From c28af94028507c4a66854372b9c102210516443c Mon Sep 17 00:00:00 2001
From: yanghaoran <yanghaoran2@huawei.com>
Date: Thu, 16 Sep 2021 20:51:19 +0800
Subject: [PATCH] upgarde ascend 0916

---
 inc/external/acl/OWNERS                            |   1 +
 inc/external/acl/acl_mdl.h                         |   6 +-
 inc/external/acl/acl_rt.h                          |  39 +-
 inc/external/acl/acl_tdt_queue.h                   | 426 +++++++++++++++++++++
 inc/external/acl/error_codes/rt_error_codes.h      |   4 +
 inc/external/acl/ops/acl_dvpp.h                    |   2 +-
 inc/external/ge/ge_api_error_codes.h               |  20 +-
 inc/external/ge/ge_api_types.h                     |   1 +
 inc/external/runtime/rt_error_codes.h              |   3 +
 inc/framework/common/debug/ge_log.h                |  88 +++--
 inc/framework/common/debug/log.h                   | 140 +++----
 inc/framework/common/ge_inner_error_codes.h        |  42 +-
 inc/framework/common/ge_types.h                    |  17 +-
 inc/framework/common/op/ge_op_utils.h              |   2 +-
 inc/framework/common/string_util.h                 |   4 +-
 inc/framework/common/types.h                       |   2 +
 inc/framework/common/util.h                        | 204 +++++-----
 inc/framework/engine/dnnengine.h                   |  23 +-
 inc/framework/generator/ge_generator.h             |  13 +-
 inc/framework/omg/version.h                        |   2 +-
 metadef                                            |   2 +-
 .../inc/external/runtime/rt_error_codes.h          | 219 +++++------
 third_party/fwkacllib/inc/ops/array_ops.h          |   8 +-
 third_party/fwkacllib/inc/ops/cluster.h            |  58 +++
 .../fwkacllib/inc/ops/elewise_calculation_ops.h    |  54 +--
 third_party/fwkacllib/inc/ops/linalg_ops.h         |   8 +-
 third_party/fwkacllib/inc/ops/math_ops.h           |  51 +++
 .../fwkacllib/inc/ops/matrix_calculation_ops.h     |  50 ++-
 third_party/fwkacllib/inc/ops/nn_calculation_ops.h | 249 ++++++------
 third_party/fwkacllib/inc/ops/nn_detect_ops.h      |   4 +-
 third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h  |  42 ++
 third_party/fwkacllib/inc/ops/pad_ops.h            |  32 ++
 third_party/fwkacllib/inc/ops/random_ops.h         |  20 +-
 third_party/fwkacllib/inc/ops/reduce_ops.h         |  11 +-
 third_party/fwkacllib/inc/ops/rnn.h                |  56 ++-
 third_party/fwkacllib/inc/ops/selection_ops.h      |  36 +-
 third_party/fwkacllib/inc/ops/transformation_ops.h |   4 +-
 third_party/fwkacllib/inc/ops/vector_search.h      |  83 +++-
 third_party/fwkacllib/inc/runtime/base.h           |  16 +-
 third_party/fwkacllib/inc/runtime/config.h         |  59 +--
 third_party/fwkacllib/inc/runtime/context.h        |   6 +-
 third_party/fwkacllib/inc/runtime/dev.h            |  39 +-
 third_party/fwkacllib/inc/runtime/dvfsprofile.h    |   6 +-
 third_party/fwkacllib/inc/runtime/event.h          |  18 +-
 third_party/fwkacllib/inc/runtime/kernel.h         |  55 +--
 third_party/fwkacllib/inc/runtime/mem.h            | 119 +++---
 third_party/fwkacllib/inc/runtime/rt.h             |   6 +-
 third_party/fwkacllib/inc/runtime/rt_ffts.h        |  10 +-
 third_party/fwkacllib/inc/runtime/rt_ffts_plus.h   |  10 +-
 .../fwkacllib/inc/runtime/rt_ffts_plus_define.h    |  14 +-
 third_party/fwkacllib/inc/runtime/rt_mem_queue.h   | 416 ++++++++++++++++++++
 third_party/fwkacllib/inc/runtime/rt_model.h       |  24 +-
 third_party/fwkacllib/inc/runtime/rt_stars.h       |   8 +-
 .../fwkacllib/inc/runtime/rt_stars_define.h        |   6 +-
 third_party/fwkacllib/inc/runtime/stream.h         |  30 +-
 third_party/fwkacllib/inc/toolchain/prof_acl_api.h |  48 +--
 56 files changed, 2141 insertions(+), 775 deletions(-)
 create mode 100644 inc/external/acl/acl_tdt_queue.h
 create mode 100644 third_party/fwkacllib/inc/ops/cluster.h
 create mode 100644 third_party/fwkacllib/inc/runtime/rt_mem_queue.h

diff --git a/inc/external/acl/OWNERS b/inc/external/acl/OWNERS
index 8552e853..b4b22068 100755
--- a/inc/external/acl/OWNERS
+++ b/inc/external/acl/OWNERS
@@ -5,5 +5,6 @@ approvers:
 reviewers:
 - justin_zhao
 - zhangyongfeng88
+- w00267184
 options:
   no_parent_owners: true
\ No newline at end of file
diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h
index 522dbd38..778fa519 100644
--- a/inc/external/acl/acl_mdl.h
+++ b/inc/external/acl/acl_mdl.h
@@ -869,7 +869,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet,
  *
  * @see aclmdlCreateAIPP
  */
-ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0,
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t cscSwitch, int16_t cscMatrixR0C0,
                                                     int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0,
                                                     int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0,
                                                     int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
@@ -1106,7 +1106,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a
  *
  * @param modelId [IN]        model id
  * @param index [IN]          index of tensor
- * @param aippinfo [OUT]      Pointer for static aipp info
+ * @param aippInfo [OUT]      Pointer for static aipp info
  *
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval ACL_ERROR_MODEL_AIPP_NOT_EXIST The tensor of index is not configured with aipp
@@ -1115,7 +1115,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a
  * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
  * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
  */
-ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo);
+ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippInfo);
 
 /**
  * @ingroup AscendCL
diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h
index 50dbc34d..7ea27cba 100644
--- a/inc/external/acl/acl_rt.h
+++ b/inc/external/acl/acl_rt.h
@@ -541,7 +541,7 @@ ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event);
  *
  * @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream
  */
-ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end);
+ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent startEvent, aclrtEvent endEvent);
 
 /**
  * @ingroup AscendCL
@@ -733,6 +733,43 @@ ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const v
 
 /**
  * @ingroup AscendCL
+ * @brief synchronous memory replication of two-dimensional matrix between host and device
+ *
+ * @param dst [IN]       destination address pointer
+ * @param dpitch [IN]    pitch of destination memory
+ * @param src [IN]       source address pointer
+ * @param spitch [IN]    pitch of source memory
+ * @param width [IN]     width of matrix transfer
+ * @param height [IN]    height of matrix transfer
+ * @param kind [IN]      memcpy type
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemcpy2d(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width,
+                                           size_t height, aclrtMemcpyKind kind);
+
+/**
+ * @ingroup AscendCL
+ * @brief asynchronous memory replication of two-dimensional matrix between host and device
+ *
+ * @param dst [IN]       destination address pointer
+ * @param dpitch [IN]    pitch of destination memory
+ * @param src [IN]       source address pointer
+ * @param spitch [IN]    pitch of source memory
+ * @param width [IN]     width of matrix transfer
+ * @param height [IN]    height of matrix transfer
+ * @param kind [IN]      memcpy type
+ * @param stream [IN]    asynchronized task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemcpy2dAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width,
+                                                size_t height, aclrtMemcpyKind kind, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
  * @brief Asynchronous initialize memory
  * and set contents of memory to specified value async
  *
diff --git a/inc/external/acl/acl_tdt_queue.h b/inc/external/acl/acl_tdt_queue.h
new file mode 100644
index 00000000..d47213d2
--- /dev/null
+++ b/inc/external/acl/acl_tdt_queue.h
@@ -0,0 +1,426 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_
+#define INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_
+
+#include "acl/acl_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ACL_TDT_QUEUE_PERMISSION_MANAGE 1
+#define ACL_TDT_QUEUE_PERMISSION_DEQUEUE 2
+#define ACL_TDT_QUEUE_PERMISSION_ENQUEUE 4
+
+typedef void *acltdtBuf;
+typedef struct tagMemQueueAttr acltdtQueueAttr;
+typedef struct acltdtQueueRouteList acltdtQueueRouteList;
+typedef struct acltdtQueueRouteQueryInfo acltdtQueueRouteQueryInfo;
+typedef struct acltdtQueueRoute acltdtQueueRoute;
+
+typedef enum { ACL_TDT_QUEUE_NAME_PTR = 0, ACL_TDT_QUEUE_DEPTH_UINT32 } acltdtQueueAttrType;
+
+typedef enum {
+  ACL_TDT_QUEUE_ROUTE_SRC_UINT32 = 0,
+  ACL_TDT_QUEUE_ROUTE_DST_UINT32,
+  ACL_TDT_QUEUE_ROUTE_STATUS_INT32
+} acltdtQueueRouteParamType;
+
+typedef enum {
+  ACL_TDT_QUEUE_ROUTE_QUERY_SRC = 0,
+  ACL_TDT_QUEUE_ROUTE_QUERY_DST,
+  ACL_TDT_QUEUE_ROUTE_QUERY_SRC_AND_DST
+} acltdtQueueRouteQueryMode;
+
+typedef enum {
+  ACL_TDT_QUEUE_ROUTE_QUERY_MODE_ENUM = 0,
+  ACL_TDT_QUEUE_ROUTE_QUERY_SRC_ID_UINT32,
+  ACL_TDT_QUEUE_ROUTE_QUERY_DST_ID_UINT32
+} acltdtQueueRouteQueryInfoParamType;
+
+/**
+ * @ingroup AscendCL
+ * @brief create queue
+ *
+ * @param attr [IN] pointer to the queue attr
+ * @param qid [OUT] pointer to the qid
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtDestroyQueue
+ */
+ACL_FUNC_VISIBILITY aclError acltdtCreateQueue(const acltdtQueueAttr *attr, uint32_t *qid);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy queue
+ *
+ * @param qid [IN] qid which to be destroyed
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateQueue
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyQueue(uint32_t qid);
+
+/**
+ * @ingroup AscendCL
+ * @brief enqueue function
+ *
+ * @param qid [IN] qid
+ * @param buf [IN] acltdtBuf
+ * @param timeout [IN] timeout
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtDequeue
+ */
+ACL_FUNC_VISIBILITY aclError acltdtEnqueue(uint32_t qid, acltdtBuf buf, int32_t timeout);
+
+/**
+ * @ingroup AscendCL
+ * @brief dequeue function
+ *
+ * @param qid [IN] qid
+ * @param buf [OUT] pointer to the acltdtBuf
+ * @param timeout [IN] timeout
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtEnqueue
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDequeue(uint32_t qid, acltdtBuf *buf, int32_t timeout);
+
+/**
+ * @ingroup AscendCL
+ * @brief grant queue to other process
+ *
+ * @param qid [IN] qid
+ * @param pid [IN] pid of dst process
+ * @param permission [IN] permission of queue
+ * @param timeout [IN] timeout
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see ACL_TDT_QUEUE_PERMISSION_MANAGE | ACL_TDT_QUEUE_PERMISSION_DEQUEUE | ACL_TDT_QUEUE_PERMISSION_ENQUEUE
+ */
+ACL_FUNC_VISIBILITY aclError acltdtGrantQueue(uint32_t qid, int32_t pid, uint32_t permission, int32_t timeout);
+
+/**
+ * @ingroup AscendCL
+ * @brief attach queue in current process
+ *
+ * @param qid [IN] qid
+ * @param timeout [IN] timeout
+ * @param permission [OUT] permission of queue
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtGrantQueue
+ */
+ACL_FUNC_VISIBILITY aclError acltdtAttachQueue(uint32_t qid, int32_t timeout, uint32_t *permission);
+
+/**
+ * @ingroup AscendCL
+ * @brief bind queue routes
+ *
+ * @param qRouteList [IN|OUT] pointer to the route list
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acltdtBindQueueRoutes(acltdtQueueRouteList *qRouteList);
+
+/**
+ * @ingroup AscendCL
+ * @brief unbind queue routes
+ *
+ * @param qRouteList [IN|OUT] pointer to the route list
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acltdtUnbindQueueRoutes(acltdtQueueRouteList *qRouteList);
+
+/**
+ * @ingroup AscendCL
+ * @brief query queue routes according to query mode
+ *
+ * @param queryInfo [IN] pointer to the queue route query info
+ * @param qRouteList [IN|OUT] pointer to the route list
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acltdtQueryQueueRoutes(const acltdtQueueRouteQueryInfo *queryInfo,
+                                                    acltdtQueueRouteList *qRouteList);
+
+/**
+ * @ingroup AscendCL
+ * @brief alloc acltdtBuf
+ *
+ * @param size [IN] alloc buf size
+ * @param buf [OUT] pointer to the acltdtBuf
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtFreeBuf
+ */
+ACL_FUNC_VISIBILITY aclError acltdtAllocBuf(size_t size, acltdtBuf *buf);
+
+/**
+ * @ingroup AscendCL
+ * @brief free acltdtBuf
+ *
+ * @param buf [IN] pointer to the acltdtBuf
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtAllocBuf
+ */
+ACL_FUNC_VISIBILITY aclError acltdtFreeBuf(acltdtBuf buf);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data buf address
+ *
+ * @param buf [IN] acltdtBuf
+ * @param dataPtr [OUT] pointer to the data ptr which is acquired from acltdtBuf
+ * @param size [OUT] pointer to the size
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtAllocBuf
+ */
+ACL_FUNC_VISIBILITY aclError acltdtGetBufData(const acltdtBuf buf, void **dataPtr, size_t *size);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the queue attr
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtDestroyQueueAttr
+ */
+ACL_FUNC_VISIBILITY acltdtQueueAttr *acltdtCreateQueueAttr();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the queue attr
+ *
+ * @param attr [IN]  pointer to the queue attr
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateQueueAttr
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueAttr(const acltdtQueueAttr *attr);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set parameter for queue attr
+ *
+ * @param attr [IN|OUT] pointer to the queue attr
+ * @param type [IN]    parameter type
+ * @param len [IN]       parameter length
+ * @param param [IN]        pointer to parameter value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ *
+ * @see acltdtCreateQueueAttr
+ */
+ACL_FUNC_VISIBILITY aclError acltdtSetQueueAttr(acltdtQueueAttr *attr, acltdtQueueAttrType type, size_t len,
+                                                const void *param);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get parameter for queue attr.
+ *
+ * @param attr [IN]   pointer to the queue attr
+ * @param type [IN]     parameter type
+ * @param len [IN]        parameter length
+ * @param paramRetSize [OUT] pointer to parameter real length
+ * @param param [OUT]        pointer to parameter value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ *
+ * @see acltdtCreateQueueAttr
+ */
+ACL_FUNC_VISIBILITY aclError acltdtGetQueueAttr(const acltdtQueueAttr *attr, acltdtQueueAttrType type, size_t len,
+                                                size_t *paramRetSize, void *param);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the queue route
+ *
+ * @param srcId [IN]   src id of queue route
+ * @param dstId [IN]   dst id of queue route
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtDestroyQueueRoute
+ */
+ACL_FUNC_VISIBILITY acltdtQueueRoute *acltdtCreateQueueRoute(uint32_t srcId, uint32_t dstId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the queue attr
+ *
+ * @param route [IN]  pointer to the queue route
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateQueueRoute
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRoute(const acltdtQueueRoute *route);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get parameter for queue route.
+ *
+ * @param route [IN]   pointer to the queue route
+ * @param type [IN]     parameter type
+ * @param len [IN]        parameter length
+ * @param paramRetSize [OUT] pointer to parameter real length
+ * @param param [OUT]        pointer to parameter value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ *
+ * @see acltdtCreateQueueRoute
+ */
+ACL_FUNC_VISIBILITY aclError acltdtGetQueueRouteParam(const acltdtQueueRoute *route, acltdtQueueRouteParamType type,
+                                                      size_t len, size_t *paramRetSize, void *param);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the queue route list
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtDestroyQueueRouteList
+ */
+ACL_FUNC_VISIBILITY acltdtQueueRouteList *acltdtCreateQueueRouteList();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the queue route list
+ *
+ * @param routeList [IN]  pointer to the queue route list
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateQueueRouteList
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRouteList(const acltdtQueueRouteList *routeList);
+
+/**
+ * @ingroup AscendCL
+ * @brief add queue route to the route list
+ *
+ * @param routeList [IN|OUT]  pointer to the queue route list
+ * @param route [IN]  pointer to the queue route
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateQueueRouteList | acltdtCreateQueueRoute
+ *
+ */
+ACL_FUNC_VISIBILITY aclError acltdtAddQueueRoute(acltdtQueueRouteList *routeList, const acltdtQueueRoute *route);
+
+/**
+ * @ingroup AscendCL
+ * @brief get queue route from route list
+ *
+ * @param routeList [IN]  pointer to the queue route list
+ * @param index [IN]  index of queue route in route list
+ * @param route [IN|OUT]  pointer to the queue route
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateQueueRouteList | acltdtCreateQueueRoute
+ *
+ */
+ACL_FUNC_VISIBILITY aclError acltdtGetQueueRoute(const acltdtQueueRouteList *routeList, size_t index,
+                                                 acltdtQueueRoute *route);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the queue route query info
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtDestroyQueueRouteQueryInfo
+ */
+ACL_FUNC_VISIBILITY acltdtQueueRouteQueryInfo *acltdtCreateQueueRouteQueryInfo();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the queue route query info
+ *
+ * @param info [IN]  pointer to the queue route info
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateQueueRouteQueryInfo
+ *
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRouteQueryInfo(const acltdtQueueRouteQueryInfo *info);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set parameter for queue route info
+ *
+ * @param attr [IN|OUT] pointer to the queue route info
+ * @param type [IN]    parameter type
+ * @param len [IN]       parameter length
+ * @param param [IN]        pointer to parameter value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ *
+ * @see acltdtCreateQueueRouteQueryInfo
+ */
+ACL_FUNC_VISIBILITY aclError acltdtSetQueueRouteQueryInfo(acltdtQueueRouteQueryInfo *param,
+                                                          acltdtQueueRouteQueryInfoParamType type, size_t len,
+                                                          const void *value);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_
\ No newline at end of file
diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h
index 1c196c48..556652be 100644
--- a/inc/external/acl/error_codes/rt_error_codes.h
+++ b/inc/external/acl/error_codes/rt_error_codes.h
@@ -56,6 +56,10 @@ static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007;    // no event res
 static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008;   // no stream resource
 static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009;   // no notify resource
 static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010;    // no model resource
+static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011;      // no cdq resource
+static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012;           // over limit
+static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013;          // queue is empty
+static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014;           // queue is full
 
 static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000;              // runtime internal error
 static const int32_t ACL_ERROR_RT_TS_ERROR = 507001;                    // ts internel error
diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h
index 5418ebd3..a536a23b 100644
--- a/inc/external/acl/ops/acl_dvpp.h
+++ b/inc/external/acl/ops/acl_dvpp.h
@@ -125,7 +125,7 @@ enum acldvppPixelFormat {
 enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL };
 
 // Supported Channel Mode
-enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 };
+enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4, DVPP_CHNMODE_PNGD = 8 };
 
 // Supported Border Type
 enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 };
diff --git a/inc/external/ge/ge_api_error_codes.h b/inc/external/ge/ge_api_error_codes.h
index d0d7981e..2512de0a 100644
--- a/inc/external/ge/ge_api_error_codes.h
+++ b/inc/external/ge/ge_api_error_codes.h
@@ -72,17 +72,23 @@ class GE_FUNC_VISIBILITY StatusFactory {
 
 class GE_FUNC_VISIBILITY ErrorNoRegisterar {
  public:
-  ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }
-  ErrorNoRegisterar(uint32_t err, const char *desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }
+  ErrorNoRegisterar(uint32_t err, const std::string &desc) {
+    StatusFactory::Instance()->RegisterErrorNo(err, desc);
+  }
+  ErrorNoRegisterar(uint32_t err, const char *desc) {
+    StatusFactory::Instance()->RegisterErrorNo(err, desc);
+  }
   ~ErrorNoRegisterar() {}
 };
 
 // Code compose(4 byte), runtime: 2 bit,  type: 2 bit,   level: 3 bit,  sysid: 8 bit, modid: 5 bit, value: 12 bit
-#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc)                              \
-  constexpr ge::Status name =                                                                          \
-    ((0xFF & (static_cast<uint8_t>(runtime))) << 30) | ((0xFF & (static_cast<uint8_t>(type))) << 28) | \
-    ((0xFF & (static_cast<uint8_t>(level))) << 25) | ((0xFF & (static_cast<uint8_t>(sysid))) << 17) |  \
-    ((0xFF & (static_cast<uint8_t>(modid))) << 12) | (0x0FFF & (static_cast<uint16_t>(value)));        \
+#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc)                               \
+  constexpr ge::Status name = (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(runtime))) << 30) | \
+                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(type))) << 28) |    \
+                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(level))) << 25) |   \
+                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(sysid))) << 17) |   \
+                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(modid))) << 12) |   \
+                              (static_cast<uint32_t>(0x0FFFU) & (static_cast<uint32_t>(value)));        \
   const ErrorNoRegisterar g_##name##_errorno(name, desc);
 
 #define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_##name##_errorno(name, desc);
diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h
index ac821281..9ee63797 100644
--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -67,6 +67,7 @@ const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOp
 const char *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput";
 const char *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode";
 const char *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange";
+const char *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr";
 
 // Option key: memory init
 const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize";
diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h
index ef7e2ec7..1a8dc3e9 100644
--- a/inc/external/runtime/rt_error_codes.h
+++ b/inc/external/runtime/rt_error_codes.h
@@ -58,6 +58,9 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008;   // no stream re
 static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009;   // no notify resource
 static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010;    // no model resource
 static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011;      // no cdq resource
+static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012;           // over limit
+static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013;          // queue is empty
+static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014;           // queue is full
 
 static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000;              // runtime internal error
 static const int32_t ACL_ERROR_RT_TS_ERROR = 507001;                    // ts internel error
diff --git a/inc/framework/common/debug/ge_log.h b/inc/framework/common/debug/ge_log.h
index 3e646440..5ee2daee 100644
--- a/inc/framework/common/debug/ge_log.h
+++ b/inc/framework/common/debug/ge_log.h
@@ -33,7 +33,7 @@
 extern "C" {
 #endif
 
-#define GE_MODULE_NAME static_cast<int>(GE)
+#define GE_MODULE_NAME static_cast<int32_t>(GE)
 
 // trace status of log
 enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP };
@@ -51,43 +51,61 @@ class GE_FUNC_VISIBILITY GeLog {
 };
 
 inline bool IsLogEnable(int module_name, int log_level) {
-  int32_t enable = CheckLogLevel(module_name, log_level);
+  const int32_t enable = CheckLogLevel(module_name, log_level);
   // 1:enable, 0:disable
   return (enable == 1);
 }
 
-#define GELOGE(ERROR_CODE, fmt, ...)                                                                         \
-  dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %d(%s) %s" fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE,    \
-             ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \
-             ##__VA_ARGS__)
-#define GELOGW(fmt, ...)                      \
-  if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) \
-  dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-#define GELOGI(fmt, ...)                      \
-  if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) \
-  dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-#define GELOGD(fmt, ...)                       \
-  if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) \
-  dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-
-#define GEEVENT(fmt, ...) dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-
-#define GELOGT(VALUE, fmt, ...)                                                                                    \
-  do {                                                                                                             \
-    TraceStatus stat = VALUE;                                                                                      \
-    const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"};                                     \
-    int idx = static_cast<int>(stat);                                                                              \
-    char *k = const_cast<char *>("status");                                                                        \
-    char *v = const_cast<char *>(TraceStatStr[idx]);                                                               \
-    KeyValue kv = {k, v};                                                                                          \
-    DlogWithKV(static_cast<int>(GE_MODULE_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, \
-               ##__VA_ARGS__);                                                                                     \
-  } while (0)
-
-#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...)                                                         \
-  dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) %s" fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE,          \
-             ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \
-             ##__VA_ARGS__)
+#define GELOGE(ERROR_CODE, fmt, ...)                                                                            \
+  do {                                                                                                          \
+    dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \
+               ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(),  \
+               ##__VA_ARGS__);                                                                                  \
+  } while (false)
+
+#define GELOGW(fmt, ...)                                                                          \
+  do {                                                                                            \
+    if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) {                                                 \
+      dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
+    }                                                                                             \
+  } while (false)
+
+#define GELOGI(fmt, ...)                                                                          \
+  do {                                                                                            \
+    if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) {                                                 \
+      dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
+    }                                                                                             \
+  } while (false)
+
+#define GELOGD(fmt, ...)                                                                           \
+  do {                                                                                             \
+    if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) {                                                 \
+      dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
+    }                                                                                              \
+  } while (false)
+
+#define GEEVENT(fmt, ...)                                                                        \
+  do {                                                                                           \
+    dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
+  } while (false)
+
+#define GELOGT(VALUE, fmt, ...)                                                                                      \
+  do {                                                                                                               \
+    TraceStatus stat = VALUE;                                                                                        \
+    const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"};                                       \
+    const int32_t idx = static_cast<int32_t>(stat);                                                                  \
+    char *k = const_cast<char *>("status");                                                                          \
+    char *v = const_cast<char *>(TraceStatStr[idx]);                                                                 \
+    KeyValue kv = {k, v};                                                                                            \
+    DlogWithKV(GE_MODULE_NAME, DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
+  } while (false)
+
+#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...)                                                           \
+  do {                                                                                                         \
+    dlog_error(MOD_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE,      \
+               ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \
+               ##__VA_ARGS__);                                                                                 \
+  } while (false)
 
 // print memory when it is greater than 1KB.
 #define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE)                                                        \
@@ -95,7 +113,7 @@ inline bool IsLogEnable(int module_name, int log_level) {
     if ((SIZE) > 1024) {                                                                                    \
       GELOGI("MallocMemory, func=%s, size=%zu, purpose=%s", (#FUNC), static_cast<size_t>(SIZE), (PURPOSE)); \
     }                                                                                                       \
-  } while (0);
+  } while (false)
 #ifdef __cplusplus
 }
 #endif
diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h
index f06faa1b..2fe425c9 100644
--- a/inc/framework/common/debug/log.h
+++ b/inc/framework/common/debug/log.h
@@ -52,82 +52,82 @@
     GELOGW(__VA_ARGS__);           \
   }
 
-#define GE_LOGE_IF(condition, ...) \
-  if ((condition)) {               \
-    DOMI_LOGE(__VA_ARGS__);        \
+#define GE_LOGE_IF(condition, ...)   \
+  if ((condition)) {                 \
+    GELOGE(ge::FAILED, __VA_ARGS__); \
   }
 
 // If expr is not SUCCESS, print the log and return the same value
-#define GE_CHK_STATUS_RET(expr, ...)   \
-  do {                                 \
-    const ge::Status _status = (expr); \
-    if (_status != ge::SUCCESS) {      \
-      DOMI_LOGE(__VA_ARGS__);          \
-      return _status;                  \
-    }                                  \
-  } while (0);
+#define GE_CHK_STATUS_RET(expr, ...)       \
+  do {                                     \
+    const ge::Status _chk_status = (expr); \
+    if (_chk_status != ge::SUCCESS) {      \
+      GELOGE(ge::FAILED, __VA_ARGS__);     \
+      return _chk_status;                  \
+    }                                      \
+  } while (false)
 
 // If expr is not SUCCESS, print the log and do not execute return
-#define GE_CHK_STATUS(expr, ...)       \
-  do {                                 \
-    const ge::Status _status = (expr); \
-    if (_status != ge::SUCCESS) {      \
-      DOMI_LOGE(__VA_ARGS__);          \
-    }                                  \
-  } while (0);
+#define GE_CHK_STATUS(expr, ...)           \
+  do {                                     \
+    const ge::Status _chk_status = (expr); \
+    if (_chk_status != ge::SUCCESS) {      \
+      GELOGE(ge::FAILED, __VA_ARGS__);     \
+    }                                      \
+  } while (false)
 
 // If expr is not SUCCESS, return the same value
-#define GE_CHK_STATUS_RET_NOLOG(expr)  \
-  do {                                 \
-    const ge::Status _status = (expr); \
-    if (_status != ge::SUCCESS) {      \
-      return _status;                  \
-    }                                  \
-  } while (0);
+#define GE_CHK_STATUS_RET_NOLOG(expr)      \
+  do {                                     \
+    const ge::Status _chk_status = (expr); \
+    if (_chk_status != ge::SUCCESS) {      \
+      return _chk_status;                  \
+    }                                      \
+  } while (false)
 
 // If expr is not GRAPH_SUCCESS, print the log and return FAILED
 #define GE_CHK_GRAPH_STATUS_RET(expr, ...)                  \
   do {                                                      \
     if ((expr) != ge::GRAPH_SUCCESS) {                      \
       REPORT_CALL_ERROR("E19999", "Operator graph failed"); \
-      DOMI_LOGE(__VA_ARGS__);                               \
+      GELOGE(ge::FAILED, __VA_ARGS__);                      \
       return FAILED;                                        \
     }                                                       \
-  } while (0);
+  } while (false)
 
 // If expr is not SUCCESS, print the log and execute a custom statement
-#define GE_CHK_STATUS_EXEC(expr, exec_expr, ...)                  \
-  do {                                                            \
-    const ge::Status _status = (expr);                            \
-    GE_CHK_BOOL_EXEC(_status == SUCCESS, exec_expr, __VA_ARGS__); \
-  } while (0);
+#define GE_CHK_STATUS_EXEC(expr, exec_expr, ...)                      \
+  do {                                                                \
+    const ge::Status _chk_status = (expr);                            \
+    GE_CHK_BOOL_EXEC(_chk_status == SUCCESS, exec_expr, __VA_ARGS__); \
+  } while (false)
 
 // If expr is not true, print the log and return the specified status
 #define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \
   do {                                             \
-    bool b = (expr);                               \
+    const bool b = (expr);                         \
     if (!b) {                                      \
       REPORT_INNER_ERROR("E19999", __VA_ARGS__);   \
       GELOGE(_status, __VA_ARGS__);                \
       return _status;                              \
     }                                              \
-  } while (0);
+  } while (false)
 
 // If expr is not true, print the log and return the specified status
 #define GE_CHK_BOOL_RET_STATUS_NOLOG(expr, _status, ...) \
   do {                                                   \
-    bool b = (expr);                                     \
+    const bool b = (expr);                               \
     if (!b) {                                            \
       return _status;                                    \
     }                                                    \
-  } while (0);
+  } while (false)
 
 // If expr is not true, print the log and execute a custom statement
 #define GE_CHK_BOOL_EXEC(expr, exec_expr, ...) \
   {                                            \
-    bool b = (expr);                           \
+    const bool b = (expr);                     \
     if (!b) {                                  \
-      DOMI_LOGE(__VA_ARGS__);                  \
+      GELOGE(ge::FAILED, __VA_ARGS__);         \
       exec_expr;                               \
     }                                          \
   }
@@ -135,7 +135,7 @@
 // If expr is not true, print the log and execute a custom statement
 #define GE_CHK_BOOL_EXEC_WARN(expr, exec_expr, ...) \
   {                                                 \
-    bool b = (expr);                                \
+    const bool b = (expr);                          \
     if (!b) {                                       \
       GELOGW(__VA_ARGS__);                          \
       exec_expr;                                    \
@@ -144,7 +144,7 @@
 // If expr is not true, print the log and execute a custom statement
 #define GE_CHK_BOOL_EXEC_INFO(expr, exec_expr, ...) \
   {                                                 \
-    bool b = (expr);                                \
+    const bool b = (expr);                          \
     if (!b) {                                       \
       GELOGI(__VA_ARGS__);                          \
       exec_expr;                                    \
@@ -154,7 +154,7 @@
 // If expr is not true, print the log and execute a custom statement
 #define GE_CHK_BOOL_TRUE_EXEC_INFO(expr, exec_expr, ...) \
   {                                                      \
-    bool b = (expr);                                     \
+    const bool b = (expr);                               \
     if (b) {                                             \
       GELOGI(__VA_ARGS__);                               \
       exec_expr;                                         \
@@ -164,16 +164,16 @@
 // If expr is true, print logs and execute custom statements
 #define GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(expr, exec_expr, ...) \
   {                                                          \
-    bool b = (expr);                                         \
+    const bool b = (expr);                                   \
     if (b) {                                                 \
-      DOMI_LOGE(__VA_ARGS__);                                \
+      GELOGE(ge::FAILED, __VA_ARGS__);                       \
       exec_expr;                                             \
     }                                                        \
   }
 // If expr is true, print the Information log and execute a custom statement
 #define GE_CHK_TRUE_EXEC_INFO(expr, exec_expr, ...) \
   {                                                 \
-    bool b = (expr);                                \
+    const bool b = (expr);                          \
     if (b) {                                        \
       GELOGI(__VA_ARGS__);                          \
       exec_expr;                                    \
@@ -183,9 +183,9 @@
 // If expr is not SUCCESS, print the log and execute the expression + return
 #define GE_CHK_BOOL_TRUE_RET_VOID(expr, exec_expr, ...) \
   {                                                     \
-    bool b = (expr);                                    \
+    const bool b = (expr);                              \
     if (b) {                                            \
-      DOMI_LOGE(__VA_ARGS__);                           \
+      GELOGE(ge::FAILED, __VA_ARGS__);                  \
       exec_expr;                                        \
       return;                                           \
     }                                                   \
@@ -194,10 +194,10 @@
 // If expr is not SUCCESS, print the log and execute the expression + return _status
 #define GE_CHK_BOOL_TRUE_EXEC_RET_STATUS(expr, _status, exec_expr, ...) \
   {                                                                     \
-    bool b = (expr);                                                    \
+    const bool b = (expr);                                              \
     if (b) {                                                            \
       REPORT_INNER_ERROR("E19999", __VA_ARGS__);                        \
-      DOMI_LOGE(__VA_ARGS__);                                           \
+      GELOGE(ge::FAILED, __VA_ARGS__);                                  \
       exec_expr;                                                        \
       return _status;                                                   \
     }                                                                   \
@@ -206,7 +206,7 @@
 // If expr is not true, execute a custom statement
 #define GE_CHK_BOOL_EXEC_NOLOG(expr, exec_expr) \
   {                                             \
-    bool b = (expr);                            \
+    const bool b = (expr);                      \
     if (!b) {                                   \
       exec_expr;                                \
     }                                           \
@@ -214,34 +214,34 @@
 
 // -----------------runtime related macro definitions-------------------------------
 // If expr is not RT_ERROR_NONE, print the log
-#define GE_CHK_RT(expr)                                    \
-  do {                                                     \
-    rtError_t _rt_ret = (expr);                            \
-    if (_rt_ret != RT_ERROR_NONE) {                        \
-      DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \
-    }                                                      \
-  } while (0);
+#define GE_CHK_RT(expr)                                             \
+  do {                                                              \
+    const rtError_t _rt_ret = (expr);                               \
+    if (_rt_ret != RT_ERROR_NONE) {                                 \
+      GELOGE(ge::FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \
+    }                                                               \
+  } while (false)
 
 // If expr is not RT_ERROR_NONE, print the log and execute the exec_expr expression
-#define GE_CHK_RT_EXEC(expr, exec_expr)                    \
-  {                                                        \
-    rtError_t _rt_ret = (expr);                            \
-    if (_rt_ret != RT_ERROR_NONE) {                        \
-      DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \
-      exec_expr;                                           \
-    }                                                      \
-  }
+#define GE_CHK_RT_EXEC(expr, exec_expr)                             \
+  do {                                                              \
+    const rtError_t _rt_ret = (expr);                               \
+    if (_rt_ret != RT_ERROR_NONE) {                                 \
+      GELOGE(ge::FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \
+      exec_expr;                                                    \
+    }                                                               \
+  } while (false)
 
 // If expr is not RT_ERROR_NONE, print the log and return
 #define GE_CHK_RT_RET(expr)                                                   \
   do {                                                                        \
-    rtError_t _rt_ret = (expr);                                               \
+    const rtError_t _rt_ret = (expr);                                         \
     if (_rt_ret != RT_ERROR_NONE) {                                           \
       REPORT_CALL_ERROR("E19999", "Call %s fail, ret: 0x%X", #expr, _rt_ret); \
-      DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret);                    \
+      GELOGE(ge::FAILED, "Call rt api failed, ret: 0x%X", _rt_ret);           \
       return RT_ERROR_TO_GE_STATUS(_rt_ret);                                  \
     }                                                                         \
-  } while (0);
+  } while (false)
 
 // If expr is true, execute exec_expr without printing logs
 #define GE_IF_BOOL_EXEC(expr, exec_expr) \
@@ -256,7 +256,7 @@
   try {                                        \
     exec_expr0;                                \
   } catch (const std::bad_alloc &) {           \
-    DOMI_LOGE("Make shared failed");           \
+    GELOGE(ge::FAILED, "Make shared failed");  \
     exec_expr1;                                \
   }
 
@@ -274,13 +274,13 @@
 
 #define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg)                                 \
   do {                                                                                   \
-    bool b = (expr);                                                                     \
+    const bool b = (expr);                                                               \
     if (!b) {                                                                            \
       GELOGE(_status, "%s", errormsg);                                                   \
       ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {errormsg}); \
       return _status;                                                                    \
     }                                                                                    \
-  } while (0)
+  } while (false)
 
 template <typename T>
 GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) {
diff --git a/inc/framework/common/ge_inner_error_codes.h b/inc/framework/common/ge_inner_error_codes.h
index 3697a526..0ab9721e 100644
--- a/inc/framework/common/ge_inner_error_codes.h
+++ b/inc/framework/common/ge_inner_error_codes.h
@@ -61,29 +61,29 @@ enum ErrorLevel {
   CRITICAL_LEVEL = 0b100,
 };
 
-// Each module defines error codes using the following macros
+// Each module defines error codes using the following macros, name can not be modified to (name)
 #define GE_ERRORNO_COMMON(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, value, desc)
+  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, (value), (desc))
 #define GE_ERRORNO_CLIENT(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, value, desc)
+  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, (value), (desc))
 #define GE_ERRORNO_INIT(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, value, desc)
+  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, (value), (desc))
 #define GE_ERRORNO_SESSION(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, value, desc)
+  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, (value), (desc))
 #define GE_ERRORNO_GRAPH(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, value, desc)
+  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, (value), (desc))
 #define GE_ERRORNO_ENGINE(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, value, desc)
+  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, (value), (desc))
 #define GE_ERRORNO_OPS(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, value, desc)
+  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, (value), (desc))
 #define GE_ERRORNO_PLUGIN(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, value, desc)
+  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, (value), (desc))
 #define GE_ERRORNO_RUNTIME(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, value, desc)
+  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, (value), (desc))
 #define GE_ERRORNO_EXECUTOR(name, value, desc) \
-  GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, value, desc)
+  GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, (value), (desc))
 #define GE_ERRORNO_GENERATOR(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, value, desc)
+  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, (value), (desc))
 
 // Get error code description
 #define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value)
@@ -125,13 +125,13 @@ GE_ERRORNO_CLIENT(GE_CLI_GE_ALREADY_INITIALIZED, 10, "GE is already initialized.
 GE_ERRORNO_CLIENT(GE_CLI_GE_NOT_INITIALIZED, 11, "GE is not yet initialized or is finalized.");  // 1343229963
 
 // Init module error code definition
-GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported.");            // 1343234048
-GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization.");  // 1343234049
-GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported.");          // 1343234050
-GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported.");          // 1343234051
-GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done.");          // 1343234052
+GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported.");                 // 1343234048
+GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization.");       // 1343234049
+GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported.");               // 1343234050
+GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported.");  // 1343234051
+GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done.");              // 1343234052
 GE_ERRORNO_INIT(GE_PROF_MODE_CONFLICT, 5,
-                "Profiling command mode which is preferred is running, the api mode will not work.");   // 1343234053
+                "Profiling command mode which is preferred is running, the api mode will not work.");  // 1343234053
 
 // Session module error code definition
 GE_ERRORNO_SESSION(GE_SESS_INIT_FAILED, 0, "Failed to initialize session.");                          // 1343238144
@@ -216,8 +216,8 @@ GE_ERRORNO_ENGINE(GE_ENG_FINALIZE_FAILED, 1, "Engine finalize failed.");
 GE_ERRORNO_ENGINE(GE_ENG_MEMTYPE_ERROR, 2, "Memory type HBM is necessary when engine is in device");  // 1343246338
 
 // Optimize errocode
-GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted.");          // 1343242303
-GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed.");               // 1343242304
+GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted.");  // 1343242303
+GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed.");       // 1343242304
 
 // Ops module error code definition
 GE_ERRORNO_OPS(GE_OPS_KERNEL_STORE_INIT_FAILED, 0, "Failed to initialize OpsKernelInfoStore.");  // 1343250432
@@ -313,7 +313,7 @@ GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph ma
 GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed.");
 GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed.");
 
-#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast<Status>(RT_ERROR)
+#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast<const Status>(RT_ERROR)
 }  // namespace ge
 
 #endif  // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_
diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h
index 83d01c1f..066327b0 100644
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -279,10 +279,19 @@ struct TaskDescInfo {
 };
 
 struct OpDescInfo {
-  std::string op_name;
-  std::string op_type;
-  uint32_t task_id;
-  uint32_t stream_id;
+  std::string op_name = "";
+  std::string op_type = "";
+  uint32_t task_id = 0;
+  uint32_t stream_id = 0;
+  uint32_t imply_type = 0;
+  uint32_t block_dim = 0;
+  std::string op_file_path = "";
+  std::string dev_func = "";
+  std::string tvm_magic = "";
+  uint32_t tiling_key = 0;
+  std::string tiling_data = "";
+  std::string node_info = "";
+  std::vector<int64_t> workspace_bytes;
   std::vector<Format> input_format;
   std::vector<std::vector<int64_t>> input_shape;
   std::vector<DataType> input_data_type;
diff --git a/inc/framework/common/op/ge_op_utils.h b/inc/framework/common/op/ge_op_utils.h
index bc965d13..be677407 100644
--- a/inc/framework/common/op/ge_op_utils.h
+++ b/inc/framework/common/op/ge_op_utils.h
@@ -95,7 +95,7 @@ class GE_FUNC_VISIBILITY OpUtils {
   /// @param [out] aipp_params aipp parameters
   /// @return enum of tagCCAippInputFormat
   ///
-  static Status ConvertAippParams(const GeAttrValue::NamedAttrs &aipp_attr, domi::AippOpParams *aipp_params);
+  static Status ConvertAippParams(const NamedAttrs &aipp_attr, domi::AippOpParams *aipp_params);
   static Status TransferDim(const std::vector<int64_t> &dim, std::vector<int64_t> &dim_vector);
   template <typename T>
   static void SliceData(const std::vector<char *> &input, int64_t chunk_size, std::vector<char *> &output,
diff --git a/inc/framework/common/string_util.h b/inc/framework/common/string_util.h
index 677b1971..21ee4670 100644
--- a/inc/framework/common/string_util.h
+++ b/inc/framework/common/string_util.h
@@ -78,8 +78,8 @@ class GE_FUNC_VISIBILITY StringUtils {
   ///  @param [in] delim  separator
   ///  @return string array after segmentation
   ///
-  static std::vector<std::string> Split(const std::string &str, char delim) {
-    std::vector<std::string> elems;
+  static std::vector<std::string, std::allocator<std::string>> Split(const std::string &str, char delim) {
+    std::vector<std::string, std::allocator<std::string>> elems;
 
     if (str.empty()) {
       elems.emplace_back("");
diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h
index 811d5eed..1a4f7cde 100644
--- a/inc/framework/common/types.h
+++ b/inc/framework/common/types.h
@@ -339,6 +339,8 @@ REGISTER_OPTYPE_DECLARE(PLACEHOLDER, "PlaceHolder");
 REGISTER_OPTYPE_DECLARE(END, "End");
 REGISTER_OPTYPE_DECLARE(BASICLSTMCELL, "BasicLSTMCell");
 REGISTER_OPTYPE_DECLARE(GETNEXT, "GetNext");
+REGISTER_OPTYPE_DECLARE(ITERATOR, "Iterator");
+REGISTER_OPTYPE_DECLARE(ITERATORV2, "IteratorV2");
 REGISTER_OPTYPE_DECLARE(INITDATA, "InitData");
 REGISTER_OPTYPE_DECLARE(TRANSSHAPE, "TransShape")
 REGISTER_OPTYPE_DECLARE(REFIDENTITY, "RefIdentity");
diff --git a/inc/framework/common/util.h b/inc/framework/common/util.h
index a3989b9d..97528eb6 100644
--- a/inc/framework/common/util.h
+++ b/inc/framework/common/util.h
@@ -18,8 +18,8 @@
 #define INC_FRAMEWORK_COMMON_UTIL_H_
 
 #include <google/protobuf/text_format.h>
-#include <limits.h>
-#include <math.h>
+#include <climits>
+#include <cmath>
 #include <sstream>
 #include <string>
 #include <vector>
@@ -30,17 +30,17 @@
 #include "framework/common/ge_inner_error_codes.h"
 #include "mmpa/mmpa_api.h"
 
-#define GE_CHECK_POSITIVE_SIZE_RANGE(size)                    \
-  do {                                                        \
-    if (size <= 0) {                                          \
-      DOMI_LOGE("param[%s] is not a positive number", #size); \
-      return PARAM_INVALID;                                   \
-    }                                                         \
-  } while (0)
+#define GE_CHECK_POSITIVE_SIZE_RANGE(size)                             \
+  do {                                                                 \
+    if (size <= 0) {                                                   \
+      GELOGE(ge::FAILED, "param[%s] is not a positive number", #size); \
+      return PARAM_INVALID;                                            \
+    }                                                                  \
+  } while (false)
 
 #define CHECK_FALSE_EXEC(expr, exec_expr, ...) \
   {                                            \
-    bool b = (expr);                           \
+    const bool b = (expr);                     \
     if (!b) {                                  \
       exec_expr;                               \
     }                                          \
@@ -59,131 +59,133 @@
   });
 
 // For propagating errors when calling a function.
-#define GE_RETURN_IF_ERROR(expr)         \
-  do {                                   \
-    const ::ge::Status _status = (expr); \
-    if (_status) return _status;         \
-  } while (0)
+#define GE_RETURN_IF_ERROR(expr)           \
+  do {                                     \
+    const ge::Status _chk_status = (expr); \
+    if (_chk_status != ge::SUCCESS) {      \
+      return _chk_status;                  \
+    }                                      \
+  } while (false)
 
 #define GE_RETURN_WITH_LOG_IF_ERROR(expr, ...) \
   do {                                         \
-    const ::ge::Status _status = (expr);       \
-    if (_status) {                             \
-      DOMI_LOGE(__VA_ARGS__);                  \
-      return _status;                          \
+    const ge::Status _chk_status = (expr);     \
+    if (_chk_status != ge::SUCCESS) {          \
+      GELOGE(ge::FAILED, __VA_ARGS__);         \
+      return _chk_status;                      \
     }                                          \
-  } while (0)
+  } while (false)
 
 // check whether the parameter is true. If it is, return FAILED and record the error log
 #define GE_RETURN_WITH_LOG_IF_TRUE(condition, ...) \
   do {                                             \
     if (condition) {                               \
-      DOMI_LOGE(__VA_ARGS__);                      \
+      GELOGE(ge::FAILED, __VA_ARGS__);             \
       return ge::FAILED;                           \
     }                                              \
-  } while (0)
+  } while (false)
 
 // Check if the parameter is false. If yes, return FAILED and record the error log
 #define GE_RETURN_WITH_LOG_IF_FALSE(condition, ...) \
   do {                                              \
-    bool _condition = (condition);                  \
+    const bool _condition = (condition);            \
     if (!_condition) {                              \
-      DOMI_LOGE(__VA_ARGS__);                       \
+      GELOGE(ge::FAILED, __VA_ARGS__);              \
       return ge::FAILED;                            \
     }                                               \
-  } while (0)
+  } while (false)
 
 // Checks whether the parameter is true. If so, returns PARAM_INVALID and records the error log
 #define GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(condition, ...) \
   do {                                                       \
     if (condition) {                                         \
-      DOMI_LOGE(__VA_ARGS__);                                \
+      GELOGE(ge::FAILED, __VA_ARGS__);                       \
       return ge::PARAM_INVALID;                              \
     }                                                        \
-  } while (0)
+  } while (false)
 
 // Check if the parameter is false. If yes, return PARAM_INVALID and record the error log
 #define GE_RT_PARAM_INVALID_WITH_LOG_IF_FALSE(condition, ...) \
   do {                                                        \
-    bool _condition = (condition);                            \
+    const bool _condition = (condition);                      \
     if (!_condition) {                                        \
-      DOMI_LOGE(__VA_ARGS__);                                 \
+      GELOGE(ge::FAILED, __VA_ARGS__);                        \
       return ge::PARAM_INVALID;                               \
     }                                                         \
-  } while (0)
+  } while (false)
 
 // Check if the parameter is null. If yes, return PARAM_INVALID and record the error
 #define GE_CHECK_NOTNULL(val)                                                   \
   do {                                                                          \
     if (val == nullptr) {                                                       \
       REPORT_INNER_ERROR("E19999", "Param:%s is nullptr, check invalid", #val); \
-      DOMI_LOGE("[Check][Param:%s]null is invalid.", #val);                     \
+      GELOGE(ge::FAILED, "[Check][Param:%s]null is invalid.", #val);            \
       return ge::PARAM_INVALID;                                                 \
     }                                                                           \
-  } while (0)
+  } while (false)
 
 // Check if the parameter is null. If yes, just return and record the error
-#define GE_CHECK_NOTNULL_JUST_RETURN(val)             \
-  do {                                                \
-    if (val == nullptr) {                             \
-      DOMI_LOGE("param[%s] must not be null.", #val); \
-      return;                                         \
-    }                                                 \
-  } while (0)
+#define GE_CHECK_NOTNULL_JUST_RETURN(val)                      \
+  do {                                                         \
+    if (val == nullptr) {                                      \
+      GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
+      return;                                                  \
+    }                                                          \
+  } while (false)
 
 // Check whether the parameter is null. If so, execute the exec_expr expression and record the error log
-#define GE_CHECK_NOTNULL_EXEC(val, exec_expr)         \
-  do {                                                \
-    if (val == nullptr) {                             \
-      DOMI_LOGE("param[%s] must not be null.", #val); \
-      exec_expr;                                      \
-    }                                                 \
-  } while (0)
+#define GE_CHECK_NOTNULL_EXEC(val, exec_expr)                  \
+  do {                                                         \
+    if (val == nullptr) {                                      \
+      GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
+      exec_expr;                                               \
+    }                                                          \
+  } while (false)
 
 // Check whether the parameter is null. If yes, return directly and record the error log
-#define GE_RT_VOID_CHECK_NOTNULL(val)                 \
-  do {                                                \
-    if (val == nullptr) {                             \
-      DOMI_LOGE("param[%s] must not be null.", #val); \
-      return;                                         \
-    }                                                 \
-  } while (0)
+#define GE_RT_VOID_CHECK_NOTNULL(val)                          \
+  do {                                                         \
+    if (val == nullptr) {                                      \
+      GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
+      return;                                                  \
+    }                                                          \
+  } while (false)
 
 // Check if the parameter is null. If yes, return false and record the error log
-#define GE_RT_FALSE_CHECK_NOTNULL(val)                \
-  do {                                                \
-    if (val == nullptr) {                             \
-      DOMI_LOGE("param[%s] must not be null.", #val); \
-      return false;                                   \
-    }                                                 \
-  } while (0)
+#define GE_RT_FALSE_CHECK_NOTNULL(val)                         \
+  do {                                                         \
+    if (val == nullptr) {                                      \
+      GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
+      return false;                                            \
+    }                                                          \
+  } while (false)
 
 // Check if the parameter is out of bounds
-#define GE_CHECK_SIZE(size)                          \
-  do {                                               \
-    if (size == 0) {                                 \
-      DOMI_LOGE("param[%s] is out of range", #size); \
-      return ge::PARAM_INVALID;                      \
-    }                                                \
-  } while (0)
+#define GE_CHECK_SIZE(size)                                   \
+  do {                                                        \
+    if (size == 0) {                                          \
+      GELOGE(ge::FAILED, "param[%s] is out of range", #size); \
+      return ge::PARAM_INVALID;                               \
+    }                                                         \
+  } while (false)
 
 // Check if the value on the left is greater than or equal to the value on the right
-#define GE_CHECK_GE(lhs, rhs)                              \
-  do {                                                     \
-    if (lhs < rhs) {                                       \
-      DOMI_LOGE("param[%s] is less than[%s]", #lhs, #rhs); \
-      return ge::PARAM_INVALID;                            \
-    }                                                      \
-  } while (0)
+#define GE_CHECK_GE(lhs, rhs)                                       \
+  do {                                                              \
+    if (lhs < rhs) {                                                \
+      GELOGE(ge::FAILED, "param[%s] is less than[%s]", #lhs, #rhs); \
+      return ge::PARAM_INVALID;                                     \
+    }                                                               \
+  } while (false)
 
 // Check if the value on the left is less than or equal to the value on the right
-#define GE_CHECK_LE(lhs, rhs)                                 \
-  do {                                                        \
-    if (lhs > rhs) {                                          \
-      DOMI_LOGE("param[%s] is greater than[%s]", #lhs, #rhs); \
-      return ge::PARAM_INVALID;                               \
-    }                                                         \
-  } while (0)
+#define GE_CHECK_LE(lhs, rhs)                                          \
+  do {                                                                 \
+    if (lhs > rhs) {                                                   \
+      GELOGE(ge::FAILED, "param[%s] is greater than[%s]", #lhs, #rhs); \
+      return ge::PARAM_INVALID;                                        \
+    }                                                                  \
+  } while (false)
 
 #define GE_DELETE_NEW_SINGLE(var) \
   do {                            \
@@ -191,7 +193,7 @@
       delete var;                 \
       var = nullptr;              \
     }                             \
-  } while (0)
+  } while (false)
 
 #define GE_DELETE_NEW_ARRAY(var) \
   do {                           \
@@ -199,18 +201,18 @@
       delete[] var;              \
       var = nullptr;             \
     }                            \
-  } while (0)
+  } while (false)
 
 #define GE_FREE_RT_LOG(addr)                                        \
   do {                                                              \
     if (addr != nullptr) {                                          \
-      rtError_t error = rtFree(addr);                               \
+      const rtError_t error = rtFree(addr);                         \
       if (error != RT_ERROR_NONE) {                                 \
         GELOGE(RT_FAILED, "Call rtFree failed, error: %#x", error); \
       }                                                             \
       addr = nullptr;                                               \
     }                                                               \
-  } while (0)
+  } while (false)
 
 /**
  * @ingroup domi_common
@@ -228,12 +230,6 @@ using google::protobuf::Message;
 
 ///
 /// @ingroup domi_common
-/// @brief Maximum file path length
-///
-const int32_t DOMI_MAX_PATH_LEN = 256;
-
-///
-/// @ingroup domi_common
 /// @brief Reads the proto structure from an array.
 /// @param [in] data proto data to be read
 /// @param [in] size proto data size
@@ -253,8 +249,6 @@ GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *data, int size, Message *
 ///
 GE_FUNC_VISIBILITY bool ReadProtoFromText(const char *file, google::protobuf::Message *message);
 
-GE_FUNC_VISIBILITY bool ReadProtoFromMem(const char *data, int size, google::protobuf::Message *message);
-
 ///
 /// @ingroup: domi_common
 /// @brief: get length of file
@@ -306,10 +300,10 @@ GE_FUNC_VISIBILITY std::string ToString(std::vector<T> &v) {
     ss << x;
     ss << ", ";
   }
-  std::string strRet =
-    ss.str().substr(0, ss.str().length() - 2);  // Delete the two extra characters at the end of the line.
-  strRet += "]";
-  return strRet;
+  // Delete the two extra characters at the end of the line.
+  std::string str = ss.str().substr(0u, ss.str().length() - 2u);
+  str += "]";
+  return str;
 }
 
 ///
@@ -326,10 +320,10 @@ GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedField<T>
     ss << x;
     ss << ", ";
   }
-  std::string strRet =
-    ss.str().substr(0, ss.str().length() - 2);  // Delete the two extra characters at the end of the line.
-  strRet += "]";
-  return strRet;
+  // Delete the two extra characters at the end of the line.
+  std::string str = ss.str().substr(0u, ss.str().length() - 2u);
+  str += "]";
+  return str;
 }
 
 ///
@@ -394,14 +388,6 @@ GE_FUNC_VISIBILITY bool ValidateStr(const std::string &filePath, const std::stri
 
 ///
 /// @ingroup domi_common
-/// @brief Check whether the file is normal file.
-/// @param [in] file_path file path
-/// @param [out] result
-///
-GE_FUNC_VISIBILITY bool IsValidFile(const char *file_path);
-
-///
-/// @ingroup domi_common
 /// @brief Check path invalid
 /// @param [in] path, path to be checked
 /// @param [in] length, length of path
diff --git a/inc/framework/engine/dnnengine.h b/inc/framework/engine/dnnengine.h
index 8a0f3b65..b5f02ebe 100644
--- a/inc/framework/engine/dnnengine.h
+++ b/inc/framework/engine/dnnengine.h
@@ -43,14 +43,31 @@ struct DNNEngineAttribute {
   // If engine input format must be specific, set this attribute, else set FORMAT_RESERVED
   Format engine_input_format;
   Format engine_output_format;
+  bool atomic_engine_flag;
 };
 
 class GE_FUNC_VISIBILITY DNNEngine {
  public:
+  DNNEngine() = default;
+  explicit DNNEngine(const DNNEngineAttribute &attrs) {
+    engine_attribute_ = attrs;
+  }
   virtual ~DNNEngine() = default;
-  virtual Status Initialize(const std::map<std::string, std::string> &options) = 0;
-  virtual Status Finalize() = 0;
-  virtual void GetAttributes(DNNEngineAttribute &attr) const = 0;
+  Status Initialize(const std::map<std::string, std::string> &options) {
+    return SUCCESS;
+  }
+  Status Finalize() {
+    return SUCCESS;
+  }
+  void GetAttributes(DNNEngineAttribute &attr) const {
+    attr = engine_attribute_;
+  }
+  bool IsAtomic() const {
+    return engine_attribute_.atomic_engine_flag;
+  }
+
+ protected:
+  DNNEngineAttribute engine_attribute_;
 };
 }  // namespace ge
 
diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h
index 5da5a593..86496012 100644
--- a/inc/framework/generator/ge_generator.h
+++ b/inc/framework/generator/ge_generator.h
@@ -34,13 +34,16 @@ namespace ge {
 class GeRootModel;
 class GE_FUNC_VISIBILITY GeGenerator {
  public:
+  using InOutTensorRef = std::pair<const vector<ge::GeTensor> &, const vector<ge::GeTensor> &>;
   static GeGenerator &GetInstance() {
     static GeGenerator Instance;
     return Instance;
   }
   GeGenerator() = default;
 
-  ~GeGenerator() { (void)Finalize(); }
+  ~GeGenerator() {
+    (void)Finalize();
+  }
 
   GeGenerator(const GeGenerator &) = delete;
 
@@ -94,8 +97,8 @@ class GE_FUNC_VISIBILITY GeGenerator {
   /// @param [in] graph_name: graph name.
   /// @param [out] graph: graph of single op.
   /// @return SUCCESS or FAILED
-  Status BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
-                            std::string graph_name, Graph &graph);
+  Status BuildSingleOpGraph(OpDescPtr &op_desc, const InOutTensorRef &inputs_outputs, std::string graph_name,
+                            Graph &graph, std::vector<std::pair<std::string, std::string>> &inputs_name_type);
 
  private:
   Status GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs,
@@ -110,6 +113,10 @@ class GE_FUNC_VISIBILITY GeGenerator {
 
   using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>;
   Status SetModelNameForDump(const GeRootModelPtr &ge_root_model);
+  Status CreateGeneralizedBuildAttrs(const GeRootModelPtr &ge_root_model, const std::vector<GeTensor> &inputs,
+                                     const std::vector<GeTensor> &outputs,
+                                     const std::vector<std::pair<std::string, std::string>> &inputs_name_type,
+                                     std::vector<ge::NamedAttrs> &generalized_build_attrs);
 
   class Impl;
 
diff --git a/inc/framework/omg/version.h b/inc/framework/omg/version.h
index 4facba0d..a1be09ed 100644
--- a/inc/framework/omg/version.h
+++ b/inc/framework/omg/version.h
@@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY PlatformVersionManager {
   ~PlatformVersionManager() = delete;
   static Status GetPlatformVersion(std::string &ver) {
     ver = "1.11.z";
-    std::vector<std::string> version_splits = StringUtils::Split(ver, '.');
+    const std::vector<std::string> version_splits = StringUtils::Split(ver, '.');
     GE_IF_BOOL_EXEC(version_splits.size() < 3, GELOGW("Read platform version error!"); return FAILED;);
 
     GELOGI("Read current platform version: %s.", ver.c_str());
diff --git a/metadef b/metadef
index 60df4b39..b21fe2bc 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit 60df4b39a6f639c21dd7deb220b93345451938f5
+Subproject commit b21fe2bccb97e64fa2c7dff8ffd559adc853e73d
diff --git a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h
index c5423d36..a2d805fb 100644
--- a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h
+++ b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h
@@ -1,109 +1,110 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
-#define __INC_EXTERNEL_RT_ERROR_CODES_H__
-
-#include <stddef.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-static const int32_t ACL_RT_SUCCESS                          = 0; // success
-
-static const int32_t ACL_ERROR_RT_PARAM_INVALID              = 107000; // param invalid
-static const int32_t ACL_ERROR_RT_INVALID_DEVICEID           = 107001; // invalid device id
-static const int32_t ACL_ERROR_RT_CONTEXT_NULL               = 107002; // current context null
-static const int32_t ACL_ERROR_RT_STREAM_CONTEXT             = 107003; // stream not in current context
-static const int32_t ACL_ERROR_RT_MODEL_CONTEXT              = 107004; // model not in current context
-static const int32_t ACL_ERROR_RT_STREAM_MODEL               = 107005; // stream not in model
-static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID    = 107006; // event timestamp invalid
-static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL   = 107007; // event timestamp reversal
-static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED             = 107008; // memory address unaligned
-static const int32_t ACL_ERROR_RT_FILE_OPEN                  = 107009; // open file failed
-static const int32_t ACL_ERROR_RT_FILE_WRITE                 = 107010; // write file failed
-static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE           = 107011; // error subscribe stream
-static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE           = 107012; // error subscribe thread
-static const int32_t ACL_ERROR_RT_GROUP_NOT_SET              = 107013; // group not set
-static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE           = 107014; // group not create
-static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG           = 107015; // callback not register to stream
-static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE        = 107016; // invalid memory type
-static const int32_t ACL_ERROR_RT_INVALID_HANDLE             = 107017; // invalid handle
-static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE        = 107018; // invalid malloc type
-static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT               = 107019; // wait timeout
-
-static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT        = 207000; // feature not support
-static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION          = 207001; // memory allocation error
-static const int32_t ACL_ERROR_RT_MEMORY_FREE                = 207002; // memory free error
-static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW           = 207003; // aicore over flow
-static const int32_t ACL_ERROR_RT_NO_DEVICE                  = 207004; // no device
-static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL        = 207005; // resource alloc fail
-static const int32_t ACL_ERROR_RT_NO_PERMISSION              = 207006; // no permission
-static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE          = 207007; // no event resource
-static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE         = 207008; // no stream resource
-static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE         = 207009; // no notify resource
-static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE          = 207010; // no model resource
-static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE            = 207011; // no cdq resource
-
-static const int32_t ACL_ERROR_RT_INTERNAL_ERROR             = 507000; // runtime internal error
-static const int32_t ACL_ERROR_RT_TS_ERROR                   = 507001; // ts internel error
-static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL           = 507002; // task full in stream
-static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY          = 507003; // task empty in stream
-static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE        = 507004; // stream not complete
-static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE            = 507005; // end of sequence
-static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE         = 507006; // event not complete
-static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR      = 507007; // context release error
-static const int32_t ACL_ERROR_RT_SOC_VERSION                = 507008; // soc version error
-static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT      = 507009; // task type not support
-static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT             = 507010; // ts lost heartbeat
-static const int32_t ACL_ERROR_RT_MODEL_EXECUTE              = 507011; // model execute failed
-static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT             = 507012; // report timeout
-static const int32_t ACL_ERROR_RT_SYS_DMA                    = 507013; // sys dma error
-static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT             = 507014; // aicore timeout
-static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION           = 507015; // aicore exception
-static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION      = 507016; // aicore trap exception
-static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT              = 507017; // aicpu timeout
-static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION            = 507018; // aicpu exception
-static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR     = 507019; // aicpu datadump response error
-static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR        = 507020; // aicpu model operate response error
-static const int32_t ACL_ERROR_RT_PROFILING_ERROR            = 507021; // profiling error
-static const int32_t ACL_ERROR_RT_IPC_ERROR                  = 507022; // ipc error
-static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL         = 507023; // model abort normal
-static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING       = 507024; // kernel unregistering
-static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT        = 507025; // ringbuffer not init
-static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA         = 507026; // ringbuffer no data
-static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP              = 507027; // kernel lookup error
-static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE           = 507028; // kernel register duplicate
-static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL        = 507029; // debug register failed
-static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL      = 507030; // debug unregister failed
-static const int32_t ACL_ERROR_RT_LABEL_CONTEXT              = 507031; // label not in current context
-static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT            = 507032; // program register num use out
-static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR            = 507033; // device setup error
-static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT        = 507034; // vector core timeout
-static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION      = 507035; // vector core exception
-static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
-static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL         = 507037; // cdq alloc batch abnormal
-
-static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR         = 507899; // drv internal error
-static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR       = 507900; // aicpu internal error
-static const int32_t ACL_ERROR_RT_SOCKET_CLOSE               = 507901; // hdc disconnect
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
+/**
+* @file rt_error_codes.h
+*
+* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
+#define __INC_EXTERNEL_RT_ERROR_CODES_H__
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static const int32_t ACL_RT_SUCCESS                          = 0; // success
+
+static const int32_t ACL_ERROR_RT_PARAM_INVALID              = 107000; // param invalid
+static const int32_t ACL_ERROR_RT_INVALID_DEVICEID           = 107001; // invalid device id
+static const int32_t ACL_ERROR_RT_CONTEXT_NULL               = 107002; // current context null
+static const int32_t ACL_ERROR_RT_STREAM_CONTEXT             = 107003; // stream not in current context
+static const int32_t ACL_ERROR_RT_MODEL_CONTEXT              = 107004; // model not in current context
+static const int32_t ACL_ERROR_RT_STREAM_MODEL               = 107005; // stream not in model
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID    = 107006; // event timestamp invalid
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL   = 107007; // event timestamp reversal
+static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED             = 107008; // memory address unaligned
+static const int32_t ACL_ERROR_RT_FILE_OPEN                  = 107009; // open file failed
+static const int32_t ACL_ERROR_RT_FILE_WRITE                 = 107010; // write file failed
+static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE           = 107011; // error subscribe stream
+static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE           = 107012; // error subscribe thread
+static const int32_t ACL_ERROR_RT_GROUP_NOT_SET              = 107013; // group not set
+static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE           = 107014; // group not create
+static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG           = 107015; // callback not register to stream
+static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE        = 107016; // invalid memory type
+static const int32_t ACL_ERROR_RT_INVALID_HANDLE             = 107017; // invalid handle
+static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE        = 107018; // invalid malloc type
+static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT               = 107019; // wait timeout
+
+static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT        = 207000; // feature not support
+static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION          = 207001; // memory allocation error
+static const int32_t ACL_ERROR_RT_MEMORY_FREE                = 207002; // memory free error
+static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW           = 207003; // aicore over flow
+static const int32_t ACL_ERROR_RT_NO_DEVICE                  = 207004; // no device
+static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL        = 207005; // resource alloc fail
+static const int32_t ACL_ERROR_RT_NO_PERMISSION              = 207006; // no permission
+static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE          = 207007; // no event resource
+static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE         = 207008; // no stream resource
+static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE         = 207009; // no notify resource
+static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE          = 207010; // no model resource
+static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE            = 207011; // no cdq resource
+static const int32_t ACL_ERROR_RT_OVER_LIMIT                 = 207012; // over limit
+static const int32_t ACL_ERROR_RT_QUEUE_EMPTY                = 207013; // queue is empty
+static const int32_t ACL_ERROR_RT_QUEUE_FULL                 = 207014; // queue is full
+
+static const int32_t ACL_ERROR_RT_INTERNAL_ERROR             = 507000; // runtime internal error
+static const int32_t ACL_ERROR_RT_TS_ERROR                   = 507001; // ts internel error
+static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL           = 507002; // task full in stream
+static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY          = 507003; // task empty in stream
+static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE        = 507004; // stream not complete
+static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE            = 507005; // end of sequence
+static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE         = 507006; // event not complete
+static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR      = 507007; // context release error
+static const int32_t ACL_ERROR_RT_SOC_VERSION                = 507008; // soc version error
+static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT      = 507009; // task type not support
+static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT             = 507010; // ts lost heartbeat
+static const int32_t ACL_ERROR_RT_MODEL_EXECUTE              = 507011; // model execute failed
+static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT             = 507012; // report timeout
+static const int32_t ACL_ERROR_RT_SYS_DMA                    = 507013; // sys dma error
+static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT             = 507014; // aicore timeout
+static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION           = 507015; // aicore exception
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION      = 507016; // aicore trap exception
+static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT              = 507017; // aicpu timeout
+static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION            = 507018; // aicpu exception
+static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR     = 507019; // aicpu datadump response error
+static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR        = 507020; // aicpu model operate response error
+static const int32_t ACL_ERROR_RT_PROFILING_ERROR            = 507021; // profiling error
+static const int32_t ACL_ERROR_RT_IPC_ERROR                  = 507022; // ipc error
+static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL         = 507023; // model abort normal
+static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING       = 507024; // kernel unregistering
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT        = 507025; // ringbuffer not init
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA         = 507026; // ringbuffer no data
+static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP              = 507027; // kernel lookup error
+static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE           = 507028; // kernel register duplicate
+static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL        = 507029; // debug register failed
+static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL      = 507030; // debug unregister failed
+static const int32_t ACL_ERROR_RT_LABEL_CONTEXT              = 507031; // label not in current context
+static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT            = 507032; // program register num use out
+static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR            = 507033; // device setup error
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT        = 507034; // vector core timeout
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION      = 507035; // vector core exception
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
+static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL         = 507037; // cdq alloc batch abnormal
+static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR      = 507038; // can not change die mode
+static const int32_t ACL_ERROR_RT_DIE_SET_ERROR              = 507039; // single die mode can not set die
+static const int32_t ACL_ERROR_RT_INVALID_DIEID              = 507040; // invalid die id
+static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET           = 507041; // die mode not set
+
+static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR         = 507899; // drv internal error
+static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR       = 507900; // aicpu internal error
+static const int32_t ACL_ERROR_RT_SOCKET_CLOSE               = 507901; // hdc disconnect
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h
index 4b45f4cf..e780ce1b 100644
--- a/third_party/fwkacllib/inc/ops/array_ops.h
+++ b/third_party/fwkacllib/inc/ops/array_ops.h
@@ -1258,21 +1258,21 @@ REG_OP(ExpandD)
 * Three inputs, including:
 * @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n
 * @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n
-* @li ivf_offset: A 1-D tensor of type int32 with the value of ivf offset. \n
+* @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n
 
 * @par Attributes:
 * total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n
 
 * @par Outputs:
 * @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n
-* @li buckets_offset: A 1-D tensor of type int32 with the value of ivf_offset corresponding to bucket_list. \n
+* @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n
 */
 REG_OP(CalcBucketsLimitAndOffset)
     .INPUT(bucket_list, TensorType({DT_INT32}))
     .INPUT(ivf_counts, TensorType({DT_INT32}))
-    .INPUT(ivf_offset, TensorType({DT_INT32}))
+    .INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64}))
     .OUTPUT(buckets_limit, TensorType({DT_INT32}))
-    .OUTPUT(buckets_offset, TensorType({DT_INT32}))
+    .OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64}))
     .REQUIRED_ATTR(total_limit, Int)
     .OP_END_FACTORY_REG(CalcBucketsLimitAndOffset)
 
diff --git a/third_party/fwkacllib/inc/ops/cluster.h b/third_party/fwkacllib/inc/ops/cluster.h
new file mode 100644
index 00000000..19b4ea05
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/cluster.h
@@ -0,0 +1,58 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file cluster.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_
+
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+/**
+* @brief Perform k-means clustering on a data matrix. \n
+
+* @par Inputs:
+* Three required inputs and one optional inputs, including: \n
+* @li x: A 2D tensor of data type float32. \n
+* @li y: A 2D tensor of data type float32. \n
+* @li sum_square_x: An optional 2D tensor of data type float32. \n
+* @li sum_square_y: A 2D tensor of data type float32. \n
+
+* @par Attributes:
+* use_actual_distance: Indicates whether to calculate the complete distance. \n
+
+* @par Outputs:
+* @li segment_sum: A tensor of data type float32. \n
+* @li segment_count: A tensor of data type float32. \n
+* @li k_mean_total_sum: A tensor of data type float32. \n
+*/
+REG_OP(KMeansCentroids)
+    .INPUT(x, TensorType({DT_FLOAT}))
+    .INPUT(y, TensorType({DT_FLOAT}))
+    .INPUT(sum_square_y, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(sum_square_x, TensorType({DT_FLOAT}))
+    .OUTPUT(segment_sum, TensorType({DT_FLOAT}))
+    .OUTPUT(segment_count, TensorType({DT_FLOAT}))
+    .OUTPUT(kmean_total_sum, TensorType({DT_FLOAT}))
+    .ATTR(use_actual_distance, Bool, false)
+    .OP_END_FACTORY_REG(KMeansCentroids)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_
diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
index bcf50058..cd41d6fa 100644
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -3391,57 +3391,57 @@ REG_OP(TensorRedirect)
     .OP_END_FACTORY_REG(TensorRedirect)
 
 /**
-* @brief Performs the element-wise division of tensor x2 by tensor x3,
-* multiply the result by the scalar value and add it to tensor x1
+* @brief Performs the element-wise division of tensor x1 by tensor x2,
+* multiply the result by the scalar value and add it to tensor input_data.
 
 * @par Inputs:
 * Four inputs, including:
 * @li input_data: A mutable input Tensor. Must be one of the following types:
-*     float16, float32.
-* @li x1: A mutable input Tensor of the same type as x1.
-* @li x2: A mutable input Tensor of the same type as x1.
+*     float16, float32, double, int64.
+* @li x1: A mutable input Tensor of the same type as input_data.
+* @li x2: A mutable input Tensor of the same type as input_data.
 * @li value: A mutable input Tensor. Must be one of the following types:
-*     float16, float32, int32. \n
+*     float16, float32, double, int64, int32. \n
+
 
 * @par Outputs:
-* y: A mutable Tensor. Has the same type as "x1". \n
+* y: A mutable Tensor. Has the same type as input_data. \n
 
 * @par Third-party framework compatibility
-* Compatible with the Pytorch operator Addcdiv.
+* Compatible with the Pytorch operator Addcdiv(version-1.5.0).
 */
 REG_OP(Addcdiv)
-    .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64}))
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64}))
+    .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32, DT_DOUBLE, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64}))
     .OP_END_FACTORY_REG(Addcdiv)
 
 /**
-* @brief Performs the element-wise multiplication of tensor x2 by tensor x3,
+* @brief Performs the element-wise multiplication of tensor x1 by tensor x2,
 * multiply the result by the scalar value and add it to tensor input_data
 
-
 * @par Inputs:
 * Four inputs, including:
 * @li input_data: A mutable input Tensor. Must be one of the following types:
-*     float16, float32, int8, int32, uint8.
-* @li x1: A mutable input Tensor of the same type as x1.
-* @li x2: A mutable input Tensor of the same type as x1.
-* @li value: A tensor which includes only one element of the same type as x1. \n
+*     float16, float32, double, int64, int8, int32, uint8.
+* @li x1: A mutable input Tensor of the same type as input_data.
+* @li x2: A mutable input Tensor of the same type as input_data.
+* @li value: A tensor which includes only one element of the same type as input_data. \n
 
 * @par Outputs:
-* y: A mutable output Tensor. Has the same type as "x1". \n
+* y: A mutable output Tensor. Has the same type as input_data. \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Addcmul.
 */
 REG_OP(Addcmul)
-    .INPUT(input_data, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
-    .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
-    .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
-    .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
-    .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
+    .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64}))
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64}))
+    .INPUT(value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64}))
     .OP_END_FACTORY_REG(Addcmul)
 
 /**
@@ -3508,8 +3508,8 @@ REG_OP(StrideAdd)
 * Compatible with the Pytorch equal operator. \n
 */
 REG_OP(TensorEqual)
-    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
-    .INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
+    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_INT8, DT_UINT8}))
+    .INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_INT8, DT_UINT8}))
     .OUTPUT(output_z, TensorType({DT_BOOL}))
     .OP_END_FACTORY_REG(TensorEqual)
 
diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h
index f6cc8694..5e31bebd 100644
--- a/third_party/fwkacllib/inc/ops/linalg_ops.h
+++ b/third_party/fwkacllib/inc/ops/linalg_ops.h
@@ -410,10 +410,10 @@ form square matrices.  \n
 */
 
 REG_OP(Svd)
-    .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT }))
-    .OUTPUT(sigma, TensorType({ DT_DOUBLE, DT_FLOAT }))
-    .OUTPUT(u, TensorType({ DT_DOUBLE, DT_FLOAT }))
-    .OUTPUT(v, TensorType({ DT_DOUBLE, DT_FLOAT }))
+    .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(sigma, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(u, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(v, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
     .ATTR(compute_uv, Bool, true)
     .ATTR(full_matrices, Bool, false)
     .OP_END_FACTORY_REG(Svd)
diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h
index 6eb418d8..2c5afbe7 100644
--- a/third_party/fwkacllib/inc/ops/math_ops.h
+++ b/third_party/fwkacllib/inc/ops/math_ops.h
@@ -939,6 +939,57 @@ REG_OP(LpNorm)
     .OP_END_FACTORY_REG(LpNorm)
 
 /**
+* @brief Computes LpNormReduce.
+
+* @par Inputs:
+* x: An ND tensor of type float16, float32. \n
+*
+* @par Attributes:
+* @li p: Int, "inf" or "-inf", default value is 2.
+* @li axes: ListInt, {} means all axes will be computed.
+* @li keepdim: Bool, default is false.
+* @li epsilon: Float, default is 1e-12. \n
+
+* @par Outputs:
+* y: An ND tensor of type float16, float32. The shape of y is depending
+* on axes and keepdim. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator LpNormReduce.
+*/
+REG_OP(LpNormReduce)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(p, Int, 2)
+    .ATTR(axes, ListInt, {})
+    .ATTR(keepdim, Bool, false)
+    .ATTR(epsilon, Float, 1e-12)
+    .OP_END_FACTORY_REG(LpNormReduce)
+
+/**
+* @brief Computes LpNormUpdate.
+
+* @par Inputs:
+* x: An ND tensor of type float16, float32. \n
+*
+* @par Attributes:
+* @li p: Int, "inf" or "-inf", default value is 2.
+* @li epsilon: Float, default is 1e-12. \n
+
+* @par Outputs:
+* y: An ND tensor of type float16, float32. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator LpNormUpdate.
+*/
+REG_OP(LpNormUpdate)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(p, Int, 2)
+    .ATTR(epsilon, Float, 1e-12)
+    .OP_END_FACTORY_REG(LpNormUpdate)
+
+/**
 * @brief get complex.
 
 * @par Inputs:
diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
index e82251bb..55199962 100644
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -49,10 +49,10 @@ namespace ge {
 * Compatible with the TensorFlow operator BatchMatmul.
 */
 REG_OP(MatMul)
-    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
-    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
     .ATTR(transpose_x1, Bool, false)
     .ATTR(transpose_x2, Bool, false)
     .OP_END_FACTORY_REG(MatMul)
@@ -88,10 +88,10 @@ REG_OP(MatMul)
 * Compatible with the TensorFlow operator BatchMatmul.
 */
 REG_OP(MatMulV2)
-    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4}))
-    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8 DT_INT4}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4}))
     .ATTR(transpose_x1, Bool, false)
     .ATTR(transpose_x2, Bool, false)
@@ -1067,6 +1067,40 @@ REG_OP(MatrixSetDiagV2)
     .OP_END_FACTORY_REG(MatrixSetDiagV2)
 
 /**
+*@brief Returns a batched matrix tensor with new batched diagonal values . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li input: "Rank `r+1`, where `r >= 1`. \n
+
+*@li diagonal: Rank `r` when `k` is an integer or `k[0] == k[1]`. Otherwise, it has rank `r+1`. \n
+
+*@li k:
+*Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n
+*diagonal, and negative value means subdiagonals. `k` can be a single integer \n
+*(for a single diagonal) or a pair of integers specifying the low and high ends \n
+*of a matrix band. `k[0]` must not be larger than `k[1]`. \n
+
+*@par Attributes:
+*@li align: An optional string. Defaults to RIGHT_LEFT. It is a string specifying \n
+*how superdiagonals and subdiagonals should be aligned, respectively. \n
+*other optional: LEFT_RIGHT, LEFT_LEFT, and RIGHT_RIGHT.\n
+
+*@par Outputs:
+*output: Rank `r+1`, with `output.shape = input.shape` . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterUpdate.
+*/
+REG_OP(MatrixSetDiagV3)
+    .INPUT(input, TensorType::BasicType())
+    .INPUT(diagonal, TensorType::BasicType())
+    .INPUT(k, TensorType({DT_INT32}))
+    .OUTPUT(output, TensorType::BasicType())
+    .ATTR(align, String, "RIGHT_LEFT")
+    .OP_END_FACTORY_REG(MatrixSetDiagV3)
+
+/**
 *@brief Returns a batched diagonal tensor with given batched diagonal values . \n
 
 *@par Inputs:
diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
index ed7cb9b5..b0cb15fb 100644
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -369,16 +369,14 @@ REG_OP(BiasAddGrad)
  *\n
  * The following are the supported data types and data formats:\n
  *\n
-    | Tensor    | out_bckprop | filter  | y\n
-    ------------|-------------|---------|--------\n
-    | Data Type | float16     | float16 | float16\n
-    |           |-------------|---------|--------\n
-    |           | float32     | float32 | float32\n
-    |           |-------------|---------|--------\n
-    |           | float64     | float64 | float64\n
-    ------------|-------------|---------|--------\n
-    | Format    | NCHW        | NCHW    | NCHW\n
-    |           | NHWC        | HWCN    | NHWC\n
+ *\n
+    | Tensor    | out_bckprop | filter  | y      |\n
+    |-----------|-------------|---------|--------|\n
+    | Data Type | float16     | float16 | float16|\n
+    |           | float32     | float32 | float32|\n
+    |           | float64     | float64 | float64|\n
+    | Format    | NCHW        | NCHW    | NCHW   |\n
+    |           | NHWC        | HWCN    | NHWC   |\n
  *\n
  * For float32 and float64 type, the actual calculation on the chip is based on
  * float16.
@@ -400,30 +398,25 @@ REG_OP(BiasAddGrad)
  *\n
  * The following value range restrictions must be met:\n
  *\n
-    | Name             | Field    | Scope\n
-    -------------------|----------|--------------\n
-    | input_size       | H        | [1, 200000]\n
-    |                  | W        | [1, 4096]\n
-    -------------------|----------|--------------\n
-    | Filter           | H        | [1, 255]\n
-    |                  | W        | [1, 255]\n
-    -------------------|----------|--------------\n
-    | out_backprop     | H*strideH| [1, 200000]\n
-    |                  | W*strideW| [1, 4096]\n
-    -------------------|----------|--------------\n
-    | y(fmap)          | H        | [1, 200000]\n
-    |                  | W        | [1, 4096]\n
-    -------------------|----------|--------------\n
-    | Stride           | H        | [1, 63]\n
-    |                  | W        | [1, 63]\n
-    -------------------|----------|--------------\n
-    | Padding          | Top      | [0, 255]\n
-    |                  | Bottom   | [0, 255]\n
-    |                  | Left     | [0, 255]\n
-    |                  | Right    | [0, 255]\n
-    -------------------|----------|--------------\n
-    | Dilation         | H        | [1, 255]\n
-    |                  | W        | [1, 255]\n
+ *\n
+    | Name             | Field    | Scope        |\n
+    |------------------|----------|--------------|\n
+    | input_size       | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | Filter           | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
+    | out_backprop     | H*strideH| [1, 200000]  |\n
+    |                  | W*strideW| [1, 4096]    |\n
+    | y(fmap)          | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | Stride           | H        | [1, 63]      |\n
+    |                  | W        | [1, 63]      |\n
+    | Padding          | Top      | [0, 255]     |\n
+    |                  | Bottom   | [0, 255]     |\n
+    |                  | Left     | [0, 255]     |\n
+    |                  | Right    | [0, 255]     |\n
+    | Dilation         | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
  *\n
 
  * In Ascend910, fmap or out_backprop's H and W not support 1 when
@@ -495,9 +488,9 @@ REG_OP(Conv2DBackpropInput)
  * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv2DBackpropInput instead.
 */
 REG_OP(Conv2DBackpropInputD)
-    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
-    .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32}))
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8, DT_BF16}))
+    .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_INT8, DT_BF16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32, DT_BF16}))
     .REQUIRED_ATTR(input_size, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
@@ -523,13 +516,12 @@ REG_OP(Conv2DBackpropInputD)
  *\n
  * The following are the supported data types and data formats:\n
  *\n
-    | Tensor    | x       | filter  | bias    | y\n
-    ------------|---------|---------|---------|--------\n
-    | Data Type | float16 | float16 | float16 | float16\n
-    |           |---------|---------|---------|--------\n
-    |           | int8    | int8    | int32   | int32\n
-    ------------|---------|---------|---------|--------\n
-    | Format    | NCHW    | NCHW    | ND      | NCHW\n
+ *\n
+    | Tensor    | x       | filter  | bias    | y      |\n
+    |-----------|---------|---------|---------|--------|\n
+    | Data Type | float16 | float16 | float16 | float16|\n
+    |           | int8    | int8    | int32   | int32  |\n
+    | Format    | NCHW    | NCHW    | ND      | NCHW   |\n
  *\n
  * For int8, a dequant or requant operator must be followed.
  *\n
@@ -553,29 +545,24 @@ REG_OP(Conv2DBackpropInputD)
  *\n
  * The following value range restrictions must be met:\n
  *\n
-    | Name             | Field    | Scope\n
-    -------------------|----------|--------------\n
-    | x (out_backprop) | H*strideH| [1, 200000]\n
-    |                  | W*strideW| [1, 4096]\n
-    -------------------|----------|--------------\n
-    | Filter           | H        | [1, 255]\n
-    |                  | W        | [1, 255]\n
-    -------------------|----------|--------------\n
-    | y (fmap)         | H        | [1, 200000]\n
-    |                  | W        | [1, 4096]\n
-    -------------------|----------|--------------\n
-    | Stride           | H        | [1, 63]\n
-    |                  | W        | [1, 63]\n
-    -------------------|----------|--------------\n
-    | Padding          | Top      | [0, 255]\n
-    |                  | Bottom   | [0, 255]\n
-    |                  | Left     | [0, 255]\n
-    |                  | Right    | [0, 255]\n
-    -------------------|----------|--------------\n
-    | Dilation         | H        | [1, 255]\n
-    |                  | W        | [1, 255]\n
-    -------------------|----------|--------------\n
-    | Offset_x         |          | [-128, 127]\n
+ *\n
+    | Name             | Field    | Scope        |\n
+    |------------------|----------|--------------|\n
+    | x (out_backprop) | H*strideH| [1, 200000]  |\n
+    |                  | W*strideW| [1, 4096]    |\n
+    | Filter           | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
+    | y (fmap)         | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | Stride           | H        | [1, 63]      |\n
+    |                  | W        | [1, 63]      |\n
+    | Padding          | Top      | [0, 255]     |\n
+    |                  | Bottom   | [0, 255]     |\n
+    |                  | Left     | [0, 255]     |\n
+    |                  | Right    | [0, 255]     |\n
+    | Dilation         | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
+    | Offset_x         |          | [-128, 127]  |\n
  *\n
  * In Ascend910, fmap or out_backprop's H and W not support 1 when
  * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
@@ -631,16 +618,14 @@ REG_OP(Deconvolution)
  *\n
  * The following are the supported data types and data formats:\n
  *\n
-    | Tensor    | x       | out_backprop | y\n
-    ------------|---------|--------------|---------\n
-    | Data Type | float16 |    float16   | float16\n
-    |           |---------|--------------|---------\n
-    |           | float32 |    float32   | float32\n
-    |           |---------|--------------|---------\n
-    |           | float64 |    float64   | float64\n
-    |-----------|---------|--------------|---------\n
-    | Format    | NCHW    |     NCHW     | NCHW\n
-    |           | NHWC    |     NHWC     | HWCN\n
+ *\n
+    | Tensor    | x       | out_backprop | y       |\n
+    |-----------|---------|--------------|---------|\n
+    | Data Type | float16 |    float16   | float16 |\n
+    |           | float32 |    float32   | float32 |\n
+    |           | float64 |    float64   | float64 |\n
+    | Format    | NCHW    |     NCHW     | NCHW    |\n
+    |           | NHWC    |     NHWC     | HWCN    |\n
  *\n
  * For float32 and float64 type of x and outbackprop, the actual calculation on the chip
  * is based on float16.
@@ -662,30 +647,25 @@ REG_OP(Deconvolution)
  *\n
  * The following value range restrictions must be met:\n
  *\n
-    | Name             | Field    | Scope\n
-    -------------------|----------|--------------\n
-    | x(fmap)          | H        | [1, 200000]\n
-    |                  | W        | [1, 4096]\n
-    -------------------|----------|--------------\n
-    | Filter Size      | H        | [1, 255]\n
-    |                  | W        | [1, 255]\n
-    -------------------|----------|--------------\n
-    | out_backprop     | H        | [1, 200000]\n
-    |                  | W        | [1, 4096]\n
-    -------------------|----------|--------------\n
-    | y                | H        | [1, 200000]\n
-    |                  | W        | [1, 4096]\n
-    -------------------|----------|--------------\n
-    | Stride           | H        | [1, 63]\n
-    |                  | W        | [1, 63]\n
-    -------------------|----------|--------------\n
-    | Padding          | Top      | [0, 255]\n
-    |                  | Bottom   | [0, 255]\n
-    |                  | Left     | [0, 255]\n
-    |                  | Right    | [0, 255]\n
-    -------------------|----------|--------------\n
-    | Dilation         | H        | [1, 255]\n
-    |                  | W        | [1, 255]\n
+ *\n
+    | Name             | Field    | Scope        |\n
+    |------------------|----------|--------------|\n
+    | x(fmap)          | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | Filter Size      | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
+    | out_backprop     | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | y                | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | Stride           | H        | [1, 63]      |\n
+    |                  | W        | [1, 63]      |\n
+    | Padding          | Top      | [0, 255]     |\n
+    |                  | Bottom   | [0, 255]     |\n
+    |                  | Left     | [0, 255]     |\n
+    |                  | Right    | [0, 255]     |\n
+    | Dilation         | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
  *\n
 *@par Outputs:
  * y: A Tensor. Has the same type as x, has the same format as filter_size.
@@ -853,11 +833,11 @@ REG_OP(Conv2DBackpropFilterD)
 *@li Compatible with the Caffe operator 2D "Convolution".
 */
 REG_OP(Conv2D)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
-    .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_BF16}))
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_BF16}))
     .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_BF16}))
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
@@ -1441,14 +1421,13 @@ REG_OP(Conv3DTransposeD)
  *\n
  * The following are the supported data types and data formats:\n
  *\n
-    | Tensor    | x       | filter  | bias    | y\n
-    ------------|---------|---------|---------|--------\n
-    | Data Type | float16 | float16 | float16 | float16\n
-    |           |---------|---------|---------|--------\n
-    |           | int8    | int8    | int32   | int32\n
-    ------------|---------|---------|---------|--------\n
-    | Format    | NCHW    | NCHW    | ND      | NCHW\n
-    |           | NHWC    | HWCN    |         | NHWC\n
+ *\n
+    | Tensor    | x       | filter  | bias    | y      |\n
+    |-----------|---------|---------|---------|--------|\n
+    | Data Type | float16 | float16 | float16 | float16|\n
+    |           | int8    | int8    | int32   | int32  |\n
+    | Format    | NCHW    | NCHW    | ND      | NCHW   |\n
+    |           | NHWC    | HWCN    |         | NHWC   |\n
  *\n
  * For int8, a dequant or requant operator must be followed.
  *\n
@@ -1476,32 +1455,26 @@ REG_OP(Conv3DTransposeD)
  *\n
  * The following value range restrictions must be met:\n
  *\n
-    | Name             | Field    | Scope\n
-    -------------------|----------|--------------\n
-    | input_size       | H        | [1, 200000]\n
-    |                  | W        | [1, 4096]\n
-    -------------------|----------|--------------\n
-    | x (out_backprop) | H*strideH| [1, 200000]\n
-    |                  | W*strideW| [1, 4096]\n
-    -------------------|----------|--------------\n
-    | filter           | H        | [1, 255]\n
-    |                  | W        | [1, 255]\n
-    -------------------|----------|--------------\n
-    | y (fmap)         | H        | [1, 200000]\n
-    |                  | W        | [1, 4096]\n
-    -------------------|----------|--------------\n
-    | Stride           | H        | [1, 63]\n
-    |                  | W        | [1, 63]\n
-    -------------------|----------|--------------\n
-    | Padding          | Top      | [0, 255]\n
-    |                  | Bottom   | [0, 255]\n
-    |                  | Left     | [0, 255]\n
-    |                  | Right    | [0, 255]\n
-    -------------------|----------|--------------\n
-    | Dilation         | H        | [1, 255]\n
-    |                  | W        | [1, 255]\n
-    -------------------|----------|--------------\n
-    | Offset_x         |          | [-128, 127]\n
+ *\n
+    | Name             | Field    | Scope        |\n
+    |------------------|----------|--------------|\n
+    | input_size       | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | x (out_backprop) | H*strideH| [1, 200000]  |\n
+    |                  | W*strideW| [1, 4096]    |\n
+    | filter           | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
+    | y (fmap)         | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | Stride           | H        | [1, 63]      |\n
+    |                  | W        | [1, 63]      |\n
+    | Padding          | Top      | [0, 255]     |\n
+    |                  | Bottom   | [0, 255]     |\n
+    |                  | Left     | [0, 255]     |\n
+    |                  | Right    | [0, 255]     |\n
+    | Dilation         | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
+    | Offset_x         |          | [-128, 127]  |\n
  *\n
  * In Ascend910, fmap or out_backprop's H and W not support 1 when
  * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
index 0011c72e..b14cc49d 100644
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -205,7 +205,8 @@ the value "5" indicates the indexes of images where the ROIs are located, "x0",
 *@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image.
 *@li sample_num: An optional attribute of type int, specifying the horizontal and vertical
 sampling frequency of each output. If this attribute is set to "0", the sampling frequency is
-equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n
+equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" .
+*@li roi_end_mode: An optional attribute of type int, specifying the align mode .\n
 
 *@par Outputs:
 *xdiff: Gradient added to input "features". Has the same 5HD shape as input "features".
@@ -220,6 +221,7 @@ REG_OP(ROIAlignGrad)
     .REQUIRED_ATTR(pooled_height, Int)
     .REQUIRED_ATTR(spatial_scale, Float)
     .ATTR(sample_num, Int, 2)
+    .ATTR(roi_end_mode, Int, 1)
     .OP_END_FACTORY_REG(ROIAlignGrad)
 
 /**
diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
index 067357de..cf332e63 100644
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -59,6 +59,25 @@ REG_OP(HardSwish)
     .OP_END_FACTORY_REG(HardSwish)
 
 /**
+*@brief Computes the gradient for the hard_swish of "x" . \n
+
+* @par Inputs:
+*Two inputs, including:
+* @li grad: A Tensor. Must be one of the following types: float16, float32
+* @li x: A Tensor of the same type as "grad" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "grad".
+* @par Third-party framework compatibility
+* Compatible with the Torch operator HardSwishGrad.
+*/
+REG_OP(HardSwishGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(HardSwishGrad)
+
+/**
 *@brief Computes the for the Swish of "x" . \n
 
 *@par Inputs:
@@ -81,6 +100,29 @@ REG_OP(Swish)
     .OP_END_FACTORY_REG(Swish)
 
 /**
+*@brief Computes the gradient for the Swish of "x" . \n
+
+*@par Inputs:
+*Three inputs, including:
+* @li grad: A Tensor. Must be one of the following types: float16, float32
+* @li x: A Tensor of the same type as "grad".
+* @li y: A Tensor of the same type as "grad" . \n
+* @par Attributes:
+* scale: A optional scalar. The data type is float . \n
+*@par Outputs:
+*grad_x: A Tensor. Has the same type as "grad".
+*@par Third-party framework compatibility
+*Compatible with the Torch operator SwishGrad
+*/
+REG_OP(SwishGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(scale, Float, 1.0)
+    .OP_END_FACTORY_REG(SwishGrad)
+
+/**
 *@brief Computes the gradient for the gelu of "x" . \n
 
 *@par Inputs:
diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h
index 9d0e7a62..6d4bcd5e 100644
--- a/third_party/fwkacllib/inc/ops/pad_ops.h
+++ b/third_party/fwkacllib/inc/ops/pad_ops.h
@@ -274,6 +274,38 @@ REG_OP(PadV3)
     .ATTR(mode, String, "constant")
     .ATTR(paddings_contiguous, Bool, true)
     .OP_END_FACTORY_REG(PadV3)
+	
+ /**
+*@brief Cal the grad of Pads.
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32,
+*     uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*     complex128, uint32, uint64.
+* @li paddings: A Tensor of type int32 or int64.
+
+*@par Attributes:
+* @li mode: An optional string, Defaults to "reflect", indicates paddings mode,
+*     support "reflect", "edge"
+* @li paddings_contiguous: An optional bool value, Defaults to true.
+*     If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...]
+*     If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...]
+
+*@par Outputs:
+*y: A Tensor of the same type as "x".
+
+*@par Third-party framework compatibility:
+* Compatible with ONNX operator PadGrad.
+*/
+
+REG_OP(PadV3Grad)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(paddings, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .ATTR(mode, String, "reflect")
+    .ATTR(paddings_contiguous, Bool, true)
+    .OP_END_FACTORY_REG(PadV3Grad)
 
   /**
   *@brief Pads a tensor.
diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h
index 66f9b65f..ad7f9003 100644
--- a/third_party/fwkacllib/inc/ops/random_ops.h
+++ b/third_party/fwkacllib/inc/ops/random_ops.h
@@ -685,6 +685,24 @@ REG_OP(Uniform)
     .ATTR(from, Float, 0.0)
     .ATTR(to, Float, 1.0)
     .OP_END_FACTORY_REG(Uniform)
-}   // namespace ge
 
+/**
+*@brief Outputs integers consisting of 0 and 1, used for lstm etc. \n
+*@par Inputs
+* @li time_step: A tensor with data type int64. 0-D.
+* @li batch_size: A tensor with data type int64. 0-D.
+
+*@par Outputs:
+*y: A Tensor. Has the  type float16 or float, 2-D, [time_step,batch_size]. \n
+
+*@attention Constraints:
+* Compatible with the Caffe operator ContinuationIndicator.
+*/
+
+REG_OP(ContinuationIndicator)
+    .REQUIRED_ATTR(time_step, Int)
+    .REQUIRED_ATTR(batch_size, Int)
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(ContinuationIndicator)
+}   // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h
index 1578ba59..0e578d86 100644
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -1275,7 +1275,7 @@ REG_OP(ReduceStd)
 
 
 * @par Attributes:
-* Three Attributes, including:
+* Five Attributes, including:
 * @li dim: An optional listint, Defaults to "None". \n
 * @li unbiased: An optional bool. Defaults to "True".
 *     If "True", Use Bessel Correction.
@@ -1283,9 +1283,14 @@ REG_OP(ReduceStd)
 * @li keepdim: An optional bool. Defaults to "False".
 *     If "True", Keep the original tensor dimension.
 *     If "False", Do not keep the original tensor dimension. \n
+* @li invert: An optional bool, Defaults to "False".
+*     If "True", the output is inverse of variance.
+*     If "False", the output is variance.
+* @li epsilon: An optional floar, Defaults to 0.001.
+*     Prevent division by 0.
 
 * @par Outputs:
-* @li y: A Tensor. It's the std of X. Has the same type as "x".
+* @li y: A Tensor. It's the variance of X or reciprocal of vaiance of X. Has the same type as "x".
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator ReduceStdWithMean.
@@ -1297,6 +1302,8 @@ REG_OP(ReduceStdWithMean)
     .ATTR(dim, ListInt, {})
     .ATTR(unbiased, Bool, true)
     .ATTR(keepdim, Bool, false)
+    .ATTR(invert, Bool, false)
+    .ATTR(epsilon, Float, 0.001)
     .OP_END_FACTORY_REG(ReduceStdWithMean)
 } //namespace ge
 
diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h
index cc0bff00..b374fa5c 100644
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -822,7 +822,7 @@ REG_OP(DynamicGRU)
 *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li bias_input:Must be one of the following types: float16, float32. The format must be ND.
 *@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
-*@li seq_length:Must be one of the following types: int32. The format must be ND.
+*@li seq_length:Must be one of the following types: float16 in FRACTAL_NZ and int32 in ND.
 *@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 
 *@par Attributes:
@@ -852,7 +852,7 @@ REG_OP(DynamicGRUV2)
     .INPUT(weight_hidden, TensorType({DT_FLOAT16}))
     .OPTIONAL_INPUT(bias_input, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16}))
     .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -880,7 +880,7 @@ REG_OP(DynamicGRUV2)
 *@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ.
 *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
-*@li seq_length:Must be one of the following types: int32. The format must be ND.
+*@li seq_length:Must be one of the following types: float16 in FRACTAL_NZ and int32 in ND.
 *@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 
 *@par Attributes:
@@ -913,7 +913,7 @@ REG_OP(DynamicGRUV2Hidden)
     .INPUT(x_weight_input, TensorType({DT_FLOAT32}))
     .INPUT(weight_hidden, TensorType({DT_FLOAT16}))
     .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16}))
     .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1050,6 +1050,50 @@ REG_OP(GRUV2HiddenGradCell)
     .OP_END_FACTORY_REG(GRUV2HiddenGradCell)
 
 /**
+*@brief: DynamicGRUCellGrad calculation.
+*@par Inputs:
+*ten inputs: \n
+*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.+
+*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li t_state:A 1D Tensor. Must be one of the following types: int32. The format must be ND.
+
+*@par Attributes:
+*gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
+
+*@par Outputs:
+*three outputs: \n
+*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicGRUCellGrad)
+    .INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(update, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(t_state, TensorType({DT_INT32, DT_INT32}))
+    .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(gate_order, String, "zrh")
+    .OP_END_FACTORY_REG(DynamicGRUCellGrad)
+
+/**
 * @brief Calculates the reversed outputs of the function "embedding". \n
 
 * @par Inputs:
@@ -1137,8 +1181,8 @@ REG_OP(CommonLSTM)
  *
  * @par Inputs:
  * @li seq_length: A 1D Tensor. Must be one of the following types: int32. Record the current length of each batch. [batch_size].
- * @li b: A 1D Tensor. Must be one of the following types: fp16/fp32. Record the hidden_size. [4 * hidden_size].
  * @li x: A 3D Tensor. Must be one of the following types: fp16/fp32. Record the num_step/batch_size/input_size. [num_step, batch_size, input_size].
+ * @li hidden_size: An optional attribute of type int32. pass the hidden_size. \n
  *
  * @par Outputs:
  * seq_mask: A 3D Tensor. Must be one of the following types: fp16/fp32. with the shape of [num_step, batch_size, hidden_size]. And has the same type as "b" \n
@@ -1148,8 +1192,8 @@ REG_OP(CommonLSTM)
  */
 REG_OP(RnnGenMaskV2)
     .INPUT(seq_length, TensorType({DT_INT32}))
-    .INPUT(b, TensorType({{DT_FLOAT16, DT_FLOAT}))
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(hidden_size, Int)
     .OUTPUT(seq_mask, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OP_END_FACTORY_REG(RnnGenMaskV2)
 
diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h
index 7f7c4fc8..08fb25a3 100644
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -1381,7 +1381,7 @@ REG_OP(InplaceUpdate)
     .INPUT(x, TensorType::BasicType())
     .INPUT(indices, TensorType({DT_INT32}))
     .INPUT(v, TensorType::BasicType())
-    .OUTPUT(y, TensorType::BasicType())
+    .OUTPUT(x, TensorType::BasicType())
     .OP_END_FACTORY_REG(InplaceUpdate)
 
 /**
@@ -2408,6 +2408,40 @@ REG_OP(TopKPQDistanceMerge)
     .OUTPUT(topk_index, TensorType({DT_INT32}))
     .REQUIRED_ATTR(k, Int)
     .OP_END_FACTORY_REG(TopKPQDistanceMerge)
+
+/**
+*@brief Extracts a strided slice of a tensor. Roughly speaking, this op
+    extracts a slice of size (end-begin)/stride from the given input tensor.
+    Starting at the location specified by begin the slice continues by
+    adding stride to the index until all dimensions are not less than end.
+
+*@par Inputs:
+*Four inputs, including:
+* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
+*     complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*     complex128, float16, uint32, uint64.
+* @li begin: A Tensor of type int32 or int64, for the index of the first value to select . \n
+
+* @li end: A Tensor of type int32 or int64, for the index of the last value to select . \n
+
+* @li strides: A Tensor of type int32 or int64, for the increment . \n
+
+* @li axes: A Tensor of type int32 or int64, for the increment . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(StridedSliceV3)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(begin, TensorType::IndexNumberType())
+    .INPUT(end, TensorType::IndexNumberType())
+    .OPTIONAL_INPUT(axes, TensorType::IndexNumberType())
+    .OPTIONAL_INPUT(strides, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(StridedSliceV3)
 } // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h
index 3560db11..525f60e9 100644
--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -368,8 +368,9 @@ REG_OP(SpaceToDepth)
 *     complex128, uint32, uint64
 
 *@par Attributes:
-*Two attributes, including:
+*Three attributes, including:
 * @li block_size: An int >= 2, specifying the size of the spatial block.
+* @li mode: An optional string, specifying the mode. Defaults to "DCR".
 * @li data_format: An optional string, specifying the data format. Defaults to "NHWC" . \n
 
 *@par Outputs:
@@ -382,6 +383,7 @@ REG_OP(DepthToSpace)
   .INPUT(x, TensorType::BasicType())
   .OUTPUT(y, TensorType::BasicType())
   .REQUIRED_ATTR(block_size, Int)
+  .ATTR(mode, String, "DCR")
   .ATTR(data_format, String, "NHWC")
   .OP_END_FACTORY_REG(DepthToSpace)
 
diff --git a/third_party/fwkacllib/inc/ops/vector_search.h b/third_party/fwkacllib/inc/ops/vector_search.h
index e3099511..d07f8cd6 100644
--- a/third_party/fwkacllib/inc/ops/vector_search.h
+++ b/third_party/fwkacllib/inc/ops/vector_search.h
@@ -34,7 +34,7 @@ namespace ge {
 * @li bucket_list: A Tensor. Must be one of the following types: int32, int64.
 *
 * @par Outputs:
-* @li adc_tables: A Tensor. Must be one of the following types: float16, float32.
+* adc_tables: A Tensor. Must be one of the following types: float16, float32.
 */
 REG_OP(GenADC)
     .INPUT(query, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -43,6 +43,87 @@ REG_OP(GenADC)
     .INPUT(bucket_list, TensorType({DT_INT32, DT_INT64}))
     .OUTPUT(adc_tables, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OP_END_FACTORY_REG(GenADC)
+
+/**
+* @brief Finds values and indices of the "k" largest or least elements for the last dimension. \n
+*
+* @par Inputs:
+* Dynamin inputs, including:
+* @li actual_count: A Tensor of type int32, the actual number of pq_distance.
+* @li pq_distance: A Tensor, Will be updated after calculation. Must be one of the following types: float32, float16. 
+* @li grouped_extreme_distance: A Tensor, the extremum in each group. Must be one of the following types: float32, float16.
+* @li pq_index: A Tensor of type int32, index corresponding to pq_distance.
+* @li pq_ivf: A Tensor of type int32 , the bucket number corresponding to pq_distance.
+*
+* @par Attributes:
+* @li order: A string, indicates the sorting method of topk_pq_distance. \n
+* @li k: Int, k maximum or minimum values. \n
+* @li group_size: Int, the group size of the extremum. \n
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(TopKPQDistance)
+    .DYNAMIC_INPUT(actual_count, TensorType({DT_INT32}))
+    .DYNAMIC_INPUT(pq_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_INPUT(grouped_extreme_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_INPUT(pq_ivf, TensorType({DT_INT32}))
+    .DYNAMIC_INPUT(pq_index, TensorType({DT_INT32}))
+    .OUTPUT(topk_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(topk_ivf, TensorType({DT_INT32}))
+    .OUTPUT(topk_index, TensorType({DT_INT32}))
+    .ATTR(order, String, "ASC")
+    .ATTR(k, Int, 0)
+    .ATTR(group_size, Int, 0)
+    .OP_END_FACTORY_REG(TopKPQDistance)
+
+/**
+* @brief Calculate PQ distance. \n
+*
+* @par Inputs:
+* Six inputs, including:
+* @li ivf: A Tensor, dtype is uint8.
+* @li bucket_list: A Tensor, dtype is int32.
+* @li bucket_base_distance: A Tensor, dtype is float16.
+* @li bucket_limits: A Tensor, dtype is int32.
+* @li bucket_offsets: A Tensor, dtype is int32.
+* @li adc_tables: A Tensor. dtype is float16. \n
+*
+* @par Outputs:
+* Five outputs, including:
+* @li actual_count: A Tensor, dtype is int32, the first element means the length of processed ivf.
+* @li pq_distance: A Tensor, dtype is float16.
+* @li grouped_extreme_distance: A Tensor, dtype is float16.
+* @li pq_ivf: A Tensor, dtype is int32.
+* @li pq_index: A Tensor, dtype is int32. \n
+*
+* @par Attributes:
+* Five attributes, including:
+* @li group_size: A Scalar, indicates the group size when compute grouped_extreme_distance.
+* @li total_limit: A Scalar, indicates the total length of the outputs.
+* @li extreme_mode: A Scalar, indicates the type of extremum, 0 means minimum, and 1 means maximum.
+* @li split_count: A Scalar.
+* @li split_index: A Scalar. \n
+*
+*/
+REG_OP(ScanPQCodes)
+    .INPUT(ivf, TensorType({DT_UINT8}))
+    .INPUT(bucket_list, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(bucket_base_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(bucket_limits, TensorType({DT_INT32}))
+    .INPUT(bucket_offsets, TensorType({DT_INT64}))
+    .INPUT(adc_tables, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(actual_count, TensorType({DT_INT32}))
+    .OUTPUT(pq_distance, TensorType({DT_FLOAT16}))
+    .OUTPUT(grouped_extreme_distance, TensorType({DT_FLOAT16}))
+    .OUTPUT(pq_ivf, TensorType({DT_INT32}))
+    .OUTPUT(pq_index, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(total_limit, Int)
+    .ATTR(group_size, Int, 64)
+    .ATTR(extreme_mode, Int, 0)
+    .ATTR(split_count, Int, 1)
+    .ATTR(split_index, Int, 0)
+    .OP_END_FACTORY_REG(ScanPQCodes)
 } // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_
diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h
index fc2cd038..4f9b15be 100644
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -42,7 +42,7 @@ static const int32_t RT_ERROR_NONE = 0; // success
  */
 typedef enum tagRtDeviceMode {
     RT_DEVICE_MODE_SINGLE_DIE = 0,
-    RT_DEVICE_MODE_MULTI_DIE = 1,
+    RT_DEVICE_MODE_MULTI_DIE,
     RT_DEVICE_MODE_RESERVED
 } rtDeviceMode;
 
@@ -178,7 +178,7 @@ RTS_API rtError_t rtProfilerInit(const char *profDir, const char *address, const
  * @ingroup profiling_base
  * @brief config rts profiler.
  */
-RTS_API rtError_t rtProfilerConfig(uint16_t type);
+RTS_API rtError_t rtProfilerConfig(uint16_t profConfig);
 
 /**
  * @ingroup profiling_base
@@ -251,18 +251,6 @@ RTS_API rtError_t rtProfRegisterCtrlCallback(uint32_t moduleId, rtProfCtrlHandle
 
 /**
  * @ingroup dvrt_base
- * @brief Returns the last error from a runtime call.
- */
-RTS_API rtError_t rtGetLastError();
-
-/**
- * @ingroup dvrt_base
- * @brief Returns the last error from a runtime call.
- */
-RTS_API rtError_t rtPeekAtLastError();
-
-/**
- * @ingroup dvrt_base
  * @brief register callback for error code
  * @param [out] NA
  * @return RT_ERROR_NONE for ok
diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h
index 76836e7b..d8083def 100644
--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -14,8 +14,8 @@
  * limitations under the License.
 */
 
-#ifndef __CCE_RUNTIME_CONFIG_H__
-#define __CCE_RUNTIME_CONFIG_H__
+#ifndef CCE_RUNTIME_CONFIG_H
+#define CCE_RUNTIME_CONFIG_H
 
 #include "base.h"
 
@@ -23,28 +23,28 @@
 extern "C" {
 #endif
 
-#define PLAT_COMBINE(arch, chip, ver) ((arch << 16) | (chip << 8) | (ver))
-#define PLAT_GET_ARCH(type)           ((type >> 16) & 0xffff)
-#define PLAT_GET_CHIP(type)           ((type >> 8) & 0xff)
-#define PLAT_GET_VER(type)            (type & 0xff)
+#define PLAT_COMBINE(arch, chip, ver) (((arch) << 16U) | ((chip) << 8U) | (ver))
+#define PLAT_GET_ARCH(type)           (((type) >> 16U) & 0xffffU)
+#define PLAT_GET_CHIP(type)           (((type) >> 8U) & 0xffU)
+#define PLAT_GET_VER(type)            ((type) & 0xffU)
 
 typedef enum tagRtArchType {
     ARCH_BEGIN = 0,
     ARCH_V100 = ARCH_BEGIN,
-    ARCH_V200,
-    ARCH_END,
+    ARCH_V200 = 1,
+    ARCH_END = 2,
 } rtArchType_t;
 
 typedef enum tagRtChipType {
     CHIP_BEGIN = 0,
     CHIP_MINI = CHIP_BEGIN,
-    CHIP_CLOUD,
-    CHIP_MDC,
-    CHIP_LHISI,
-    CHIP_DC,
-    CHIP_CLOUD_V2,
-    CHIP_NO_DEVICE,
-    CHIP_END,
+    CHIP_CLOUD = 1,
+    CHIP_MDC = 2,
+    CHIP_LHISI = 3,
+    CHIP_DC = 4,
+    CHIP_CLOUD_V2 = 5,
+    CHIP_NO_DEVICE = 6,
+    CHIP_END = 7,
 } rtChipType_t;
 
 typedef enum tagRtAicpuScheType {
@@ -59,29 +59,32 @@ typedef enum tagRtDeviceCapabilityType {
     RT_SCHEDULE_HARDWARE, // HWTS Schedule
     RT_AICPU_BLOCKING_OP_NOT_SUPPORT,
     RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation
+    RT_MODE_NO_FFTS, // no ffts
+    RT_MODE_FFTS, // 1981 get ffts work mode, ffts
+    RT_MODE_FFTS_PLUS, // 1981 get ffts work mode, ffts plus
 } rtDeviceCapabilityType;
 
 typedef enum tagRtVersion {
     VER_BEGIN = 0,
     VER_NA = VER_BEGIN,
-    VER_ES,
-    VER_CS,
-    VER_SD3403,
-    VER_END,
+    VER_ES = 1,
+    VER_CS = 2,
+    VER_SD3403 = 3,
+    VER_END = 4,
 } rtVersion_t;
 
 /* match rtChipType_t */
 typedef enum tagRtPlatformType {
     PLATFORM_BEGIN = 0,
     PLATFORM_MINI_V1 = PLATFORM_BEGIN,
-    PLATFORM_CLOUD_V1,
-    PLATFORM_MINI_V2,
-    PLATFORM_LHISI_ES,
-    PLATFORM_LHISI_CS,
-    PLATFORM_DC,
-    PLATFORM_CLOUD_V2,
-    PLATFORM_LHISI_SD3403,
-    PLATFORM_END,
+    PLATFORM_CLOUD_V1 = 1,
+    PLATFORM_MINI_V2 = 2,
+    PLATFORM_LHISI_ES = 3,
+    PLATFORM_LHISI_CS = 4,
+    PLATFORM_DC = 5,
+    PLATFORM_CLOUD_V2 = 6,
+    PLATFORM_LHISI_SD3403 = 7,
+    PLATFORM_END = 8,
 } rtPlatformType_t;
 
 typedef enum tagRtCubeFracMKNFp16 {
@@ -240,4 +243,4 @@ RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout);
 }
 #endif
 
-#endif // __CCE_RUNTIME_STREAM_H__
+#endif // CCE_RUNTIME_CONFIG_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h
index bb6bf111..947ed093 100644
--- a/third_party/fwkacllib/inc/runtime/context.h
+++ b/third_party/fwkacllib/inc/runtime/context.h
@@ -14,8 +14,8 @@
  * limitations under the License.
 */
 
-#ifndef __CCE_RUNTIME_CONTEXT_H__
-#define __CCE_RUNTIME_CONTEXT_H__
+#ifndef CCE_RUNTIME_CONTEXT_H
+#define CCE_RUNTIME_CONTEXT_H
 
 #include "base.h"
 
@@ -173,4 +173,4 @@ RTS_API rtError_t rtSetCtxINFMode(bool mode);
 #endif
 
 
-#endif  // __CCE_RUNTIME_CONTEXT_H__
+#endif  // CCE_RUNTIME_CONTEXT_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h
index 3d3da22e..98975f70 100644
--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -14,8 +14,8 @@
  * limitations under the License.
 */
 
-#ifndef __CCE_RUNTIME_DEVICE_H__
-#define __CCE_RUNTIME_DEVICE_H__
+#ifndef CCE_RUNTIME_DEVICE_H
+#define CCE_RUNTIME_DEVICE_H
 
 #include "base.h"
 
@@ -23,8 +23,8 @@
 extern "C" {
 #endif
 
-#define RT_CAPABILITY_SUPPORT     (0x1)
-#define RT_CAPABILITY_NOT_SUPPORT (0x0)
+#define RT_CAPABILITY_SUPPORT     (0x1U)
+#define RT_CAPABILITY_NOT_SUPPORT (0x0U)
 
 typedef struct tagRTDeviceInfo {
     uint8_t env_type;  // 0: FPGA  1: EMU 2: ESL
@@ -45,27 +45,28 @@ typedef struct tagRTDeviceInfo {
 
 typedef enum tagRtRunMode {
     RT_RUN_MODE_OFFLINE = 0,
-    RT_RUN_MODE_ONLINE = 1,
-    RT_RUN_MODE_AICPU_SCHED = 2,
+    RT_RUN_MODE_ONLINE,
+    RT_RUN_MODE_AICPU_SCHED,
     RT_RUN_MODE_RESERVED
 } rtRunMode;
 
 typedef enum tagRtAicpuDeployType {
     AICPU_DEPLOY_CROSS_OS = 0x0,
-    AICPU_DEPLOY_CROSS_PROCESS = 0x1,
-    AICPU_DEPLOY_CROSS_THREAD = 0x2,
+    AICPU_DEPLOY_CROSS_PROCESS,
+    AICPU_DEPLOY_CROSS_THREAD,
     AICPU_DEPLOY_RESERVED
 } rtAicpuDeployType_t;
 
 typedef enum tagRtFeatureType {
     FEATURE_TYPE_MEMCPY = 0,
-    FEATURE_TYPE_MEMORY = 1,
+    FEATURE_TYPE_MEMORY,
     FEATURE_TYPE_RSV
 } rtFeatureType_t;
 
 typedef enum tagRtDeviceFeatureType {
   FEATURE_TYPE_SCHE,
   FEATURE_TYPE_BLOCKING_OPERATOR,
+  FEATURE_TYPE_FFTS_MODE,
   FEATURE_TYPE_END,
 } rtDeviceFeatureType_t;
 
@@ -90,6 +91,15 @@ typedef enum tagRtDeviceModuleType {
     RT_MODULE_TYPE_VECTOR_CORE, /**< VECTOR CORE info*/
 } rtDeviceModuleType_t;
 
+// used for rtGetDevMsg callback function
+typedef void (*rtGetMsgCallback)(const char *msg, uint32_t len);
+
+typedef enum tagGetDevMsgType {
+    RT_GET_DEV_ERROR_MSG = 0,
+    RT_GET_DEV_RUNNING_STREAM_SNAPSHOT_MSG,
+    RT_GET_DEV_MSG_RESERVE
+} rtGetDevMsgType_t;
+
 /**
  * @ingroup dvrt_dev
  * @brief get total device number.
@@ -408,8 +418,17 @@ RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device);
  */
 RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device);
 
+/**
+ * @ingroup dvrt_dev
+ * @brief get device message
+ * @param [in] rtGetDevMsgType_t getMsgType:msg type
+ * @param [in] GetMsgCallback callback:acl callback function
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetDevMsg(rtGetDevMsgType_t getMsgType, rtGetMsgCallback callback);
 #if defined(__cplusplus)
 }
 #endif
 
-#endif  // __CCE_RUNTIME_DEVICE_H__
+#endif  // CCE_RUNTIME_DEVICE_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/runtime/dvfsprofile.h b/third_party/fwkacllib/inc/runtime/dvfsprofile.h
index 33e2f4c1..b0caaf2d 100644
--- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h
+++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h
@@ -14,8 +14,8 @@
  * limitations under the License.
 */
 
-#ifndef __CCE_RUNTIME_DVFSPROFILE_H__
-#define __CCE_RUNTIME_DVFSPROFILE_H__
+#ifndef CCE_RUNTIME_DVFSPROFILE_H
+#define CCE_RUNTIME_DVFSPROFILE_H
 
 #include "base.h"
 
@@ -60,4 +60,4 @@ RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode);
 }
 #endif
 
-#endif  // __CCE_RUNTIME_PROFILE_H__
+#endif  // CCE_RUNTIME_DVFSPROFILE_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h
index 81b635c3..3c1f2670 100644
--- a/third_party/fwkacllib/inc/runtime/event.h
+++ b/third_party/fwkacllib/inc/runtime/event.h
@@ -14,8 +14,8 @@
  * limitations under the License.
 */
 
-#ifndef __CCE_RUNTIME_EVENT_H__
-#define __CCE_RUNTIME_EVENT_H__
+#ifndef CCE_RUNTIME_EVENT_H
+#define CCE_RUNTIME_EVENT_H
 
 #include "base.h"
 
@@ -33,8 +33,8 @@ typedef enum rtEventWaitStatus {
  * @ingroup event_flags
  * @brief event op bit flags
  */
-#define RT_EVENT_DEFAULT (0x0E)
-#define RT_EVENT_WITH_FLAG (0x0B)
+#define RT_EVENT_DEFAULT (0x0EU)
+#define RT_EVENT_WITH_FLAG (0x0BU)
 
 #define RT_EVENT_DDSYNC_NS    0x01U
 #define RT_EVENT_STREAM_MARK  0x02U
@@ -200,14 +200,14 @@ RTS_API rtError_t rtNotifyWait(rtNotify_t notify, rtStream_t stream);
 /**
  * @ingroup dvrt_event
  * @brief Wait for a notify with time out
- * @param [in] notify_ notify to be wait
- * @param [in] stream_  input stream
+ * @param [in] notify notify to be wait
+ * @param [in] stream  input stream
  * @param [in] timeOut  input timeOut
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx
  */
-RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify_, rtStream_t stream_, uint32_t timeOut);
+RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify, rtStream_t stream, uint32_t timeOut);
 
 /**
  * @ingroup dvrt_event
@@ -270,10 +270,10 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_DRV_ERR for driver error
  */
-RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num);
+RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int32_t num);
 
 #if defined(__cplusplus)
 }
 #endif
 
-#endif  // __CCE_RUNTIME_EVENT_H__
+#endif  // CCE_RUNTIME_EVENT_H
diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h
index f33b51d3..a0ccff73 100644
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -14,8 +14,8 @@
  * limitations under the License.
 */
 
-#ifndef __CCE_RUNTIME_KERNEL_H__
-#define __CCE_RUNTIME_KERNEL_H__
+#ifndef CCE_RUNTIME_KERNEL_H
+#define CCE_RUNTIME_KERNEL_H
 
 #include "base.h"
 #include "stream.h"
@@ -131,7 +131,10 @@ typedef struct tagRtArgsWithTiling {
     uint32_t argsSizeWithoutTiling; // input + output + tiling addr size
     uint16_t tilingAddrOffset;      // tiling addr offset
     uint16_t tilingDataOffset;      // tiling data offset
-    uint16_t reserved[2];
+    uint16_t hostInputAddrOffset;   // index of host_memory input in inputs_addrs list
+    uint16_t hostInputDataOffset;   // host_mem input data offset
+    bool hasHostMemInput;           // has host_memory input data in args or not: ture or false
+    uint8_t reserved[7];
 } rtArgsWithTiling_t;
 
 /**
@@ -141,7 +144,7 @@ typedef struct tagRtArgsWithTiling {
 typedef enum tagRtDumpKind {
     RT_DATA_DUMP_KIND_INVALID = -1,
     RT_DATA_DUMP_KIND_DUMP = 0,
-    RT_DATA_DUMP_KIND_RESERVED
+    RT_DATA_DUMP_KIND_RESERVED = 1,
 } rtDumpKind_t;
 
 /**
@@ -160,72 +163,72 @@ typedef void (*rtCallback_t)(void *fnData);
  * @ingroup rt_kernel
  * @brief magic number of plain binary for aicore
  */
-#define RT_DEV_BINARY_MAGIC_PLAIN 0xabceed50
+#define RT_DEV_BINARY_MAGIC_PLAIN 0xabceed50U
 
 /**
  * @ingroup rt_kernel
  * @brief magic number of plain binary for aicpu
  */
-#define RT_DEV_BINARY_MAGIC_PLAIN_AICPU 0xabceed51
+#define RT_DEV_BINARY_MAGIC_PLAIN_AICPU 0xabceed51U
 
 /**
  * @ingroup rt_kernel
  * @brief magic number of plain binary for aivector
  */
-#define RT_DEV_BINARY_MAGIC_PLAIN_AIVEC 0xabceed52
+#define RT_DEV_BINARY_MAGIC_PLAIN_AIVEC 0xabceed52U
 
 /**
  * @ingroup rt_kernel
  * @brief magic number of elf binary for aicore
  */
-#define RT_DEV_BINARY_MAGIC_ELF 0x43554245
+#define RT_DEV_BINARY_MAGIC_ELF 0x43554245U
 
 /**
  * @ingroup rt_kernel
  * @brief magic number of elf binary for aicpu
  */
-#define RT_DEV_BINARY_MAGIC_ELF_AICPU 0x41415243
+#define RT_DEV_BINARY_MAGIC_ELF_AICPU 0x41415243U
 
 /**
  * @ingroup rt_kernel
  * @brief magic number of elf binary for aivector
  */
-#define RT_DEV_BINARY_MAGIC_ELF_AIVEC 0x41415246
+#define RT_DEV_BINARY_MAGIC_ELF_AIVEC 0x41415246U
 
 /**
  * @ingroup rt_kernel
  * @brief magic number of elf binary for aicube
  */
-#define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41494343
+#define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41494343U
 
 /**
  * @ingroup rt_kernel_flags
  * @brief kernel op bit flags
  */
-#define RT_KERNEL_DEFAULT (0x00)
-#define RT_KERNEL_CONVERT (0x01)
-#define RT_KERNEL_DUMPFLAG (0x02)
-#define RT_FUSION_KERNEL_DUMPFLAG (0x04)
-#define RT_KERNEL_CUSTOM_AICPU (0x08)
+#define RT_KERNEL_DEFAULT (0x00U)
+#define RT_KERNEL_CONVERT (0x01U)
+#define RT_KERNEL_DUMPFLAG (0x02U)
+#define RT_FUSION_KERNEL_DUMPFLAG (0x04U)
+#define RT_KERNEL_CUSTOM_AICPU (0x08U)
 
 // STARS topic scheduler sqe : topic_type
-#define RT_KERNEL_DEVICE_FIRST (0x10)
-#define RT_KERNEL_HOST_ONLY (0x20)
-#define RT_KERNEL_HOST_FIRST (0x40)
+#define RT_KERNEL_DEVICE_FIRST (0x10U)
+#define RT_KERNEL_HOST_ONLY (0x20U)
+#define RT_KERNEL_HOST_FIRST (0x40U)
 
 /**
  * @ingroup rt_kernel
  * @brief kernel mode
 **/
-#define RT_DEFAULT_KERNEL_MODE (0x00)
-#define RT_NORMAL_KERNEL_MODE (0x01)
-#define RT_ALL_KERNEL_MODE (0x02)
+#define RT_DEFAULT_KERNEL_MODE (0x00U)
+#define RT_NORMAL_KERNEL_MODE (0x01U)
+#define RT_ALL_KERNEL_MODE (0x02U)
 
 /**
  * @ingroup rt_kernel
  * @brief kernel L1 Fusion Dump bit flags
  */
-#define RT_DDR_ADDR (0x0)
+#define RT_DDR_ADDR (0x0U)
 
 /**
  * @ingroup rt_kernel
@@ -672,7 +675,7 @@ RTS_API rtError_t rtStopMDCProfiler(void *addr);
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockDim,
-    rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream_);
+    rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream);
 
 /**
  * @ingroup rt_kernel
@@ -688,11 +691,11 @@ RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockD
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtKernelLaunchWithHandleAndTiling(void *handle, const void *devFunc, uint32_t blockDim,
-    rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream_, const void* kernelInfo);
+    rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream, const void* kernelInfo);
 
 #if defined(__cplusplus)
 }
 #endif
 
-#endif  // __CCE_RUNTIME_KERNEL_H__
+#endif  // CCE_RUNTIME_KERNEL_H
 
diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h
index b049e762..473a203a 100644
--- a/third_party/fwkacllib/inc/runtime/mem.h
+++ b/third_party/fwkacllib/inc/runtime/mem.h
@@ -14,12 +14,10 @@
  * limitations under the License.
 */
 
-#ifndef __CCE_RUNTIME_MEM_H__
-#define __CCE_RUNTIME_MEM_H__
+#ifndef CCE_RUNTIME_MEM_H
+#define CCE_RUNTIME_MEM_H
 
-/*lint -e7*/
 #include <stddef.h>
-/*lint +e7*/
 #include "base.h"
 #include "config.h"
 #include "stream.h"
@@ -32,43 +30,43 @@ extern "C" {
  * @ingroup dvrt_mem
  * @brief memory type
  */
-#define RT_MEMORY_DEFAULT ((uint32_t)0x0)   // default memory on device
-#define RT_MEMORY_HBM ((uint32_t)0x2)       // HBM memory on device
-#define RT_MEMORY_RDMA_HBM ((uint32_t)0x3)  // RDMA-HBM memory on device
-#define RT_MEMORY_DDR ((uint32_t)0x4)       // DDR memory on device
-#define RT_MEMORY_SPM ((uint32_t)0x8)       // shared physical memory on device
-#define RT_MEMORY_P2P_HBM ((uint32_t)0x10)  // HBM memory on other 4P device
-#define RT_MEMORY_P2P_DDR ((uint32_t)0x11)  // DDR memory on other device
-#define RT_MEMORY_DDR_NC ((uint32_t)0x20)   // DDR memory of non-cache
-#define RT_MEMORY_TS_4G ((uint32_t)0x40)
-#define RT_MEMORY_TS ((uint32_t)0x80)
-#define RT_MEMORY_RESERVED ((uint32_t)0x100)
+#define RT_MEMORY_DEFAULT (0x0U)   // default memory on device
+#define RT_MEMORY_HBM (0x2U)       // HBM memory on device
+#define RT_MEMORY_RDMA_HBM (0x3U)  // RDMA-HBM memory on device
+#define RT_MEMORY_DDR (0x4U)       // DDR memory on device
+#define RT_MEMORY_SPM (0x8U)       // shared physical memory on device
+#define RT_MEMORY_P2P_HBM (0x10U)  // HBM memory on other 4P device
+#define RT_MEMORY_P2P_DDR (0x11U)  // DDR memory on other device
+#define RT_MEMORY_DDR_NC (0x20U)   // DDR memory of non-cache
+#define RT_MEMORY_TS_4G (0x40U)
+#define RT_MEMORY_TS (0x80U)
+#define RT_MEMORY_RESERVED (0x100U)
 
-#define RT_MEMORY_L1 ((uint32_t)0x1<<16)
-#define RT_MEMORY_L2 ((uint32_t)0x1<<17)
+#define RT_MEMORY_L1 (0x1U << 16U)
+#define RT_MEMORY_L2 (0x1U << 17U)
 
 /**
  * @ingroup dvrt_mem
  * @brief memory info type
  */
-#define RT_MEM_INFO_TYPE_DDR_SIZE          ((uint32_t)0x1)
-#define RT_MEM_INFO_TYPE_HBM_SIZE          ((uint32_t)0x2)
-#define RT_MEM_INFO_TYPE_DDR_P2P_SIZE      ((uint32_t)0x3)
-#define RT_MEM_INFO_TYPE_HBM_P2P_SIZE      ((uint32_t)0x4)
+#define RT_MEM_INFO_TYPE_DDR_SIZE          (0x1U)
+#define RT_MEM_INFO_TYPE_HBM_SIZE          (0x2U)
+#define RT_MEM_INFO_TYPE_DDR_P2P_SIZE      (0x3U)
+#define RT_MEM_INFO_TYPE_HBM_P2P_SIZE      (0x4U)
 
 /**
  * @ingroup dvrt_mem
  * @brief memory Policy
  */
-#define RT_MEMORY_POLICY_NONE ((uint32_t)0x0)                     // Malloc mem prior hage page, then default page
-#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST ((uint32_t)0x1 << 10)    // Malloc mem prior hage page, then default page
-#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY ((uint32_t)0x1 << 11)     // Malloc mem only use hage page
-#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY ((uint32_t)0x1 << 12)  // Malloc mem only use default page
-#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P ((uint32_t)0x1 << 13)    // Malloc mem prior hage page, then default page, use for p2p
-#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P ((uint32_t)0x1 << 14)     // Malloc mem only use hage page, use for p2p
-#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P ((uint32_t)0x1 << 15)  // Malloc mem only use default page, use for p2p
+#define RT_MEMORY_POLICY_NONE (0x0U)                     // Malloc mem prior huge page, then default page
+#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST (0x1U << 10U)    // Malloc mem prior huge page, then default page
+#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY (0x1U << 11U)     // Malloc mem only use huge page
+#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY (0x1U << 12U)  // Malloc mem only use default page
+#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P (0x1U << 13U) // Malloc mem prior huge page, then default page, for p2p
+#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P (0x1U << 14U)     // Malloc mem only use huge page, use for p2p
+#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P (0x1U << 15U)  // Malloc mem only use default page, use for p2p
 
-#define MEM_ALLOC_TYPE_BIT ((uint32_t)0x3FF)  // mem type bit in <0, 9>
+#define MEM_ALLOC_TYPE_BIT (0x3FFU)  // mem type bit in <0, 9>
 
 /**
  * @ingroup dvrt_mem
@@ -80,10 +78,10 @@ typedef uint32_t rtMemType_t;
  * @ingroup dvrt_mem
  * @brief memory advise type
  */
-#define RT_MEMORY_ADVISE_EXE (0x02)
-#define RT_MEMORY_ADVISE_THP (0x04)
-#define RT_MEMORY_ADVISE_PLE (0x08)
-#define RT_MEMORY_ADVISE_PIN (0x16)
+#define RT_MEMORY_ADVISE_EXE (0x02U)
+#define RT_MEMORY_ADVISE_THP (0x04U)
+#define RT_MEMORY_ADVISE_PLE (0x08U)
+#define RT_MEMORY_ADVISE_PIN (0x16U)
 
 /**
  * @ingroup dvrt_mem
@@ -119,7 +117,7 @@ typedef enum tagRtRecudeKind {
     RT_MEMCPY_SDMA_AUTOMATIC_MAX = 11,
     RT_MEMCPY_SDMA_AUTOMATIC_MIN = 12,
     RT_MEMCPY_SDMA_AUTOMATIC_EQUAL = 13,
-    RT_RECUDE_KIND_END
+    RT_RECUDE_KIND_END = 14,
 } rtRecudeKind_t;
 
 typedef enum tagRtDataType {
@@ -134,7 +132,7 @@ typedef enum tagRtDataType {
     RT_DATA_TYPE_UINT8 = 8, // uint8
     RT_DATA_TYPE_UINT16= 9, // uint16
     RT_DATA_TYPE_UINT32= 10,// uint32
-    RT_DATA_TYPE_END
+    RT_DATA_TYPE_END = 11,
 } rtDataType_t;
 
 /**
@@ -197,7 +195,7 @@ typedef struct rtMallocHostSharedMemoryIn {
 } rtMallocHostSharedMemoryIn;
 
 typedef struct rtMallocHostSharedMemoryOut {
-    int fd;
+    int32_t fd;
     void *ptr;
     void *devPtr;
 } rtMallocHostSharedMemoryOut;
@@ -205,7 +203,7 @@ typedef struct rtMallocHostSharedMemoryOut {
 typedef struct rtFreeHostSharedMemoryIn {
     const char *name;
     const uint64_t size;
-    int fd;
+    int32_t fd;
     void *ptr;
     void *devPtr;
 } rtFreeHostSharedMemoryIn;
@@ -384,6 +382,39 @@ RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, ui
 
 /**
  * @ingroup dvrt_mem
+ * @brief synchronized memcpy2D
+ * @param [in] dst      destination address pointer
+ * @param [in] dstPitch pitch of destination memory
+ * @param [in] src      source address pointer
+ * @param [in] srcPitch pitch of source memory
+ * @param [in] width    width of matrix transfer
+ * @param [in] height   height of matrix transfer
+ * @param [in] kind     memcpy type
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMemcpy2d(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width,
+                             uint64_t height, rtMemcpyKind_t kind);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief asynchronized memcpy2D
+ * @param [in] dst      destination address pointer
+ * @param [in] dstPitch length of destination address memory
+ * @param [in] src      source address pointer
+ * @param [in] srcPitch length of destination address memory
+ * @param [in] width    width of matrix transfer
+ * @param [in] height   height of matrix transfer
+ * @param [in] kind     memcpy type
+ * @param [in] stream   asynchronized task stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMemcpy2dAsync(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width,
+                                  uint64_t height, rtMemcpyKind_t kind, rtStream_t stream);
+
+/**
+ * @ingroup dvrt_mem
  * @brief query memory size
  * @param [in] aiCoreMemorySize
  * @return RT_ERROR_NONE for ok, errno for failed
@@ -429,22 +460,22 @@ RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uin
 /**
  * @ingroup dvrt_mem
  * @brief get current device memory total and free
- * @param [out] free
- * @param [out] total
+ * @param [out] freeSize
+ * @param [out] totalSize
  * @return RT_ERROR_NONE for ok, errno for failed
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtMemGetInfo(size_t *free, size_t *total);
+RTS_API rtError_t rtMemGetInfo(size_t *freeSize, size_t *totalSize);
 
 /**
  * @ingroup dvrt_mem
  * @brief get current device memory total and free
  * @param [in] memInfoType
- * @param [out] free
- * @param [out] total
+ * @param [out] freeSize
+ * @param [out] totalSize
  * @return RT_ERROR_NONE for ok, errno for failed
  */
-RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size_t *total);
+RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *freeSize, size_t *totalSize);
 
 /**
  * @ingroup dvrt_mem
@@ -551,4 +582,4 @@ RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t str
 }
 #endif
 
-#endif  // __CCE_RUNTIME_MEM_H__
+#endif  // CCE_RUNTIME_MEM_H
diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h
index 6c2f5318..8c236dcd 100644
--- a/third_party/fwkacllib/inc/runtime/rt.h
+++ b/third_party/fwkacllib/inc/runtime/rt.h
@@ -14,8 +14,8 @@
  * limitations under the License.
 */
 
-#ifndef __CCE_RUNTIME_RT_H__
-#define __CCE_RUNTIME_RT_H__
+#ifndef CCE_RUNTIME_RT_H
+#define CCE_RUNTIME_RT_H
 
 #include "base.h"
 #include "config.h"
@@ -32,4 +32,4 @@
 #include "rt_ffts_plus.h"
 #include "rt_dfx.h"
 
-#endif  // __CCE_RUNTIME_RT_H__
+#endif  // CCE_RUNTIME_RT_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts.h b/third_party/fwkacllib/inc/runtime/rt_ffts.h
index f2809218..11164757 100644
--- a/third_party/fwkacllib/inc/runtime/rt_ffts.h
+++ b/third_party/fwkacllib/inc/runtime/rt_ffts.h
@@ -3,8 +3,8 @@
  * Description: ffts interface
  */
 
-#ifndef __CCE_RUNTIME_FFTS_H
-#define __CCE_RUNTIME_FFTS_H
+#ifndef CCE_RUNTIME_RT_FFTS_H
+#define CCE_RUNTIME_RT_FFTS_H
 
 #include "base.h"
 
@@ -33,7 +33,7 @@ typedef enum tagFftsSubTaskType {
     RT_FFTS_SUB_TASK_TYPE_MIX_AIC = 6,
     RT_FFTS_SUB_TASK_TYPE_MIX_AIV = 7,
     RT_FFTS_SUB_TASK_TYPE_SDMA = 8,
-    RT_FFTS_SUB_TASK_TYPE_RESERVED,
+    RT_FFTS_SUB_TASK_TYPE_RESERVED = 9,
 } rtFftsSubTaskType_t;
 
 typedef struct tagManualThreadDmuInfo {
@@ -178,7 +178,9 @@ typedef struct tagFftsTaskInfo {
 
 RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream);
 
+RTS_API rtError_t rtFftsTaskLaunchWithFlag(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream, uint32_t flag);
+
 #if defined(__cplusplus)
 }
 #endif
-#endif // __CCE_RUNTIME_FFTS_H
+#endif // CCE_RUNTIME_RT_FFTS_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h b/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
index 61eee9f3..343701a2 100644
--- a/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
+++ b/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
@@ -3,8 +3,8 @@
  * Description: ffts plus interface
  */
 
-#ifndef __CCE_RUNTIME_FFTS_PLUS_H
-#define __CCE_RUNTIME_FFTS_PLUS_H
+#ifndef CCE_RUNTIME_RT_FFTS_PLUS_H
+#define CCE_RUNTIME_RT_FFTS_PLUS_H
 
 #include "base.h"
 #include "rt_ffts_plus_define.h"
@@ -26,9 +26,13 @@ typedef struct tagFftsPlusTaskInfo {
 #pragma pack(pop)
 
 RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *handle, const void *devFunc, void **addr, uint32_t *prefetchCnt);
+
 RTS_API rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream);
 
+RTS_API rtError_t rtFftsPlusTaskLaunchWithFlag(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream,
+                                               uint32_t flag);
+
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
-#endif // __CCE_RUNTIME_FFTS_H
+#endif // CCE_RUNTIME_RT_FFTS_PLUS_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h b/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h
index 9887b943..36276b4c 100644
--- a/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h
+++ b/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h
@@ -3,8 +3,8 @@
  * Description: the definition of ffts plus
  */
 
-#ifndef __CCE_RUNTIME_FFTS_PLUS_DEFINE_H
-#define __CCE_RUNTIME_FFTS_PLUS_DEFINE_H
+#ifndef CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
+#define CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
 
 #include "base.h"
 
@@ -30,7 +30,7 @@ typedef enum tagFftsPlusHwType {
     RT_HW_CTX_TYPE_WRITEBACK_DATA = 11,
     RT_HW_CTX_TYPE_AICPU = 12,
     RT_HW_CTX_TYPE_LOAD = 13,
-    RT_HW_CTX_TYPE_MAX,
+    RT_HW_CTX_TYPE_MAX = 14,
 } rtFftsPlusHwType_t;
 
 // hardware context type
@@ -40,7 +40,7 @@ typedef enum tagFftsPlusSoftType {
     RT_SOFT_CTX_TYPE_AT_START = 3,
     RT_SOFT_CTX_TYPE_AT_END = 4,
     RT_SOFT_CTX_TYPE_LABEL = 5,
-    RT_SOFT_CTX_TYPE_MAX,
+    RT_SOFT_CTX_TYPE_MAX = 6,
 } rtFftsPlusSoftType_t;
 
 typedef enum tagFftsPlusContextType {
@@ -71,7 +71,7 @@ typedef enum tagFftsPlusCondType {
     RT_COND_TYPE_GREATER_OR_EQUAL = 3,
     RT_COND_TYPE_LESS = 4,
     RT_COND_TYPE_LESS_OR_EQUAL = 5,
-    RT_COND_TYPE_MAX,
+    RT_COND_TYPE_MAX = 6,
 } rtFftsPlusCondType_t;
 
 // the definition of ffts plus context
@@ -505,7 +505,7 @@ typedef struct tagFftsPlusAtStartCtx {
     uint16_t threadIdInit;
     uint16_t threadWindowSize;
     // 80-127
-    uint16_t res9[12];
+    uint32_t res9[12];
 } rtFftsPlusAtStartCtx_t;
 
 // at end context
@@ -712,4 +712,4 @@ typedef struct tagFftsPlusCondSwitchCtx {
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
-#endif // __CCE_RUNTIME_FFTS_PLUS_DEFINE_H
+#endif // CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/runtime/rt_mem_queue.h b/third_party/fwkacllib/inc/runtime/rt_mem_queue.h
new file mode 100644
index 00000000..70bfb9f3
--- /dev/null
+++ b/third_party/fwkacllib/inc/runtime/rt_mem_queue.h
@@ -0,0 +1,416 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ * Description: mbuf and queue interface
+ */
+
+#ifndef CCE_RUNTIME_RT_MEM_QUEUE_H
+#define CCE_RUNTIME_RT_MEM_QUEUE_H
+
+#include "base.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define RT_MQ_MAX_NAME_LEN 128 // same as driver's
+#define RT_MQ_DEPTH_MIN 2U
+#define RT_MQ_MODE_PUSH 1
+#define RT_MQ_MODE_PULL 2
+#define RT_MQ_MODE_DEFAULT RT_MQ_MODE_PUSH
+
+typedef struct tagMemQueueAttr {
+    char name[RT_MQ_MAX_NAME_LEN];
+    uint32_t depth;
+    uint32_t workMode;
+    uint32_t flowCtrlDropTime;
+    bool flowCtrlFlag;
+    bool overWriteFlag;
+} rtMemQueueAttr_t;
+
+typedef struct tagMemQueueShareAttr {
+    uint32_t manage : 1;
+    uint32_t read : 1;
+    uint32_t write : 1;
+    uint32_t rsv : 29;
+} rtMemQueueShareAttr_t;
+
+typedef struct tagMemQueueBuffInfo {
+    void *addr;
+    size_t len;
+} rtMemQueueBuffInfo;
+
+typedef struct tagMemQueueBuff {
+    void *contextAddr;
+    size_t contextLen;
+    rtMemQueueBuffInfo *buffInfo;
+    uint32_t buffCount;
+} rtMemQueueBuff_t;
+
+
+typedef enum tagMemQueueQueryCmd {
+    RT_MQ_QUERY_QUE_ATTR_OF_CUR_PROC = 0, // input is qid(4bytes), output is rtMemQueueShareAttr_t
+    RT_MQ_QUERY_QUES_OF_CUR_PROC = 1,
+    RT_MQ_QUERY_CMD_MAX = 2
+} rtMemQueueQueryCmd_t;
+
+#define RT_MQ_EVENT_QS_MSG 27 // same as driver's
+
+#define RT_MQ_SCHED_PRIORITY_LEVEL0 0 // same as driver's
+#define RT_MQ_SCHED_PRIORITY_LEVEL1 1
+#define RT_MQ_SCHED_PRIORITY_LEVEL2 2
+#define RT_MQ_SCHED_PRIORITY_LEVEL3 3
+#define RT_MQ_SCHED_PRIORITY_LEVEL4 4
+#define RT_MQ_SCHED_PRIORITY_LEVEL5 5
+#define RT_MQ_SCHED_PRIORITY_LEVEL6 6
+#define RT_MQ_SCHED_PRIORITY_LEVEL7 7
+
+/* Events can be released between different systems. This parameter specifies the destination type of events
+   to be released. The destination type is defined based on the CPU type of the destination system. */
+#define RT_MQ_DST_ENGINE_ACPU_DEVICE 0            // device AICPU, same as driver's
+#define RT_MQ_DST_ENGINE_ACPU_HOST 1              // Host AICPU
+#define RT_MQ_DST_ENGINE_CCPU_DEVICE 2           // device CtrlCPU
+#define RT_MQ_DST_ENGINE_CCPU_HOST 3             // Host CtrlCPU
+#define RT_MQ_DST_ENGINE_DCPU_DEVICE 4          // device DataCPU
+#define RT_MQ_DST_ENGINE_TS_CPU 5                 // device TS CPU
+#define RT_MQ_DST_ENGINE_DVPP_CPU 6               // device DVPP CPU
+
+#define RT_MQ_SCHED_EVENT_QS_MSG 25 // same as driver's EVENT_QS_MSG
+
+/* When the destination engine is AICPU, select a policy.
+   ONLY: The command is executed only on the local AICPU.
+   FIRST: The local AICPU is preferentially executed. If the local AICPU is busy, the remote AICPU can be used. */
+#define RT_SCHEDULE_POLICY_ONLY 0 // same as driver's schedule_policy
+#define RT_SCHEDULE_POLICY_FIRST 1 // same as driver's schedule_policy
+
+
+typedef struct tagEschedEventSummary {
+    int32_t pid; // dst PID
+    uint32_t grpId;
+    int32_t eventId; // only RT_MQ_SCHED_EVENT_QS_MSG is supported
+    uint32_t subeventId;
+    uint32_t msgLen;
+    char *msg;
+    uint32_t dstEngine; // dst system cpu type
+    int32_t policy; // RT_SCHEDULE_POLICY_ONLY or RT_SCHEDULE_POLICY_FIRST
+} rtEschedEventSummary_t;
+
+typedef struct tagEschedEventReply {
+    char *buf;
+    uint32_t bufLen;
+    uint32_t replyLen; // output, ack msg len, same with msgLen in halEschedAckEvent
+} rtEschedEventReply_t;
+
+#define RT_DEV_PROCESS_CP1 0
+#define RT_DEV_PROCESS_CP2 1
+#define RT_DEV_PROCESS_DEV_ONLY 2
+#define RT_DEV_PROCESS_QS 3
+#define RT_DEV_PROCESS_SIGN_LENGTH 49
+
+typedef struct tagBindHostpidInfo {
+    int32_t hostPid;
+    uint32_t vfid;
+    uint32_t chipId;
+    int32_t mode; // online:0, offline:1
+    int32_t cpType; // type of custom-process, see RT_DEV_PROCESS_XXX
+    uint32_t len; // lenth of sign
+    char sign[RT_DEV_PROCESS_SIGN_LENGTH]; // sign of hostpid
+} rtBindHostpidInfo_t;
+
+#define RT_MEM_BUFF_MAX_CFG_NUM 64
+
+typedef struct {
+    uint32_t cfgId;    // cfg id, start from 0
+    uint32_t totalSize;  // one zone total size
+    uint32_t blkSize;  // blk size, 2^n (0, 2M]
+    uint32_t maxBufSize; // max size can alloc from zone
+    uint32_t pageType;  // page type, small page / huge page
+    int32_t elasticEnable; // elastic enable
+    int32_t elasticRate;
+    int32_t elasticRateMax;
+    int32_t elasticHighLevel;
+    int32_t elasticLowLevel;
+} rtMemZoneCfg_t;
+
+typedef struct {
+    rtMemZoneCfg_t cfg[RT_MEM_BUFF_MAX_CFG_NUM];
+}rtMemBuffCfg_t;
+
+typedef void *rtMbufPtr_t;
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief init queue schedule
+ * @param [in] device   the logical device id
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueInitQS(int32_t device);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief create mbuf queue
+ * @param [in] device   the logical device id
+ * @param [in] rtMemQueueAttr   attribute of queue
+ * @param [out] qid  queue id
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueCreate(int32_t device, const rtMemQueueAttr_t *queueAttr, uint32_t *qid);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief destroy mbuf queue
+ * @param [in] device   the logical device id
+ * @param [in] qid  queue id
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueDestroy(int32_t device, uint32_t qid);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief destroy mbuf queue init
+ * @param [in] device   the logical device id
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueInit(int32_t device);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief enqueu mbuf
+ * @param [in] device   the logical device id
+ * @param [in] qid  queue id
+ * @param [in] mbuf   enqueue mbuf
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueEnQueue(int32_t device, uint32_t qid, void *mbuf);
+
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief enqueu mbuf
+ * @param [in] device   the logical device id
+ * @param [in] qid  queue id
+ * @param [out] mbuf   dequeue mbuf
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueDeQueue(int32_t device, uint32_t qid, void **mbuf);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief enqueu peek
+ * @param [in] device   the logical device id
+ * @param [in] qid  queue id
+ * @param [out] bufLen   length of mbuf in queue
+ * @param [in] timeout  peek timeout  (ms), -1: wait all the time until peeking success
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueuePeek(int32_t device, uint32_t qid, size_t *bufLen, int32_t timeout);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief enqueu  buff
+ * @param [in] device   the logical device id
+ * @param [in] qid  queue id
+ * @param [in] inBuf   enqueue buff
+ * @param [in] timeout  enqueue timeout  (ms), -1: wait all the time until enqueue success
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueEnQueueBuff(int32_t device, uint32_t qid, rtMemQueueBuff_t *inBuf, int32_t timeout);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief enqueu  buff
+ * @param [in] device   the logical device id
+ * @param [in] qid  queue id
+ * @param [out] outBuf   dequeue buff
+ * @param [in] timeout  dequeue timeout  (ms), -1: wait all the time until dequeue success
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueDeQueueBuff(int32_t device, uint32_t qid, rtMemQueueBuff_t *outBuf, int32_t timeout);
+
+
+/**
+* @ingroup rt_mem_queue
+* @brief  query queue status
+* @param [in] device: the logical device id
+* @param [in] cmd: query cmd
+* @param [in] inBuff: input buff
+* @param [in] inLen: the length of input
+* @param [in|out] outBuff: output buff
+* @param [in|out] outLen: the length of output
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMemQueueQuery(int32_t device, rtMemQueueQueryCmd_t cmd, const void *inBuff, uint32_t inLen,
+    void *outBuff, uint32_t *outLen);
+
+/**
+* @ingroup rt_mem_queue
+* @brief  grant queue
+* @param [in] device: logic devid
+* @param [in] qid: queue id
+* @param [in] pid: pid
+* @param [in] attr: queue share attr
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMemQueueGrant(int32_t device, uint32_t qid, int32_t pid, rtMemQueueShareAttr_t *attr);
+
+/**
+* @ingroup rt_mem_queue
+* @brief  attach queue
+* @param [in] device: logic devid
+* @param [in] qid: queue id
+* @param [in] timeOut: timeOut
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMemQueueAttach(int32_t device, uint32_t qid, int32_t timeOut);
+
+/**
+* @ingroup rt_mem_queue
+* @brief  Commit the event to a specific process
+* @param [in] device: logic devid
+* @param [in] event: event summary info
+* @param [out] ack: event reply info
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtEschedSubmitEventSync(int32_t device, rtEschedEventSummary_t *event,
+                                          rtEschedEventReply_t *ack);
+
+/**
+* @ingroup rt_mem_queue
+* @brief  query device proccess id
+* @param [in] info: see struct rtBindHostpidInfo_t
+* @param [out] devPid: device proccess id
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtQueryDevPid(rtBindHostpidInfo_t *info, int32_t *devPid);
+
+/**
+* @ingroup rt_mem_queue
+* @brief device buff init
+* @param [in] cfg, init cfg
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMbufInit(rtMemBuffCfg_t *cfg);
+
+/**
+* @ingroup rt_mem_queue
+* @brief alloc buff
+* @param [out] buff: buff addr alloced
+* @param [in]  size: The amount of memory space requested
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *mbuf, uint64_t size);
+
+/**
+* @ingroup rt_mem_queue
+* @brief free buff
+* @param [in] buff: buff addr to be freed
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMbufFree(rtMbufPtr_t mbuf);
+
+/**
+* @ingroup rt_mem_queue
+* @brief get Data addr of Mbuf
+* @param [in] mbuf: Mbuf addr
+* @param [out] buf: Mbuf data addr
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMbufGetBuffAddr(rtMbufPtr_t mbuf, void **buf);
+
+/**
+* @ingroup rt_mem_queue
+* @brief get total Buffer size of Mbuf
+* @param [in] mbuf: Mbuf addr
+* @param [out] totalSize: total buffer size of Mbuf
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t mbuf, uint64_t *totalSize);
+
+/**
+* @ingroup rt_mem_queue
+* @brief Get the address and length of its user_data from the specified Mbuf
+* @param [in] mbuf: Mbuf addr
+* @param [out] priv: address of its user_data
+* @param [out]  size: length of its user_data
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMbufGetPrivInfo (rtMbufPtr_t mbuf,  void **priv, uint64_t *size);
+
+// mem group
+typedef struct {
+    uint64_t maxMemSize; // max buf size in grp, in KB. = 0 means no limit
+} rtMemGrpConfig_t;
+
+typedef struct {
+    uint32_t admin : 1;     // admin permission, can add other proc to grp
+    uint32_t read : 1;     // read only permission
+    uint32_t write : 1;    // read and write permission
+    uint32_t alloc : 1;    // alloc permission (have read and write permission)
+    uint32_t rsv : 28;
+} rtMemGrpShareAttr_t;
+
+#define RT_MEM_GRP_QUERY_GROUPS_OF_PROCESS 1  // query process all grp
+
+typedef struct {
+    int32_t pid;
+} rtMemGrpQueryByProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS
+
+typedef union {
+    rtMemGrpQueryByProc_t grpQueryByProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS
+} rtMemGrpQueryInput_t;
+
+#define RT_MEM_GRP_NAME_LEN 32  // it must be same as driver define BUFF_GRP_NAME_LEN
+
+typedef struct {
+    char groupName[RT_MEM_GRP_NAME_LEN];  // group name
+    rtMemGrpShareAttr_t attr; // process in group attribute
+} rtMemGrpOfProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS
+
+typedef struct {
+    rtMemGrpOfProc_t *groupsOfProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS
+    size_t maxNum; // max number of result
+    size_t resultNum; // if the number of results exceeds 'maxNum', only 'maxNum' results are filled in buffer
+} rtMemGrpQueryOutput_t;
+
+/**
+* @ingroup rt_mem_queue
+* @brief create mem group
+* @attention null
+* @param [in] name, group name
+* @param [in] cfg, group cfg
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtMemGrpCreate(const char *name, const rtMemGrpConfig_t *cfg);
+
+/**
+* @ingroup rt_mem_queue
+* @brief add process to group
+* @param [in] name, group name
+* @param [in] pid, process id
+* @param [in] attr, process permission in group
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtMemGrpAddProc(const char *name, int32_t pid, const rtMemGrpShareAttr_t *attr);
+
+/**
+* @ingroup rt_mem_queue
+* @brief attach proccess to check permission in group
+* @param [in] name, group name
+* @param [in] timeout, time out ms
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtMemGrpAttach(const char *name, int32_t timeout);
+
+/**
+* @ingroup rt_mem_queue
+* @brief buff group query
+* @param [in] cmd, cmd type
+* @param [in] input, query input
+* @param [in|out] output, query output
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtMemGrpQuery(int32_t cmd, const rtMemGrpQueryInput_t *input, rtMemGrpQueryOutput_t *output);
+
+#if defined(__cplusplus)
+}
+#endif
+#endif // CCE_RUNTIME_RT_MEM_QUEUE_H
diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h
index d0ffe9c8..d330fe3e 100644
--- a/third_party/fwkacllib/inc/runtime/rt_model.h
+++ b/third_party/fwkacllib/inc/runtime/rt_model.h
@@ -14,8 +14,8 @@
  * limitations under the License.
 */
 
-#ifndef __CCE_RUNTIME_MODEL_H__
-#define __CCE_RUNTIME_MODEL_H__
+#ifndef CCE_RUNTIME_RT_MODEL_H
+#define CCE_RUNTIME_RT_MODEL_H
 
 #include "base.h"
 
@@ -42,7 +42,7 @@ typedef enum tagModelTaskType {
     RT_MODEL_TASK_NOTIFY_WAIT,
     RT_MODEL_TASK_REDUCE_ASYNC,
     RT_MODEL_TASK_RDMA_SEND,
-    RT_MODEL_TASK_EVENT_RESET = 18,
+    RT_MODEL_TASK_EVENT_RESET,
     RT_MODEL_TASK_MODEL_END_GRAPH,
     RT_MODEL_TASK_STREAM_SWITCH_N,
     RT_MODEL_TASK_RDMA_DB_SEND,
@@ -66,16 +66,16 @@ typedef enum tagModelQueueFlag {
     RT_MODEL_OUTPUT_QUEUE = 1
 } rtModelQueueFlag_t;
 
-#define EXECUTOR_NONE ((uint32_t)0x0)
-#define EXECUTOR_TS ((uint32_t)0x01)
-#define EXECUTOR_AICPU ((uint32_t)0x02)
+#define EXECUTOR_NONE (0x0U)
+#define EXECUTOR_TS (0x01U)
+#define EXECUTOR_AICPU (0x02U)
 
 /*
  * @ingroup rt_model
  * @brief debug flag for kernel exception dump
  */
-#define RT_DEBUG_FLAG_AICORE_OVERFLOW (0x1 << 0)
-#define RT_DEBUG_FLAG_ATOMIC_ADD_OVERFLOW (0x1 << 1)
+#define RT_DEBUG_FLAG_AICORE_OVERFLOW (0x1U << 0U)
+#define RT_DEBUG_FLAG_ATOMIC_ADD_OVERFLOW (0x1U << 1U)
 
 /**
  * @ingroup
@@ -392,12 +392,12 @@ RTS_API rtError_t rtModelExecute(rtModel_t model, rtStream_t stream, uint32_t fl
  * @ingroup rt_model
  * @brief get model the last persist task id
  * @param [in] model   model to execute
- * @param [out] taskid last task id of the model
- * @param [out] streamid last steam id of the model
+ * @param [out] taskId last task id of the model
+ * @param [out] streamId last steam id of the model
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelGetTaskId(rtModel_t model, uint32_t *taskid, uint32_t *streamid);
+RTS_API rtError_t rtModelGetTaskId(rtModel_t model, uint32_t *taskId, uint32_t *streamId);
 
 /**
  * @ingroup rt_model
@@ -495,4 +495,4 @@ RTS_API rtError_t rtDebugUnRegister(rtModel_t model);
 }
 #endif
 
-#endif  // __CCE_RUNTIME_MODEL_H__
+#endif  // CCE_RUNTIME_RT_MODEL_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/runtime/rt_stars.h b/third_party/fwkacllib/inc/runtime/rt_stars.h
index 016c352a..12b836e2 100644
--- a/third_party/fwkacllib/inc/runtime/rt_stars.h
+++ b/third_party/fwkacllib/inc/runtime/rt_stars.h
@@ -1,10 +1,10 @@
 /*
  * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
- * Description:
+ * Description: the definition of stars
  */
 
-#ifndef __CCE_RUNTIME_STARS_H
-#define __CCE_RUNTIME_STARS_H
+#ifndef CCE_RUNTIME_RT_STARS_H
+#define CCE_RUNTIME_RT_STARS_H
 
 #include "base.h"
 
@@ -84,4 +84,4 @@ RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, c
 
 }
 #endif
-#endif // __CCE_RUNTIME_STARS_H
+#endif // CCE_RUNTIME_RT_STARS_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/runtime/rt_stars_define.h b/third_party/fwkacllib/inc/runtime/rt_stars_define.h
index d77a8a8e..208f7aa6 100644
--- a/third_party/fwkacllib/inc/runtime/rt_stars_define.h
+++ b/third_party/fwkacllib/inc/runtime/rt_stars_define.h
@@ -3,8 +3,8 @@
  * Description: the definition of stars
  */
 
-#ifndef __CCE_RUNTIME_STARS_DEFINE__H
-#define __CCE_RUNTIME_STARS_DEFINE__H
+#ifndef CCE_RUNTIME_RT_STARS_DEFINE_H
+#define CCE_RUNTIME_RT_STARS_DEFINE_H
 
 #include "base.h"
 
@@ -88,4 +88,4 @@ typedef struct tagFftsPlusSqe {
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
-#endif // __CCE_RUNTIME_STARS_DEFINE__H
\ No newline at end of file
+#endif // CCE_RUNTIME_RT_STARS_DEFINE_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h
index 3a078e99..c783b892 100644
--- a/third_party/fwkacllib/inc/runtime/stream.h
+++ b/third_party/fwkacllib/inc/runtime/stream.h
@@ -14,8 +14,8 @@
  * limitations under the License.
 */
 
-#ifndef __CCE_RUNTIME_STREAM_H__
-#define __CCE_RUNTIME_STREAM_H__
+#ifndef CCE_RUNTIME_STREAM_H
+#define CCE_RUNTIME_STREAM_H
 
 #include "base.h"
 #include "event.h"
@@ -28,27 +28,27 @@ extern "C" {
  * @ingroup stream_flags
  * @brief stream op bit flags
  */
-#define RT_STREAM_DEFAULT (0x00)
-#define RT_STREAM_PERSISTENT (0x01)
-#define RT_STREAM_FORCE_COPY (0x02)
-#define RT_STREAM_HUGE (0x04)
-#define RT_STREAM_AICPU (0x08)
-#define RT_STREAM_FORBIDDEN_DEFAULT (0x10)
-#define RT_STREAM_HEAD (0x20)
-#define RT_STREAM_PRIMARY_DEFAULT (0x40)
-#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80)
+#define RT_STREAM_DEFAULT (0x00U)
+#define RT_STREAM_PERSISTENT (0x01U)
+#define RT_STREAM_FORCE_COPY (0x02U)
+#define RT_STREAM_HUGE (0x04U)
+#define RT_STREAM_AICPU (0x08U)
+#define RT_STREAM_FORBIDDEN_DEFAULT (0x10U)
+#define RT_STREAM_HEAD (0x20U)
+#define RT_STREAM_PRIMARY_DEFAULT (0x40U)
+#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80U)
 
 /**
  * @ingroup stream_type
  * @brief stream type
  */
-#define RT_NORMAL_STREAM    (0x00)
-#define RT_HUGE_STREAM      (0x01)
+#define RT_NORMAL_STREAM    (0x00U)
+#define RT_HUGE_STREAM      (0x01U)
 
 /**
  * priority level default value when create a stream
  */
-#define RT_STREAM_PRIORITY_DEFAULT (0)
+#define RT_STREAM_PRIORITY_DEFAULT (0U)
 
 /**
  * @ingroup dvrt_stream
@@ -215,4 +215,4 @@ RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream);
 }
 #endif
 
-#endif  // __CCE_RUNTIME_STREAM_H__
+#endif  // CCE_RUNTIME_STREAM_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
index d65aac83..09a35c5d 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
@@ -22,18 +22,7 @@
 #define PROF_TASK_TIME              0x00000002
 #define PROF_AICORE_METRICS         0x00000004
 #define PROF_AICPU_TRACE            0x00000008
-#define PROF_MODEL_EXECUTE          0x00000010
-#define PROF_RUNTIME_API            0x00000020
-#define PROF_RUNTIME_TRACE          0x00000040
-#define PROF_SCHEDULE_TIMELINE      0x00000080
-#define PROF_SCHEDULE_TRACE         0x00000100
-#define PROF_AIVECTORCORE_METRICS   0x00000200
-#define PROF_SUBTASK_TIME           0x00000400
-
-#define PROF_TRAINING_TRACE         0x00000800
-#define PROF_HCCL_TRACE             0x00001000
-
-#define PROF_TASK_TRACE             0x00001852
+#define PROF_L2CACHE                0x00000010
 
 // system profilinig switch
 #define PROF_CPU                    0x00010000
@@ -44,6 +33,19 @@
 #define PROF_SYS_AICORE_SAMPLE      0x00200000
 #define PROF_AIVECTORCORE_SAMPLE    0x00400000
 
+#define PROF_MODEL_EXECUTE          0x0000001000000
+#define PROF_RUNTIME_API            0x0000002000000
+#define PROF_RUNTIME_TRACE          0x0000004000000
+#define PROF_SCHEDULE_TIMELINE      0x0000008000000
+#define PROF_SCHEDULE_TRACE         0x0000010000000
+#define PROF_AIVECTORCORE_METRICS   0x0000020000000
+#define PROF_SUBTASK_TIME           0x0000040000000
+
+#define PROF_TRAINING_TRACE         0x0000080000000
+#define PROF_HCCL_TRACE             0x0000100000000
+
+#define PROF_TASK_TRACE             0x0000185000002
+
 #define PROF_MODEL_LOAD             0x8000000000000000
 
 // DataTypeConfig MASK
@@ -51,16 +53,7 @@
 #define PROF_TASK_TIME_MASK              0x00000002
 #define PROF_AICORE_METRICS_MASK         0x00000004
 #define PROF_AICPU_TRACE_MASK            0x00000008
-#define PROF_MODEL_EXECUTE_MASK          0x00000010
-#define PROF_RUNTIME_API_MASK            0x00000020
-#define PROF_RUNTIME_TRACE_MASK          0x00000040
-#define PROF_SCHEDULE_TIMELINE_MASK      0x00000080
-#define PROF_SCHEDULE_TRACE_MASK         0x00000100
-#define PROF_AIVECTORCORE_METRICS_MASK   0x00000200
-#define PROF_SUBTASK_TIME_MASK           0x00000400
-
-#define PROF_TRAINING_TRACE_MASK         0x00000800
-#define PROF_HCCL_TRACE_MASK             0x00001000
+#define PROF_L2CACHE_MASK                0x00000010
 
 // system profilinig mask
 #define PROF_CPU_MASK                    0x00010000
@@ -71,6 +64,17 @@
 #define PROF_SYS_AICORE_SAMPLE_MASK      0x00200000
 #define PROF_AIVECTORCORE_SAMPLE_MASK    0x00400000
 
+#define PROF_MODEL_EXECUTE_MASK          0x0000001000000
+#define PROF_RUNTIME_API_MASK            0x0000002000000
+#define PROF_RUNTIME_TRACE_MASK          0x0000004000000
+#define PROF_SCHEDULE_TIMELINE_MASK      0x0000008000000
+#define PROF_SCHEDULE_TRACE_MASK         0x0000010000000
+#define PROF_AIVECTORCORE_METRICS_MASK   0x0000020000000
+#define PROF_SUBTASK_TIME_MASK           0x0000040000000
+
+#define PROF_TRAINING_TRACE_MASK         0x0000080000000
+#define PROF_HCCL_TRACE_MASK             0x0000100000000
+
 #define PROF_MODEL_LOAD_MASK             0x8000000000000000
 
 #ifndef OS_TYPE