diff --git a/CMakeLists.txt b/CMakeLists.txt
index 60509838..5e58eeba 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -125,7 +125,6 @@ else ()
                 message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
             endif()
         endif()
-
         set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef)
         set(PARSER_DIR ${CMAKE_CURRENT_LIST_DIR}/parser)
         set(GE_DEPEND_DIR ${CMAKE_CURRENT_LIST_DIR}/..)
@@ -158,6 +157,7 @@ else ()
     elseif(ENABLE_MS_TESTCASES)
         include(cmake/external_libs/protobuf_static.cmake)
         include(cmake/external_libs/protoc.cmake)
+        include(cmake/external_libs/json.cmake)
         include(cmake/external_libs/securec.cmake)
         include(cmake/FindModule.cmake)
         include(cmake/intf_pub_linux.cmake)
@@ -175,5 +175,4 @@ else ()
     endif()
 
     add_subdirectory(ge)
-
 endif ()
diff --git a/cmake/external_libs/json.cmake b/cmake/external_libs/json.cmake
index 3c1cd012..04659ebc 100755
--- a/cmake/external_libs/json.cmake
+++ b/cmake/external_libs/json.cmake
@@ -9,10 +9,6 @@ if (GE_PB_PKG)
     set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip")
     set(MD5 "0dc903888211db3a0f170304cd9f3a89")
     set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
-#elseif (ENABLE_GITEE)
-#    set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip")
-#    set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7")
-#set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include")
 else()
     set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
     set(MD5 "0dc903888211db3a0f170304cd9f3a89")
diff --git a/ge/common/model/ge_model.cc b/ge/common/model/ge_model.cc
index 7fc58b6d..b90c3466 100755
--- a/ge/common/model/ge_model.cc
+++ b/ge/common/model/ge_model.cc
@@ -34,7 +34,6 @@ void GeModel::Init() {
 }
 
 GeModel::GeModel() {
-  attrs_.InitDefault();
   Init();
 }
 
@@ -78,12 +77,12 @@ void GeModel::SetPlatformVersion(const std::string &platform_version) { this->pl
 
 void GeModel::SetPlatformType(uint8_t platform_type) { this->platform_type_ = platform_type; }
 
-void GeModel::SetAttr(const ProtoAttrMapHelper &attrs) { attrs_ = attrs; }
+void GeModel::SetAttr(const ProtoAttrMap &attrs) { attrs_ = attrs; }
 
-ProtoAttrMapHelper GeModel::MutableAttrMap() { return attrs_; }
+ProtoAttrMap &GeModel::MutableAttrMap() { return attrs_; }
 
-ConstProtoAttrMapHelper GeModel::GetAttrMap() const {
-  return ConstProtoAttrMapHelper(attrs_.GetProtoOwner(), attrs_.GetProtoMsg());
+ConstProtoAttrMap &GeModel::GetAttrMap() const {
+  return attrs_;
 }
 
 Status GeModel::GetSessionId(uint32_t model_id, uint64_t &session_id) const {
diff --git a/ge/common/model/ge_model.h b/ge/common/model/ge_model.h
index 0e791746..08d7c4da 100755
--- a/ge/common/model/ge_model.h
+++ b/ge/common/model/ge_model.h
@@ -17,24 +17,26 @@
 #ifndef GE_MODEL_GE_MODEL_H_
 #define GE_MODEL_GE_MODEL_H_
 
-#include <securec.h>
 #include <map>
 #include <memory>
 #include <string>
+
+#include "securec.h"
+#include "runtime/rt.h"
 #include "common/tbe_kernel_store.h"
 #include "common/cust_aicpu_kernel_store.h"
 #include "framework/common/debug/log.h"
 #include "framework/common/fmk_error_codes.h"
+#include "framework/common/ge_types.h"
 #include "graph/buffer.h"
 #include "external/graph/graph.h"
 #include "proto/task.pb.h"
 
 namespace ge {
-const uint32_t INVALID_MODEL_ID = 0xFFFFFFFFUL;
 class GeModel : public AttrHolder {
  public:
   GeModel();
-  ~GeModel() = default;
+  ~GeModel() override = default;
   GeModel(const GeModel &other) = delete;
   GeModel &operator=(const GeModel &other) = delete;
 
@@ -55,34 +57,34 @@ class GeModel : public AttrHolder {
   void SetCustAICPUKernelStore(const CustAICPUKernelStore &cust_aicpu_kernal_store);
   void SetWeight(const Buffer &weights_buffer);
 
+  bool LoadTBEKernelStore(const uint8_t *const data, const size_t len);
+  bool loadAICPUKernelStore(const uint8_t *const data, const size_t len);
+
   void SetName(const std::string &name);
-  void SetVersion(uint32_t version);
+  void SetVersion(const uint32_t version);
   void SetPlatformVersion(const std::string &platform_version);
-  void SetPlatformType(uint8_t platform_type);
+  void SetPlatformType(const uint8_t platform_type);
 
-  void SetAttr(const ProtoAttrMapHelper &attrs);
+  void SetAttr(const ProtoAttrMap &attrs);
 
-  ProtoAttrMapHelper MutableAttrMap() override;
+  ProtoAttrMap &MutableAttrMap() override;
 
   using AttrHolder::SetAttr;
   using AttrHolder::GetAllAttrs;
   using AttrHolder::GetAllAttrNames;
 
-  void SetModelId(uint32_t model_id) { model_id_ = model_id; }
+  void SetModelId(const uint32_t model_id) { model_id_ = model_id; }
   uint32_t GetModelId() const { return model_id_; }
 
-  Status GetSessionId(uint32_t model_id, uint64_t &session_id) const;
-  void InsertSessionMap(uint32_t model_id, uint64_t session_id) {
-    model_id_to_session_id_map_.insert({model_id, session_id});
-  }
+  Status GetSessionId(const uint32_t model_id, uint64_t &session_id) const;
 
  protected:
-  ConstProtoAttrMapHelper GetAttrMap() const override;
+  ConstProtoAttrMap &GetAttrMap() const override;
 
  private:
   void Init();
 
-  ProtoAttrMapHelper attrs_;  /*lint !e148*/
+  ProtoAttrMap attrs_;  /*lint !e148*/
 
   Graph graph_;
   std::shared_ptr<domi::ModelTaskDef> task_;  /*lint !e148*/
@@ -91,9 +93,9 @@ class GeModel : public AttrHolder {
   Buffer weights_buffer_;  /*lint !e148*/
 
   std::string name_;
-  uint32_t version_ = {0};
+  uint32_t version_ = {0U};
   std::string platform_version_;
-  uint8_t platform_type_ = {0};
+  uint8_t platform_type_ = {0U};
   uint32_t model_id_ = INVALID_MODEL_ID;
   std::map<uint32_t, uint64_t> model_id_to_session_id_map_;
 };
diff --git a/ge/ge_runtime/CMakeLists.txt b/ge/ge_runtime/CMakeLists.txt
index 3243766f..ffea784b 100644
--- a/ge/ge_runtime/CMakeLists.txt
+++ b/ge/ge_runtime/CMakeLists.txt
@@ -16,6 +16,7 @@ set(GE_SRC_LIST
     "task/label_goto_task.cc"
     "task/label_set_task.cc"
     "task/label_switch_task.cc"
+    "task/label_manager.cc"
 )
 
 add_library(ge_runtime SHARED ${GE_SRC_LIST})
diff --git a/ge/ge_runtime/task/hccl_task.cc b/ge/ge_runtime/task/hccl_task.cc
index b1c7158c..bfe0d0f3 100644
--- a/ge/ge_runtime/task/hccl_task.cc
+++ b/ge/ge_runtime/task/hccl_task.cc
@@ -53,15 +53,7 @@ HcclTask::HcclTask(const ModelContext &model_context, const std::shared_ptr<Hccl
   }
 }
 
-HcclTask::~HcclTask() {
-  if (workspace_mem_ != nullptr) {
-    rtError_t rt_ret = rtFree(workspace_mem_);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "rtFree workspace_mem_ failed! ret: 0x%X.", rt_ret);
-    }
-    workspace_mem_ = nullptr;
-  }
-}
+HcclTask::~HcclTask() {}
 
 bool HcclTask::Distribute() {
   // Ops kernel info store
@@ -80,11 +72,7 @@ bool HcclTask::Distribute() {
   SetSecondaryStream();
 
   if (task_info_->workspace_size() > 0) {
-    rtError_t rt_ret = rtMalloc(&workspace_mem_, task_info_->workspace_size(), RT_MEMORYINFO_HBM);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
-      return false;
-    }
+    workspace_mem_ = task_info_->workspace_addr();
   }
 
   GELOGI("HcclTaskInfo Distribute Start. begin to call function LoadTask in hccl.");
diff --git a/ge/ge_runtime/task/label_goto_task.cc b/ge/ge_runtime/task/label_goto_task.cc
index 7cb6d556..a3b70971 100644
--- a/ge/ge_runtime/task/label_goto_task.cc
+++ b/ge/ge_runtime/task/label_goto_task.cc
@@ -16,33 +16,46 @@
 
 #include "ge_runtime/task/label_goto_task.h"
 #include "ge_runtime/task/task_factory.h"
-#include "framework/common/util.h"
 
 namespace ge {
 namespace model_runner {
 LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr<LabelGotoTaskInfo> &task_info)
-    : TaskRepeater<LabelGotoTaskInfo>(model_context, task_info), task_info_(task_info) {
+    : TaskRepeater<LabelGotoTaskInfo>(model_context, task_info),
+      task_info_(task_info),
+      stream_(nullptr),
+      index_value_(nullptr) {
   if (task_info_ == nullptr) {
     GELOGW("task_info_ is null!");
     return;
   }
   auto stream_list = model_context.stream_list();
   auto label_list = model_context.label_list();
+  rt_model_handle_ = model_context.rt_model_handle();
   uint32_t stream_id = task_info->stream_id();
-  uint32_t label_id = task_info->label_id();
+  label_id_ = task_info->label_id();
   GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id);
-  GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id);
-  if (stream_id >= stream_list.size() || label_id >= label_list.size()) {
+  GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id_);
+  if (stream_id >= stream_list.size() || label_id_ >= label_list.size()) {
     GELOGW("Stream/Label id invalid.");
     return;
   }
   stream_ = stream_list[stream_id];
-  label_ = label_list[label_id];
+  label_manager_ = LabelManager::GetInstance();
+  if (label_manager_ == nullptr) {
+    GELOGW("Get label manager instance failed.");
+    return;
+  }
+  label_info_ = label_manager_->GetLabelInfo(rt_model_handle_, {label_id_}, label_list);
 }
 
 LabelGotoTask::~LabelGotoTask() {
-  GE_FREE_RT_LOG(label_info_);
-  GE_FREE_RT_LOG(index_value_);
+  if (index_value_ != nullptr) {
+    rtError_t rt_ret = rtFree(index_value_);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "rtFree index_value_ failed! ret: 0x%X.", rt_ret);
+    }
+    index_value_ = nullptr;
+  }
 }
 
 bool LabelGotoTask::Distribute() {
@@ -94,21 +107,34 @@ bool LabelGotoTask::CheckParamValid() {
     return false;
   }
 
-  if (label_ == nullptr) {
-    GELOGE(PARAM_INVALID, "label is null!");
+  if (label_info_ == nullptr) {
+    GELOGE(PARAM_INVALID, "label info is null!");
     return false;
   }
 
-  if (label_info_ != nullptr) {
-    GELOGE(PARAM_INVALID, "label_info_ has dirty data.");
-    return false;
+  if (index_value_ == nullptr) {
+    rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), RT_MEMORY_HBM);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      return false;
+    }
+
+    uint64_t index = 0;
+    rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &index, sizeof(index), RT_MEMCPY_HOST_TO_DEVICE);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      return false;
+    }
   }
 
-  if (index_value_ != nullptr) {
-    GELOGE(PARAM_INVALID, "index_value_ has dirty data.");
+  void *label_info = label_info_->GetLabelInfo();
+  rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, 1, label_info, stream_);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
     return false;
   }
 
+  GELOGI("DistributeTask end.");
   return true;
 }
 
diff --git a/ge/ge_runtime/task/label_goto_task.h b/ge/ge_runtime/task/label_goto_task.h
index addbb700..e579c683 100644
--- a/ge/ge_runtime/task/label_goto_task.h
+++ b/ge/ge_runtime/task/label_goto_task.h
@@ -18,7 +18,11 @@
 #define GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_
 
 #include <memory>
+#include <vector>
+#include <map>
+#include <mutex>
 #include "ge_runtime/task/task.h"
+#include "ge_runtime/task/label_manager.h"
 
 namespace ge {
 namespace model_runner {
@@ -31,13 +35,13 @@ class LabelGotoTask : public TaskRepeater<LabelGotoTaskInfo> {
   bool Distribute() override;
 
  private:
-  bool CheckParamValid();
-
   std::shared_ptr<LabelGotoTaskInfo> task_info_;
-  void *stream_{nullptr};
-  void *label_{nullptr};
-  void *label_info_{nullptr};
-  void *index_value_{nullptr};
+  void *stream_;
+  std::shared_ptr<LabelGuard> label_info_;
+  void *index_value_;
+  uint32_t label_id_;
+  rtModel_t rt_model_handle_;
+  std::shared_ptr<LabelManager> label_manager_;
 };
 }  // namespace model_runner
 }  // namespace ge
diff --git a/ge/ge_runtime/task/label_manager.cc b/ge/ge_runtime/task/label_manager.cc
new file mode 100644
index 00000000..a2b0c3aa
--- /dev/null
+++ b/ge/ge_runtime/task/label_manager.cc
@@ -0,0 +1,119 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ge_runtime/task/label_manager.h"
+#include <algorithm>
+#include <string>
+#include "runtime/mem.h"
+#include "runtime/rt_model.h"
+#include "common/ge_inner_error_codes.h"
+#include "framework/common/debug/ge_log.h"
+
+namespace ge {
+namespace model_runner {
+std::weak_ptr<LabelManager> LabelManager::instance_;
+std::mutex LabelManager::instance_mutex_;
+
+template <class T>
+static std::string GetVectorString(const std::vector<T> &vec) {
+  std::string ret;
+  for (size_t i = 0; i < vec.size(); ++i) {
+    if (i != 0) {
+      ret.push_back(',');
+    }
+    ret += std::to_string(vec[i]);
+  }
+  return ret;
+}
+
+LabelGuard::~LabelGuard() {
+  void *label_info = GetLabelInfo();
+  if (label_info != nullptr) {
+    rtError_t rt_ret = rtFree(label_info);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "rtFree label_info failed! ret: 0x%X.", rt_ret);
+    }
+  }
+}
+
+std::shared_ptr<LabelManager> LabelManager::GetInstance() {
+  std::lock_guard<std::mutex> lock(instance_mutex_);
+  auto instance = instance_.lock();
+  if (instance != nullptr) {
+    return instance;
+  }
+
+  instance = std::make_shared<LabelManager>();
+  instance_ = instance;
+  return instance;
+}
+
+std::shared_ptr<LabelGuard> LabelManager::GetLabelInfo(rtModel_t model, const std::vector<uint32_t> &label_ids,
+                                                       const std::vector<void *> &all_label) {
+  std::lock_guard<std::mutex> lock(model_info_mapping_mutex_);
+  rtError_t rt_ret;
+  auto model_iter = model_info_mapping_.find(model);
+  if (model_iter == model_info_mapping_.end()) {
+    model_info_mapping_.emplace(model, std::map<std::string, std::weak_ptr<LabelGuard>>());
+    model_iter = model_info_mapping_.find(model);
+  }
+
+  std::string label_id_str = GetVectorString(label_ids);
+  auto &label_map = model_iter->second;
+  auto label_iter = label_map.find(label_id_str);
+  if (label_iter != label_map.end()) {
+    auto label_guard = label_iter->second.lock();
+    if (label_guard != nullptr) {
+      GELOGI("model %p find same label id %s.", model, label_id_str.c_str());
+      return label_guard;
+    }
+  }
+
+  GELOGI("Alloc label id %s for model %p.", label_id_str.c_str(), model);
+  void *label_info;
+  std::vector<void *> label_list;
+  bool status = true;
+  std::transform(label_ids.begin(), label_ids.end(), std::back_inserter(label_list),
+                 [&all_label, &status](uint32_t idx) -> void * {
+                   if (idx >= all_label.size()) {
+                     GELOGE(PARAM_INVALID, "Invalid label id %u, all label list size %zu.", idx, all_label.size());
+                     status = false;
+                     return nullptr;
+                   }
+                   return all_label[idx];
+                 });
+  if (!status) {
+    GELOGE(PARAM_INVALID, "Get label info failed.");
+    return nullptr;
+  }
+  uint32_t label_info_size = sizeof(rtLabelDevInfo) * label_list.size();
+  rt_ret = rtMalloc(&label_info, label_info_size, RT_MEMORY_HBM);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    return nullptr;
+  }
+
+  rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info, label_info_size);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    return nullptr;
+  }
+
+  auto label_guard = std::make_shared<LabelGuard>(label_info);
+  label_map.emplace(label_id_str, label_guard);
+  return label_guard;
+}
+}  // namespace model_runner
+}  // namespace ge
diff --git a/ge/ge_runtime/task/label_manager.h b/ge/ge_runtime/task/label_manager.h
new file mode 100644
index 00000000..f2c42c29
--- /dev/null
+++ b/ge/ge_runtime/task/label_manager.h
@@ -0,0 +1,54 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_
+#define GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_
+
+#include <vector>
+#include <memory>
+#include <mutex>
+#include <map>
+#include <runtime/base.h>
+
+namespace ge {
+namespace model_runner {
+class LabelGuard {
+ public:
+  explicit LabelGuard(void *label_info) : label_info_(reinterpret_cast<uintptr_t>(label_info)) {}
+  ~LabelGuard();
+  void *GetLabelInfo() { return reinterpret_cast<void *>(label_info_); }
+
+ private:
+  uintptr_t label_info_;
+};
+
+class LabelManager {
+ public:
+  static std::shared_ptr<LabelManager> GetInstance();
+  std::shared_ptr<LabelGuard> GetLabelInfo(rtModel_t model, const std::vector<uint32_t> &label_ids,
+                                           const std::vector<void *> &all_label);
+
+ private:
+  std::mutex model_info_mapping_mutex_;
+  std::map<rtModel_t, std::map<std::string, std::weak_ptr<LabelGuard>>> model_info_mapping_;
+
+  static std::weak_ptr<LabelManager> instance_;
+  static std::mutex instance_mutex_;
+};
+
+
+}  // namespace model_runner
+}  // namespace ge
+#endif  // GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_
\ No newline at end of file
diff --git a/ge/ge_runtime/task/label_switch_task.cc b/ge/ge_runtime/task/label_switch_task.cc
index 8c795da9..cde278d9 100644
--- a/ge/ge_runtime/task/label_switch_task.cc
+++ b/ge/ge_runtime/task/label_switch_task.cc
@@ -24,14 +24,14 @@ LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context,
     : TaskRepeater<LabelSwitchTaskInfo>(model_context, task_info),
       task_info_(task_info),
       stream_(nullptr),
-      all_label_resource_(),
       label_info_(nullptr) {
   if (task_info_ == nullptr) {
     GELOGW("task_info_ is null!");
     return;
   }
 
-  all_label_resource_ = model_context.label_list();
+  rt_model_handle_ = model_context.rt_model_handle();
+  auto all_label_resource = model_context.label_list();
   auto stream_list = model_context.stream_list();
   uint32_t stream_id = task_info->stream_id();
   GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id);
@@ -40,18 +40,16 @@ LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context,
     return;
   }
   stream_ = stream_list[stream_id];
-}
-
-LabelSwitchTask::~LabelSwitchTask() {
-  if (label_info_ != nullptr) {
-    rtError_t rt_ret = rtFree(label_info_);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "rtFree fwkOpBuf failed! ret: 0x%X.", rt_ret);
-    }
-    label_info_ = nullptr;
+  label_manager_ = LabelManager::GetInstance();
+  if (label_manager_ == nullptr) {
+    GELOGW("Get label manager instance failed.");
+    return;
   }
+  label_info_ = label_manager_->GetLabelInfo(rt_model_handle_, task_info_->label_list(), all_label_resource);
 }
 
+LabelSwitchTask::~LabelSwitchTask() {}
+
 bool LabelSwitchTask::Distribute() {
   GELOGI("LabelSwitchTask Distribute start.");
   if (!CheckParamValid()) {
@@ -117,8 +115,8 @@ bool LabelSwitchTask::CheckParamValid() {
     return false;
   }
 
-  if (label_info_ != nullptr) {
-    GELOGE(PARAM_INVALID, "label_info_ has dirty data.");
+  if (label_info_ == nullptr) {
+    GELOGE(PARAM_INVALID, "CopyLabelList failed, label info is null.");
     return false;
   }
 
@@ -126,6 +124,5 @@ bool LabelSwitchTask::CheckParamValid() {
 }
 
 REGISTER_TASK(TaskInfoType::LABEL_SWITCH, LabelSwitchTask, LabelSwitchTaskInfo);
-
 }  // namespace model_runner
 }  // namespace ge
diff --git a/ge/ge_runtime/task/label_switch_task.h b/ge/ge_runtime/task/label_switch_task.h
index 463faa31..cfa6877c 100644
--- a/ge/ge_runtime/task/label_switch_task.h
+++ b/ge/ge_runtime/task/label_switch_task.h
@@ -19,6 +19,7 @@
 
 #include <memory>
 #include "ge_runtime/task/task.h"
+#include "ge_runtime/task/label_manager.h"
 
 namespace ge {
 namespace model_runner {
@@ -35,8 +36,9 @@ class LabelSwitchTask : public TaskRepeater<LabelSwitchTaskInfo> {
 
   std::shared_ptr<LabelSwitchTaskInfo> task_info_;
   void *stream_;
-  std::vector<void *> all_label_resource_;
-  void *label_info_;
+  rtModel_t rt_model_handle_;
+  std::shared_ptr<LabelGuard> label_info_;
+  std::shared_ptr<LabelManager> label_manager_;
 };
 }  // namespace model_runner
 }  // namespace ge
diff --git a/inc/external/acl/acl.h b/inc/external/acl/acl.h
new file mode 100644
index 00000000..5fbaccc7
--- /dev/null
+++ b/inc/external/acl/acl.h
@@ -0,0 +1,82 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_H_
+#define INC_EXTERNAL_ACL_ACL_H_
+
+#include "acl_rt.h"
+#include "acl_op.h"
+#include "acl_mdl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Current version is 1.2.0
+#define ACL_MAJOR_VERSION 1
+#define ACL_MINOR_VERSION 2
+#define ACL_PATCH_VERSION 0
+
+/**
+ * @ingroup AscendCL
+ * @brief acl initialize
+ *
+ * @par Restriction
+ * The aclInit interface can be called only once in a process
+ * @param configPath [IN]    the config path,it can be NULL
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclInit(const char *configPath);
+
+/**
+ * @ingroup AscendCL
+ * @brief acl finalize
+ *
+ * @par Restriction
+ * Need to call aclFinalize before the process exits.
+ * After calling aclFinalize,the services cannot continue to be used normally.
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclFinalize();
+
+/**
+ * @ingroup AscendCL
+ * @brief query ACL interface version
+ *
+ * @param majorVersion[OUT] ACL interface major version
+ * @param minorVersion[OUT] ACL interface minor version
+ * @param patchVersion[OUT] ACL interface patch version
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *minorVersion, int32_t *patchVersion);
+
+/**
+ * @ingroup AscendCL
+ * @brief get recent error message
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY const char *aclGetRecentErrMsg();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_H_
diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h
new file mode 100644
index 00000000..b23629b6
--- /dev/null
+++ b/inc/external/acl/acl_base.h
@@ -0,0 +1,656 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_BASE_H_
+#define INC_EXTERNAL_ACL_ACL_BASE_H_
+
+#include <stdint.h>
+#include <stddef.h>
+#include "error_codes/rt_error_codes.h"
+#include "error_codes/ge_error_codes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define ACL_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define ACL_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define ACL_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define ACL_FUNC_VISIBILITY
+#endif
+#endif
+
+#ifdef __GNUC__
+#define ACL_DEPRECATED __attribute__((deprecated))
+#define ACL_DEPRECATED_MESSAGE(message) __attribute__((deprecated(message)))
+#elif defined(_MSC_VER)
+#define ACL_DEPRECATED __declspec(deprecated)
+#define ACL_DEPRECATED_MESSAGE(message) __declspec(deprecated(message))
+#else
+#define ACL_DEPRECATED
+#define ACL_DEPRECATED_MESSAGE(message)
+#endif
+
+typedef void *aclrtStream;
+typedef void *aclrtEvent;
+typedef void *aclrtContext;
+typedef int aclError;
+typedef uint16_t aclFloat16;
+typedef struct aclDataBuffer aclDataBuffer;
+typedef struct aclTensorDesc aclTensorDesc;
+
+static const int ACL_ERROR_NONE = 0;
+static const int ACL_SUCCESS = 0;
+
+static const int ACL_ERROR_INVALID_PARAM = 100000;
+static const int ACL_ERROR_UNINITIALIZE = 100001;
+static const int ACL_ERROR_REPEAT_INITIALIZE = 100002;
+static const int ACL_ERROR_INVALID_FILE = 100003;
+static const int ACL_ERROR_WRITE_FILE = 100004;
+static const int ACL_ERROR_INVALID_FILE_SIZE = 100005;
+static const int ACL_ERROR_PARSE_FILE = 100006;
+static const int ACL_ERROR_FILE_MISSING_ATTR = 100007;
+static const int ACL_ERROR_FILE_ATTR_INVALID = 100008;
+static const int ACL_ERROR_INVALID_DUMP_CONFIG = 100009;
+static const int ACL_ERROR_INVALID_PROFILING_CONFIG = 100010;
+static const int ACL_ERROR_INVALID_MODEL_ID = 100011;
+static const int ACL_ERROR_DESERIALIZE_MODEL = 100012;
+static const int ACL_ERROR_PARSE_MODEL = 100013;
+static const int ACL_ERROR_READ_MODEL_FAILURE = 100014;
+static const int ACL_ERROR_MODEL_SIZE_INVALID = 100015;
+static const int ACL_ERROR_MODEL_MISSING_ATTR = 100016;
+static const int ACL_ERROR_MODEL_INPUT_NOT_MATCH = 100017;
+static const int ACL_ERROR_MODEL_OUTPUT_NOT_MATCH = 100018;
+static const int ACL_ERROR_MODEL_NOT_DYNAMIC = 100019;
+static const int ACL_ERROR_OP_TYPE_NOT_MATCH = 100020;
+static const int ACL_ERROR_OP_INPUT_NOT_MATCH = 100021;
+static const int ACL_ERROR_OP_OUTPUT_NOT_MATCH = 100022;
+static const int ACL_ERROR_OP_ATTR_NOT_MATCH = 100023;
+static const int ACL_ERROR_OP_NOT_FOUND = 100024;
+static const int ACL_ERROR_OP_LOAD_FAILED = 100025;
+static const int ACL_ERROR_UNSUPPORTED_DATA_TYPE = 100026;
+static const int ACL_ERROR_FORMAT_NOT_MATCH = 100027;
+static const int ACL_ERROR_BIN_SELECTOR_NOT_REGISTERED = 100028;
+static const int ACL_ERROR_KERNEL_NOT_FOUND = 100029;
+static const int ACL_ERROR_BIN_SELECTOR_ALREADY_REGISTERED = 100030;
+static const int ACL_ERROR_KERNEL_ALREADY_REGISTERED = 100031;
+static const int ACL_ERROR_INVALID_QUEUE_ID = 100032;
+static const int ACL_ERROR_REPEAT_SUBSCRIBE = 100033;
+static const int ACL_ERROR_STREAM_NOT_SUBSCRIBE = 100034;
+static const int ACL_ERROR_THREAD_NOT_SUBSCRIBE = 100035;
+static const int ACL_ERROR_WAIT_CALLBACK_TIMEOUT = 100036;
+static const int ACL_ERROR_REPEAT_FINALIZE = 100037;
+static const int ACL_ERROR_NOT_STATIC_AIPP = 100038;
+static const int ACL_ERROR_COMPILING_STUB_MODE = 100039;
+static const int ACL_ERROR_GROUP_NOT_SET = 100040;
+static const int ACL_ERROR_GROUP_NOT_CREATE = 100041;
+static const int ACL_ERROR_PROF_ALREADY_RUN = 100042;
+static const int ACL_ERROR_PROF_NOT_RUN = 100043;
+static const int ACL_ERROR_DUMP_ALREADY_RUN = 100044;
+static const int ACL_ERROR_DUMP_NOT_RUN = 100045;
+static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046;
+static const int ACL_ERROR_PROF_API_CONFLICT = 148047;
+static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048;
+static const int ACL_ERROR_INVALID_OPP_PATH = 148049;
+static const int ACL_ERROR_OP_UNSUPPORTED_DYNAMIC = 148050;
+static const int ACL_ERROR_RELATIVE_RESOURCE_NOT_CLEARED = 148051;
+static const int ACL_ERROR_UNSUPPORTED_JPEG = 148052;
+
+static const int ACL_ERROR_BAD_ALLOC = 200000;
+static const int ACL_ERROR_API_NOT_SUPPORT = 200001;
+static const int ACL_ERROR_INVALID_DEVICE = 200002;
+static const int ACL_ERROR_MEMORY_ADDRESS_UNALIGNED = 200003;
+static const int ACL_ERROR_RESOURCE_NOT_MATCH = 200004;
+static const int ACL_ERROR_INVALID_RESOURCE_HANDLE = 200005;
+static const int ACL_ERROR_FEATURE_UNSUPPORTED = 200006;
+static const int ACL_ERROR_PROF_MODULES_UNSUPPORTED = 200007;
+
+static const int ACL_ERROR_STORAGE_OVER_LIMIT = 300000;
+
+static const int ACL_ERROR_INTERNAL_ERROR = 500000;
+static const int ACL_ERROR_FAILURE = 500001;
+static const int ACL_ERROR_GE_FAILURE = 500002;
+static const int ACL_ERROR_RT_FAILURE = 500003;
+static const int ACL_ERROR_DRV_FAILURE = 500004;
+static const int ACL_ERROR_PROFILING_FAILURE = 500005;
+
+#define ACL_TENSOR_SHAPE_RANGE_NUM 2
+#define ACL_TENSOR_VALUE_RANGE_NUM 2
+#define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE
+
+typedef enum {
+  ACL_DT_UNDEFINED = -1,
+  ACL_FLOAT = 0,
+  ACL_FLOAT16 = 1,
+  ACL_INT8 = 2,
+  ACL_INT32 = 3,
+  ACL_UINT8 = 4,
+  ACL_INT16 = 6,
+  ACL_UINT16 = 7,
+  ACL_UINT32 = 8,
+  ACL_INT64 = 9,
+  ACL_UINT64 = 10,
+  ACL_DOUBLE = 11,
+  ACL_BOOL = 12,
+  ACL_STRING = 13,
+  ACL_COMPLEX64 = 16,
+  ACL_COMPLEX128 = 17,
+  ACL_BF16 = 27
+} aclDataType;
+
+typedef enum {
+  ACL_FORMAT_UNDEFINED = -1,
+  ACL_FORMAT_NCHW = 0,
+  ACL_FORMAT_NHWC = 1,
+  ACL_FORMAT_ND = 2,
+  ACL_FORMAT_NC1HWC0 = 3,
+  ACL_FORMAT_FRACTAL_Z = 4,
+  ACL_FORMAT_NC1HWC0_C04 = 12,
+  ACL_FORMAT_HWCN = 16,
+  ACL_FORMAT_NDHWC = 27,
+  ACL_FORMAT_FRACTAL_NZ = 29,
+  ACL_FORMAT_NCDHW = 30,
+  ACL_FORMAT_NDC1HWC0 = 32,
+  ACL_FRACTAL_Z_3D = 33
+} aclFormat;
+
+typedef enum {
+  ACL_DEBUG = 0,
+  ACL_INFO = 1,
+  ACL_WARNING = 2,
+  ACL_ERROR = 3,
+} aclLogLevel;
+
+typedef enum { ACL_MEMTYPE_DEVICE = 0, ACL_MEMTYPE_HOST = 1, ACL_MEMTYPE_HOST_COMPILE_INDEPENDENT = 2 } aclMemType;
+
+/**
+ * @ingroup AscendCL
+ * @brief Converts data of type aclFloat16 to data of type float
+ *
+ * @param value [IN]   Data to be converted
+ *
+ * @retval Transformed data
+ */
+ACL_FUNC_VISIBILITY float aclFloat16ToFloat(aclFloat16 value);
+
+/**
+ * @ingroup AscendCL
+ * @brief Converts data of type float to data of type aclFloat16
+ *
+ * @param value [IN]   Data to be converted
+ *
+ * @retval Transformed data
+ */
+ACL_FUNC_VISIBILITY aclFloat16 aclFloatToFloat16(float value);
+
+/**
+ * @ingroup AscendCL
+ * @brief create data of aclDataBuffer
+ *
+ * @param data [IN]    pointer to data
+ * @li Need to be managed by the user,
+ *  call aclrtMalloc interface to apply for memory,
+ *  call aclrtFree interface to release memory
+ *
+ * @param size [IN]    size of data in bytes
+ *
+ * @retval pointer to created instance. nullptr if run out of memory
+ *
+ * @see aclrtMalloc | aclrtFree
+ */
+ACL_FUNC_VISIBILITY aclDataBuffer *aclCreateDataBuffer(void *data, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of aclDataBuffer
+ *
+ * @par Function
+ *  Only the aclDataBuffer type data is destroyed here.
+ *  The memory of the data passed in when the aclDataDataBuffer interface
+ *  is called to create aclDataBuffer type data must be released by the user
+ *
+ * @param  dataBuffer [IN]   pointer to the aclDataBuffer
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclCreateDataBuffer
+ */
+ACL_FUNC_VISIBILITY aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer);
+
+/**
+ * @ingroup AscendCL
+ * @brief update new data of aclDataBuffer
+ *
+ * @param dataBuffer [OUT]    pointer to aclDataBuffer
+ * @li The old data need to be released by the user, otherwise it may occur memory leak leakage
+ *  call aclGetDataBufferAddr interface to get old data address
+ *  call aclrtFree interface to release memory
+ *
+ * @param data [IN]    pointer to new data
+ * @li Need to be managed by the user,
+ *  call aclrtMalloc interface to apply for memory,
+ *  call aclrtFree interface to release memory
+ *
+ * @param size [IN]    size of data in bytes
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtMalloc | aclrtFree | aclGetDataBufferAddr
+ */
+ACL_FUNC_VISIBILITY aclError aclUpdateDataBuffer(aclDataBuffer *dataBuffer, void *data, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data address from aclDataBuffer
+ *
+ * @param dataBuffer [IN]    pointer to the data of aclDataBuffer
+ *
+ * @retval data address
+ */
+ACL_FUNC_VISIBILITY void *aclGetDataBufferAddr(const aclDataBuffer *dataBuffer);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data size of aclDataBuffer
+ *
+ * @param  dataBuffer [IN]    pointer to the data of aclDataBuffer
+ *
+ * @retval data size
+ */
+ACL_DEPRECATED_MESSAGE("aclGetDataBufferSize is deprecated, use aclGetDataBufferSizeV2 instead")
+ACL_FUNC_VISIBILITY uint32_t aclGetDataBufferSize(const aclDataBuffer *dataBuffer);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data size of aclDataBuffer to replace aclGetDataBufferSize
+ *
+ * @param  dataBuffer [IN]    pointer to the data of aclDataBuffer
+ *
+ * @retval data size
+ */
+ACL_FUNC_VISIBILITY size_t aclGetDataBufferSizeV2(const aclDataBuffer *dataBuffer);
+
+/**
+ * @ingroup AscendCL
+ * @brief get size of aclDataType
+ *
+ * @param  dataType [IN]    aclDataType data the size to get
+ *
+ * @retval size of the aclDataType
+ */
+ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType);
+
+// interfaces of tensor desc
+/**
+ * @ingroup AscendCL
+ * @brief create data aclTensorDesc
+ *
+ * @param  dataType [IN]    Data types described by tensor
+ * @param  numDims [IN]     the number of dimensions of the shape
+ * @param  dims [IN]        the size of the specified dimension
+ * @param  format [IN]      tensor format
+ *
+ * @retval aclTensorDesc pointer.
+ * @retval nullptr if param is invalid or run out of memory
+ */
+ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims,
+                                                       aclFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data aclTensorDesc
+ *
+ * @param desc [IN]     pointer to the data of aclTensorDesc to destroy
+ */
+ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief set tensor shape range for aclTensorDesc
+ *
+ * @param  desc [OUT]     pointer to the data of aclTensorDesc
+ * @param  dimsCount [IN]     the number of dimensions of the shape
+ * @param  dimsRange [IN]     the range of dimensions of the shape
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount,
+                                                    int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]);
+
+/**
+ * @ingroup AscendCL
+ * @brief set value range for aclTensorDesc
+ *
+ * @param  desc [OUT]     pointer to the data of aclTensorDesc
+ * @param  valueCount [IN]     the number of value
+ * @param  valueRange [IN]     the range of value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorValueRange(aclTensorDesc *desc, size_t valueCount,
+                                                    int64_t valueRange[][ACL_TENSOR_VALUE_RANGE_NUM]);
+/**
+ * @ingroup AscendCL
+ * @brief get data type specified by the tensor description
+ *
+ * @param desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval data type specified by the tensor description.
+ * @retval ACL_DT_UNDEFINED if description is null
+ */
+ACL_FUNC_VISIBILITY aclDataType aclGetTensorDescType(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data format specified by the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval data format specified by the tensor description.
+ * @retval ACL_FORMAT_UNDEFINED if description is null
+ */
+ACL_FUNC_VISIBILITY aclFormat aclGetTensorDescFormat(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief get tensor size specified by the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval data size specified by the tensor description.
+ * @retval 0 if description is null
+ */
+ACL_FUNC_VISIBILITY size_t aclGetTensorDescSize(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief get element count specified by the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval element count specified by the tensor description.
+ * @retval 0 if description is null
+ */
+ACL_FUNC_VISIBILITY size_t aclGetTensorDescElementCount(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief get number of dims specified by the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval number of dims specified by the tensor description.
+ * @retval 0 if description is null
+ * @retval ACL_UNKNOWN_RANK if the tensor dim is -2
+ */
+ACL_FUNC_VISIBILITY size_t aclGetTensorDescNumDims(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the size of the specified dim in the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ * @param  index [IN]       index of dims, start from 0.
+ *
+ * @retval dim specified by the tensor description and index.
+ * @retval -1 if description or index is invalid
+ */
+ACL_DEPRECATED_MESSAGE("aclGetTensorDescDim is deprecated, use aclGetTensorDescDimV2 instead")
+ACL_FUNC_VISIBILITY int64_t aclGetTensorDescDim(const aclTensorDesc *desc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the size of the specified dim in the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ * @param  index [IN]       index of dims, start from 0.
+ * @param  dimSize [OUT]    size of the specified dim.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, size_t index, int64_t *dimSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the range of the specified dim in the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ * @param  index [IN]       index of dims, start from 0.
+ * @param  dimRangeNum [IN]     number of dimRange.
+ * @param  dimRange [OUT]       range of the specified dim.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum,
+                                                      int64_t *dimRange);
+
+/**
+ * @ingroup AscendCL
+ * @brief set tensor description name
+ *
+ * @param desc [OUT]       pointer to the instance of aclTensorDesc
+ * @param name [IN]        tensor description name
+ */
+ACL_FUNC_VISIBILITY void aclSetTensorDescName(aclTensorDesc *desc, const char *name);
+
+/**
+ * @ingroup AscendCL
+ * @brief get tensor description name
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval tensor description name.
+ * @retval empty string if description is null
+ */
+ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Convert the format in the source aclTensorDesc according to
+ * the specified dstFormat to generate a new target aclTensorDesc.
+ * The format in the source aclTensorDesc remains unchanged.
+ *
+ * @param  srcDesc [IN]     pointer to the source tensor desc
+ * @param  dstFormat [IN]   destination format
+ * @param  dstDesc [OUT]    pointer to the pointer to the destination tensor desc
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat,
+                                                      aclTensorDesc **dstDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the storage format specified by the tensor description
+ *
+ * @param  desc [OUT]     pointer to the instance of aclTensorDesc
+ * @param  format [IN]    the storage format
+ *
+ * @retval ACL_SUCCESS    The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_DEPRECATED_MESSAGE("aclSetTensorStorageFormat is deprecated, use aclSetTensorFormat instead")
+ACL_FUNC_VISIBILITY aclError aclSetTensorStorageFormat(aclTensorDesc *desc, aclFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the storage shape specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  numDims [IN]    the number of dimensions of the shape
+ * @param  dims [IN]       the size of the specified dimension
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_DEPRECATED_MESSAGE("aclSetTensorStorageShape is deprecated, use aclSetTensorShape instead")
+ACL_FUNC_VISIBILITY aclError aclSetTensorStorageShape(aclTensorDesc *desc, int numDims, const int64_t *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the format specified by the tensor description
+ *
+ * @param  desc [OUT]     pointer to the instance of aclTensorDesc
+ * @param  format [IN]    the storage format
+ *
+ * @retval ACL_SUCCESS    The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorFormat(aclTensorDesc *desc, aclFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the shape specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  numDims [IN]    the number of dimensions of the shape
+ * @param  dims [IN]       the size of the specified dimension
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorShape(aclTensorDesc *desc, int numDims, const int64_t *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the original format specified by the tensor description
+ *
+ * @param  desc [OUT]     pointer to the instance of aclTensorDesc
+ * @param  format [IN]    the storage format
+ *
+ * @retval ACL_SUCCESS    The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorOriginFormat(aclTensorDesc *desc, aclFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the original shape specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  numDims [IN]    the number of dimensions of the shape
+ * @param  dims [IN]       the size of the specified dimension
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int numDims, const int64_t *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op description info
+ *
+ * @param desc [IN]     pointer to tensor description
+ * @param index [IN]    index of tensor
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get address of tensor
+ *
+ * @param desc [IN]    pointer to tensor description
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the dynamic input name specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  dynamicInputName [IN]       pointer to the dynamic input name
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorDynamicInput(aclTensorDesc *desc, const char *dynamicInputName);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set const data specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  dataBuffer [IN]       pointer to the const databuffer
+ * @param  length [IN]       the length of const databuffer
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBuffer, size_t length);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set tensor memory type specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  memType [IN]       ACL_MEMTYPE_DEVICE means device, ACL_MEMTYPE_HOST or
+ * ACL_MEMTYPE_HOST_COMPILE_INDEPENDENT means host
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorPlaceMent(aclTensorDesc *desc, aclMemType memType);
+
+/**
+ * @ingroup AscendCL
+ * @brief an interface for users to output  APP logs
+ *
+ * @param logLevel [IN]    the level of current log
+ * @param func [IN]        the function where the log is located
+ * @param file [IN]        the file where the log is located
+ * @param line [IN]        Number of source lines where the log is located
+ * @param fmt [IN]         the format of current log
+ * @param ... [IN]         the value of current log
+ */
+ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line,
+                                   const char *fmt, ...);
+
+/**
+ * @ingroup AscendCL
+ * @brief get soc name
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY const char *aclrtGetSocName();
+
+#define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_BASE_H_
diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h
new file mode 100644
index 00000000..d0e2d145
--- /dev/null
+++ b/inc/external/acl/acl_mdl.h
@@ -0,0 +1,1237 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_MODEL_H_
+#define INC_EXTERNAL_ACL_ACL_MODEL_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "acl_base.h"
+#include "acl_rt.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ACL_MAX_DIM_CNT 128
+#define ACL_MAX_TENSOR_NAME_LEN 128
+#define ACL_MAX_BATCH_NUM 128
+#define ACL_MAX_HW_NUM 128
+#define ACL_MAX_SHAPE_COUNT 128
+#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF
+
+#define ACL_MDL_LOAD_FROM_FILE 1
+#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2
+#define ACL_MDL_LOAD_FROM_MEM 3
+#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4
+#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5
+#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6
+
+#define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data"
+#define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data"
+#define ACL_ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES "_datadump_original_op_names"
+
+typedef struct aclmdlDataset aclmdlDataset;
+typedef struct aclmdlDesc aclmdlDesc;
+typedef struct aclmdlAIPP aclmdlAIPP;
+typedef struct aclAippExtendInfo aclAippExtendInfo;
+typedef struct aclmdlConfigHandle aclmdlConfigHandle;
+
+typedef enum {
+  ACL_YUV420SP_U8 = 1,
+  ACL_XRGB8888_U8,
+  ACL_RGB888_U8,
+  ACL_YUV400_U8,
+  ACL_NC1HWC0DI_FP16,
+  ACL_NC1HWC0DI_S8,
+  ACL_ARGB8888_U8,
+  ACL_YUYV_U8,
+  ACL_YUV422SP_U8,
+  ACL_AYUV444_U8,
+  ACL_RAW10,
+  ACL_RAW12,
+  ACL_RAW16,
+  ACL_RAW24,
+  ACL_AIPP_RESERVED = 0xffff,
+} aclAippInputFormat;
+
+typedef enum {
+  ACL_MDL_PRIORITY_INT32 = 0,
+  ACL_MDL_LOAD_TYPE_SIZET,
+  ACL_MDL_PATH_PTR,     /**< pointer to model load path with deep copy */
+  ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */
+  ACL_MDL_MEM_SIZET,
+  ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */
+  ACL_MDL_WEIGHT_SIZET,
+  ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */
+  ACL_MDL_WORKSPACE_SIZET,
+  ACL_MDL_INPUTQ_NUM_SIZET,
+  ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */
+  ACL_MDL_OUTPUTQ_NUM_SIZET,
+  ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */
+} aclmdlConfigAttr;
+
+typedef enum {
+  ACL_DATA_WITHOUT_AIPP = 0,
+  ACL_DATA_WITH_STATIC_AIPP,
+  ACL_DATA_WITH_DYNAMIC_AIPP,
+  ACL_DYNAMIC_AIPP_NODE
+} aclmdlInputAippType;
+
+typedef struct aclmdlIODims {
+  char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */
+  size_t dimCount;                    /**< dim array count */
+  int64_t dims[ACL_MAX_DIM_CNT];      /**< dim data array */
+} aclmdlIODims;
+
+typedef struct aclAippDims {
+  aclmdlIODims srcDims;     /**< input dims before model transform */
+  size_t srcSize;           /**< input size before model transform */
+  aclmdlIODims aippOutdims; /**< aipp output dims */
+  size_t aippOutSize;       /**< aipp output size */
+} aclAippDims;
+
+typedef struct aclmdlBatch {
+  size_t batchCount;                 /**< batch array count */
+  uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */
+} aclmdlBatch;
+
+typedef struct aclmdlHW {
+  size_t hwCount;                 /**< height&width array count */
+  uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */
+} aclmdlHW;
+
+typedef struct aclAippInfo {
+  aclAippInputFormat inputFormat;
+  int32_t srcImageSizeW;
+  int32_t srcImageSizeH;
+  int8_t cropSwitch;
+  int32_t loadStartPosW;
+  int32_t loadStartPosH;
+  int32_t cropSizeW;
+  int32_t cropSizeH;
+  int8_t resizeSwitch;
+  int32_t resizeOutputW;
+  int32_t resizeOutputH;
+  int8_t paddingSwitch;
+  int32_t leftPaddingSize;
+  int32_t rightPaddingSize;
+  int32_t topPaddingSize;
+  int32_t bottomPaddingSize;
+  int8_t cscSwitch;
+  int8_t rbuvSwapSwitch;
+  int8_t axSwapSwitch;
+  int8_t singleLineMode;
+  int32_t matrixR0C0;
+  int32_t matrixR0C1;
+  int32_t matrixR0C2;
+  int32_t matrixR1C0;
+  int32_t matrixR1C1;
+  int32_t matrixR1C2;
+  int32_t matrixR2C0;
+  int32_t matrixR2C1;
+  int32_t matrixR2C2;
+  int32_t outputBias0;
+  int32_t outputBias1;
+  int32_t outputBias2;
+  int32_t inputBias0;
+  int32_t inputBias1;
+  int32_t inputBias2;
+  int32_t meanChn0;
+  int32_t meanChn1;
+  int32_t meanChn2;
+  int32_t meanChn3;
+  float minChn0;
+  float minChn1;
+  float minChn2;
+  float minChn3;
+  float varReciChn0;
+  float varReciChn1;
+  float varReciChn2;
+  float varReciChn3;
+  aclFormat srcFormat;
+  aclDataType srcDatatype;
+  size_t srcDimNum;
+  size_t shapeCount;
+  aclAippDims outDims[ACL_MAX_SHAPE_COUNT];
+  aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */
+} aclAippInfo;
+
+/**
+ * @ingroup AscendCL
+ * @brief Create data of type aclmdlDesc
+ *
+ * @retval the aclmdlDesc pointer
+ */
+ACL_FUNC_VISIBILITY aclmdlDesc *aclmdlCreateDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of type aclmdlDesc
+ *
+ * @param modelDesc [IN]   Pointer to almdldlDesc to be destroyed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlDestroyDesc(aclmdlDesc *modelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get aclmdlDesc data of the model according to the model ID
+ *
+ * @param  modelDesc [OUT]   aclmdlDesc pointer
+ * @param  modelId [IN]      model id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetDesc(aclmdlDesc *modelDesc, uint32_t modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the number of the inputs of
+ *        the model according to data of aclmdlDesc
+ *
+ * @param  modelDesc [IN]   aclmdlDesc pointer
+ *
+ * @retval input size with aclmdlDesc
+ */
+ACL_FUNC_VISIBILITY size_t aclmdlGetNumInputs(aclmdlDesc *modelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the number of the output of
+ *        the model according to data of aclmdlDesc
+ *
+ * @param  modelDesc [IN]   aclmdlDesc pointer
+ *
+ * @retval output size with aclmdlDesc
+ */
+ACL_FUNC_VISIBILITY size_t aclmdlGetNumOutputs(aclmdlDesc *modelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the size of the specified input according to
+ *        the data of type aclmdlDesc
+ *
+ * @param  modelDesc [IN]  aclmdlDesc pointer
+ * @param  index [IN] the size of the number of inputs to be obtained,
+ *         the index value starts from 0
+ *
+ * @retval Specify the size of the input
+ */
+ACL_FUNC_VISIBILITY size_t aclmdlGetInputSizeByIndex(aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the size of the specified output according to
+ *        the data of type aclmdlDesc
+ *
+ * @param modelDesc [IN]   aclmdlDesc pointer
+ * @param index [IN]  the size of the number of outputs to be obtained,
+ *        the index value starts from 0
+ *
+ * @retval Specify the size of the output
+ */
+ACL_FUNC_VISIBILITY size_t aclmdlGetOutputSizeByIndex(aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create data of type aclmdlDataset
+ *
+ * @retval the aclmdlDataset pointer
+ */
+ACL_FUNC_VISIBILITY aclmdlDataset *aclmdlCreateDataset();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of type aclmdlDataset
+ *
+ * @param  dataset [IN]  Pointer to aclmdlDataset to be destroyed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlDestroyDataset(const aclmdlDataset *dataset);
+
+/**
+ * @ingroup AscendCL
+ * @brief Add aclDataBuffer to aclmdlDataset
+ *
+ * @param dataset [OUT]    aclmdlDataset address of aclDataBuffer to be added
+ * @param dataBuffer [IN]  aclDataBuffer address to be added
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlAddDatasetBuffer(aclmdlDataset *dataset, aclDataBuffer *dataBuffer);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set aclTensorDesc to aclmdlDataset
+ *
+ * @param dataset [OUT]    aclmdlDataset address of aclDataBuffer to be added
+ * @param tensorDesc [IN]  aclTensorDesc address to be added
+ * @param index [IN]       index of tensorDesc which to be added
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset, aclTensorDesc *tensorDesc,
+                                                        size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get aclTensorDesc from aclmdlDataset
+ *
+ * @param dataset [IN]    aclmdlDataset pointer;
+ * @param index [IN]      index of tensorDesc
+ *
+ * @retval Get address of aclTensorDesc when executed successfully.
+ * @retval Failure return NULL
+ */
+ACL_FUNC_VISIBILITY aclTensorDesc *aclmdlGetDatasetTensorDesc(const aclmdlDataset *dataset, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the number of aclDataBuffer in aclmdlDataset
+ *
+ * @param dataset [IN]   aclmdlDataset pointer
+ *
+ * @retval the number of aclDataBuffer
+ */
+ACL_FUNC_VISIBILITY size_t aclmdlGetDatasetNumBuffers(const aclmdlDataset *dataset);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the aclDataBuffer in aclmdlDataset by index
+ *
+ * @param dataset [IN]   aclmdlDataset pointer
+ * @param index [IN]     the index of aclDataBuffer
+ *
+ * @retval Get successfully, return the address of aclDataBuffer
+ * @retval Failure return NULL
+ */
+ACL_FUNC_VISIBILITY aclDataBuffer *aclmdlGetDatasetBuffer(const aclmdlDataset *dataset, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load offline model data from files
+ * and manage memory internally by the system
+ *
+ * @par Function
+ * After the system finishes loading the model,
+ * the model ID returned is used as a mark to identify the model
+ * during subsequent operations
+ *
+ * @param modelPath [IN]   Storage path for offline model files
+ * @param modelId [OUT]    Model ID generated after
+ *        the system finishes loading the model
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t *modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load offline model data from memory and manage the memory of
+ * model running internally by the system
+ *
+ * @par Function
+ * After the system finishes loading the model,
+ * the model ID returned is used as a mark to identify the model
+ * during subsequent operations
+ *
+ * @param model [IN]      Model data stored in memory
+ * @param modelSize [IN]  model data size
+ * @param modelId [OUT]   Model ID generated after
+ *        the system finishes loading the model
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load offline model data from a file,
+ * and the user manages the memory of the model run by itself
+ *
+ * @par Function
+ * After the system finishes loading the model,
+ * the model ID returned is used as a mark to identify the model
+ * during subsequent operations.
+ * @param modelPath [IN]   Storage path for offline model files
+ * @param modelId [OUT]    Model ID generated after finishes loading the model
+ * @param workPtr [IN]     A pointer to the working memory
+ *                         required by the model on the Device,can be null
+ * @param workSize [IN]    The amount of working memory required by the model
+ * @param weightPtr [IN]   Pointer to model weight memory on Device
+ * @param weightSize [IN]  The amount of weight memory required by the model
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr,
+                                                       size_t workSize, void *weightPtr, size_t weightSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load offline model data from memory,
+ * and the user can manage the memory of model running
+ *
+ * @par Function
+ * After the system finishes loading the model,
+ * the model ID returned is used as a mark to identify the model
+ * during subsequent operations
+ * @param model [IN]      Model data stored in memory
+ * @param modelSize [IN]  model data size
+ * @param modelId [OUT]   Model ID generated after finishes loading the model
+ * @param workPtr [IN]    A pointer to the working memory
+ *                        required by the model on the Device,can be null
+ * @param workSize [IN]   work memory size
+ * @param weightPtr [IN]  Pointer to model weight memory on Device,can be null
+ * @param weightSize [IN] The amount of weight memory required by the model
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId,
+                                                      void *workPtr, size_t workSize, void *weightPtr,
+                                                      size_t weightSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief load model from file with async queue
+ *
+ * @param modelPath  [IN] model path
+ * @param modelId [OUT]   return model id if load success
+ * @param inputQ [IN]     input queue pointer
+ * @param inputQNum [IN]  input queue num
+ * @param outputQ [IN]    output queue pointer
+ * @param outputQNum [IN] output queue num
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint32_t *modelId, const uint32_t *inputQ,
+                                                     size_t inputQNum, const uint32_t *outputQ, size_t outputQNum);
+
+/**
+ * @ingroup AscendCL
+ * @brief load model from memory with async queue
+ *
+ * @param model [IN]      model memory which user manages
+ * @param modelSize [IN]  model size
+ * @param modelId [OUT]   return model id if load success
+ * @param inputQ [IN]     input queue pointer
+ * @param inputQNum [IN]  input queue num
+ * @param outputQ [IN]    output queue pointer
+ * @param outputQNum [IN] output queue num
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId,
+                                                    const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ,
+                                                    size_t outputQNum);
+
+/**
+ * @ingroup AscendCL
+ * @brief Execute model synchronous inference until the inference result is returned
+ *
+ * @param  modelId [IN]   ID of the model to perform inference
+ * @param  input [IN]     Input data for model inference
+ * @param  output [OUT]   Output data for model inference
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output);
+
+/**
+ * @ingroup AscendCL
+ * @brief Execute model asynchronous inference until the inference result is returned
+ *
+ * @param  modelId [IN]   ID of the model to perform inference
+ * @param  input [IN]     Input data for model inference
+ * @param  output [OUT]   Output data for model inference
+ * @param  stream [IN]    stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output,
+                                                aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief unload model with model id
+ *
+ * @param  modelId [IN]   model id to be unloaded
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlUnload(uint32_t modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the weight memory size and working memory size
+ * required for model execution according to the model file
+ *
+ * @param  fileName [IN]     Model path to get memory information
+ * @param  workSize [OUT]    The amount of working memory for model executed
+ * @param  weightSize [OUT]  The amount of weight memory for model executed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlQuerySize(const char *fileName, size_t *workSize, size_t *weightSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Obtain the weights required for
+ * model execution according to the model data in memory
+ *
+ * @par Restriction
+ * The execution and weight memory is Device memory,
+ * and requires user application and release.
+ * @param  model [IN]        model memory which user manages
+ * @param  modelSize [IN]    model data size
+ * @param  workSize [OUT]    The amount of working memory for model executed
+ * @param  weightSize [OUT]  The amount of weight memory for model executed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlQuerySizeFromMem(const void *model, size_t modelSize, size_t *workSize,
+                                                    size_t *weightSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief In dynamic batch scenarios,
+ * it is used to set the number of images processed
+ * at one time during model inference
+ *
+ * @param  modelId [IN]     model id
+ * @param  dataset [IN|OUT] data for model inference
+ * @param  index [IN]       index of dynamic tensor
+ * @param  batchSize [IN]   Number of images processed at a time during model
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetDynamicBatchSize(uint32_t modelId, aclmdlDataset *dataset, size_t index,
+                                                       uint64_t batchSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Sets the H and W of the specified input of the model
+ *
+ * @param  modelId [IN]     model id
+ * @param  dataset [IN|OUT] data for model inference
+ * @param  index [IN]       index of dynamic tensor
+ * @param  height [IN]      model height
+ * @param  width [IN]       model width
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetDynamicHWSize(uint32_t modelId, aclmdlDataset *dataset, size_t index,
+                                                    uint64_t height, uint64_t width);
+
+/**
+ * @ingroup AscendCL
+ * @brief Sets the dynamic dims of the specified input of the model
+ *
+ * @param  modelId [IN]     model id
+ * @param  dataset [IN|OUT] data for model inference
+ * @param  index [IN]       index of dynamic dims
+ * @param  dims [IN]        value of dynamic dims
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetInputDynamicDims(uint32_t modelId, aclmdlDataset *dataset, size_t index,
+                                                       const aclmdlIODims *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input dims info
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  input tensor index
+ * @param dims [OUT]  dims info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlGetInputDimsV2
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetInputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input dims info(version 2), especially for static aipp
+ * it is the same with aclmdlGetInputDims while model without static aipp
+ *
+ * @param modelDesc [IN] model description
+ * @param index [IN]     input tensor index
+ * @param dims [OUT]     dims info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlGetInputDims
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetInputDimsV2(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get output dims info
+ *
+ * @param modelDesc [IN] model description
+ * @param index [IN]     output tensor index
+ * @param dims [OUT]     dims info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get current output dims info
+ *
+ * @par Function
+ * The following use cases are supported:
+ * @li Get current output shape when model is dynamic and
+ * dynamic shape info is set
+ * @li Get max output shape when model is dynamic and
+ * dynamic shape info is not set
+ * @li Get actual output shape when model is static
+ *
+ * @param modelDesc [IN] model description
+ * @param index [IN]     output tensor index
+ * @param dims [OUT]     dims info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get attr value by op name
+ *
+ * @param modelDesc [IN]   model description
+ * @param opName [IN]      op name
+ * @param attr [IN]        attr name
+ *
+ * @retval the attr value
+ */
+ACL_FUNC_VISIBILITY const char *aclmdlGetOpAttr(aclmdlDesc *modelDesc, const char *opName, const char *attr);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input name by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]      intput tensor index
+ *
+ * @retval input tensor name,the same life cycle with modelDesc
+ */
+ACL_FUNC_VISIBILITY const char *aclmdlGetInputNameByIndex(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get output name by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]      output tensor index
+ *
+ * @retval output tensor name,the same life cycle with modelDesc
+ */
+ACL_FUNC_VISIBILITY const char *aclmdlGetOutputNameByIndex(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input format by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]      intput tensor index
+ *
+ * @retval input tensor format
+ */
+ACL_FUNC_VISIBILITY aclFormat aclmdlGetInputFormat(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get output format by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]      output tensor index
+ *
+ * @retval output tensor format
+ */
+ACL_FUNC_VISIBILITY aclFormat aclmdlGetOutputFormat(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input data type by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  intput tensor index
+ *
+ * @retval input tensor data type
+ */
+ACL_FUNC_VISIBILITY aclDataType aclmdlGetInputDataType(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get output data type by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  output tensor index
+ *
+ * @retval output tensor data type
+ */
+ACL_FUNC_VISIBILITY aclDataType aclmdlGetOutputDataType(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input tensor index by name
+ *
+ * @param modelDesc [IN]  model description
+ * @param name [IN]    intput tensor name
+ * @param index [OUT]  intput tensor index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetInputIndexByName(const aclmdlDesc *modelDesc, const char *name, size_t *index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get output tensor index by name
+ *
+ * @param modelDesc [IN]  model description
+ * @param name [IN]  output tensor name
+ * @param index [OUT]  output tensor index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetOutputIndexByName(const aclmdlDesc *modelDesc, const char *name, size_t *index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get dynamic batch info
+ *
+ * @param modelDesc [IN]  model description
+ * @param batch [OUT]  dynamic batch info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetDynamicBatch(const aclmdlDesc *modelDesc, aclmdlBatch *batch);
+
+/**
+ * @ingroup AscendCL
+ * @brief get dynamic height&width info
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  input tensor index
+ * @param hw [OUT]  dynamic height&width info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetDynamicHW(const aclmdlDesc *modelDesc, size_t index, aclmdlHW *hw);
+
+/**
+ * @ingroup AscendCL
+ * @brief get dynamic gear count
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  unused, must be -1
+ * @param gearCount [OUT]  dynamic gear count
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetInputDynamicGearCount(const aclmdlDesc *modelDesc, size_t index,
+                                                            size_t *gearCount);
+
+/**
+ * @ingroup AscendCL
+ * @brief get dynamic dims info
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  unused, must be -1
+ * @param dims [OUT]  value of dynamic dims
+ * @param gearCount [IN]  dynamic gear count
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetInputDynamicDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims,
+                                                       size_t gearCount);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create data of type aclmdlAIPP
+ *
+ * @param batchSize [IN]    batchsizes of model
+ *
+ * @retval the aclmdlAIPP pointer
+ */
+ACL_FUNC_VISIBILITY aclmdlAIPP *aclmdlCreateAIPP(uint64_t batchSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of type aclmdlAIPP
+ *
+ * @param aippParmsSet [IN]    Pointer for aclmdlAIPP to be destroyed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlDestroyAIPP(const aclmdlAIPP *aippParmsSet);
+
+/**
+ * @ingroup AscendCL
+ * @brief set InputFormat of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param inputFormat [IN]    The inputFormat of aipp
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet, aclAippInputFormat inputFormat);
+
+/**
+ * @ingroup AscendCL
+ * @brief set cscParms of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]    Pointer for aclmdlAIPP
+ * @param csc_switch [IN]       Csc switch
+ * @param cscMatrixR0C0 [IN]    Csc_matrix_r0_c0
+ * @param cscMatrixR0C1 [IN]    Csc_matrix_r0_c1
+ * @param cscMatrixR0C2 [IN]    Csc_matrix_r0_c2
+ * @param cscMatrixR1C0 [IN]    Csc_matrix_r1_c0
+ * @param cscMatrixR1C1 [IN]    Csc_matrix_r1_c1
+ * @param cscMatrixR1C2 [IN]    Csc_matrix_r1_c2
+ * @param cscMatrixR2C0 [IN]    Csc_matrix_r2_c0
+ * @param cscMatrixR2C1 [IN]    Csc_matrix_r2_c1
+ * @param cscMatrixR2C2 [IN]    Csc_matrix_r2_c2
+ * @param cscOutputBiasR0 [IN]  Output Bias for RGB to YUV, element of row 0, unsigned number
+ * @param cscOutputBiasR1 [IN]  Output Bias for RGB to YUV, element of row 1, unsigned number
+ * @param cscOutputBiasR2 [IN]  Output Bias for RGB to YUV, element of row 2, unsigned number
+ * @param cscInputBiasR0 [IN]   Input Bias for YUV to RGB, element of row 0, unsigned number
+ * @param cscInputBiasR1 [IN]   Input Bias for YUV to RGB, element of row 1, unsigned number
+ * @param cscInputBiasR2 [IN]   Input Bias for YUV to RGB, element of row 2, unsigned number
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t cscSwitch, int16_t cscMatrixR0C0,
+                                                    int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0,
+                                                    int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0,
+                                                    int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
+                                                    uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1,
+                                                    uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0,
+                                                    uint8_t cscInputBiasR1, uint8_t cscInputBiasR2);
+
+/**
+ * @ingroup AscendCL
+ * @brief set rb/ub swap switch of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param rbuvSwapSwitch [IN] rb/ub swap switch
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch);
+
+/**
+ * @ingroup AscendCL
+ * @brief set RGBA->ARGB, YUVA->AYUV swap switch of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param axSwapSwitch [IN]   RGBA->ARGB, YUVA->AYUV swap switch
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch);
+
+/**
+ * @ingroup AscendCL
+ * @brief set source image of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param srcImageSizeW [IN]  Source image width
+ * @param srcImageSizeH [IN]  Source image height
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW,
+                                                       int32_t srcImageSizeH);
+
+/**
+ * @ingroup AscendCL
+ * @brief set resize switch of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param scfSwitch [IN]      Resize switch
+ * @param scfInputSizeW [IN]  Input width of scf
+ * @param scfInputSizeH [IN]  Input height of scf
+ * @param scfOutputSizeW [IN] Output width of scf
+ * @param scfOutputSizeH [IN] Output height of scf
+ * @param batchIndex [IN]     Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, int8_t scfSwitch, int32_t scfInputSizeW,
+                                                    int32_t scfInputSizeH, int32_t scfOutputSizeW,
+                                                    int32_t scfOutputSizeH, uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set cropParams of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param cropSwitch [IN]     Crop switch
+ * @param cropStartPosW [IN]  The start horizontal position of cropping
+ * @param cropStartPosH [IN]  The start vertical position of cropping
+ * @param cropSizeW [IN]      Crop width
+ * @param cropSizeH [IN]      Crop height
+ * @param batchIndex [IN]     Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, int8_t cropSwitch, int32_t cropStartPosW,
+                                                     int32_t cropStartPosH, int32_t cropSizeW, int32_t cropSizeH,
+                                                     uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set paddingParams of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]      Pointer for aclmdlAIPP
+ * @param paddingSwitch [IN]      Padding switch
+ * @param paddingSizeTop [IN]     Top padding size
+ * @param paddingSizeBottom [IN]  Bottom padding size
+ * @param paddingSizeLeft [IN]    Left padding size
+ * @param paddingSizeRight [IN]   Right padding size
+ * @param batchIndex [IN]         Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch,
+                                                        int32_t paddingSizeTop, int32_t paddingSizeBottom,
+                                                        int32_t paddingSizeLeft, int32_t paddingSizeRight,
+                                                        uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set DtcPixelMean of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]      Pointer for aclmdlAIPP
+ * @param dtcPixelMeanChn0 [IN]   Mean value of channel 0
+ * @param dtcPixelMeanChn1 [IN]   Mean value of channel 1
+ * @param dtcPixelMeanChn2 [IN]   Mean value of channel 2
+ * @param dtcPixelMeanChn3 [IN]   Mean value of channel 3
+ * @param batchIndex [IN]         Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, int16_t dtcPixelMeanChn0,
+                                                       int16_t dtcPixelMeanChn1, int16_t dtcPixelMeanChn2,
+                                                       int16_t dtcPixelMeanChn3, uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set DtcPixelMin of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]    Pointer for aclmdlAIPP
+ * @param dtcPixelMinChn0 [IN]  Min value of channel 0
+ * @param dtcPixelMinChn1 [IN]  Min value of channel 1
+ * @param dtcPixelMinChn2 [IN]  Min value of channel 2
+ * @param dtcPixelMinChn3 [IN]  Min value of channel 3
+ * @param batchIndex [IN]       Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, float dtcPixelMinChn0,
+                                                      float dtcPixelMinChn1, float dtcPixelMinChn2,
+                                                      float dtcPixelMinChn3, uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set PixelVarReci of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]       Pointer for aclmdlAIPP
+ * @param dtcPixelVarReciChn0 [IN] sfr_dtc_pixel_variance_reci_ch0
+ * @param dtcPixelVarReciChn1 [IN] sfr_dtc_pixel_variance_reci_ch1
+ * @param dtcPixelVarReciChn2 [IN] sfr_dtc_pixel_variance_reci_ch2
+ * @param dtcPixelVarReciChn3 [IN] sfr_dtc_pixel_variance_reci_ch3
+ * @param batchIndex [IN]          Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, float dtcPixelVarReciChn0,
+                                                       float dtcPixelVarReciChn1, float dtcPixelVarReciChn2,
+                                                       float dtcPixelVarReciChn3, uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set aipp parameters to model
+ *
+ * @param modelId [IN]        model id
+ * @param dataset [IN]        Pointer of dataset
+ * @param index [IN]          index of input for aipp data(ACL_DYNAMIC_AIPP_NODE)
+ * @param aippParmsSet [IN]   Pointer for aclmdlAIPP
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset *dataset, size_t index,
+                                                const aclmdlAIPP *aippParmsSet);
+
+/**
+ * @ingroup AscendCL
+ * @brief set aipp parameters to model
+ *
+ * @param modelId [IN]        model id
+ * @param dataset [IN]        Pointer of dataset
+ * @param index [IN]          index of input for data which linked dynamic aipp(ACL_DATA_WITH_DYNAMIC_AIPP)
+ * @param aippParmsSet [IN]   Pointer for aclmdlAIPP
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlDataset *dataset, size_t index,
+                                                       const aclmdlAIPP *aippParmsSet);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input aipp type
+ *
+ * @param modelId [IN]        model id
+ * @param index [IN]          index of input
+ * @param type [OUT]          aipp type for input.refrer to aclmdlInputAippType(enum)
+ * @param dynamicAttachedDataIndex [OUT]     index for dynamic attached data(ACL_DYNAMIC_AIPP_NODE)
+ *        valid when type is ACL_DATA_WITH_DYNAMIC_AIPP, invalid value is ACL_INVALID_NODE_INDEX
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, aclmdlInputAippType *type,
+                                               size_t *dynamicAttachedDataIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief get static aipp parameters from model
+ *
+ * @param modelId [IN]        model id
+ * @param index [IN]          index of tensor
+ * @param aippInfo [OUT]      Pointer for static aipp info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval ACL_ERROR_MODEL_AIPP_NOT_EXIST The tensor of index is not configured with aipp
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op description info
+ *
+ * @param deviceId [IN]       device id
+ * @param streamId [IN]       stream id
+ * @param taskId [IN]         task id
+ * @param opName [OUT]        pointer to op name
+ * @param opNameLen [IN]      the length of op name
+ * @param inputDesc [OUT]     pointer to input description
+ * @param numInputs [OUT]     the number of input tensor
+ * @param outputDesc [OUT]    pointer to output description
+ * @param numOutputs [OUT]    the number of output tensor
+ *
+ * @retval ACL_SUCCESS The function is successfully executed
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, uint32_t taskId,
+                                                      char *opName, size_t opNameLen, aclTensorDesc **inputDesc,
+                                                      size_t *numInputs, aclTensorDesc **outputDesc,
+                                                      size_t *numOutputs);
+
+/**
+ * @ingroup AscendCL
+ * @brief init dump
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlInitDump();
+
+/**
+ * @ingroup AscendCL
+ * @brief set param of dump
+ *
+ * @param dumpCfgPath [IN]   the path of dump config
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath);
+
+/**
+ * @ingroup AscendCL
+ * @brief finalize dump.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump();
+
+/**
+ * @ingroup AscendCL
+ * @brief load model with config
+ *
+ * @param handle [IN]    pointer to model config handle
+ * @param modelId [OUT]  pointer to model id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief create model config handle of type aclmdlConfigHandle
+ *
+ * @retval the aclmdlConfigHandle pointer
+ *
+ * @see aclmdlDestroyConfigHandle
+ */
+ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of type aclmdlConfigHandle
+ *
+ * @param handle [IN]   pointer to model config handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateConfigHandle
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief set config for model load
+ *
+ * @param handle [OUT]    pointer to model config handle
+ * @param attr [IN]       config attr in model config handle to be set
+ * @param attrValue [IN]  pointer to model config value
+ * @param valueSize [IN]  memory size of attrValue
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr,
+                                                const void *attrValue, size_t valueSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief get real tensor name from modelDesc
+ *
+ * @param modelDesc [IN]  pointer to modelDesc
+ * @param name [IN]       tensor name
+ *
+ * @retval the pointer of real tensor name
+ * @retval Failure return NULL
+ */
+ACL_FUNC_VISIBILITY const char *aclmdlGetTensorRealName(const aclmdlDesc *modelDesc, const char *name);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_MODEL_H_
diff --git a/inc/external/acl/acl_op.h b/inc/external/acl/acl_op.h
new file mode 100644
index 00000000..c49d56db
--- /dev/null
+++ b/inc/external/acl/acl_op.h
@@ -0,0 +1,532 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_OP_H_
+#define INC_EXTERNAL_ACL_ACL_OP_H_
+
+#include "acl_base.h"
+#include "acl_rt.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct aclopHandle aclopHandle;
+typedef struct aclopAttr aclopAttr;
+typedef struct aclopKernelDesc aclopKernelDesc;
+
+typedef void (*aclDataDeallocator)(void *data, size_t length);
+
+static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1;
+
+typedef enum aclEngineType {
+  ACL_ENGINE_SYS,
+  ACL_ENGINE_AICORE,
+  ACL_ENGINE_VECTOR,
+} aclopEngineType;
+
+/**
+ * @ingroup AscendCL
+ * @brief Set base directory that contains single op models
+ *
+ * @par Restriction
+ * The aclopSetModelDir interface can be called only once in a process.
+ * @param  modelDir [IN]   path of the directory
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetModelDir(const char *modelDir);
+
+/**
+ * @ingroup AscendCL
+ * @brief load single op models from memory
+ *
+ * @par Restriction
+ * The aclopLoad interface can be called more than one times in a process.
+ * @param model [IN]        address of single op models
+ * @param modelSize [IN]    size of single op models
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopLoad(const void *model, size_t modelSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief create data of type aclopAttr
+ *
+ * @retval pointer to created instance.
+ * @retval nullptr if run out of memory
+ */
+ACL_FUNC_VISIBILITY aclopAttr *aclopCreateAttr();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of typ aclopAttr
+ *
+ * @param attr [IN]   pointer to the instance of aclopAttr
+ */
+ACL_FUNC_VISIBILITY void aclopDestroyAttr(const aclopAttr *attr);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is bool
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param attrValue [IN]   attribute value
+ *                         false if attrValue is 0, true otherwise.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrBool(aclopAttr *attr, const char *attrName, uint8_t attrValue);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is int64_t
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param attrValue [IN]   attribute value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrInt(aclopAttr *attr, const char *attrName, int64_t attrValue);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is float
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param attrValue [IN]   attribute value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrFloat(aclopAttr *attr, const char *attrName, float attrValue);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is string
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param attrValue [IN]   attribute value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *attrName, const char *attrValue);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is aclDataType
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param attrValue [IN]   attribute value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrDataType(aclopAttr *attr, const char *attrName, aclDataType attrValue);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of aclDataType
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numValues [IN]   number of values. false if attrValue is 0, true otherwise.
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListDataType(aclopAttr *attr, const char *attrName, int numValues,
+                                                      const aclDataType values[]);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of bools
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numValues [IN]   number of values. false if attrValue is 0, true otherwise.
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues,
+                                                  const uint8_t *values);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of ints
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numValues [IN]   number of values
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues,
+                                                 const int64_t *values);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of floats
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numValues [IN]   number of values
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues,
+                                                   const float *values);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of strings
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numValues [IN]   number of values
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues,
+                                                    const char **values);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of list of ints
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numLists [IN]    number of lists
+ * @param numValues [IN]   pointer to number of values of each list
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists,
+                                                     const int *numValues, const int64_t *const values[]);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load and execute the specified operator asynchronously
+ *
+ * @par Restriction
+ * @li The input and output organization of each operator is different,
+ * and the application needs to organize the operator strictly
+ * according to the operator input and output parameters when calling.
+ * @li When the user calls aclopExecute,
+ * the ACL finds the corresponding task according to the optype,
+ * the description of the input tesnsor,
+ * the description of the output tesnsor, and attr, and issues the execution.
+ *
+ * @param opType [IN]      type of op
+ * @param numInputs [IN]   number of inputs
+ * @param inputDesc [IN]   pointer to array of input tensor descriptions
+ * @param inputs [IN]      pointer to array of input buffers
+ * @param numOutputs [IN]  number of outputs
+ * @param outputDesc [IN]  pointer to array of output tensor descriptions
+ * @param outputs [OUT]    pointer to array of output buffers
+ * @param attr [IN]        pointer to instance of aclopAttr.
+ *                         may pass nullptr if the op has no attribute
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead")
+ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[],
+                                          const aclDataBuffer *const inputs[], int numOutputs,
+                                          const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],
+                                          const aclopAttr *attr, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load and execute the specified operator
+ *        The difference with aclopExecute is that aclopExecuteV2 will refresh outputDesc
+ *
+ * @par Restriction
+ * @li The input and output organization of each operator is different,
+ * and the application needs to organize the operator strictly
+ * according to the operator input and output parameters when calling.
+ * @li When the user calls aclopExecuteV2,
+ * the ACL finds the corresponding task according to the optype,
+ * the description of the input tesnsor,
+ * the description of the output tesnsor, and attr, and issues the execution.
+ *
+ * @param opType [IN]      type of op
+ * @param numInputs [IN]   number of inputs
+ * @param inputDesc [IN]   pointer to array of input tensor descriptions
+ * @param inputs [IN]      pointer to array of input buffers
+ * @param numOutputs [IN]  number of outputs
+ * @param outputDesc [IN|OUT]  pointer to array of output tensor descriptions
+ * @param outputs [OUT]    pointer to array of output buffers
+ * @param attr [IN]        pointer to instance of aclopAttr.
+ *                         may pass nullptr if the op has no attribute
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
+                                            aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[],
+                                            aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a instance of aclopHandle.
+ *
+ * @param opType [IN]      type of op
+ * @param numInputs [IN]   number of inputs
+ * @param inputDesc [IN]   pointer to array of input tensor descriptions
+ * @param numOutputs [IN]  number of outputs
+ * @param outputDesc [IN]  pointer to array of output tensor descriptions
+ * @param opAttr [IN]      pointer to instance of aclopAttr.
+ *                         may pass nullptr if the op has no attribute
+ * @param handle [OUT]     pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs,
+                                               const aclTensorDesc *const inputDesc[], int numOutputs,
+                                               const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr,
+                                               aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy aclopHandle instance
+ *
+ * @param handle [IN]   pointer to the instance of aclopHandle
+ */
+ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief execute an op with the handle.
+ *        can save op model matching cost compared with aclopExecute
+ *
+ * @param handle [IN]      pointer to the instance of aclopHandle.
+ *                         The aclopCreateHandle interface has been called
+ *                         in advance to create aclopHandle type data.
+ * @param numInputs [IN]   number of inputs
+ * @param inputs [IN]      pointer to array of input buffers.
+ *                         The aclCreateDataBuffer interface has been called
+ *                         in advance to create aclDataBuffer type data.
+ * @param numOutputs [IN]  number of outputs
+ * @param outputs [OUT]    pointer to array of output buffers
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclopCreateHandle | aclCreateDataBuffer
+ */
+ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs,
+                                                 const aclDataBuffer *const inputs[], int numOutputs,
+                                                 aclDataBuffer *const outputs[], aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief cast data type
+ *
+ * @param srcDesc [IN]     source tensor desc
+ * @param srcBuffer [IN]   source tensor buffer
+ * @param dstDesc [IN]     destination tensor desc
+ * @param dstBuffer [OUT]  destination tensor buffer
+ * @param truncate [IN]    do not truncate if value is 0, truncate otherwise
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer,
+                                       const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate,
+                                       aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for casting datatype
+ *
+ * @param srcDesc [IN]    source tensor desc
+ * @param dstDesc [IN]    destination tensor desc
+ * @param truncate [IN]   do not truncate if value is 0, truncate otherwise
+ * @param handle [OUT]    pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate,
+                                                      aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief create kernel
+ *
+ * @param opType [IN]           op type
+ * @param kernelId [IN]         kernel id
+ * @param kernelName [IN]       kernel name
+ * @param binData [IN]          kernel bin data
+ * @param binSize [IN]          kernel bin size
+ * @param enginetype [IN]       enigne type
+ * @param deallocator [IN]      callback function for deallocating bin data,
+ *                              null if bin data to be deallocated by caller
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclopCompile
+ */
+ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName,
+                                               void *binData, int binSize, aclopEngineType enginetype,
+                                               aclDataDeallocator deallocator);
+
+/**
+ * @ingroup AscendCL
+ * @brief create kernel
+ *
+ * @param numInputs [IN]            number of inputs
+ * @param inputDesc [IN]            pointer to array of input tensor descriptions
+ * @param numOutputs [IN]           number of outputs
+ * @param outputDesc [IN]           pointer to array of output tensor descriptions
+ * @param opAttr [IN]               pointer to instance of aclopAttr
+ * @param aclopKernelDesc [IN]      pointer to instance of aclopKernelDesc
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs,
+                                     const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr,
+                                     aclopKernelDesc *aclopKernelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief register compile function
+ *
+ * @param opType [IN]         op type
+ * @param func [IN]           compile function
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclopUnregisterCompileFunc
+ */
+ACL_FUNC_VISIBILITY aclError aclopRegisterCompileFunc(const char *opType, aclopCompileFunc func);
+
+/**
+ * @ingroup AscendCL
+ * @brief unregister compile function
+ *
+ * @param opType [IN]         op type
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType);
+
+/**
+ * @ingroup AscendCL
+ * @brief set kernel args
+ *
+ * @param kernelDesc [IN]               pointer to instance of aclopKernelDesc
+ * @param kernelId [IN]                 kernel id
+ * @param blockDim [IN]                 block dim
+ * @param args [IN]                     args
+ * @param argSize [IN]                  size in bytes of args
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim,
+                                                const void *args, uint32_t argSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief set workspace sizes
+ *
+ * @param kernelDesc [IN]               pointer to instance of aclopKernelDesc
+ * @param numWorkspaces [IN]            number of workspaces
+ * @param workspaceSizes [IN]           pointer to array of sizes of workspaces
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kernelDesc, int numWorkspaces,
+                                                          size_t *workspaceSizes);
+
+/**
+ * @ingroup AscendCL
+ * @brief compile op with dynamic shape
+ *
+ * @param opType [IN]       op type
+ * @param numInputs [IN]    number of inputs
+ * @param inputDesc [IN]    pointer to array of input tensor descriptions
+ * @param numOutputs [IN]   number of outputs
+ * @param outputDesc [IN]   pointer to array of output tensor descriptions
+ * @param attr [IN]         pointer to instance of aclopAttr.
+ *                          may pass nullptr if the op has no attribute
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs,
+                                               const aclTensorDesc *const inputDesc[], int numOutputs,
+                                               const aclTensorDesc *const outputDesc[], const aclopAttr *attr);
+
+/**
+ * @ingroup AscendCL
+ * @brief inferShape the specified operator synchronously
+ *
+ * @param opType [IN]       type of op
+ * @param numInputs [IN]    number of inputs
+ * @param inputDesc [IN]    pointer to array of input tensor descriptions
+ * @param inputs [IN]       pointer to array of input buffers
+ * @param numOutputs [IN]   number of outputs
+ * @param outputDesc [OUT]  pointer to array of output tensor descriptions
+ * @param attr [IN]         pointer to instance of aclopAttr.
+ *                          may pass nullptr if the op has no attribute
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
+                                             aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[],
+                                             aclopAttr *attr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_OP_H_
diff --git a/inc/external/acl/acl_op_compiler.h b/inc/external/acl/acl_op_compiler.h
new file mode 100644
index 00000000..a0a3f786
--- /dev/null
+++ b/inc/external/acl/acl_op_compiler.h
@@ -0,0 +1,200 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
+#define INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
+
+#include "acl_base.h"
+#include "acl_op.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType;
+
+typedef enum {
+  ACL_PRECISION_MODE,
+  ACL_AICORE_NUM,
+  ACL_AUTO_TUNE_MODE,
+  ACL_OP_SELECT_IMPL_MODE,
+  ACL_OPTYPELIST_FOR_IMPLMODE,
+  ACL_OP_DEBUG_LEVEL,
+  ACL_DEBUG_DIR,
+  ACL_OP_COMPILER_CACHE_MODE,
+  ACL_OP_COMPILER_CACHE_DIR,
+  ACL_OP_PERFORMANCE_MODE
+} aclCompileOpt;
+
+typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag;
+
+typedef struct aclGraphDumpOption aclGraphDumpOption;
+
+/**
+ * @ingroup AscendCL
+ * @brief compile op
+ *
+ * @param opType [IN]           op type
+ * @param numInputs [IN]        number of inputs
+ * @param inputDesc [IN]        pointer to array of input tensor descriptions
+ * @param numOutputs [IN]       number of outputs
+ * @param outputDesc [IN]       pointer to array of output tensor descriptions
+ * @param attr [IN]           pointer to instance of aclopAttr.
+ *                              may pass nullptr if the op has no attribute
+ * @param engineType [IN]       engine type
+ * @param compileFlag [IN]      compile flag
+ * @param opPath [IN]           path of op
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[],
+                                          int numOutputs, const aclTensorDesc *const outputDesc[],
+                                          const aclopAttr *attr, aclopEngineType engineType,
+                                          aclopCompileType compileFlag, const char *opPath);
+
+/**
+ * @ingroup AscendCL
+ * @brief compile and execute op
+ *
+ * @param opType [IN]           op type
+ * @param numInputs [IN]        number of inputs
+ * @param inputDesc [IN]        pointer to array of input tensor descriptions
+ * @param inputs [IN]           pointer to array of input buffers
+ * @param numOutputs [IN]       number of outputs
+ * @param outputDesc [IN]       pointer to array of output tensor descriptions
+ * @param outputs [IN]          pointer to array of outputs buffers
+ * @param attr [IN]             pointer to instance of aclopAttr.
+ *                              may pass nullptr if the op has no attribute
+ * @param engineType [IN]       engine type
+ * @param compileFlag [IN]      compile flag
+ * @param opPath [IN]           path of op
+ * @param stream [IN]           stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(
+    const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
+    int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr,
+    aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief compile and execute op
+ *
+ * @param opType [IN]           op type
+ * @param numInputs [IN]        number of inputs
+ * @param inputDesc [IN]        pointer to array of input tensor descriptions
+ * @param inputs [IN]           pointer to array of input buffers
+ * @param numOutputs [IN]       number of outputs
+ * @param outputDesc [IN|OUT]   pointer to array of output tensor descriptions
+ * @param outputs [IN]          pointer to array of outputs buffers
+ * @param attr [IN]             pointer to instance of aclopAttr.
+ *                              may pass nullptr if the op has no attribute
+ * @param engineType [IN]       engine type
+ * @param compileFlag [IN]      compile flag
+ * @param opPath [IN]           path of op
+ * @param stream [IN]           stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCompileAndExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
+                                                      aclDataBuffer *inputs[], int numOutputs,
+                                                      aclTensorDesc *outputDesc[], aclDataBuffer *outputs[],
+                                                      aclopAttr *attr, aclopEngineType engineType,
+                                                      aclopCompileType compileFlag, const char *opPath,
+                                                      aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief set compile option
+ *
+ * @param aclCompileOpt [IN]      compile option
+ * @param value [IN]              pointer for the option value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *value);
+
+/**
+ * @ingroup AscendCL
+ * @brief set compile flag
+ *
+ * @param flag [IN]    compile flag, ACL_OP_COMPILE_DEFAULT means compile with default mode
+ *                     ACL_OP_COMPILE_FUZZ means compile with fuzz mode
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag);
+
+/**
+ * @ingroup AscendCL
+ * @brief generate graph and dump
+ *
+ * @param opType [IN]           op type
+ * @param numInputs [IN]        number of inputs
+ * @param inputDesc [IN]        pointer to array of input tensor descriptions
+ * @param inputs [IN]           pointer to array of input buffers
+ * @param numOutputs [IN]       number of outputs
+ * @param outputDesc [IN]       pointer to array of output tensor descriptions
+ * @param outputs [IN]          pointer to array of outputs buffers
+ * @param attr [IN]             pointer to instance of aclopAttr.
+ *                              may pass nullptr if the op has no attribute
+ * @param engineType [IN]       engine type
+ * @param graphDumpPath [IN]    dump path, if the suffix is ".txt", it means file path, else it means directory path
+ * @param graphDumpOpt [IN]     dump option, nullptr is supported
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclGenGraphAndDumpForOp(
+    const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
+    int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr,
+    aclopEngineType engineType, const char *graphDumpPath, const aclGraphDumpOption *graphDumpOpt);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the graph dump option
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see aclDestroyGraphDumpOpt
+ */
+ACL_FUNC_VISIBILITY aclGraphDumpOption *aclCreateGraphDumpOpt();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy graph dump option
+ *
+ * @param graphDumpOpt [IN]  pointer to the graph dump option
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclCreateGraphDumpOpt
+ */
+ACL_FUNC_VISIBILITY aclError aclDestroyGraphDumpOpt(const aclGraphDumpOption *graphDumpOpt);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
diff --git a/inc/external/acl/acl_prof.h b/inc/external/acl/acl_prof.h
new file mode 100644
index 00000000..4a9a5be9
--- /dev/null
+++ b/inc/external/acl/acl_prof.h
@@ -0,0 +1,485 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_PROF_H_
+#define INC_EXTERNAL_ACL_PROF_H_
+
+#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER))
+#define MSVP_PROF_API __declspec(dllexport)
+#else
+#define MSVP_PROF_API __attribute__((visibility("default")))
+#endif
+
+#include "acl_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ACL_PROF_ACL_API 0x0001ULL
+#define ACL_PROF_TASK_TIME 0x0002ULL
+#define ACL_PROF_AICORE_METRICS 0x0004ULL
+#define ACL_PROF_AICPU 0x0008ULL
+#define ACL_PROF_L2CACHE 0x0010ULL
+#define ACL_PROF_HCCL_TRACE 0x0020ULL
+#define ACL_PROF_TRAINING_TRACE 0x0040ULL
+#define ACL_PROF_MSPROFTX 0x0080ULL
+#define ACL_PROF_RUNTIME_API 0x0100ULL
+
+/**
+ * @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead
+ */
+#define ACL_PROF_MAX_OP_NAME_LEN 257
+#define ACL_PROF_MAX_OP_TYPE_LEN 65
+
+typedef enum {
+  ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
+  ACL_AICORE_PIPE_UTILIZATION = 1,
+  ACL_AICORE_MEMORY_BANDWIDTH = 2,
+  ACL_AICORE_L0B_AND_WIDTH = 3,
+  ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
+  ACL_AICORE_MEMORY_UB = 5,
+  ACL_AICORE_NONE = 0xFF
+} aclprofAicoreMetrics;
+
+typedef enum {
+  ACL_STEP_START = 0,  // step  start
+  ACL_STEP_END = 1     // step  end
+} aclprofStepTag;
+
+typedef enum {
+  ACL_SUBSCRIBE_OP = 0,
+  ACL_SUBSCRIBE_SUBGRAPH = 1,
+  ACL_SUBSCRIBE_OP_THREAD = 2,
+  ACL_SUBSCRIBE_NONE
+} aclprofSubscribeOpFlag;
+
+typedef enum { ACL_SUBSCRIBE_ATTRI_THREADID = 0, ACL_SUBSCRIBE_ATTRI_NONE } aclprofSubscribeOpAttri;
+
+typedef struct aclprofConfig aclprofConfig;
+typedef struct aclprofStopConfig aclprofStopConfig;
+typedef struct aclprofAicoreEvents aclprofAicoreEvents;
+typedef struct aclprofSubscribeConfig aclprofSubscribeConfig;
+typedef struct aclprofStepInfo aclprofStepInfo;
+
+/**
+ * @ingroup AscendCL
+ * @brief profiling initialize
+ *
+ * @param  profilerResultPath [IN]  path of profiling result
+ * @param  length [IN]              length of profilerResultPath
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofFinalize
+ */
+MSVP_PROF_API aclError aclprofInit(const char *profilerResultPath, size_t length);
+
+/**
+ * @ingroup AscendCL
+ * @brief profiling finalize
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofInit
+ */
+MSVP_PROF_API aclError aclprofFinalize();
+
+/**
+ * @ingroup AscendCL
+ * @brief Start profiling modules by profilerConfig
+ *
+ * @param  profilerConfig [IN]  config of profiling
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofStop
+ */
+MSVP_PROF_API aclError aclprofStart(const aclprofConfig *profilerConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create data of type aclprofConfig
+ *
+ * @param  deviceIdList [IN]      list of device id
+ * @param  deviceNums [IN]        number of devices
+ * @param  aicoreMetrics [IN]     type of aicore metrics
+ * @param  aicoreEvents [IN]      pointer to aicore events, only support NULL now
+ * @param  dataTypeConfig [IN]    config modules need profiling
+ *
+ * @retval the aclprofConfig pointer
+ *
+ * @see aclprofDestroyConfig
+ */
+MSVP_PROF_API aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums,
+                                                 aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents,
+                                                 uint64_t dataTypeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy data of type aclprofConfig
+ *
+ * @param  profilerConfig [IN]  config of profiling
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofCreateConfig
+ */
+MSVP_PROF_API aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief stop profiling modules by stopProfilingConfig
+ *
+ * @param  profilerConfig [IN]  pointer to stop config of profiling
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofStart
+ */
+MSVP_PROF_API aclError aclprofStop(const aclprofConfig *profilerConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief subscribe profiling data of model
+ *
+ * @param  modelId [IN]              the model id subscribed
+ * @param  profSubscribeConfig [IN]  pointer to config of model subscribe
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofModelUnSubscribe
+ */
+MSVP_PROF_API aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief unsubscribe profiling data of model
+ *
+ * @param  modelId [IN]  the model id unsubscribed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofModelSubscribe
+ */
+MSVP_PROF_API aclError aclprofModelUnSubscribe(uint32_t modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief create subscribe config
+ *
+ * @param  timeInfoSwitch [IN] switch whether get time info from model
+ * @param  aicoreMetrics [IN]  aicore metrics
+ * @param  fd [IN]             pointer to write pipe
+ *
+ * @retval the aclprofSubscribeConfig pointer
+ *
+ * @see aclprofDestroySubscribeConfig
+ */
+MSVP_PROF_API aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch,
+                                                                   aclprofAicoreMetrics aicoreMetrics, void *fd);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy subscribe config
+ *
+ * @param  profSubscribeConfig [IN]  subscribe config
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofCreateSubscribeConfig
+ */
+MSVP_PROF_API aclError aclprofDestroySubscribeConfig(const aclprofSubscribeConfig *profSubscribeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief create subscribe config
+ *
+ * @param  opDescSize [OUT]  size of op desc
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+MSVP_PROF_API aclError aclprofGetOpDescSize(size_t *opDescSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op number from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ * @param  opNumber [OUT]  op number of subscription data
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+MSVP_PROF_API aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLen, uint32_t *opNumber);
+
+/**
+ * @ingroup AscendCL
+ * @brief get length op type from subscription data
+ *
+ * @param  opInfo [IN]      pointer to subscription data
+ * @param  opInfoLen [IN]   memory size of subscription data
+ * @param  index [IN]       index of op array in opInfo
+ * @param  opTypeLen [OUT]  actual length of op type string
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+MSVP_PROF_API aclError aclprofGetOpTypeLen(const void *opInfo, size_t opInfoLen, uint32_t index, size_t *opTypeLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op type from subscription data
+ *
+ * @param  opInfo [IN]      pointer to subscription data
+ * @param  opInfoLen [IN]   memory size of subscription data
+ * @param  index [IN]       index of op array in opInfo
+ * @param  opType [OUT]     obtained op type string
+ * @param  opTypeLen [IN]   obtained length of op type string
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+MSVP_PROF_API aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType,
+                                        size_t opTypeLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief get length op name from subscription data
+ *
+ * @param  opInfo [IN]      pointer to subscription data
+ * @param  opInfoLen [IN]   memory size of subscription data
+ * @param  index [IN]       index of op array in opInfo
+ * @param  opNameLen [OUT]  actual length of op name string
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+MSVP_PROF_API aclError aclprofGetOpNameLen(const void *opInfo, size_t opInfoLen, uint32_t index, size_t *opNameLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op type from subscription data
+ *
+ * @param  opInfo [IN]      pointer to subscription data
+ * @param  opInfoLen [IN]   memory size of subscription data
+ * @param  index [IN]       index of op array in opInfo
+ * @param  opName [OUT]     obtained op name string
+ * @param  opNameLen [IN]   obtained length of op name string
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+MSVP_PROF_API aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName,
+                                        size_t opNameLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief get start time of specified op from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ * @param  index [IN]      index of op array in opInfo
+ *
+ * @retval start time(us) of specified op with timestamp
+ * @retval 0 for failed
+ */
+MSVP_PROF_API uint64_t aclprofGetOpStart(const void *opInfo, size_t opInfoLen, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get end time of specified op from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ * @param  index [IN]      index of op array in opInfo
+ *
+ * @retval end time(us) of specified op with timestamp
+ * @retval 0 for failed
+ */
+MSVP_PROF_API uint64_t aclprofGetOpEnd(const void *opInfo, size_t opInfoLen, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get excution time of specified op from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ * @param  index [IN]      index of op array in opInfo
+ *
+ * @retval execution time(us) of specified op with timestamp
+ * @retval 0 for failed
+ */
+MSVP_PROF_API uint64_t aclprofGetOpDuration(const void *opInfo, size_t opInfoLen, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get model id from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ *
+ * @retval model id of subscription data
+ * @retval 0 for failed
+ */
+MSVP_PROF_API size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op flag from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ * @param  index [IN]      index of op array in opInfo
+ *
+ * @retval op flag
+ * @retval ACL_SUBSCRIBE_NONE for failed
+ */
+MSVP_PROF_API aclprofSubscribeOpFlag aclprofGetOpFlag(const void *opInfo, size_t opInfoLen, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op flag from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ * @param  index [IN]      index of op array in opInfo
+ * @param  attri [IN]      attribute of op
+ *
+ * @retval op flag
+ * @retval NULL for failed
+ */
+MSVP_PROF_API const char *aclprofGetOpAttriValue(const void *opInfo, size_t opInfoLen, uint32_t index,
+                                                 aclprofSubscribeOpAttri attri);
+
+/**
+ * @ingroup AscendCL
+ * @brief
+ *
+ * @param  stepInfo [IN]     pointer to stepInfo data
+ * @param  aclprofstepTag [IN] start or end flag
+ * @param  stream [IN] steam info
+ *
+ * @retval 0 for failed
+ */
+MSVP_PROF_API aclError aclprofGetStepTimestamp(aclprofStepInfo *stepInfo, aclprofStepTag tag, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create pointer to aclprofStepInfo data
+ *
+ *
+ * @retval aclprofStepInfo pointer
+ */
+MSVP_PROF_API aclprofStepInfo *aclprofCreateStepInfo();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy aclprofStepInfo pointer
+ *
+ *
+ * @retval void
+ */
+MSVP_PROF_API void aclprofDestroyStepInfo(aclprofStepInfo *stepinfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief create pointer to aclprofstamp
+ *
+ *
+ * @retval aclprofStamp pointer
+ */
+MSVP_PROF_API void *aclprofCreateStamp();
+
+/**
+ * @ingroup AscendCL
+ * @brief destory stamp pointer
+ *
+ *
+ * @retval void
+ */
+MSVP_PROF_API void aclprofDestroyStamp(void *stamp);
+
+/**
+ * @ingroup AscendCL
+ * @brief Record push timestamp
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+MSVP_PROF_API aclError aclprofPush(void *stamp);
+
+/**
+ * @ingroup AscendCL
+ * @brief Record pop timestamp
+ *
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+MSVP_PROF_API aclError aclprofPop();
+
+/**
+ * @ingroup AscendCL
+ * @brief Record range start timestamp
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+MSVP_PROF_API aclError aclprofRangeStart(void *stamp, uint32_t *rangeId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Record range end timestamp
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+MSVP_PROF_API aclError aclprofRangeStop(uint32_t rangeId);
+
+/**
+ * @ingroup AscendCL
+ * @brief set message to stamp
+ *
+ *
+ * @retval void
+ */
+MSVP_PROF_API aclError aclprofSetStampTraceMessage(void *stamp, const char *msg, uint32_t msgLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief Record mark timestamp
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+MSVP_PROF_API aclError aclprofMark(void *stamp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_PROF_H_
diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h
new file mode 100644
index 00000000..578f3bdd
--- /dev/null
+++ b/inc/external/acl/acl_rt.h
@@ -0,0 +1,1031 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_RT_H_
+#define INC_EXTERNAL_ACL_ACL_RT_H_
+
+#include <stdint.h>
+#include <stddef.h>
+#include "acl_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ACL_EVENT_TIME_LINE 0x00000008u
+
+typedef enum aclrtRunMode {
+  ACL_DEVICE,
+  ACL_HOST,
+} aclrtRunMode;
+
+typedef enum aclrtTsId {
+  ACL_TS_ID_AICORE = 0,
+  ACL_TS_ID_AIVECTOR = 1,
+  ACL_TS_ID_RESERVED = 2,
+} aclrtTsId;
+
+typedef enum aclrtEventStatus {
+  ACL_EVENT_STATUS_COMPLETE = 0,
+  ACL_EVENT_STATUS_NOT_READY = 1,
+  ACL_EVENT_STATUS_RESERVED = 2,
+} aclrtEventStatus;
+
+typedef enum aclrtEventRecordedStatus {
+  ACL_EVENT_RECORDED_STATUS_NOT_READY = 0,
+  ACL_EVENT_RECORDED_STATUS_COMPLETE = 1,
+} aclrtEventRecordedStatus;
+
+typedef enum aclrtEventWaitStatus {
+  ACL_EVENT_WAIT_STATUS_COMPLETE = 0,
+  ACL_EVENT_WAIT_STATUS_NOT_READY = 1,
+  ACL_EVENT_WAIT_STATUS_RESERVED = 0xffff,
+} aclrtEventWaitStatus;
+
+typedef enum aclrtCallbackBlockType {
+  ACL_CALLBACK_NO_BLOCK,
+  ACL_CALLBACK_BLOCK,
+} aclrtCallbackBlockType;
+
+typedef enum aclrtMemcpyKind {
+  ACL_MEMCPY_HOST_TO_HOST,
+  ACL_MEMCPY_HOST_TO_DEVICE,
+  ACL_MEMCPY_DEVICE_TO_HOST,
+  ACL_MEMCPY_DEVICE_TO_DEVICE,
+} aclrtMemcpyKind;
+
+typedef enum aclrtMemMallocPolicy {
+  ACL_MEM_MALLOC_HUGE_FIRST,
+  ACL_MEM_MALLOC_HUGE_ONLY,
+  ACL_MEM_MALLOC_NORMAL_ONLY,
+  ACL_MEM_MALLOC_HUGE_FIRST_P2P,
+  ACL_MEM_MALLOC_HUGE_ONLY_P2P,
+  ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
+} aclrtMemMallocPolicy;
+
+typedef enum aclrtMemAttr {
+  ACL_DDR_MEM,
+  ACL_HBM_MEM,
+  ACL_DDR_MEM_HUGE,
+  ACL_DDR_MEM_NORMAL,
+  ACL_HBM_MEM_HUGE,
+  ACL_HBM_MEM_NORMAL,
+  ACL_DDR_MEM_P2P_HUGE,
+  ACL_DDR_MEM_P2P_NORMAL,
+  ACL_HBM_MEM_P2P_HUGE,
+  ACL_HBM_MEM_P2P_NORMAL,
+} aclrtMemAttr;
+
+typedef enum aclrtGroupAttr {
+  ACL_GROUP_AICORE_INT,
+  ACL_GROUP_AIV_INT,
+  ACL_GROUP_AIC_INT,
+  ACL_GROUP_SDMANUM_INT,
+  ACL_GROUP_ASQNUM_INT,
+  ACL_GROUP_GROUPID_INT
+} aclrtGroupAttr;
+
+typedef struct tagRtGroupInfo aclrtGroupInfo;
+
+typedef struct rtExceptionInfo aclrtExceptionInfo;
+
+typedef void (*aclrtCallback)(void *userData);
+
+typedef void (*aclrtExceptionInfoCallback)(aclrtExceptionInfo *exceptionInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set a callback function to handle exception information
+ *
+ * @param callback [IN] callback function to handle exception information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetExceptionInfoCallback(aclrtExceptionInfoCallback callback);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get task id from exception information
+ *
+ * @param info [IN]   pointer of exception information
+ *
+ * @retval The task id from exception information
+ * @retval 0xFFFFFFFF if info is null
+ */
+ACL_FUNC_VISIBILITY uint32_t aclrtGetTaskIdFromExceptionInfo(const aclrtExceptionInfo *info);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream id from exception information
+ *
+ * @param info [IN]   pointer of exception information
+ *
+ * @retval The stream id from exception information
+ * @retval 0xFFFFFFFF if info is null
+ */
+ACL_FUNC_VISIBILITY uint32_t aclrtGetStreamIdFromExceptionInfo(const aclrtExceptionInfo *info);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get thread id from exception information
+ *
+ * @param info [IN]   pointer of exception information
+ *
+ * @retval The thread id of fail task
+ * @retval 0xFFFFFFFF if info is null
+ */
+ACL_FUNC_VISIBILITY uint32_t aclrtGetThreadIdFromExceptionInfo(const aclrtExceptionInfo *info);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get device id from exception information
+ *
+ * @param info [IN]   pointer of exception information
+ *
+ * @retval The thread id of fail task
+ * @retval 0xFFFFFFFF if info is null
+ */
+ACL_FUNC_VISIBILITY uint32_t aclrtGetDeviceIdFromExceptionInfo(const aclrtExceptionInfo *info);
+
+/**
+ * @ingroup AscendCL
+ * @brief The thread that handles the callback function on the Stream
+ *
+ * @param threadId [IN] thread ID
+ * @param stream [IN]   stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSubscribeReport(uint64_t threadId, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Add a callback function to be executed on the host
+ *        to the task queue of the Stream
+ *
+ * @param fn [IN]   Specify the callback function to be added
+ *                  The function prototype of the callback function is:
+ *                  typedef void (*aclrtCallback)(void *userData);
+ * @param userData [IN]   User data to be passed to the callback function
+ * @param blockType [IN]  callback block type
+ * @param stream [IN]     stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtLaunchCallback(aclrtCallback fn, void *userData, aclrtCallbackBlockType blockType,
+                                                 aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief After waiting for a specified time, trigger callback processing
+ *
+ * @par Function
+ *  The thread processing callback specified by
+ *  the aclrtSubscribeReport interface
+ *
+ * @param timeout [IN]   timeout value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtSubscribeReport
+ */
+ACL_FUNC_VISIBILITY aclError aclrtProcessReport(int32_t timeout);
+
+/**
+ * @ingroup AscendCL
+ * @brief Cancel thread registration,
+ *        the callback function on the specified Stream
+ *        is no longer processed by the specified thread
+ *
+ * @param threadId [IN]   thread ID
+ * @param stream [IN]     stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtUnSubscribeReport(uint64_t threadId, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create context and associates it with the calling thread
+ *
+ * @par Function
+ * The following use cases are supported:
+ * @li If you don't call the aclrtCreateContext interface
+ * to explicitly create the context,
+ * the system will use the default context, which is implicitly created
+ * when the aclrtSetDevice interface is called.
+ * @li If multiple contexts are created in a process
+ * (there is no limit on the number of contexts),
+ * the current thread can only use one of them at the same time.
+ * It is recommended to explicitly specify the context of the current thread
+ * through the aclrtSetCurrentContext interface to increase.
+ * the maintainability of the program.
+ *
+ * @param  context [OUT]    point to the created context
+ * @param  deviceId [IN]    device to create context on
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtSetDevice | aclrtSetCurrentContext
+ */
+ACL_FUNC_VISIBILITY aclError aclrtCreateContext(aclrtContext *context, int32_t deviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy context instance
+ *
+ * @par Function
+ * Can only destroy context created through aclrtCreateContext interface
+ *
+ * @param  context [IN]   the context to destroy
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateContext
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDestroyContext(aclrtContext context);
+
+/**
+ * @ingroup AscendCL
+ * @brief set the context of the thread
+ *
+ * @par Function
+ * The following scenarios are supported:
+ * @li If the aclrtCreateContext interface is called in a thread to explicitly
+ * create a Context (for example: ctx1), the thread's Context can be specified
+ * without calling the aclrtSetCurrentContext interface.
+ * The system uses ctx1 as the context of thread1 by default.
+ * @li If the aclrtCreateContext interface is not explicitly created,
+ * the system uses the default context as the context of the thread.
+ * At this time, the aclrtDestroyContext interface cannot be used to release
+ * the default context.
+ * @li If the aclrtSetCurrentContext interface is called multiple times to
+ * set the thread's Context, the last one prevails.
+ *
+ * @par Restriction
+ * @li If the cevice corresponding to the context set for the thread
+ * has been reset, you cannot set the context as the context of the thread,
+ * otherwise a business exception will result.
+ * @li It is recommended to use the context created in a thread.
+ * If the aclrtCreateContext interface is called in thread A to create a context,
+ * and the context is used in thread B,
+ * the user must guarantee the execution order of tasks in the same stream
+ * under the same context in two threads.
+ *
+ * @param  context [IN]   the current context of the thread
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateContext | aclrtDestroyContext
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetCurrentContext(aclrtContext context);
+
+/**
+ * @ingroup AscendCL
+ * @brief get the context of the thread
+ *
+ * @par Function
+ * If the user calls the aclrtSetCurrentContext interface
+ * multiple times to set the context of the current thread,
+ * then the last set context is obtained
+ *
+ * @param  context [OUT]   the current context of the thread
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtSetCurrentContext
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetCurrentContext(aclrtContext *context);
+
+/**
+ * @ingroup AscendCL
+ * @brief Specify the device to use for the operation
+ * implicitly create the default context and the default stream
+ *
+ * @par Function
+ * The following use cases are supported:
+ * @li Device can be specified in the process or thread.
+ * If you call the aclrtSetDevice interface multiple
+ * times to specify the same device,
+ * you only need to call the aclrtResetDevice interface to reset the device.
+ * @li The same device can be specified for operation
+ *  in different processes or threads.
+ * @li Device is specified in a process,
+ * and multiple threads in the process can share this device to explicitly
+ * create a Context (aclrtCreateContext interface).
+ * @li In multi-device scenarios, you can switch to other devices
+ * through the aclrtSetDevice interface in the process.
+ *
+ * @param  deviceId [IN]  the device id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtResetDevice |aclrtCreateContext
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetDevice(int32_t deviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Reset the current operating Device and free resources on the device,
+ * including the default context, the default stream,
+ * and all streams created under the default context,
+ * and synchronizes the interface.
+ * If the task under the default context or stream has not been completed,
+ * the system will wait for the task to complete before releasing it.
+ *
+ * @par Restriction
+ * @li The Context, Stream, and Event that are explicitly created
+ * on the device to be reset. Before resetting,
+ * it is recommended to follow the following interface calling sequence,
+ * otherwise business abnormalities may be caused.
+ * @li Interface calling sequence:
+ * call aclrtDestroyEvent interface to release Event or
+ * call aclrtDestroyStream interface to release explicitly created Stream->
+ * call aclrtDestroyContext to release explicitly created Context->
+ * call aclrtResetDevice interface
+ *
+ * @param  deviceId [IN]   the device id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtResetDevice(int32_t deviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief get target device of current thread
+ *
+ * @param deviceId [OUT]  the device id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetDevice(int32_t *deviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief get target side
+ *
+ * @param runMode [OUT]    the run mode
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetRunMode(aclrtRunMode *runMode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Wait for compute device to finish
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSynchronizeDevice(void);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set Scheduling TS
+ *
+ * @param tsId [IN]   the ts id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetTsDevice(aclrtTsId tsId);
+
+/**
+ * @ingroup AscendCL
+ * @brief get total device number.
+ *
+ * @param count [OUT]    the device number
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetDeviceCount(uint32_t *count);
+
+/**
+ * @ingroup AscendCL
+ * @brief create event instance
+ *
+ * @param event [OUT]   created event
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event);
+
+/**
+ * @ingroup AscendCL
+ * @brief create event instance with flag
+ *
+ * @param event [OUT]   created event
+ * @param flag [IN]     event flag
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtCreateEventWithFlag(aclrtEvent *event, uint32_t flag);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy event instance
+ *
+ * @par Function
+ *  Only events created through the aclrtCreateEvent interface can be
+ *  destroyed, synchronous interfaces. When destroying an event,
+ *  the user must ensure that the tasks involved in the aclrtSynchronizeEvent
+ *  interface or the aclrtStreamWaitEvent interface are completed before
+ *  they are destroyed.
+ *
+ * @param  event [IN]   event to destroy
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateEvent | aclrtSynchronizeEvent | aclrtStreamWaitEvent
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDestroyEvent(aclrtEvent event);
+
+/**
+ * @ingroup AscendCL
+ * @brief Record an Event in the Stream
+ *
+ * @param event [IN]    event to record
+ * @param stream [IN]   stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Reset an event
+ *
+ * @par Function
+ *  Users need to make sure to wait for the tasks in the Stream
+ *  to complete before resetting the Event
+ *
+ * @param event [IN]    event to reset
+ * @param stream [IN]   stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Queries an event's status
+ *
+ * @param  event [IN]    event to query
+ * @param  status [OUT]  event status
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_DEPRECATED_MESSAGE("aclrtQueryEvent is deprecated, use aclrtQueryEventStatus instead")
+ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status);
+
+/**
+ * @ingroup AscendCL
+ * @brief Queries an event's status
+ *
+ * @param  event [IN]    event to query
+ * @param  status [OUT]  event recorded status
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtQueryEventStatus(aclrtEvent event, aclrtEventRecordedStatus *status);
+
+/**
+ * @ingroup AscendCL
+ * @brief Queries an event's wait-status
+ *
+ * @param  event [IN]    event to query
+ * @param  status [OUT]  event wait-status
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtQueryEventWaitStatus(aclrtEvent event, aclrtEventWaitStatus *status);
+
+/**
+ * @ingroup AscendCL
+ * @brief Block Host Running, wait event to be complete
+ *
+ * @param  event [IN]   event to wait
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event);
+
+/**
+ * @ingroup AscendCL
+ * @brief computes the elapsed time between events.
+ *
+ * @param ms [OUT]     time between start and end in ms
+ * @param start [IN]   starting event
+ * @param end [IN]     ending event
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream
+ */
+ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent startEvent, aclrtEvent endEvent);
+
+/**
+ * @ingroup AscendCL
+ * @brief alloc memory on device
+ *
+ * @par Function
+ *  alloc for size linear memory on device
+ *  and return a pointer to allocated memory by *devPtr
+ *
+ * @par Restriction
+ * @li The memory requested by the aclrtMalloc interface needs to be released
+ * through the aclrtFree interface.
+ * @li Before calling the media data processing interface,
+ * if you need to apply memory on the device to store input or output data,
+ * you need to call acldvppMalloc to apply for memory.
+ *
+ * @param devPtr [OUT]  pointer to pointer to allocated memory on device
+ * @param size [IN]     alloc memory size
+ * @param policy [IN]   memory alloc policy
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtFree | acldvppMalloc | aclrtMallocCached
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy);
+
+/**
+ * @ingroup AscendCL
+ * @brief allocate memory on device with cache
+ *
+ * @par Function
+ *  alloc for size linear memory on device
+ *  and return a pointer to allocated memory by *devPtr
+ *
+ * @par Restriction
+ * @li The memory requested by the aclrtMallocCached interface needs to be released
+ * through the aclrtFree interface.
+ *
+ * @param devPtr [OUT]  pointer to pointer to allocated memory on device
+ * @param size [IN]     alloc memory size
+ * @param policy [IN]   memory alloc policy
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtFree | aclrtMalloc
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy);
+
+/**
+ * @ingroup AscendCL
+ * @brief flush cache data to ddr
+ *
+ * @param devPtr [IN]  the pointer that flush data to ddr
+ * @param size [IN]    flush size
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemFlush(void *devPtr, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief invalidate cache data
+ *
+ * @param devPtr [IN]  pointer to invalidate cache data
+ * @param size [IN]    invalidate size
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemInvalidate(void *devPtr, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief free device memory
+ *
+ * @par Function
+ *  can only free memory allocated through the aclrtMalloc interface
+ *
+ * @param  devPtr [IN]  Pointer to memory to be freed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtMalloc
+ */
+ACL_FUNC_VISIBILITY aclError aclrtFree(void *devPtr);
+
+/**
+ * @ingroup AscendCL
+ * @brief alloc memory on host
+ *
+ * @par Restriction
+ * @li The requested memory cannot be used in the Device
+ * and needs to be explicitly copied to the Device.
+ * @li The memory requested by the aclrtMallocHost interface
+ * needs to be released through the aclrtFreeHost interface.
+ *
+ * @param  hostPtr [OUT] pointer to pointer to allocated memory on the host
+ * @param  size [IN]     alloc memory size
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtFreeHost
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMallocHost(void **hostPtr, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief free host memory
+ *
+ * @par Function
+ *  can only free memory allocated through the aclrtMallocHost interface
+ *
+ * @param  hostPtr [IN]   free memory pointer
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtMallocHost
+ */
+ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr);
+
+/**
+ * @ingroup AscendCL
+ * @brief synchronous memory replication between host and device
+ *
+ * @param dst [IN]       destination address pointer
+ * @param destMax [IN]   Max length of the destination address memory
+ * @param src [IN]       source address pointer
+ * @param count [IN]     the length of byte to copy
+ * @param kind [IN]      memcpy type
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count,
+                                         aclrtMemcpyKind kind);
+
+/**
+ * @ingroup AscendCL
+ * @brief Initialize memory and set contents of memory to specified value
+ *
+ * @par Function
+ *  The memory to be initialized is on the Host or device side,
+ *  and the system determines whether
+ *  it is host or device according to the address
+ *
+ * @param devPtr [IN]    Starting address of memory
+ * @param maxCount [IN]  Max length of destination address memory
+ * @param value [IN]     Set value
+ * @param count [IN]     The length of memory
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t value, size_t count);
+
+/**
+ * @ingroup AscendCL
+ * @brief  Asynchronous memory replication between Host and Device
+ *
+ * @par Function
+ *  After calling this interface,
+ *  be sure to call the aclrtSynchronizeStream interface to ensure that
+ *  the task of memory replication has been completed
+ *
+ * @par Restriction
+ * @li For on-chip Device-to-Device memory copy,
+ *     both the source and destination addresses must be 64-byte aligned
+ *
+ * @param dst [IN]     destination address pointer
+ * @param destMax [IN] Max length of destination address memory
+ * @param src [IN]     source address pointer
+ * @param count [IN]   the number of byte to copy
+ * @param kind [IN]    memcpy type
+ * @param stream [IN]  asynchronized task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtSynchronizeStream
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count,
+                                              aclrtMemcpyKind kind, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief synchronous memory replication of two-dimensional matrix between host and device
+ *
+ * @param dst [IN]       destination address pointer
+ * @param dpitch [IN]    pitch of destination memory
+ * @param src [IN]       source address pointer
+ * @param spitch [IN]    pitch of source memory
+ * @param width [IN]     width of matrix transfer
+ * @param height [IN]    height of matrix transfer
+ * @param kind [IN]      memcpy type
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemcpy2d(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width,
+                                           size_t height, aclrtMemcpyKind kind);
+
+/**
+ * @ingroup AscendCL
+ * @brief asynchronous memory replication of two-dimensional matrix between host and device
+ *
+ * @param dst [IN]       destination address pointer
+ * @param dpitch [IN]    pitch of destination memory
+ * @param src [IN]       source address pointer
+ * @param spitch [IN]    pitch of source memory
+ * @param width [IN]     width of matrix transfer
+ * @param height [IN]    height of matrix transfer
+ * @param kind [IN]      memcpy type
+ * @param stream [IN]    asynchronized task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemcpy2dAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width,
+                                                size_t height, aclrtMemcpyKind kind, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Asynchronous initialize memory
+ * and set contents of memory to specified value async
+ *
+ * @par Function
+ *  The memory to be initialized is on the Host or device side,
+ *  and the system determines whether
+ *  it is host or device according to the address
+ *
+ * @param devPtr [IN]      destination address pointer
+ * @param maxCount [IN]    Max length of destination address memory
+ * @param value [IN]       set value
+ * @param count [IN]       the number of byte to set
+ * @param stream [IN]      asynchronized task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtSynchronizeStream
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count,
+                                              aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief  create stream instance
+ *
+ * @param  stream [OUT]   the created stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtCreateStream(aclrtStream *stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy stream instance
+ *
+ * @par Function
+ * Can only destroy streams created through the aclrtCreateStream interface
+ *
+ * @par Restriction
+ * Before calling the aclrtDestroyStream interface to destroy
+ * the specified Stream, you need to call the aclrtSynchronizeStream interface
+ * to ensure that the tasks in the Stream have been completed.
+ *
+ * @param stream [IN]  the stream to destroy
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateStream | aclrtSynchronizeStream
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDestroyStream(aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief block the host until all tasks
+ * in the specified stream have completed
+ *
+ * @param  stream [IN]   the stream to wait
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSynchronizeStream(aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Blocks the operation of the specified Stream until
+ * the specified Event is completed.
+ * Support for multiple streams waiting for the same event.
+ *
+ * @param  stream [IN]   the wait stream If using thedefault Stream, set NULL
+ * @param  event [IN]    the event to wait
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtStreamWaitEvent(aclrtStream stream, aclrtEvent event);
+
+/**
+ * @ingroup AscendCL
+ * @brief set group
+ *
+ * @par Function
+ *  set the task to the corresponding group
+ *
+ * @param groupId [IN]   group id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtGetGroupCount | aclrtGetAllGroupInfo | aclrtGetGroupInfoDetail
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetGroup(int32_t groupId);
+
+/**
+ * @ingroup AscendCL
+ * @brief get the number of group
+ *
+ * @par Function
+ *  get the number of group. if the number of group is zero,
+ *  it means that group is not supported or group is not created.
+ *
+ * @param count [OUT]   the number of group
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetGroupCount(uint32_t *count);
+
+/**
+ * @ingroup AscendCL
+ * @brief create group information
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ *
+ * @see aclrtDestroyGroupInfo
+ */
+ACL_FUNC_VISIBILITY aclrtGroupInfo *aclrtCreateGroupInfo();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy group information
+ *
+ * @param groupInfo [IN]   pointer to group information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateGroupInfo
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDestroyGroupInfo(aclrtGroupInfo *groupInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief get all group information
+ *
+ * @param groupInfo [OUT]   pointer to group information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtGetGroupCount
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief get detail information of group
+ *
+ * @param groupInfo [IN]    pointer to group information
+ * @param groupIndex [IN]   group index value
+ * @param attr [IN]         group attribute
+ * @param attrValue [OUT]   pointer to attribute value
+ * @param valueLen [IN]     length of attribute value
+ * @param paramRetSize [OUT]   pointer to real length of attribute value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtGetGroupCount | aclrtGetAllGroupInfo
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupIndex,
+                                                     aclrtGroupAttr attr, void *attrValue, size_t valueLen,
+                                                     size_t *paramRetSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief checking whether current device and peer device support the p2p feature
+ *
+ * @param canAccessPeer [OUT]   pointer to save the checking result
+ * @param deviceId [IN]         current device id
+ * @param peerDeviceId [IN]     peer device id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtDeviceEnablePeerAccess | aclrtDeviceDisablePeerAccess
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDeviceCanAccessPeer(int32_t *canAccessPeer, int32_t deviceId, int32_t peerDeviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief enable the peer device to support the p2p feature
+ *
+ * @param peerDeviceId [IN]   the peer device id
+ * @param flags [IN]   reserved field, now it must be zero
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtDeviceCanAccessPeer | aclrtDeviceDisablePeerAccess
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDeviceEnablePeerAccess(int32_t peerDeviceId, uint32_t flags);
+
+/**
+ * @ingroup AscendCL
+ * @brief disable the peer device to support the p2p function
+ *
+ * @param peerDeviceId [IN]   the peer device id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtDeviceCanAccessPeer | aclrtDeviceEnablePeerAccess
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDeviceDisablePeerAccess(int32_t peerDeviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Obtain the free memory and total memory of specified attribute.
+ * the specified memory include normal memory and huge memory.
+ *
+ * @param attr [IN]    the memory attribute of specified device
+ * @param free [OUT]   the free memory of specified device
+ * @param total [OUT]  the total memory of specified device.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, size_t *total);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the timeout interval for waitting of op
+ *
+ * @param timeout [IN]   op wait timeout
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetOpWaitTimeout(uint32_t timeout);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_RT_H_
diff --git a/inc/external/acl/acl_tdt.h b/inc/external/acl/acl_tdt.h
new file mode 100644
index 00000000..4cea7500
--- /dev/null
+++ b/inc/external/acl/acl_tdt.h
@@ -0,0 +1,316 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_TDT_H_
+#define INC_EXTERNAL_ACL_ACL_TDT_H_
+
+#include "acl/acl_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum acltdtTensorType {
+  ACL_TENSOR_DATA_UNDEFINED = -1,
+  ACL_TENSOR_DATA_TENSOR,
+  ACL_TENSOR_DATA_END_OF_SEQUENCE,
+  ACL_TENSOR_DATA_ABNORMAL
+};
+
+typedef struct acltdtDataItem acltdtDataItem;
+typedef struct acltdtDataset acltdtDataset;
+typedef struct acltdtChannelHandle acltdtChannelHandle;
+
+/**
+ * @ingroup AscendCL
+ * @brief Get tensor type from item
+ *
+ * @param dataItem [IN] pointer to the data item
+ *
+ * @retval Tensor type.
+ * @retval ACL_DT_UNDEFINED if dataItem is null
+ */
+ACL_FUNC_VISIBILITY acltdtTensorType acltdtGetTensorTypeFromItem(const acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get data type from item
+ *
+ * @param dataItem [IN] pointer to the data item
+ *
+ * @retval Data type.
+ * @retval ACL_DT_UNDEFINED if dataItem is null
+ */
+ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get data address from item
+ *
+ * @param dataItem [IN] pointer to data item
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get data size from item
+ *
+ * @param dataItem [IN] pointer to data item
+ *
+ * @retval 0 for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dim's number from item
+ *
+ * @param dataItem [IN] pointer to data item
+ *
+ * @retval 0 for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dims from item
+ *
+ * @param  dataItem [IN]      the struct of data item
+ * @param  dims [IN|OUT]      pointer to the dims of dataTtem
+ * @param  dimNum [IN]        the size of the dims
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataItem, int64_t *dims, size_t dimNum);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the struct of data item
+ *
+ * @param tdtType [IN]  Tdt tensor type
+ * @param dims [IN]     pointer of tdtDataItem's dims
+ * @param dimNum [IN]   Dim number
+ * @param dataType [IN] Data type
+ * @param data [IN]     Data pointer
+ * @param size [IN]     Data size
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtDestroyDataItem
+ */
+ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum,
+                                                         aclDataType dataType, void *data, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the struct of data item
+ *
+ * @param dataItem [IN]  pointer to the data item
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateDataItem
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyDataItem(acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the tdt dataset
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtDestroyDataset
+ */
+ACL_FUNC_VISIBILITY acltdtDataset *acltdtCreateDataset();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the tdt dataset
+ *
+ * @param dataset [IN]  pointer to the dataset
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateDataset
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyDataset(acltdtDataset *dataset);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the data item
+ *
+ * @param dataset [IN] pointer to the dataset
+ * @param index [IN]   index of the dataset
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtAddDataItem
+ */
+ACL_FUNC_VISIBILITY acltdtDataItem *acltdtGetDataItem(const acltdtDataset *dataset, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the data item
+ *
+ * @param dataset [OUT] pointer to the dataset
+ * @param dataItem [IN] pointer to the data item
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtGetDataItem
+ */
+ACL_FUNC_VISIBILITY aclError acltdtAddDataItem(acltdtDataset *dataset, acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the size of dataset
+ *
+ * @param dataset [IN]  pointer to the dataset
+ *
+ * @retval 0 for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY size_t acltdtGetDatasetSize(const acltdtDataset *dataset);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the name of dataset
+ *
+ * @param  dataset [IN]      pointer to the dataset
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY const char *acltdtGetDatasetName(const acltdtDataset *dataset);
+
+/**
+ * @ingroup AscendCL
+ * @brief Stop the channel
+ *
+ * @param handle [IN]  pointer to the channel handle
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateChannel | acltdtDestroyChannel
+ */
+ACL_FUNC_VISIBILITY aclError acltdtStopChannel(acltdtChannelHandle *handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the channel
+ *
+ * @param deviceId [IN]  the device id
+ * @param name [IN]      the name of channel
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtStopChannel | acltdtDestroyChannel
+ */
+ACL_FUNC_VISIBILITY acltdtChannelHandle *acltdtCreateChannel(uint32_t deviceId, const char *name);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the channel with max size
+ *
+ * @param deviceId [IN]  the device id
+ * @param name [IN]      the name of channel
+ * @param capacity [IN]   the capacity of channel
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtDestroyChannel
+ */
+ACL_FUNC_VISIBILITY acltdtChannelHandle *acltdtCreateChannelWithCapacity(uint32_t deviceId, const char *name,
+                                                                         size_t capacity);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the channel
+ *
+ * @param handle [IN]  pointer to the channel handle
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateChannel | acltdtStopChannel
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief Send tensor to device
+ *
+ * @param handle [IN]  pointer to the channel handle
+ * @param dataset [IN] pointer to the dataset
+ * @param timeout [IN] to be reserved, now it must be -1
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtReceiveTensor
+ */
+ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset,
+                                              int32_t timeout);
+
+/**
+ * @ingroup AscendCL
+ * @brief Receive tensor from device
+ *
+ * @param handle [IN]      pointer to the channel handle
+ * @param dataset [OUT]    pointer to the dataset
+ * @param timeout [IN]     to be reserved, now it must be -1
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtSendTensor
+ */
+ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset,
+                                                 int32_t timeout);
+
+/**
+ * @ingroup AscendCL
+ * @brief query the size of the channel
+ *
+ * @param handle [IN]      pointer to the channel handle
+ * @param size [OUT]       current size of this channel
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ */
+ACL_FUNC_VISIBILITY aclError acltdtQueryChannelSize(const acltdtChannelHandle *handle, size_t *size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_TDT_H_
diff --git a/inc/external/acl/acl_tdt_queue.h b/inc/external/acl/acl_tdt_queue.h
new file mode 100644
index 00000000..e940c020
--- /dev/null
+++ b/inc/external/acl/acl_tdt_queue.h
@@ -0,0 +1,476 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_
+#define INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_
+
+#include "acl/acl_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ACL_TDT_QUEUE_PERMISSION_MANAGE 1
+#define ACL_TDT_QUEUE_PERMISSION_DEQUEUE 2
+#define ACL_TDT_QUEUE_PERMISSION_ENQUEUE 4
+
+typedef void *acltdtBuf;
+typedef struct tagMemQueueAttr acltdtQueueAttr;
+typedef struct acltdtQueueRouteList acltdtQueueRouteList;
+typedef struct acltdtQueueRouteQueryInfo acltdtQueueRouteQueryInfo;
+typedef struct acltdtQueueRoute acltdtQueueRoute;
+
+typedef enum { ACL_TDT_QUEUE_NAME_PTR = 0, ACL_TDT_QUEUE_DEPTH_UINT32 } acltdtQueueAttrType;
+
+typedef enum {
+  ACL_TDT_QUEUE_ROUTE_SRC_UINT32 = 0,
+  ACL_TDT_QUEUE_ROUTE_DST_UINT32,
+  ACL_TDT_QUEUE_ROUTE_STATUS_INT32
+} acltdtQueueRouteParamType;
+
+typedef enum {
+  ACL_TDT_QUEUE_ROUTE_QUERY_SRC = 0,
+  ACL_TDT_QUEUE_ROUTE_QUERY_DST,
+  ACL_TDT_QUEUE_ROUTE_QUERY_SRC_AND_DST
+} acltdtQueueRouteQueryMode;
+
+typedef enum {
+  ACL_TDT_QUEUE_ROUTE_QUERY_MODE_ENUM = 0,
+  ACL_TDT_QUEUE_ROUTE_QUERY_SRC_ID_UINT32,
+  ACL_TDT_QUEUE_ROUTE_QUERY_DST_ID_UINT32
+} acltdtQueueRouteQueryInfoParamType;
+
+/**
+ * @ingroup AscendCL
+ * @brief create queue
+ *
+ * @param attr [IN] pointer to the queue attr
+ * @param qid [OUT] pointer to the qid
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtDestroyQueue
+ */
+ACL_FUNC_VISIBILITY aclError acltdtCreateQueue(const acltdtQueueAttr *attr, uint32_t *qid);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy queue
+ *
+ * @param qid [IN] qid which to be destroyed
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateQueue
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyQueue(uint32_t qid);
+
+/**
+ * @ingroup AscendCL
+ * @brief enqueue function
+ *
+ * @param qid [IN] qid
+ * @param buf [IN] acltdtBuf
+ * @param timeout [IN] timeout, -1 means blocking
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtDequeue
+ */
+ACL_FUNC_VISIBILITY aclError acltdtEnqueue(uint32_t qid, acltdtBuf buf, int32_t timeout);
+
+/**
+ * @ingroup AscendCL
+ * @brief dequeue function
+ *
+ * @param qid [IN] qid
+ * @param buf [OUT] pointer to the acltdtBuf
+ * @param timeout [IN] timeout, -1 means blocking
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtEnqueue
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDequeue(uint32_t qid, acltdtBuf *buf, int32_t timeout);
+
+/**
+ * @ingroup AscendCL
+ * @brief enqueue function
+ *
+ * @param qid [IN] qid
+ * @param data [IN] the pointer to data buf
+ * @param dataSize [IN] the size of data buf
+ * @param userData [IN] the pointer to user data buf
+ * @param userDataSize [IN] the size of user data buf
+ * @param timeout [IN] timeout, -1 means blocking
+ * @param rsv [IN] reserved param
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtDequeueData
+ */
+ACL_FUNC_VISIBILITY aclError acltdtEnqueueData(uint32_t qid, const void *data, size_t dataSize, const void *userData,
+                                               size_t userDataSize, int32_t timeout, uint32_t rsv);
+
+/**
+ * @ingroup AscendCL
+ * @brief dequeue function
+ *
+ * @param qid [IN] qid
+ * @param data [IN|OUT] the pointer to data buf
+ * @param dataSize [IN] the size of data buf
+ * @param retDataSize [OUT] the return size of data buf
+ * @param userData [IN|OUT] the pointer to user data buf
+ * @param userDataSize [IN] the size of user data buf
+ * @param timeout [IN] timeout, -1 means blocking
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtEnqueueData
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDequeueData(uint32_t qid, void *data, size_t dataSize, size_t *retDataSize,
+                                               void *userData, size_t userDataSize, int32_t timeout);
+
+/**
+ * @ingroup AscendCL
+ * @brief grant queue to other process
+ *
+ * @param qid [IN] qid
+ * @param pid [IN] pid of dst process
+ * @param permission [IN] permission of queue
+ * @param timeout [IN] timeout, -1 means blocking
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see ACL_TDT_QUEUE_PERMISSION_MANAGE | ACL_TDT_QUEUE_PERMISSION_DEQUEUE | ACL_TDT_QUEUE_PERMISSION_ENQUEUE
+ */
+ACL_FUNC_VISIBILITY aclError acltdtGrantQueue(uint32_t qid, int32_t pid, uint32_t permission, int32_t timeout);
+
+/**
+ * @ingroup AscendCL
+ * @brief attach queue in current process
+ *
+ * @param qid [IN] qid
+ * @param timeout [IN] timeout, -1 means blocking
+ * @param permission [OUT] permission of queue
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtGrantQueue
+ */
+ACL_FUNC_VISIBILITY aclError acltdtAttachQueue(uint32_t qid, int32_t timeout, uint32_t *permission);
+
+/**
+ * @ingroup AscendCL
+ * @brief bind queue routes
+ *
+ * @param qRouteList [IN|OUT] pointer to the route list
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acltdtBindQueueRoutes(acltdtQueueRouteList *qRouteList);
+
+/**
+ * @ingroup AscendCL
+ * @brief unbind queue routes
+ *
+ * @param qRouteList [IN|OUT] pointer to the route list
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acltdtUnbindQueueRoutes(acltdtQueueRouteList *qRouteList);
+
+/**
+ * @ingroup AscendCL
+ * @brief query queue routes according to query mode
+ *
+ * @param queryInfo [IN] pointer to the queue route query info
+ * @param qRouteList [IN|OUT] pointer to the route list
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acltdtQueryQueueRoutes(const acltdtQueueRouteQueryInfo *queryInfo,
+                                                    acltdtQueueRouteList *qRouteList);
+
+/**
+ * @ingroup AscendCL
+ * @brief alloc acltdtBuf
+ *
+ * @param size [IN] alloc buf size
+ * @param type [IN] reserved parameters, need to set zero currently
+ * @param buf [OUT] pointer to the acltdtBuf
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtFreeBuf
+ */
+ACL_FUNC_VISIBILITY aclError acltdtAllocBuf(size_t size, uint32_t type, acltdtBuf *buf);
+
+/**
+ * @ingroup AscendCL
+ * @brief free acltdtBuf
+ *
+ * @param buf [IN] pointer to the acltdtBuf
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtAllocBuf
+ */
+ACL_FUNC_VISIBILITY aclError acltdtFreeBuf(acltdtBuf buf);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data buf address
+ *
+ * @param buf [IN] acltdtBuf
+ * @param dataPtr [OUT] pointer to the data ptr which is acquired from acltdtBuf
+ * @param size [OUT] pointer to the size
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtAllocBuf
+ */
+ACL_FUNC_VISIBILITY aclError acltdtGetBufData(const acltdtBuf buf, void **dataPtr, size_t *size);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the queue attr
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtDestroyQueueAttr
+ */
+ACL_FUNC_VISIBILITY acltdtQueueAttr *acltdtCreateQueueAttr();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the queue attr
+ *
+ * @param attr [IN]  pointer to the queue attr
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateQueueAttr
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueAttr(const acltdtQueueAttr *attr);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set parameter for queue attr
+ *
+ * @param attr [IN|OUT] pointer to the queue attr
+ * @param type [IN]    parameter type
+ * @param len [IN]       parameter length
+ * @param param [IN]        pointer to parameter value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ *
+ * @see acltdtCreateQueueAttr
+ */
+ACL_FUNC_VISIBILITY aclError acltdtSetQueueAttr(acltdtQueueAttr *attr, acltdtQueueAttrType type, size_t len,
+                                                const void *param);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get parameter for queue attr.
+ *
+ * @param attr [IN]   pointer to the queue attr
+ * @param type [IN]     parameter type
+ * @param len [IN]        parameter length
+ * @param paramRetSize [OUT] pointer to parameter real length
+ * @param param [OUT]        pointer to parameter value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ *
+ * @see acltdtCreateQueueAttr
+ */
+ACL_FUNC_VISIBILITY aclError acltdtGetQueueAttr(const acltdtQueueAttr *attr, acltdtQueueAttrType type, size_t len,
+                                                size_t *paramRetSize, void *param);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the queue route
+ *
+ * @param srcId [IN]   src id of queue route
+ * @param dstId [IN]   dst id of queue route
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtDestroyQueueRoute
+ */
+ACL_FUNC_VISIBILITY acltdtQueueRoute *acltdtCreateQueueRoute(uint32_t srcId, uint32_t dstId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the queue attr
+ *
+ * @param route [IN]  pointer to the queue route
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateQueueRoute
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRoute(const acltdtQueueRoute *route);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get parameter for queue route.
+ *
+ * @param route [IN]   pointer to the queue route
+ * @param type [IN]     parameter type
+ * @param len [IN]        parameter length
+ * @param paramRetSize [OUT] pointer to parameter real length
+ * @param param [OUT]        pointer to parameter value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ *
+ * @see acltdtCreateQueueRoute
+ */
+ACL_FUNC_VISIBILITY aclError acltdtGetQueueRouteParam(const acltdtQueueRoute *route, acltdtQueueRouteParamType type,
+                                                      size_t len, size_t *paramRetSize, void *param);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the queue route list
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtDestroyQueueRouteList
+ */
+ACL_FUNC_VISIBILITY acltdtQueueRouteList *acltdtCreateQueueRouteList();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the queue route list
+ *
+ * @param routeList [IN]  pointer to the queue route list
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateQueueRouteList
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRouteList(const acltdtQueueRouteList *routeList);
+
+/**
+ * @ingroup AscendCL
+ * @brief add queue route to the route list
+ *
+ * @param routeList [IN|OUT]  pointer to the queue route list
+ * @param route [IN]  pointer to the queue route
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateQueueRouteList | acltdtCreateQueueRoute
+ *
+ */
+ACL_FUNC_VISIBILITY aclError acltdtAddQueueRoute(acltdtQueueRouteList *routeList, const acltdtQueueRoute *route);
+
+/**
+ * @ingroup AscendCL
+ * @brief get queue route from route list
+ *
+ * @param routeList [IN]  pointer to the queue route list
+ * @param index [IN]  index of queue route in route list
+ * @param route [IN|OUT]  pointer to the queue route
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateQueueRouteList | acltdtCreateQueueRoute
+ *
+ */
+ACL_FUNC_VISIBILITY aclError acltdtGetQueueRoute(const acltdtQueueRouteList *routeList, size_t index,
+                                                 acltdtQueueRoute *route);
+
+/**
+ * @ingroup AscendCL
+ * @brief get queue route num from route list
+ *
+ * @param routeList [IN]  pointer to the queue route list
+ *
+ * @retval the number of queue route
+ *
+ */
+ACL_FUNC_VISIBILITY size_t acltdtGetQueueRouteNum(const acltdtQueueRouteList *routeList);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the queue route query info
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtDestroyQueueRouteQueryInfo
+ */
+ACL_FUNC_VISIBILITY acltdtQueueRouteQueryInfo *acltdtCreateQueueRouteQueryInfo();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the queue route query info
+ *
+ * @param info [IN]  pointer to the queue route info
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateQueueRouteQueryInfo
+ *
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRouteQueryInfo(const acltdtQueueRouteQueryInfo *info);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set parameter for queue route info
+ *
+ * @param attr [IN|OUT] pointer to the queue route info
+ * @param type [IN]    parameter type
+ * @param len [IN]       parameter length
+ * @param param [IN]        pointer to parameter value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ *
+ * @see acltdtCreateQueueRouteQueryInfo
+ */
+ACL_FUNC_VISIBILITY aclError acltdtSetQueueRouteQueryInfo(acltdtQueueRouteQueryInfo *param,
+                                                          acltdtQueueRouteQueryInfoParamType type, size_t len,
+                                                          const void *value);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_
\ No newline at end of file
diff --git a/inc/external/acl/error_codes/ge_error_codes.h b/inc/external/acl/error_codes/ge_error_codes.h
new file mode 100644
index 00000000..550471cf
--- /dev/null
+++ b/inc/external/acl/error_codes/ge_error_codes.h
@@ -0,0 +1,77 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_
+#define INC_EXTERNAL_GE_GE_ERROR_CODES_H_
+
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000U;
+static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001U;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002U;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003U;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006U;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007U;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008U;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009U;
+static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011U;
+static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012U;
+static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013U;
+static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014U;
+static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015U;
+static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016U;
+static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017U;
+static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018U;
+static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019U;
+static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020U;
+static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021U;
+static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022U;
+static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000U;
+static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001U;
+static const uint32_t ACL_ERROR_GE_DEVICE_MEMORY_OPERATE_FAILED = 245002U;
+static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000U;
+static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001U;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002U;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003U;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004U;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005U;
+static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006U;
+static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007U;
+static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008U;
+static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009U;
+
+#ifdef __cplusplus
+}  // namespace ge
+#endif
+#endif  // INC_EXTERNAL_GE_GE_ERROR_CODES_H_
diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h
new file mode 100644
index 00000000..05122efb
--- /dev/null
+++ b/inc/external/acl/error_codes/rt_error_codes.h
@@ -0,0 +1,123 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
+#define __INC_EXTERNEL_RT_ERROR_CODES_H__
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static const int32_t ACL_RT_SUCCESS = 0;  // success
+
+static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000;             // param invalid
+static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001;          // invalid device id
+static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002;              // current context null
+static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003;            // stream not in current context
+static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004;             // model not in current context
+static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005;              // stream not in model
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006;   // event timestamp invalid
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007;  // event timestamp reversal
+static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008;            // memory address unaligned
+static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009;                 // open file failed
+static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010;                // write file failed
+static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011;          // error subscribe stream
+static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012;          // error subscribe thread
+static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013;             // group not set
+static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014;          // group not create
+static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015;          // callback not register to stream
+static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016;       // invalid memory type
+static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017;            // invalid handle
+static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018;       // invalid malloc type
+static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019;              // wait timeout
+static const int32_t ACL_ERROR_RT_TASK_TIMEOUT = 107020;              // task timeout
+
+static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000;  // feature not support
+static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001;    // memory allocation error
+static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002;          // memory free error
+static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003;     // aicore over flow
+static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004;            // no device
+static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005;  // resource alloc fail
+static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006;        // no permission
+static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007;    // no event resource
+static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008;   // no stream resource
+static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009;   // no notify resource
+static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010;    // no model resource
+static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011;      // no cdq resource
+static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012;           // over limit
+static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013;          // queue is empty
+static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014;           // queue is full
+static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015;        // repeated init
+static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016;      // aivec over flow
+static const int32_t ACL_ERROR_RT_OVER_FLOW = 207017;            // common over flow
+
+static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000;                   // runtime internal error
+static const int32_t ACL_ERROR_RT_TS_ERROR = 507001;                         // ts internel error
+static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002;                 // task full in stream
+static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003;                // task empty in stream
+static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004;              // stream not complete
+static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005;                  // end of sequence
+static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006;               // event not complete
+static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007;            // context release error
+static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008;                      // soc version error
+static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009;            // task type not support
+static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010;                   // ts lost heartbeat
+static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011;                    // model execute failed
+static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012;                   // report timeout
+static const int32_t ACL_ERROR_RT_SYS_DMA = 507013;                          // sys dma error
+static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014;                   // aicore timeout
+static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015;                 // aicore exception
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016;            // aicore trap exception
+static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017;                    // aicpu timeout
+static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018;                  // aicpu exception
+static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019;           // aicpu datadump response error
+static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020;              // aicpu model operate response error
+static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021;                  // profiling error
+static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022;                        // ipc error
+static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023;               // model abort normal
+static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024;             // kernel unregistering
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025;              // ringbuffer not init
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026;               // ringbuffer no data
+static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027;                    // kernel lookup error
+static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028;                 // kernel register duplicate
+static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029;              // debug register failed
+static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030;            // debug unregister failed
+static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031;                    // label not in current context
+static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032;                  // program register num use out
+static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033;                  // device setup error
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034;              // vector core timeout
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035;            // vector core exception
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036;       // vector core trap exception
+static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037;               // cdq alloc batch abnormal
+static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038;            // can not change die mode
+static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039;                    // single die mode can not set die
+static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040;                    // invalid die id
+static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041;                 // die mode not set
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042;        // aic trap read overflow
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043;       // aic trap write overflow
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044;   // aiv trap read overflow
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045;  // aiv trap write overflow
+
+static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;    // drv internal error
+static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900;  // aicpu internal error
+static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901;          // hdc disconnect
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // __INC_EXTERNEL_RT_ERROR_CODES_H__
diff --git a/inc/external/acl/ops/acl_cblas.h b/inc/external/acl/ops/acl_cblas.h
new file mode 100644
index 00000000..586d06a8
--- /dev/null
+++ b/inc/external/acl/ops/acl_cblas.h
@@ -0,0 +1,334 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
+#define INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
+
+#include "acl/acl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType;
+
+typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType;
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param alpha [IN]       pointer to scalar used for multiplication.
+ *                         of same type as dataTypeC
+ * @param a [IN]           pointer to matrix A
+ * @param lda [IN]         leading dimension used to store the matrix A
+ * @param dataTypeA [IN]   datatype of matrix A
+ * @param x [IN]           pointer to vector x
+ * @param incx [IN]        stride between consecutive elements of vector x
+ * @param dataTypeX [IN]   datatype of vector x
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         of same type as dataTypeC If beta == 0,
+ *                         then y does not have to be a valid input
+ * @param y [IN|OUT]       pointer to vector y
+ * @param incy [IN]        stride between consecutive elements of vector y
+ * @param dataTypeY [IN]   datatype of vector y
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda,
+                                           aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX,
+                                           const void *beta, void *y, int incy, aclDataType dataTypeY,
+                                           aclComputeType type, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param dataTypeA [IN]   datatype of matrix A
+ * @param dataTypeX [IN]   datatype of vector x
+ * @param dataTypeY [IN]   datatype of vector y
+ * @param type [IN]        computation type
+ * @param handle [OUT]     pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA,
+                                                          aclDataType dataTypeX, aclDataType dataTypeY,
+                                                          aclComputeType type, aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param alpha [IN]       pointer to scalar used for multiplication
+ * @param a [IN]           pointer to matrix A
+ * @param lda [IN]         leading dimension used to store the matrix A
+ * @param x [IN]           pointer to vector x
+ * @param incx [IN]        stride between consecutive elements of vector x
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         If beta value == 0,
+ *                         then y does not have to be a valid input
+ * @param y [IN|OUT]       pointer to vector y
+ * @param incy [IN]        stride between consecutive elements of vector y
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha,
+                                          const aclFloat16 *a, int lda, const aclFloat16 *x, int incx,
+                                          const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type,
+                                          aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param type [IN]        computation type
+ * @param handle [OUT]     pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type,
+                                                         aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param alpha [IN]       pointer to scalar used for multiplication
+ * @param a [IN]           pointer to matrix A
+ * @param lda [IN]         leading dimension used to store the matrix A
+ * @param x [IN]           pointer to vector x
+ * @param incx [IN]        stride between consecutive elements of vector x
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         If beta value == 0,
+ *                         then y does not have to be a valid input
+ * @param y [IN|OUT]       pointer to vector y
+ * @param incy [IN]        stride between consecutive elements of vector y
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a,
+                                           int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y,
+                                           int incy, aclComputeType type, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param handle [OUT]     pointer to the pointer to the handle
+ * @param type [IN]        computation type
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type,
+                                                          aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param alpha [IN]       pointer to scalar used for multiplication. of same type as dataTypeC
+ * @param matrixA [IN]     pointer to matrix A
+ * @param lda [IN]         leading dimension array used to store  matrix A
+ * @param dataTypeA [IN]   datatype of matrix A
+ * @param matrixB [IN]     pointer to matrix B
+ * @param ldb [IN]         leading dimension array used to store  matrix B
+ * @param dataTypeB [IN]   datatype of matrix B
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         of same type as dataTypeC If beta == 0,
+ *                         then matrixC does not have to be a valid input
+ * @param matrixC [IN|OUT] pointer to matrix C
+ * @param ldc [IN]         leading dimension array used to store  matrix C
+ * @param dataTypeC [IN]   datatype of matrix C
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
+                                           int k, const void *alpha, const void *matrixA, int lda,
+                                           aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB,
+                                           const void *beta, void *matrixC, int ldc, aclDataType dataTypeC,
+                                           aclComputeType type, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param dataTypeA [IN]   datatype of matrix A
+ * @param dataTypeB [IN]   datatype of matrix B
+ * @param dataTypeC [IN]   datatype of matrix C
+ * @param type [IN]        computation type
+ * @param handle [OUT]     pointer to the pointer to the handle
+ * @param type [IN]        computation type
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC,
+                                                          int m, int n, int k, aclDataType dataTypeA,
+                                                          aclDataType dataTypeB, aclDataType dataTypeC,
+                                                          aclComputeType type, aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param alpha [IN]       pointer to scalar used for multiplication
+ * @param matrixA [IN]     pointer to matrix A
+ * @param lda [IN]         leading dimension used to store the matrix A
+ * @param matrixB [IN]     pointer to matrix B
+ * @param ldb [IN]         leading dimension used to store the matrix B
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         If beta value == 0,
+ *                         then matrixC does not have to be a valid input
+ * @param matrixC [IN|OUT] pointer to matrix C
+ * @param ldc [IN]         leading dimension used to store the matrix C
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
+                                          int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda,
+                                          const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta,
+                                          aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param type [IN]        computation type
+ * @param handle [OUT]     pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC,
+                                                         int m, int n, int k, aclComputeType type,
+                                                         aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param alpha [IN]       pointer to scalar used for multiplication
+ * @param matrixA [IN]     pointer to matrix A
+ * @param lda [IN]         leading dimension used to store the matrix A
+ * @param matrixB [IN]     pointer to matrix B
+ * @param ldb [IN]         leading dimension used to store the matrix B
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         If beta value == 0,
+ *                         then matrixC does not have to be a valid input
+ * @param matrixC [IN|OUT] pointer to matrix C
+ * @param ldc [IN]         leading dimension used to store the matrix C
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
+                                           int k, const int32_t *alpha, const int8_t *matrixA, int lda,
+                                           const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC,
+                                           int ldc, aclComputeType type, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param type [IN]        computation type
+ * @param handle [OUT]     pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC,
+                                                          int m, int n, int k, aclComputeType type,
+                                                          aclopHandle **handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h
new file mode 100644
index 00000000..a46839f8
--- /dev/null
+++ b/inc/external/acl/ops/acl_dvpp.h
@@ -0,0 +1,2683 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if !defined(ENABLE_DVPP_INTERFACE)
+#if defined(_MSC_VER)
+#error message("if you want to use dvpp funtions ,please use the macro definition (ENABLE_DVPP_INTERFACE).")
+#else
+#error "if you want to use dvpp funtions ,please use the macro definition (ENABLE_DVPP_INTERFACE)."
+#endif
+#endif
+
+#ifndef INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
+#define INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
+
+#include <stdint.h>
+#include <stddef.h>
+#include "acl/acl.h"
+#include "acl/acl_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct acldvppPicDesc acldvppPicDesc;
+typedef struct acldvppBatchPicDesc acldvppBatchPicDesc;
+typedef struct acldvppRoiConfig acldvppRoiConfig;
+typedef struct acldvppResizeConfig acldvppResizeConfig;
+typedef struct acldvppBorderConfig acldvppBorderConfig;
+typedef struct acldvppLutMap acldvppLutMap;
+typedef struct acldvppChannelDesc acldvppChannelDesc;
+typedef struct acldvppJpegeConfig acldvppJpegeConfig;
+typedef struct aclvdecChannelDesc aclvdecChannelDesc;
+typedef struct acldvppStreamDesc acldvppStreamDesc;
+typedef struct aclvdecFrameConfig aclvdecFrameConfig;
+typedef struct aclvencChannelDesc aclvencChannelDesc;
+typedef struct aclvencFrameConfig aclvencFrameConfig;
+typedef struct acldvppHist acldvppHist;
+typedef void (*aclvdecCallback)(acldvppStreamDesc *input, acldvppPicDesc *output, void *userData);
+typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output, void *userdata);
+
+// Supported Pixel Format
+enum acldvppPixelFormat {
+  PIXEL_FORMAT_YUV_400 = 0,                      // 0
+  PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1,           // 1
+  PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2,           // 2
+  PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3,           // 3
+  PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4,           // 4
+  PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5,           // 5
+  PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6,           // 6
+  PIXEL_FORMAT_YUYV_PACKED_422 = 7,              // 7
+  PIXEL_FORMAT_UYVY_PACKED_422 = 8,              // 8
+  PIXEL_FORMAT_YVYU_PACKED_422 = 9,              // 9
+  PIXEL_FORMAT_VYUY_PACKED_422 = 10,             // 10
+  PIXEL_FORMAT_YUV_PACKED_444 = 11,              // 11
+  PIXEL_FORMAT_RGB_888 = 12,                     // 12
+  PIXEL_FORMAT_BGR_888 = 13,                     // 13
+  PIXEL_FORMAT_ARGB_8888 = 14,                   // 14
+  PIXEL_FORMAT_ABGR_8888 = 15,                   // 15
+  PIXEL_FORMAT_RGBA_8888 = 16,                   // 16
+  PIXEL_FORMAT_BGRA_8888 = 17,                   // 17
+  PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18,  // 18
+  PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19,  // 19
+  PIXEL_FORMAT_YVU_PLANAR_420 = 20,              // 20
+  PIXEL_FORMAT_YVU_PLANAR_422,
+  PIXEL_FORMAT_YVU_PLANAR_444,
+  PIXEL_FORMAT_RGB_444 = 23,
+  PIXEL_FORMAT_BGR_444,
+  PIXEL_FORMAT_ARGB_4444,
+  PIXEL_FORMAT_ABGR_4444,
+  PIXEL_FORMAT_RGBA_4444,
+  PIXEL_FORMAT_BGRA_4444,
+  PIXEL_FORMAT_RGB_555,
+  PIXEL_FORMAT_BGR_555,
+  PIXEL_FORMAT_RGB_565,
+  PIXEL_FORMAT_BGR_565,
+  PIXEL_FORMAT_ARGB_1555,
+  PIXEL_FORMAT_ABGR_1555,
+  PIXEL_FORMAT_RGBA_1555,
+  PIXEL_FORMAT_BGRA_1555,
+  PIXEL_FORMAT_ARGB_8565,
+  PIXEL_FORMAT_ABGR_8565,
+  PIXEL_FORMAT_RGBA_8565,
+  PIXEL_FORMAT_BGRA_8565,
+  PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
+  PIXEL_FORMAT_RGB_BAYER_10BPP,
+  PIXEL_FORMAT_RGB_BAYER_12BPP,
+  PIXEL_FORMAT_RGB_BAYER_14BPP,
+  PIXEL_FORMAT_RGB_BAYER_16BPP,
+  PIXEL_FORMAT_BGR_888_PLANAR = 70,
+  PIXEL_FORMAT_HSV_888_PACKAGE,
+  PIXEL_FORMAT_HSV_888_PLANAR,
+  PIXEL_FORMAT_LAB_888_PACKAGE,
+  PIXEL_FORMAT_LAB_888_PLANAR,
+  PIXEL_FORMAT_S8C1,
+  PIXEL_FORMAT_S8C2_PACKAGE,
+  PIXEL_FORMAT_S8C2_PLANAR,
+  PIXEL_FORMAT_S16C1,
+  PIXEL_FORMAT_U8C1,
+  PIXEL_FORMAT_U16C1,
+  PIXEL_FORMAT_S32C1,
+  PIXEL_FORMAT_U32C1,
+  PIXEL_FORMAT_U64C1,
+  PIXEL_FORMAT_S64C1,
+  PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
+  PIXEL_FORMAT_YVU_SEMIPLANAR_440,
+  PIXEL_FORMAT_FLOAT32,
+  PIXEL_FORMAT_BUTT,
+  PIXEL_FORMAT_UNKNOWN = 10000
+};
+
+// Stream Format
+enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL };
+
+// Supported Channel Mode
+enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4, DVPP_CHNMODE_PNGD = 8 };
+
+// Supported Border Type
+enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 };
+
+// Venc parameter type
+enum aclvencChannelDescParamType {
+  ACL_VENC_THREAD_ID_UINT64 = 0,
+  ACL_VENC_CALLBACK_PTR,
+  ACL_VENC_PIXEL_FORMAT_UINT32,
+  ACL_VENC_ENCODE_TYPE_UINT32,
+  ACL_VENC_PIC_WIDTH_UINT32,
+  ACL_VENC_PIC_HEIGHT_UINT32,
+  ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
+  ACL_VENC_BUF_ADDR_PTR,
+  ACL_VENC_BUF_SIZE_UINT32,
+  ACL_VENC_RC_MODE_UINT32,
+  ACL_VENC_SRC_RATE_UINT32,
+  ACL_VENC_MAX_BITRATE_UINT32,
+  ACL_VENC_MAX_IP_PROP_UINT32
+};
+
+// Jpeg picture format
+enum acldvppJpegFormat {
+  ACL_JPEG_CSS_444 = 0,
+  ACL_JPEG_CSS_422,
+  ACL_JPEG_CSS_420,
+  ACL_JPEG_CSS_GRAY,
+  ACL_JPEG_CSS_440,
+  ACL_JPEG_CSS_411,
+  ACL_JPEG_CSS_UNKNOWN = 1000
+};
+
+enum acldvppChannelDescParamType {
+  ACL_DVPP_CSC_MATRIX_UINT32 = 0,
+  ACL_DVPP_MODE_UINT32,
+  ACL_DVPP_CHANNEL_ID_UINT64,
+  ACL_DVPP_CHANNEL_HEIGHT_UINT32,
+  ACL_DVPP_CHANNEL_WIDTH_UINT32
+};
+
+enum aclvdecChannelDescParamType {
+  ACL_VDEC_CSC_MATRIX_UINT32 = 0,
+  ACL_VDEC_OUT_MODE_UINT32,
+  ACL_VDEC_THREAD_ID_UINT64,
+  ACL_VDEC_CALLBACK_PTR,
+  ACL_VDEC_CHANNEL_ID_UINT32,
+  ACL_VDEC_ENCODE_TYPE_UINT32,
+  ACL_VDEC_OUT_PIC_FORMAT_UINT32,
+  ACL_VDEC_OUT_PIC_WIDTH_UINT32,
+  ACL_VDEC_OUT_PIC_HEIGHT_UINT32,
+  ACL_VDEC_REF_FRAME_NUM_UINT32,
+  ACL_VDEC_BIT_DEPTH_UINT32
+};
+
+// Csc Matrix can be used both for acldvppChannelDescParamType and aclvdecChannelDescParamType
+enum acldvppCscMatrix {
+  ACL_DVPP_CSC_MATRIX_BT601_WIDE = 0,
+  ACL_DVPP_CSC_MATRIX_BT601_NARROW,
+  ACL_DVPP_CSC_MATRIX_BT709_WIDE,
+  ACL_DVPP_CSC_MATRIX_BT709_NARROW,
+  ACL_DVPP_CSC_MATRIX_BT2020_WIDE,
+  ACL_DVPP_CSC_MATRIX_BT2020_NARROW
+};
+
+/**
+ * @ingroup AscendCL
+ * @brief alloc device memory for dvpp.
+ *
+ * @par Function
+ * @li It's mainly used for allocating memory to device media data processing.
+ * The requested memory meets the data processing requirements.
+ * After calling this interface to request memory,
+ * you must release the memory using the acldvppFree interface.
+ * @li When calling the acldvppMalloc interface to apply for memory,
+ * the size entered by the user is aligned upwards to 32 integer multiples,
+ * and an additional 32 bytes are applied.
+ *
+ * @par Restriction
+ * If the user uses the acldvppMalloc interface to apply for a large block of
+ * memory and divide and manage the memory by himself,
+ * when applying for memory, the user needs to align up to 32 integer
+ * times + 32 bytes (ALIGN_UP [len] +32 words) according to
+ * the actual data size of each picture Section) to manage memory.
+ *
+ * @param devPtr [OUT]    memory pointer.
+ * @param size [IN]       memory size.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppFree
+ */
+ACL_FUNC_VISIBILITY aclError acldvppMalloc(void **devPtr, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief free device memory for dvpp.
+ *
+ * @par Function
+ * Free the memory requested through the acldvppMalloc interface
+ * @param devPtr [IN]      memory pointer to free.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppMalloc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppFree(void *devPtr);
+
+/**
+ * @ingroup AscendCL
+ * @brief create DvppChannelDesc.
+ *
+ * @par Function
+ * Create a channel for image data processing.
+ * The same channel can be reused
+ * and is no longer available after destruction
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppChannelDesc *acldvppCreateChannelDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy dvppChannelDesc.
+ *
+ * @par Function
+ * Can only destroy channels created by the acldvppCreateChannel interface
+ * @param channelDesc [IN]     the channel description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannelDesc | acldvppDestroyChannel
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyChannelDesc(acldvppChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp channel Id.
+ *
+ * @par Restriction
+ * Interface calling sequence:
+ * acldvppCreateChannelDesc --> acldvppCreateChannel -->
+ * acldvppGetChannelDescChannelId
+ *
+ * @param channelDesc [IN]     the channel description.
+ *
+ * @retval channel id.
+ *
+ * @see acldvppCreateChannelDesc | acldvppCreateChannel
+ */
+ACL_FUNC_VISIBILITY uint64_t acldvppGetChannelDescChannelId(const acldvppChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp picture description.
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppPicDesc *acldvppCreatePicDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp picture description.
+ *
+ * @par Function
+ * Can only destroy picture description information created
+ * through acldvppCreatePicDesc interface.
+ * @param picDesc [IN]     dvpp picture description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyPicDesc(acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's data.
+ *
+ * @param picDesc [OUT]   dvpp picture description.
+ * @param dataDev [IN]    dvpp picture dataDev.Must be the memory
+ *                        requested using the acldvppMalloc interface.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppMalloc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescData(acldvppPicDesc *picDesc, void *dataDev);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's size.
+ *
+ * @param picDesc [OUT]      dvpp picture description.
+ * @param size dvpp [IN]     picture size.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescSize(acldvppPicDesc *picDesc, uint32_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's format.
+ *
+ * @param picDesc [OUT]    dvpp picture description.
+ * @param format [IN]      dvpp picture format.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescFormat(acldvppPicDesc *picDesc, acldvppPixelFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's width.
+ *
+ * @param picDesc [OUT]   dvpp picture description.
+ * @param width [IN]      dvpp picture width.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescWidth(acldvppPicDesc *picDesc, uint32_t width);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's height.
+ *
+ * @param picDesc [OUT]  dvpp picture description.
+ * @param height [IN]    dvpp picture height.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescHeight(acldvppPicDesc *picDesc, uint32_t height);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's widthStride.
+ *
+ * @par Restriction
+ * Width alignment requirements:
+ * @li The minimum stride is 32 and the maximum is 4096 * 4
+ * (that is, an image in argb format with a width of 4096);
+ * @li For 8K scaling, widthStride is required to be aligned to 2;
+ * @li For non 8K scaling, the calculation formula for widthStride
+ * is different for different image formats:
+ *   @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16
+ *   @li yuv422packed: input image width * 2 and then align to 16
+ *   @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16
+ *   @li xrgb8888: input image width * 4, align to 16
+ *   @li HFBC:input image width
+ *
+ * @param picDesc [OUT]      dvpp picture description.
+ * @param widthStride [IN]   dvpp picture widthStride.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescWidthStride(acldvppPicDesc *picDesc, uint32_t widthStride);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's heightStride.
+ *
+ * @par Restriction
+ * Height alignment requirements:
+ * @li The height of the input image is aligned to 2.
+ * High stride minimum 6 and maximum 4096.
+ *
+ * @param picDesc [OUT]        dvpp picture description.
+ * @param heightStride [IN]    dvpp picture heightStride.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescHeightStride(acldvppPicDesc *picDesc, uint32_t heightStride);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's retcode.
+ *
+ * @param picDesc [OUT]    dvpp picture description.
+ * @param retCode [IN]     dvpp picture retcode.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescRetCode(acldvppPicDesc *picDesc, uint32_t retCode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get picture data.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval picture data addr.
+ * @retval default nullptr.
+ */
+ACL_FUNC_VISIBILITY void *acldvppGetPicDescData(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get picture data size.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval picture data size.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescSize(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's format.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval format
+ * @retval default PIXEL_FORMAT_YUV_400.
+ */
+ACL_FUNC_VISIBILITY acldvppPixelFormat acldvppGetPicDescFormat(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's width.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval width.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescWidth(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's height.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval height.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescHeight(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's widthStride.
+ *
+ * @par Restriction
+ * Width alignment requirements:
+ * @li The minimum stride is 32 and the maximum is 4096 * 4
+ * (that is, an image in argb format with a width of 4096);
+ * @li For 8K scaling, widthStride is required to be aligned to 2;
+ * @li For non 8K scaling, the calculation formula for widthStride
+ * is different for different image formats:
+ *   @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16
+ *   @li yuv422packed: input image width * 2 and then align to 16
+ *   @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16
+ *   @li xrgb8888: input image width * 4, align to 16
+ *   @li HFBC:input image width
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval stride width.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescWidthStride(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's heightStride.
+ *
+ * @par Restriction
+ * Height alignment requirements:
+ * @li The height of the input image is aligned to 2.
+ * High stride minimum 6 and maximum 4096.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval stride height.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescHeightStride(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's retcode.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval ret code.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp roi config.
+ *
+ * @param left [IN]    the left offset, must be even
+ * @param right [IN]   the right offset, must be odd
+ * @param top [IN]     the top offset, must be even
+ * @param bottom [IN]  the bottom offset, must be odd
+ *
+ * @retval null for failed.
+ * @retval other success
+ */
+ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top,
+                                                             uint32_t bottom);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp roi config.
+ *
+ * @par Function
+ * Destroys data created through the acldvppCreateRoiConfig interface
+ * @param roiConfig [IN]    dvpp roi config.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateRoiConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyRoiConfig(acldvppRoiConfig *roiConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set left of RoiConfig.
+ *
+ * @param config [OUT]  RoiConfig
+ * @param left [IN]     left offset
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigLeft(acldvppRoiConfig *config, uint32_t left);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set right of RoiConfig.
+ *
+ * @param config [OUT]  RoiConfig
+ * @param right [IN]    right offset
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigRight(acldvppRoiConfig *config, uint32_t right);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set top of RoiConfig.
+ *
+ * @param config [OUT]  RoiConfig
+ * @param top [IN]      top offset
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigTop(acldvppRoiConfig *config, uint32_t top);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set bottom of RoiConfig.
+ *
+ * @param config [OUT]   RoiConfig
+ * @param bottom [IN]    bottom offset
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config, uint32_t bottom);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set RoiConfig.
+ *
+ * @param config [OUT]    RoiConfig
+ * @param left [IN]       left offset
+ * @param right [IN]      right offset
+ * @param top [IN]        top offset
+ * @param bottom [IN]     bottom offset
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top,
+                                                 uint32_t bottom);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp resize config.
+ * The specified scaling algorithm is not supported.
+ * The default scaling algorithm is "nearest neighbor interpolation".
+ *
+ * @retval null for failed.
+ * @retval other success.
+ */
+ACL_FUNC_VISIBILITY acldvppResizeConfig *acldvppCreateResizeConfig();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp resize config.
+ *
+ * @par Function
+ * Destroys the scaling configuration data created by
+ * the acldvppCreateResizeConfig interface
+ *
+ * @param resizeConfig [IN]    resize config.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateResizeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyResizeConfig(acldvppResizeConfig *resizeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create jpege config.
+ *
+ * @retval null for failed.
+ * @retval other success.
+ */
+ACL_FUNC_VISIBILITY acldvppJpegeConfig *acldvppCreateJpegeConfig();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy jpege config.
+ *
+ * @par Function
+ * Destroys the encoding configuration data created by
+ * the acldvppCreateJpegeConfig interface
+ * @param jpegeConfig [IN] config pointer to destroy.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateJpegeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyJpegeConfig(acldvppJpegeConfig *jpegeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set jpege config's level.
+ *
+ * @param jpegeConfig [OUT]    Call the acldvppCreateJpegeConfig
+ *                             interface to create acldvppJpegeConfig data
+ * @param level [IN]   Encoding quality range [0, 100],
+ *                     where level 0 encoding quality is similar to level 100,
+ *                     and the smaller the value in [1, 100],
+ *                     the worse the quality of the output picture.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetJpegeConfigLevel(acldvppJpegeConfig *jpegeConfig, uint32_t level);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get jpege config's level.
+ *
+ * @param jpegeConfig [IN]    jpege config.
+ *
+ * @retval compression level.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetJpegeConfigLevel(const acldvppJpegeConfig *jpegeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief create vdecChannelDesc.Channel description information
+ * when creating a video data processing channel.
+ *
+ * @retval null for failed.
+ * @retval other success
+ */
+ACL_FUNC_VISIBILITY aclvdecChannelDesc *aclvdecCreateChannelDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy vdecChannelDesc.
+ *
+ * @par Function
+ * Can only destroy aclvdecChannelDesc type created
+ * through aclvdecCreateChannelDesc interface
+ * @param channelDesc [IN]    channel description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+
+ * @see aclvdecCreateChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannelDesc(aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's channel id.
+ *
+ * @param channelDesc [OUT]  vdec channel description.
+ * @param channelId [IN]     decoding channel id: 0~15.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescChannelId(aclvdecChannelDesc *channelDesc, uint32_t channelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's thread id.
+ *
+ * @param channelDesc [OUT]    vdec channel description.
+ * @param threadId [IN]        thread id.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescThreadId(aclvdecChannelDesc *channelDesc, uint64_t threadId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's callback function.
+ *
+ * @param channelDesc [OUT]  vdec channel description.
+ * @param callback [IN]      function callback.Function prototype:
+ * void (* aclvdecCallback)
+ * (acldvppStreamDesc * input, acldvppPicDesc * output, void* userdata)
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCallback
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescCallback(aclvdecChannelDesc *channelDesc, aclvdecCallback callback);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's video encoding type.
+ *
+ * @param channelDesc [OUT]  vdec channel description.
+ * @param enType [IN]        video encoding type.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescEnType(aclvdecChannelDesc *channelDesc, acldvppStreamFormat enType);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's out picture format.
+ *
+ * @param channelDesc [OUT]     vdec channel description.
+ * @param outPicFormat [IN]     out picture format (acldvppPixelFormat).
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicFormat(aclvdecChannelDesc *channelDesc,
+                                                               acldvppPixelFormat outPicFormat);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's out picture width.
+ *
+ * @param channelDesc [OUT]    vdec channel description.
+ * @param outPicWidth [IN]     out picture width.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicWidth(aclvdecChannelDesc *channelDesc, uint32_t outPicWidth);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's out picture height.
+ *
+ * @param channelDesc [OUT]     vdec channel description.
+ * @param outPicHeight [IN]     out picture height.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicHeight(aclvdecChannelDesc *channelDesc, uint32_t outPicHeight);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's reference frame num.
+ *
+ * @param channelDesc [OUT]    vdec channel description.
+ * @param refFrameNum [IN]     reference frame num.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescRefFrameNum(aclvdecChannelDesc *channelDesc, uint32_t refFrameNum);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's bit depth.
+ *
+ * @param channelDesc [OUT]  vdec channel description.
+ * @param bitDepth [IN]      bit depth.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescBitDepth(aclvdecChannelDesc *channelDesc, uint32_t bitDepth);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's channel id.
+ *
+ * @param channelDesc [IN]     vdec channel description.
+ *
+ * @retval decoding channel id: 0~15.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescChannelId(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's thread id.
+ *
+ * @param channelDesc [IN]     vdec channel description.
+ *
+ * @retval thread id.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint64_t aclvdecGetChannelDescThreadId(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's callback function.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval function callback.Function prototype:
+ * void (* aclvdecCallback)
+ * (acldvppStreamDesc * input, acldvppPicDesc * output, void* userdata)
+ * @retval default null.
+ *
+ * @see aclvdecCallback
+ */
+ACL_FUNC_VISIBILITY aclvdecCallback aclvdecGetChannelDescCallback(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's video encoding type.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval video encoding type.
+ * @retval default H265_MAIN_LEVEL.
+ */
+ACL_FUNC_VISIBILITY acldvppStreamFormat aclvdecGetChannelDescEnType(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's out picture format.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval out picture format.
+ * @retval default DVPP_OUTPUT_YUV420SP_UV.
+ */
+ACL_FUNC_VISIBILITY acldvppPixelFormat aclvdecGetChannelDescOutPicFormat(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's out picture width.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval out picture width.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutPicWidth(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's out picture height.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval out picture height (for vdec malloc memory).
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutPicHeight(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's bit depth.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval bit depth.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescBitDepth(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's reference frame num.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval reference frame num.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescRefFrameNum(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief create vencChannelDesc.
+ *
+ * @retval null for failed, other success
+ */
+ACL_FUNC_VISIBILITY aclvencChannelDesc *aclvencCreateChannelDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy vencChannelDesc.
+ *
+ * @param channelDesc [IN] channel desc.
+ *
+ * @retval ACL_SUCCESS:success, other:failed
+ */
+ACL_FUNC_VISIBILITY aclError aclvencDestroyChannelDesc(aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set decoding thread id for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param threadId [IN] thread id
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescThreadId(aclvencChannelDesc *channelDesc, uint64_t threadId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set func callback for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param callback [IN]     func callback
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescCallback(aclvencChannelDesc *channelDesc, aclvencCallback callback);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set video encoding type for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param enType [IN]       video encoding type
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescEnType(aclvencChannelDesc *channelDesc, acldvppStreamFormat enType);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set pic format for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param picFormat [IN]    pic format
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicFormat(aclvencChannelDesc *channelDesc,
+                                                            acldvppPixelFormat picFormat);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set out pic width for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param picWidth [IN]     pic width
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicWidth(aclvencChannelDesc *channelDesc, uint32_t picWidth);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set pic height for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param picHeight [IN]    pic height
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicHeight(aclvencChannelDesc *channelDesc, uint32_t picHeight);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set key frame interval for venc channel desc.
+ *
+ * @param channelDesc [OUT]     venc channel desc
+ * @param keyFrameInterval [IN] Interval of key frame
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescKeyFrameInterval(aclvencChannelDesc *channelDesc,
+                                                                   uint32_t keyFrameInterval);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set output buffer address for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param bufAddr [IN]      output buffer address
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescBufAddr(aclvencChannelDesc *channelDesc, void *bufAddr);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set output buffer size for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param bufSize [IN]      output buffer size
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescBufSize(aclvencChannelDesc *channelDesc, uint32_t bufSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set rc model for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param rcMode [IN]       venc rc mode(VBR=1, CBR=2)
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescRcMode(aclvencChannelDesc *channelDesc, uint32_t rcMode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set source rate for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param srcRate [IN] source rate
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescSrcRate(aclvencChannelDesc *channelDesc, uint32_t srcRate);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set max bit rate for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param maxBitRate [IN]   max bit rate
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc *channelDesc, uint32_t maxBitRate);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set venc parameter for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param paramType [IN]    parameter type
+ * @param length [IN]       parameter length
+ * @param param [IN]        pointer to parameter value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc,
+                                                        aclvencChannelDescParamType paramType, size_t length,
+                                                        const void *param);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get output buffer address for venc channel desc.
+ *
+ * @param channelDesc[IN] venc channel desc
+ *
+ * @retval output buffer address
+ */
+ACL_FUNC_VISIBILITY void *aclvencGetChannelDescBufAddr(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get output buffer size for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval output buffer size
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescBufSize(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get decoding channel id for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval decoding channel id: 0~15, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescChannelId(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get decoding thread id for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval thread id, default 0
+ */
+ACL_FUNC_VISIBILITY uint64_t aclvencGetChannelDescThreadId(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get func callback for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval func callback, default null
+ */
+ACL_FUNC_VISIBILITY aclvencCallback aclvencGetChannelDescCallback(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get video encoding type for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval video encoding type, default H265_MAIN_LEVEL
+ */
+ACL_FUNC_VISIBILITY acldvppStreamFormat aclvencGetChannelDescEnType(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get pic format for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval pic format
+ */
+ACL_FUNC_VISIBILITY acldvppPixelFormat aclvencGetChannelDescPicFormat(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get pic width for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval pic width, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescPicWidth(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get pic height for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval pic height, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescPicHeight(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get interval of key frame for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval interval of key frame, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescKeyFrameInterval(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get rc mode for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval rc mode, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescRcMode(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get source rate for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval source rate, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescSrcRate(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get max bit rate for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval max bit rate, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get venc parameter for venc channel desc.
+ *
+ * @param channelDesc [IN]   venc channel desc
+ * @param paramType [IN]     parameter type
+ * @param length [IN]        parameter length
+ * @param paramRetSize [OUT] pointer to parameter real length
+ * @param param [OUT]        pointer to parameter value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc,
+                                                        aclvencChannelDescParamType paramType, size_t length,
+                                                        size_t *paramRetSize, void *param);
+
+/**
+ * @ingroup AscendCL
+ * @brief get forced restart of I-frame interval from config
+ *
+ * @param config [IN] venc frame config
+ *
+ * @retval 0: Not forced; 1: Forced restart of I-frame -1: error
+ */
+ACL_FUNC_VISIBILITY uint8_t aclvencGetFrameConfigForceIFrame(const aclvencFrameConfig *config);
+
+/**
+ * @ingroup AscendCL
+ * @brief get forced restart of I-frame interval from config
+ *
+ * @param config [IN] venc frame config
+ *
+ * @retval Whether it is the end frame: 0: no; 1: end frame
+ */
+ACL_FUNC_VISIBILITY uint8_t aclvencGetFrameConfigEos(const aclvencFrameConfig *config);
+
+/**
+ * @ingroup AscendCL
+ * @brief set single frame encoding configuration parameters
+ *
+ * @param config [OUT]    venc frame config
+ * @param forceFrame [IN] forced restart of I-frame interval: 0: Not forced; 1: Forced restart of I-frame
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetFrameConfigForceIFrame(aclvencFrameConfig *config, uint8_t forceIFrame);
+
+/**
+ * @ingroup AscendCL
+ * @brief set single frame encoding configuration parameters
+ *
+ * @param config [OUT] venc frame config
+ * @param eos [IN]     Whether it is the end frame: 0: no; 1: end frame
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetFrameConfigEos(aclvencFrameConfig *config, uint8_t eos);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp venc destroy frame config
+ *
+ * @param config [IN] venc frame config
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencDestroyFrameConfig(aclvencFrameConfig *config);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp venc frame config.
+ *
+ * @retval null for failed, other aclvencFrameConfig ptr
+ */
+ACL_FUNC_VISIBILITY aclvencFrameConfig *aclvencCreateFrameConfig();
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp venc channel.
+ *
+ * @param channelDesc [IN|OUT] venc channel desc
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencCreateChannel(aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp venc channel.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencDestroyChannel(aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp venc launch send frame task.
+ *
+ * @param channelDesc [IN] venc channel desc
+ * @param input [IN]       input picture desc
+ * @param reserve [IN]     reserve parameter
+ * @param config [IN]      dvpp frame config
+ * @param userdata [IN]    user callback function
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSendFrame(aclvencChannelDesc *channelDesc, acldvppPicDesc *input, void *reserve,
+                                              aclvencFrameConfig *config, void *userdata);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp stream description.
+ *
+ * @retval null for failed.
+ * @retval other success.
+ */
+ACL_FUNC_VISIBILITY acldvppStreamDesc *acldvppCreateStreamDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp stream description.
+ *
+ * @par Function
+ * Can only destroy acldvppStreamDesc type created through
+ * acldvppCreateStreamDesc interface.
+ *
+ * @param streamDesc [IN]     dvpp stream description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateStreamDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyStreamDesc(acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's data addr.
+ *
+ * @param streamDesc [OUT]    dvpp stream description.
+ * @param dataDev [IN]        data addr.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescData(acldvppStreamDesc *streamDesc, void *dataDev);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's data size.
+ *
+ * @param streamDesc [OUT]     dvpp stream description.
+ * @param size [IN]            data size.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescSize(acldvppStreamDesc *streamDesc, uint32_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's format.
+ *
+ * @param streamDesc [OUT]    dvpp stream description.
+ * @param format [IN]         stream format.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescFormat(acldvppStreamDesc *streamDesc, acldvppStreamFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's timestamp.
+ *
+ * @param streamDesc [OUT]  dvpp stream description.
+ * @param timestamp [IN]    current timestamp.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescTimestamp(acldvppStreamDesc *streamDesc, uint64_t timestamp);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's ret code.
+ *
+ * @param streamDesc [OUT]    dvpp stream description.
+ * @param retCode [IN]        result code.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescRetCode(acldvppStreamDesc *streamDesc, uint32_t retCode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's eos.
+ *
+ * @param streamDesc [OUT]    dvpp stream description.
+ * @param eos [IN]            end flag of sequence.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescEos(acldvppStreamDesc *streamDesc, uint8_t eos);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's data addr.
+ *
+ * @param streamDesc [IN]     dvpp stream description.
+ *
+ * @retval data addr.
+ * @retval deault nullptr.
+ */
+ACL_FUNC_VISIBILITY void *acldvppGetStreamDescData(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's data size.
+ *
+ * @param streamDesc [IN]    dvpp stream description.
+ *
+ * @retval data size.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetStreamDescSize(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's format.
+ *
+ * @param streamDesc [IN]    dvpp stream description.
+ *
+ * @retval stream format.
+ * @retval default ACL_DVPP_STREAM_H264.
+ */
+ACL_FUNC_VISIBILITY acldvppStreamFormat acldvppGetStreamDescFormat(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's timestamp.
+ *
+ * @param streamDesc [IN]    dvpp stream description.
+ *
+ * @retval current timestamp.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint64_t acldvppGetStreamDescTimestamp(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's retCode.
+ *
+ * @param streamDesc [IN]    dvpp stream description.
+ *
+ * @retval result code.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetStreamDescRetCode(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's eos.
+ *
+ * @param streamDesc [IN]    dvpp stream description.
+ *
+ * @retval end flag of sequence.
+ * @retval default 0(false).
+ */
+ACL_FUNC_VISIBILITY uint8_t acldvppGetStreamDescEos(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create vdec frame config.
+ *
+ * @retval null for failed.
+ * @retval other success.
+ */
+ACL_FUNC_VISIBILITY aclvdecFrameConfig *aclvdecCreateFrameConfig();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy vdec frame config.
+ *
+ * @par Function
+ * Can only destroy aclvdecFrameConfig type created through
+ *  aclvdecCreateFrameConfig interface
+ *
+ * @param vdecFrameConfig [IN]     vdec frame config.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCreateFrameConfig
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecFrameConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get image width and height of jpeg.
+ *
+ * @param data [IN]          image data in host memory
+ * @param size [IN]          the size of image data
+ * @param width [OUT]        the width of image from image header
+ * @param height [OUT]       the height of image from image header
+ * @param components [OUT]   the components of image from image header
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height,
+                                                     int32_t *components);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get image width and height of jpeg.
+ *
+ * @param data [IN]          image data in host memory
+ * @param size [IN]          the size of image data
+ * @param width [OUT]        the width of image from image header
+ * @param height [OUT]       the height of image from image header
+ * @param components [OUT]   the components of image from image header
+ * @param format [OUT]       the format of image from image header
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width,
+                                                       uint32_t *height, int32_t *components,
+                                                       acldvppJpegFormat *format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Predict encode size of jpeg image.
+ *
+ * @param inputDesc [IN]     dvpp image desc
+ * @param config [IN]        jpeg encode config
+ * @param size [OUT]         the size predicted of image
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc,
+                                                       const acldvppJpegeConfig *config, uint32_t *size);
+
+/**
+ * @ingroup AscendCL
+ * @brief Predict decode size of jpeg image.
+ *
+ * @param data [IN]                 origin image data in host memory
+ * @param dataSize [IN]             the size of origin image data
+ * @param outputPixelFormat [IN]    the pixel format jpeg decode
+ * @param decSize [OUT]             the size predicted for decode image
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize,
+                                                       acldvppPixelFormat outputPixelFormat, uint32_t *decSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get image width and height of png.
+ *
+ * @param data [IN]          image data in host memory
+ * @param size [IN]          the size of image data
+ * @param width [OUT]        the width of image from image header
+ * @param height [OUT]       the height of image from image header
+ * @param components [OUT]   the components of image from image header
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width,
+                                                    uint32_t *height, int32_t *components);
+
+/**
+ * @ingroup AscendCL
+ * @brief Predict decode size of png image.
+ *
+ * @param data [IN]                 origin image data in host memory
+ * @param dataSize [IN]             the size of origin image data
+ * @param outputPixelFormat [IN]    the pixel format jpeg decode
+ * @param decSize [OUT]             the size predicted for decode image
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize,
+                                                      acldvppPixelFormat outputPixelFormat, uint32_t *decSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp channel, the same channel can be reused
+ * and is no longer available after destruction.
+ *
+ * @param channelDesc [IN|OUT]    the channel destruction
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppCreateChannel(acldvppChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp channel.
+ *
+ * @par Restriction
+ * Can only destroy channel created through the acldvppCreateChannel interface
+ *
+ * @param channelDesc [IN]   the channel destruction
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc resize.
+ *
+ * @par Restriction
+ * Width alignment requirements:
+ * @li The minimum stride is 32 and the maximum is 4096 * 4
+ * (that is, an image in argb format with a width of 4096);
+ * @li For 8K scaling, widthStride is required to be aligned to 2;
+ * @li For non 8K scaling, the calculation formula for widthStride
+ * is different for different image formats:
+ *   @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16
+ *   @li yuv422packed: input image width * 2 and then align to 16
+ *   @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16
+ *   @li xrgb8888: input image width * 4, align to 16
+ *   @li HFBC:input image width
+ * Height alignment requirements:
+ * @li The height of the input image is aligned to 2.
+ * High stride minimum 6 and maximum 4096.
+ *
+ * @param channelDesc [IN]  the channel destruction
+ * @param inputDesc [IN]    resize input picture destruction
+ * @param outputDesc [IN|OUT]  resize output picture destruction
+ * @param resizeConfig [IN] resize config
+ * @param stream [IN]       resize task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc
+ * | acldvppCreateResizeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                   acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig,
+                                                   aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc crop.
+ *
+ * @par Function
+ * crop the input picture according to the specified area,
+ * and then store the picture in the output memory as the output picture
+ *
+ * @par Restriction
+ * Width alignment requirements:
+ * @li The minimum stride is 32 and the maximum is 4096 * 4
+ * (that is, an image in argb format with a width of 4096);
+ * @li For 8K scaling, widthStride is required to be aligned to 2;
+ * @li For non 8K scaling, the calculation formula for widthStride
+ * is different for different image formats:
+ *   @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16
+ *   @li yuv422packed: input image width * 2 and then align to 16
+ *   @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16
+ *   @li xrgb8888: input image width * 4, align to 16
+ *   @li HFBC:input image width
+ * Height alignment requirements:
+ * @li The height of the input image is aligned to 2.
+ * High stride minimum 6 and maximum 4096.
+ *
+ * @param channelDesc [IN]      the channel destruction
+ * @param inputDesc [IN]        crop input picture destruction
+ * @param outputDesc [IN|OUT]   crop output picture destruction
+ * @param cropArea [IN]         crop area config
+ * @param stream [IN]           crop task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                 acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
+                                                 aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc crop and resize config.
+ *
+ * @par Function
+ * crop the input picture with resize config according to the specified area,
+ * and then store the picture in the output memory as the output picture
+ *
+ * @par Restriction
+ * Width alignment requirements:
+ * @li The minimum stride is 32 and the maximum is 4096 * 4
+ * (that is, an image in argb format with a width of 4096);
+ * @li For 8K scaling, widthStride is required to be aligned to 2;
+ * @li For non 8K scaling, the calculation formula for widthStride
+ * is different for different image formats:
+ *   @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16
+ *   @li yuv422packed: input image width * 2 and then align to 16
+ *   @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16
+ *   @li xrgb8888: input image width * 4, align to 16
+ *   @li HFBC:input image width
+ * Height alignment requirements:
+ * @li The height of the input image is aligned to 2.
+ * High stride minimum 6 and maximum 4096.
+ *
+ * @param channelDesc [IN]     the channel destruction
+ * @param inputDesc [IN]       crop input picture destruction
+ * @param outputDesc [IN|OUT]  crop output picture destruction
+ * @param cropArea [IN]        crop area config
+ * @param resizeConfig [IN]    resize config
+ * @param stream [IN]          crop and resize config task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                       acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
+                                                       acldvppResizeConfig *resizeConfig, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc batch crop.
+ *
+ * @par Function
+ * crop the input batch picture according to the specified area
+ * as the output batch pictures
+ *
+ * @param channelDesc [IN]         the channel destruction
+ * @param srcBatchPicDescs [IN]    crop input batch picture destruction
+ * @param roiNums [IN]    roi config numbers
+ * @param size [IN]       roiNum size
+ * @param dstBatchPicDescs [IN|OUT]    crop output batch picture destruction
+ * @param cropAreas [IN]    crop area configs
+ * @param stream [IN]       crop batch task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc,
+                                                      acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
+                                                      uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
+                                                      acldvppRoiConfig *cropAreas[], aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc batch crop and resize config.
+ *
+ * @par Function
+ * crop the input batch picture with resize config according to the specified area
+ * as the output batch pictures
+ *
+ * @param channelDesc [IN]             the channel destruction
+ * @param srcBatchPicDescs [IN]        crop input batch picture destruction
+ * @param roiNums [IN]                 roi config numbers
+ * @param size [IN]                    roiNum size
+ * @param dstBatchPicDescs [IN|OUT]    crop output batch picture destruction
+ * @param cropAreas [IN]               crop area configs
+ * @param resizeConfig [IN]            resize config
+ * @param stream [IN]                  crop batch and resize config task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateDvppConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc *channelDesc,
+                                                            acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
+                                                            uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
+                                                            acldvppRoiConfig *cropAreas[],
+                                                            acldvppResizeConfig *resizeConfig, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc crop and paste.
+ *
+ * @par Function
+ * crop the input picture according to the specified area,
+ * and paste the picture to the specified position of the target picture
+ * as the output picture
+ *
+ * @param channelDesc [IN]   thechannel destruction
+ * @param inputDesc [IN]     crop and paste input picture destruction
+ * @param outputDesc [IN|OUT]   crop and paste output picture destruction
+ * @param cropArea [IN]      crop area config
+ * @param pasteArea [IN]     paste area config
+ * @param stream [IN]        crop and paste task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                         acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
+                                                         acldvppRoiConfig *pasteArea, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc crop, resize config and paste.
+ *
+ * @par Function
+ * crop the input picture with resize config according to the specified area,
+ * and paste the picture to the specified position of the target picture
+ * as the output picture
+ *
+ * @param channelDesc [IN]       thechannel destruction
+ * @param inputDesc [IN]         crop and paste input picture destruction
+ * @param outputDesc [IN|OUT]    crop and paste output picture destruction
+ * @param cropArea [IN]          crop area config
+ * @param pasteArea [IN]         paste area config
+ * @param resizeConfig [IN]      resize config
+ * @param stream [IN]            crop, paste and resize task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                            acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
+                                                            acldvppRoiConfig *pasteArea,
+                                                            acldvppResizeConfig *resizeConfig, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc batch crop and paste.
+ *
+ * @par Function
+ * crop the input batch picture according to the specified area,
+ * and paste the pictures to the specified position of the target pictures
+ * as the output batch pictures
+ *
+ * @param channelDesc [IN]       the channel destruction
+ * @param srcBatchPicDescs [IN]  crop input batch picture destruction
+ * @param roiNums [IN]     roi config numbers
+ * @param size [IN]        roiNum size
+ * @param dstBatchPicDescs [IN|OUT]    crop output batch picture destruction
+ * @param cropAreas [IN]   crop area configs
+ * @param pasteAreas [IN]  paste area configs
+ * @param stream [IN]      crop batch task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
+                                                              acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
+                                                              uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
+                                                              acldvppRoiConfig *cropAreas[],
+                                                              acldvppRoiConfig *pasteAreas[], aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc batch crop, resize config and paste.
+ *
+ * @par Function
+ * crop the input batch picture with resize config according to the specified area,
+ * and paste the pictures to the specified position of the target pictures
+ * as the output batch pictures
+ *
+ * @param channelDesc [IN]             the channel destruction
+ * @param srcBatchPicDescs [IN]        crop input batch picture destruction
+ * @param roiNums [IN]                 roi config numbers
+ * @param size [IN]                    roiNum size
+ * @param dstBatchPicDescs [IN|OUT]    crop output batch picture destruction
+ * @param cropAreas [IN]               crop area configs
+ * @param pasteAreas [IN]              paste area configs
+ * @param resizeConfig [IN]            resize config
+ * @param stream [IN]                  crop batch and resize config task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(
+    acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
+    acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[],
+    acldvppResizeConfig *resizeConfig, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc jpeg decode.
+ *
+ * @par Function
+ * For different source picture formats, after decoding,
+ * output pictures in the following format:
+ * @li jpeg(444) -> YUV444SP:V is front U is back,
+ * YUV420 SP V is front U is back, YUV420SP U is front V is back;
+ * @li jpeg(422) -> YUV422SP:V is in front U is behind,
+ * YUV420SP V is in front U is behind, YUV420SP U is in front V is behind;
+ * @li jpeg(420) -> YUV420SP:
+ * V is front U is back, YUV420SP U is front V is back;
+ * @li jpeg(400) -> YUV420SP:UV data is filled with 0 x 80.
+ *
+ * @param channelDesc [IN]  the channel destruction
+ * @param data [IN]         decode input picture destruction's data
+ * @param size [IN]         decode input picture destruction's size
+ * @param outputDesc [IN|OUT]  decode output picture destruction
+ * @param stream [IN]       decode task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
+                                                    acldvppPicDesc *outputDesc, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc jpeg encode.
+ *
+ * @param channelDesc [IN]  the channel destruction
+ * @param inputDesc [IN]    encode input picture destruction
+ * @param data [OUT]        encode output picture destruction's data
+ * @param size [IN|OUT]     encode output picture destruction's size
+ * @param config [IN]       jpeg encode config
+ * @param stream [IN]       encode task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreateJpegeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                    const void *data, uint32_t *size, acldvppJpegeConfig *config,
+                                                    aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc png decode.
+ *
+ * @param channelDesc [IN]    the channel destruction
+ * @param data [IN]           decode input picture destruction's data
+ * @param size [IN]           decode input picture destruction's size
+ * @param outputDesc [IN|OUT]    decode output picture destruction
+ * @param stream [IN]         decode task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
+                                                   acldvppPicDesc *outputDesc, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create vdec channel.
+ *
+ * @par Function
+ * Create a channel for video data processing,
+ * the same channel can be reused,
+ * and is no longer available after destruction
+ *
+ * @param channelDesc [IN|OUT]    the channel destruction
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCreateChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecCreateChannel(aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy vdec channel.
+ *
+ * @par Function
+ * Can only destroy channels created by the aclvdecCreateChannel interface
+ *
+ * @param channelDesc [IN]    the channel destruction
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCreateChannel
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vdec send frame.
+ *
+ * @par Function
+ * Pass the input memory to be decoded
+ * and the decoded output memory to the decoder for decoding
+ *
+ * @param channelDesc [IN] vdec channel destruction
+ * @param input [IN]       input stream destruction
+ * @param output [IN|OUT]  output picture destruction
+ * @param config [IN]      vdec frame config
+ * @param userData [IN]    user data for callback function
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
+                                              acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vdec send skipped frame.
+ *
+ * @par Function
+ * Pass video frame to decoder
+ *
+ * @param channelDesc [IN] vdec channel destruction
+ * @param input [IN]       input stream destruction
+ * @param config [IN]      vdec frame config
+ * @param userData [IN]    user data for callback function
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
+                                                     aclvdecFrameConfig *config, void *userData);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc convert color.
+ *
+ * @par Restriction
+ * @li outputDesc:Width height stride, No changes are allowed. Just configure 0
+ * @par Function
+ * Convert color gamut
+ *
+ * @param channelDesc [IN] the channel destruction
+ * @param inputDesc [IN]   convert color input picture destruction
+ * @param outputDesc [IN|OUT] convert color output picture destruction
+ * @param stream [IN]      convert color task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                         acldvppPicDesc *outputDesc, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc pyramid down.
+ *
+ * @par Restriction
+ * @li outputDesc:format only supported YUV400
+ * @par Function
+ * Image pyramid down
+ *
+ * @param channelDesc [IN] the channel destruction
+ * @param inputDesc [IN]   pyr down input picture destruction
+ * @param outputDesc [IN|OUT] pyr down output picture destruction
+ * @param reserve [IN]     reserved param , must be nullptr
+ * @param stream [IN]      pyr down task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                    acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp channel mode.
+ *
+ * @param channelDesc [OUT] the channel destruction
+ * @param mode [IN]         channel mode
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set resize config interpolation.
+ *
+ * @param resizeConfig [OUT] the resize config
+ * @param interpolation [IN] interpolation
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetResizeConfigInterpolation(acldvppResizeConfig *resizeConfig,
+                                                                 uint32_t interpolation);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get resize config interpolation.
+ *
+ * @param resizeConfig [IN] the resize config
+ *
+ * @retval Interpolation of resize config.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppResizeConfig *resizeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel out mode.
+ *
+ * @param channelDesc [OUT] the channel destruction
+ * @param outMode [IN] channel out mode
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel out mode.
+ *
+ * @param channelDesc [IN] the channel destruction
+ *
+ * @retval Out mode of channel destruction
+ * @retval default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutMode(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp batch picture description.
+ *
+ * @param batchSize [IN]    batch size
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppBatchPicDesc *acldvppCreateBatchPicDesc(uint32_t batchSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture description.
+ *
+ * @param batchPicDesc [IN] dvpp batch picture description.
+ * @param index [IN]        index of batch
+ *
+ * @retval null for failed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateBatchPicDesc
+ */
+ACL_FUNC_VISIBILITY acldvppPicDesc *acldvppGetPicDesc(acldvppBatchPicDesc *batchPicDesc, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp batch picture description.
+ *
+ * @par Function
+ * Can only destroy batch picture description information created
+ * through acldvppCreateBatchPicDesc interface.
+ *
+ * @param batchPicDesc [IN]     dvpp batch picture description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateBatchPicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyBatchPicDesc(acldvppBatchPicDesc *batchPicDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp lut map.
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppLutMap *acldvppCreateLutMap();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy lut map.
+ *
+ * @param lutMap [IN]    lut map
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyLutMap(acldvppLutMap *lutMap);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get lut map dims.
+ *
+ * @param lutMap [IN]    lut map
+ *
+ * @retval 0 for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get lut map data.
+ *
+ * @param lutMap [IN]   lut map
+ * @param dim [IN]      input dim of map
+ * @param data [OUT]    the dim of lut map's data
+ * @param len [OUT]     the dim of lut map's length
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data,
+                                                  uint32_t *len);
+/**
+ * @ingroup AscendCL
+ * @brief Vpc equalize hist.
+ *
+ * @param channelDesc [IN] channel desc
+ * @param inputDesc [IN]   input desc
+ * @param outputDesc [IN|OUT] output desc
+ * @param lutMap [IN]      lut map param
+ * @param stream [IN]      runtime stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc,
+                                                         const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
+                                                         const acldvppLutMap *lutMap, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp border config.
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig();
+
+/**
+ * @ingroup AscendCL
+ * @brief Set value of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param index [IN]         index of value array
+ * @param value [IN]         value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index,
+                                                         double value);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set border type of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param borderType [IN]    border type
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigBorderType(acldvppBorderConfig *borderConfig,
+                                                              acldvppBorderType borderType);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set top of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param top [IN]           top of border
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigTop(acldvppBorderConfig *borderConfig, uint32_t top);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set bottom of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param bottom [IN]        bottom of border
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigBottom(acldvppBorderConfig *borderConfig, uint32_t bottom);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set left of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param left [IN]          left of border
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigLeft(acldvppBorderConfig *borderConfig, uint32_t left);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set right of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param right [IN]         right of border
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigRight(acldvppBorderConfig *borderConfig, uint32_t right);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get value of border config.
+ *
+ * @param borderConfig [IN] border config
+ * @param index[IN] index of value array
+ *
+ * @retval invalid value is < 0, normal Value is >= 0
+ */
+ACL_FUNC_VISIBILITY double acldvppGetBorderConfigValue(const acldvppBorderConfig *borderConfig, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get border type of border config.
+ *
+ * @param borderConfig [IN] border config
+ * @retval border type of border config
+ */
+ACL_FUNC_VISIBILITY acldvppBorderType acldvppGetBorderConfigBorderType(const acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get right of border config.
+ *
+ * @param borderConfig [IN] border config
+ *
+ * @retval default 0, top value of border config
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigTop(const acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get Bottom of border config.
+ *
+ * @param borderConfig [IN] border config
+ *
+ * @retval default 0, top value of border config
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigBottom(const acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get left of border config.
+ *
+ * @param borderConfig [IN] border config
+ *
+ * @retval default 0, top value of border config
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigLeft(const acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get right of border config.
+ *
+ * @param borderConfig [IN] border config
+ *
+ * @retval default 0, right value of border config
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigRight(const acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy border config.
+ *
+ * @param borderConfig [IN] border config
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Vpc make border.
+ *
+ * @param channelDesc [IN]  channel desc
+ * @param inputDesc [IN]    input desc
+ * @param outputDesc [IN|OUT]  output desc
+ * @param borderConfig [IN] border config param
+ * @param stream [IN]       runtime stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc,
+                                                       const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
+                                                       const acldvppBorderConfig *borderConfig, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Dvpp vpc calc hist.
+ *
+ * @param channelDesc [IN] the channel destruction
+ * @param srcPicDesc [IN]  pyr down input picture destruction
+ * @param hist [IN|OUT]    pyr down output picture destruction
+ * @param reserve [IN]     reserved param, must be nullptr
+ * @param stream [IN]      task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc,
+                                                     acldvppHist *hist, void *reserve, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create vpc hist description.
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy vpc hist description.
+ *
+ * @par Function
+ * Can only destroy hist description information created
+ * through acldvppCreateHist interface.
+ *
+ * @param hist [IN] vpc hist description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateHist
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyHist(acldvppHist *hist);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dims of vpc hist description.
+ *
+ * @param hist [IN] vpc hist description.
+ *
+ * @retval dims of vpc hist description.
+ *
+ * @see acldvppCreateHist | acldvppVpcCalcHistAsync
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetHistDims(acldvppHist *hist);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get data from vpc hist description by dim.
+ *
+ * @param hist [IN]  vpc hist description.
+ * @param dim [IN]   which dim to get data.
+ * @param data [OUT] address of output hist data.
+ * @param len [OUT]  len of output hist data.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateHist | acldvppVpcCalcHistAsync
+ */
+ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim, uint32_t **data, uint16_t *len);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp calc hist process return code.
+ *
+ * @param hist [IN] vpc hist description.
+ *
+ * @retval Dvpp calc hist process return code.
+ *
+ * @see acldvppCreateHist | acldvppVpcCalcHistAsync
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vpc hist description to 0.
+ *
+ * @par Function
+ * Can only clear hist description information created
+ * through acldvppCreateHist interface.
+ *
+ * @param hist [IN] vpc hist description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateHist
+ */
+ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc batch crop, resize config and make border.
+ *
+ * @par Function
+ * crop the input batch picture with resize config and border configs according to the specified area
+ * as the output batch pictures
+ *
+ * @param channelDesc [IN]              the channel destruction
+ * @param srcBatchPicDescs [IN]         crop input batch picture destruction
+ * @param roiNums [IN]                  roi config numbers
+ * @param size [IN]                     roiNum size
+ * @param dstBatchPicDescs [IN|OUT]     crop output batch picture destruction
+ * @param cropAreas [IN]                crop area configs
+ * @param borderCfgs [IN]               border configs
+ * @param resizeConfig [IN]             resize config
+ * @param stream [IN]                   crop batch, resize config and make border task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(
+    acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
+    acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[],
+    acldvppResizeConfig *resizeConfig, aclrtStream stream);
+/**
+ * @ingroup AscendCL
+ * @brief set param for dvpp channel desc
+ *
+ * @par Function
+ * set attribution in dvpp channelDesc for specified type
+ *
+ * @param channelDesc [OUT]             the channel destruction
+ * @param paramType [IN]                specified param type
+ * @param length [IN]                   mem length of param
+ * @param param [IN]                    pointer to param
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppGetChannelDescParam | acldvppCreateChannelDesc | acldvppDestroyChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescParam(acldvppChannelDesc *channelDesc,
+                                                        acldvppChannelDescParamType paramType, size_t length,
+                                                        const void *param);
+
+/**
+ * @ingroup AscendCL
+ * @brief get param of dvpp channel desc
+ *
+ * @par Function
+ * get attribution value in dvpp channelDesc for specified type
+ *
+ * @param channelDesc [IN]              the channel destruction
+ * @param paramType [IN]                specified param type
+ * @param length [IN]                   mem length allocated for output param
+ * @param paramRetSize [OUT]            mem length of output param
+ * @param param [OUT]                   pointer to output param
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppSetChannelDescParam | acldvppCreateChannelDesc | acldvppDestroyChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppGetChannelDescParam(const acldvppChannelDesc *channelDesc,
+                                                        acldvppChannelDescParamType paramType, size_t length,
+                                                        size_t *paramRetSize, void *param);
+/**
+ * @ingroup AscendCL
+ * @brief set param for vdec channel desc
+ *
+ * @par Function
+ * set attribution in channelDesc for specified type
+ *
+ * @param channelDesc [OUT]             the vdec channel destruction
+ * @param paramType [IN]                specified param type
+ * @param length [IN]                   mem length of param
+ * @param param [IN]                    pointer to param
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecGetChannelDescParam | aclvdecCreateChannelDesc | aclvdecDestroyChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescParam(aclvdecChannelDesc *channelDesc,
+                                                        aclvdecChannelDescParamType paramType, size_t length,
+                                                        const void *param);
+
+/**
+ * @ingroup AscendCL
+ * @brief get param of vdec channel desc
+ *
+ * @par Function
+ * get attribution value in channelDesc for specified type
+ *
+ * @param channelDesc [IN]              the vdec channel destruction
+ * @param paramType [IN]                specified param type
+ * @param length [IN]                   mem length allocated for output param
+ * @param paramRetSize [OUT]            mem length of output param
+ * @param param [OUT]                   pointer to output param
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecSetChannelDescParam | aclvdecCreateChannelDesc | aclvdecDestroyChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecGetChannelDescParam(const aclvdecChannelDesc *channelDesc,
+                                                        aclvdecChannelDescParamType paramType, size_t length,
+                                                        size_t *paramRetSize, void *param);
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
diff --git a/inc/external/acl/ops/acl_fv.h b/inc/external/acl/ops/acl_fv.h
new file mode 100644
index 00000000..87480461
--- /dev/null
+++ b/inc/external/acl/ops/acl_fv.h
@@ -0,0 +1,348 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
+#define INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
+
+#include "acl/acl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct aclfvInitPara aclfvInitPara;
+typedef struct aclfvFeatureInfo aclfvFeatureInfo;
+typedef struct aclfvRepoRange aclfvRepoRange;
+typedef struct aclfvQueryTable aclfvQueryTable;
+typedef struct aclfvSearchInput aclfvSearchInput;
+typedef struct aclfvSearchResult aclfvSearchResult;
+
+// search operation type
+enum aclfvSearchType {
+  SEARCH_1_N,  // 1:N operation type
+  SEARCH_N_M   // N:M operation type
+};
+
+/**
+ * @ingroup AscendCL
+ * @brief Create fv init param.
+ *
+ * @param fsNum [IN]  The feature num
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY aclfvInitPara *aclfvCreateInitPara(uint64_t fsNum);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy fv init param.
+ *
+ * @par Function
+ * Can only destroy fv init param information created
+ * through aclfvCreateInitPara interface.
+ *
+ * @param initPara [IN]   fv init param.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateInitPara
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroyInitPara(aclfvInitPara *initPara);
+
+/**
+ * @ingroup AscendCL
+ * @brief set value for maxTopNumFor1N which in fv init param.
+ *
+ * @param initPara [IN|OUT]     fv init param.
+ * @param maxTopNumFor1N [IN]   maxTopNumFor1N value for init param.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclfvSet1NTopNum(aclfvInitPara *initPara, uint32_t maxTopNumFor1N);
+
+/**
+ * @ingroup AscendCL
+ * @brief set value for maxTopNumForNM which in fv init param.
+ *
+ * @param initPara [IN|OUT]        fv init param.
+ * @param maxTopNumForNM [IN]   maxTopNumForNM value for init param.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t maxTopNumForNM);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create fv feature info.
+ *
+ * @param id0 [IN]     The first level library id0
+ * @param id1 [IN]     Secondary library id1
+ * @param offset [IN]  The offset of the first feature in the library
+ * @param featureLen [IN]       Single feature length
+ * @param featureCount [IN]     Single feature count
+ * @param featureData [IN]      Feature value list
+ * @param featureDataLen [IN]   Feature value list length
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset,
+                                                             uint32_t featureLen, uint32_t featureCount,
+                                                             uint8_t *featureData, uint32_t featureDataLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy fv feature info.
+ *
+ * @par Function
+ * Can only destroy fv feature info information created
+ * through aclfvCreateFeatureInfo interface.
+ *
+ * @param featureInfo [IN]     fv feature info.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateFeatureInfo
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroyFeatureInfo(aclfvFeatureInfo *featureInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create fv repo range.
+ *
+ * @param id0Min [IN]  id0 start value
+ * @param id0Min [IN]  id0 max
+ * @param id1Min [IN]  id0 start value
+ * @param id1Max [IN]  id1 max
+ *
+ * @retval null for failed. OtherValues success
+ */
+ACL_FUNC_VISIBILITY aclfvRepoRange *aclfvCreateRepoRange(uint32_t id0Min, uint32_t id0Max, uint32_t id1Min,
+                                                         uint32_t id1Max);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy fv repo range.
+ *
+ * @par Function
+ * Can only destroy fv repo range information created
+ * through aclfvCreateRepoRange interface.
+ *
+ * @param repoRange [IN]     fv repo range.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateRepoRange
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroyRepoRange(aclfvRepoRange *repoRange);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create query table.
+ *
+ * @param queryCnt [IN]   Number of tables, the maximum number is 6
+ * @param tableLen [IN]   Single table length, table length is 32KB
+ * @param tableData [IN]  Feature value list
+ * @param tableDataLen [IN]   The length of memory requested by the featureData pointer
+ *
+ * @retval null for failed. OtherValues success
+ */
+ACL_FUNC_VISIBILITY aclfvQueryTable *aclfvCreateQueryTable(uint32_t queryCnt, uint32_t tableLen, uint8_t *tableData,
+                                                           uint32_t tableDataLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy query table.
+ *
+ * @par Function
+ * Can only destroy query table information created
+ * through aclfvCreateQueryTable interface.
+ *
+ * @param queryTable [IN]     query table.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateQueryTable
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroyQueryTable(aclfvQueryTable *queryTable);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create search input.
+ *
+ * @param queryTable [IN]  query table
+ * @param repoRange [IN]   query repo range
+ * @param topk [IN]  query topk
+ *
+ * @retval null for failed. OtherValues success
+ */
+ACL_FUNC_VISIBILITY aclfvSearchInput *aclfvCreateSearchInput(aclfvQueryTable *queryTable, aclfvRepoRange *repoRange,
+                                                             uint32_t topk);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy search input.
+ *
+ * @par Function
+ * Can only destroy search input information created
+ * through aclfvCreateSearchInput interface.
+ *
+ * @param searchInput [IN]     search input.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateSearchInput
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInput);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create search result.
+ *
+ * @param queryCnt [IN]   Retrieve the number of features
+ * @param resultNum [IN]  The number of search results for each feature, the number is queryCnt
+ * @param resultNumDataLen [IN]  resultNum memory length
+ * @param id0 [IN]  Level 1 library id0
+ * @param id1 [IN]  Secondary library id1
+ * @param resultOffset [IN]   The offset of the bottom library corresponding
+ * to each feature retrieval result, total length topK * queryCnt
+ * @param resultDistance [IN]  Distance, total length topK * queryCnt
+ * @param dataLen [IN]  The memory size requested by
+ * id0\id1\reslutOffset\resultDistance
+ *
+ * @retval null for failed. OtherValues success
+ */
+ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum,
+                                                               uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1,
+                                                               uint32_t *resultOffset, float *resultDistance,
+                                                               uint32_t dataLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy search result.
+ *
+ * @par Function
+ * Can only destroy search result information created
+ * through aclfvCreateSearchResult interface.
+ *
+ * @param searchResult [IN]     search result.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateSearchResult
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroySearchResult(aclfvSearchResult *searchResult);
+
+/**
+ * @ingroup AscendCL
+ * @brief fv IP initialize.
+ *
+ * @param initPara [IN]     fv init param.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvInit(aclfvInitPara *initPara);
+
+/**
+ * @ingroup AscendCL
+ * @brief release fv resources.
+ *
+ * @par Function
+ * Can only release fv resources created
+ * through aclfvInit interface.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ *
+ * @see aclfvInit
+ */
+ACL_FUNC_VISIBILITY aclError aclfvRelease();
+
+/**
+ * @ingroup AscendCL
+ * @brief fv repo add.
+ *
+ * @param type [IN]          repo add type
+ * @param featureInfo [IN]   add feature information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvRepoAdd(aclfvSearchType type, aclfvFeatureInfo *featureInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief fv repo del.
+ *
+ * @param type [IN]       repo delete type
+ * @param repoRange [IN]  repo range information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvRepoDel(aclfvSearchType type, aclfvRepoRange *repoRange);
+
+/**
+ * @ingroup AscendCL
+ * @brief fv accurate del.
+ *
+ * @param featureInfo [IN]   accurate delete feature information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDel(aclfvFeatureInfo *featureInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief fv accurate modify.
+ *
+ * @param featureInfo [IN]  accurate modify feature information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvModify(aclfvFeatureInfo *featureInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief fv search.
+ *
+ * @param type [IN]  search type
+ * @param searchInput [IN]    search input
+ * @param searchRst [OUT]     search result
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput *searchInput,
+                                         aclfvSearchResult *searchRst);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
diff --git a/inc/external/ge/ge_api.h b/inc/external/ge/ge_api.h
index c2cbe794..73753b72 100644
--- a/inc/external/ge/ge_api.h
+++ b/inc/external/ge/ge_api.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -57,22 +57,22 @@ class GE_FUNC_VISIBILITY Session {
 
   ///
   /// @ingroup client
-  /// @brief add a graph with a specific graphId
-  /// @param [in] graphId graph id
+  /// @brief add a graph with a specific graph id
+  /// @param [in] graph_id graph id
   /// @return Status result of function
   ///
-  Status AddGraph(uint32_t graphId, const Graph &graph);
+  Status AddGraph(uint32_t graph_id, const Graph &graph);
 
   ///
   /// @ingroup client
-  /// @brief add a graph with a specific graphId and graphOptions
+  /// @brief add a graph with a specific graph id and graphOptions
   /// @param [in] graphId graph id
   /// @param [in] graph the graph
   /// @param [in] options graph options
   /// @return Status result of function
   ///
   ATTRIBUTED_DEPRECATED(Status AddGraph(uint32_t, const Graph &, const std::map<AscendString, AscendString> &))
-  Status AddGraph(uint32_t graphId, const Graph &graph, const std::map<std::string, std::string> &options);
+  Status AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options);
 
   ///
   /// @ingroup client
@@ -82,7 +82,7 @@ class GE_FUNC_VISIBILITY Session {
   /// @param [in] options graph options
   /// @return Status result of function
   ///
-  Status AddGraph(uint32_t graphId, const Graph &graph, const std::map<AscendString, AscendString> &options);
+  Status AddGraph(uint32_t graph_id, const Graph &graph, const std::map<AscendString, AscendString> &options);
 
   ///
   /// @ingroup client
@@ -106,10 +106,10 @@ class GE_FUNC_VISIBILITY Session {
   ///
   /// @ingroup ge_graph
   /// @brief remove a graph of the session with specific session id
-  /// @param [in] graphId graph id
+  /// @param [in] graph_d graph id
   /// @return Status result of function
   ///
-  Status RemoveGraph(uint32_t graphId);
+  Status RemoveGraph(uint32_t graph_id);
 
   ///
   /// @ingroup ge_graph
@@ -119,7 +119,7 @@ class GE_FUNC_VISIBILITY Session {
   /// @param [out] outputs output data
   /// @return Status result of function
   ///
-  Status RunGraph(uint32_t graphId, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs);
+  Status RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs);
 
   ///
   /// @ingroup ge_graph
@@ -140,9 +140,9 @@ class GE_FUNC_VISIBILITY Session {
   /// @param [in] inputs: input data
   /// @return Status result of function
   ///
-  Status BuildGraph(uint32_t graphId, const std::vector<InputTensorInfo> &inputs);
+  Status BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs);
 
-  Status BuildGraph(uint32_t graphId, const std::vector<ge::Tensor> &inputs);  /*lint !e148*/
+  Status BuildGraph(uint32_t graph_id, const std::vector<ge::Tensor> &inputs); /*lint !e148*/
 
   ///
   /// @ingroup ge_graph
@@ -154,7 +154,7 @@ class GE_FUNC_VISIBILITY Session {
   ///                        Please ensure that the implementation of the function is trusted.
   /// @return Status result of function
   ///
-  Status RunGraphAsync(uint32_t graphId, const std::vector<ge::Tensor> &inputs, RunAsyncCallback callback);
+  Status RunGraphAsync(uint32_t graph_id, const std::vector<ge::Tensor> &inputs, RunAsyncCallback callback);
 
   ///
   /// @ingroup ge_graph
@@ -189,7 +189,7 @@ class GE_FUNC_VISIBILITY Session {
 
   Status RegisterCallBackFunc(const char *key, const session::pCallBackFunc &callback);
 
-  bool IsGraphNeedRebuild(uint32_t graphId);
+  bool IsGraphNeedRebuild(uint32_t graph_id);
 
  private:
   uint64_t sessionId_;
diff --git a/inc/external/ge/ge_api_error_codes.h b/inc/external/ge/ge_api_error_codes.h
index d0d7981e..7d4cab13 100644
--- a/inc/external/ge/ge_api_error_codes.h
+++ b/inc/external/ge/ge_api_error_codes.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,14 +20,27 @@
 #include <map>
 #include <string>
 #include "ge_error_codes.h"
+#include "ge_api_types.h"
 
-namespace ge {
 #ifdef __GNUC__
 #define ATTRIBUTED_DEPRECATED(replacement) __attribute__((deprecated("Please use " #replacement " instead.")))
 #else
 #define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead."))
 #endif
 
+// Code compose(4 byte), runtime: 2 bit,  type: 2 bit,   level: 3 bit,  sysid: 8 bit, modid: 5 bit, value: 12 bit
+#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc)                                \
+  constexpr ge::Status name = (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(runtime))) << 30U) | \
+                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(type))) << 28U) |    \
+                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(level))) << 25U) |   \
+                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(sysid))) << 17U) |   \
+                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(modid))) << 12U) |   \
+                              (static_cast<uint32_t>(0x0FFFU) & (static_cast<uint32_t>(value)));         \
+  const ErrorNoRegisterar g_errorno_##name((name), (desc));
+
+#define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_errorno_##name((name), (desc));
+
+namespace ge {
 class GE_FUNC_VISIBILITY StatusFactory {
  public:
   static StatusFactory *Instance() {
@@ -35,7 +48,7 @@ class GE_FUNC_VISIBILITY StatusFactory {
     return &instance;
   }
 
-  void RegisterErrorNo(uint32_t err, const std::string &desc) {
+  void RegisterErrorNo(const uint32_t err, const std::string &desc) {
     // Avoid repeated addition
     if (err_desc_.find(err) != err_desc_.end()) {
       return;
@@ -43,19 +56,19 @@ class GE_FUNC_VISIBILITY StatusFactory {
     err_desc_[err] = desc;
   }
 
-  void RegisterErrorNo(uint32_t err, const char *desc) {
+  void RegisterErrorNo(const uint32_t err, const char *const desc) {
     if (desc == nullptr) {
       return;
     }
-    std::string error_desc = desc;
+    const std::string error_desc = desc;
     if (err_desc_.find(err) != err_desc_.end()) {
       return;
     }
     err_desc_[err] = error_desc;
   }
 
-  std::string GetErrDesc(uint32_t err) {
-    auto iter_find = err_desc_.find(err);
+  std::string GetErrDesc(const uint32_t err) {
+    const std::map<uint32_t, std::string>::const_iterator iter_find = err_desc_.find(err);
     if (iter_find == err_desc_.end()) {
       return "";
     }
@@ -72,61 +85,18 @@ class GE_FUNC_VISIBILITY StatusFactory {
 
 class GE_FUNC_VISIBILITY ErrorNoRegisterar {
  public:
-  ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }
-  ErrorNoRegisterar(uint32_t err, const char *desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }
+  ErrorNoRegisterar(const uint32_t err, const std::string &desc) noexcept {
+    StatusFactory::Instance()->RegisterErrorNo(err, desc);
+  }
+  ErrorNoRegisterar(const uint32_t err, const char *const desc) noexcept {
+    StatusFactory::Instance()->RegisterErrorNo(err, desc);
+  }
   ~ErrorNoRegisterar() {}
 };
 
-// Code compose(4 byte), runtime: 2 bit,  type: 2 bit,   level: 3 bit,  sysid: 8 bit, modid: 5 bit, value: 12 bit
-#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc)                              \
-  constexpr ge::Status name =                                                                          \
-    ((0xFF & (static_cast<uint8_t>(runtime))) << 30) | ((0xFF & (static_cast<uint8_t>(type))) << 28) | \
-    ((0xFF & (static_cast<uint8_t>(level))) << 25) | ((0xFF & (static_cast<uint8_t>(sysid))) << 17) |  \
-    ((0xFF & (static_cast<uint8_t>(modid))) << 12) | (0x0FFF & (static_cast<uint16_t>(value)));        \
-  const ErrorNoRegisterar g_##name##_errorno(name, desc);
-
-#define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_##name##_errorno(name, desc);
-
-using Status = uint32_t;
-
 // General error code
 GE_ERRORNO(0, 0, 0, 0, 0, SUCCESS, 0, "success");
-GE_ERRORNO(0b11, 0b11, 0b111, 0xFF, 0b11111, FAILED, 0xFFF, "failed"); /*lint !e401*/
-
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PARAM_INVALID, "Parameter invalid.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_NOT_INIT, "GE executor not initialized yet.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "Model file path invalid.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "Model id invalid.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "Data size of model invalid.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID, "Model addr invalid.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Queue id of model invalid.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED, "The model loaded repeatedly.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, "Dynamic input addr invalid.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Dynamic input size invalid.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, "Dynamic batch size invalid.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_BATCH_EMPTY, "AIPP batch parameter empty.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_NOT_EXIST, "AIPP parameter not exist.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_MODE_INVALID, "AIPP mode invalid.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Task type invalid.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Kernel type invalid.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "Plugin path is invalid.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_FORMAT_INVALID, "Format is invalid.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_SHAPE_INVALID, "Shape is invalid.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DATATYPE_INVALID, "Datatype is invalid.");
-
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_ALLOCATION, "Memory allocation error.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate memory.");
-
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_INTERNAL_ERROR, "Internal error.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_LOAD_MODEL, "Load model error.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED, "Failed to load model partition.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, "Failed to load weight partition.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED, "Failed to load task partition.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED, "Failed to load op kernel partition.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA, "Failed to release the model data.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_COMMAND_HANDLE, "Command handle error.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_GET_TENSOR_INFO, "Get tensor info error.");
-GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_UNLOAD_MODEL, "Load model error.");
+GE_ERRORNO(0b11, 0b11, 0b111, 0xFFU, 0b11111, FAILED, 0xFFFU, "failed"); /*lint !e401*/
 
 }  // namespace ge
 
diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h
index 6f5bbfbf..208b7eab 100644
--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,96 +24,111 @@
 #include <functional>
 #include <memory>
 #include "graph/tensor.h"
+#include "graph/types.h"
 
 namespace ge {
 // Option key: graph run mode
-const char *const OPTION_GRAPH_RUN_MODE = "ge.graphRunMode";
+const char_t *const OPTION_GRAPH_RUN_MODE = "ge.graphRunMode";
+const char_t *const OPTION_DEVICE_TYPE = "ge.deviceType";
 
 // Option key: ome init
-const char *const OPTION_EXEC_SESSION_ID = "ge.exec.sessionId";
-const char *const OPTION_EXEC_DEVICE_ID = "ge.exec.deviceId";
-const char *const OPTION_EXEC_JOB_ID = "ge.exec.jobId";
-const char *const OPTION_EXEC_IS_USEHCOM = "ge.exec.isUseHcom";
-const char *const OPTION_EXEC_IS_USEHVD = "ge.exec.isUseHvd";
-const char *const OPTION_EXEC_RANK_ID = "ge.exec.rankId";
-const char *const OPTION_EXEC_POD_NAME = "ge.exec.podName";
-const char *const OPTION_EXEC_DEPLOY_MODE = "ge.exec.deployMode";
-const char *const OPTION_EXEC_RANK_TABLE_FILE = "ge.exec.rankTableFile";
-const char *const GE_AICPU_FLAG = "ge.aicpuFlag";
-const char *const OPTION_EXEC_EXTERN_PLUGIN_PATH = "ge.soLoadPath";
+const char_t *const OPTION_EXEC_SESSION_ID = "ge.exec.sessionId";
+const char_t *const OPTION_EXEC_DEVICE_ID = "ge.exec.deviceId";
+const char_t *const OPTION_EXEC_JOB_ID = "ge.exec.jobId";
+const char_t *const OPTION_EXEC_IS_USEHCOM = "ge.exec.isUseHcom";
+const char_t *const OPTION_EXEC_IS_USEHVD = "ge.exec.isUseHvd";
+const char_t *const OPTION_EXEC_RANK_ID = "ge.exec.rankId";
+const char_t *const OPTION_EXEC_POD_NAME = "ge.exec.podName";
+const char_t *const OPTION_EXEC_DEPLOY_MODE = "ge.exec.deployMode";
+const char_t *const OPTION_EXEC_RANK_TABLE_FILE = "ge.exec.rankTableFile";
+const char_t *const GE_AICPU_FLAG = "ge.aicpuFlag";
+const char_t *const OPTION_EXEC_EXTERN_PLUGIN_PATH = "ge.soLoadPath";
 // Dump flag and para
-const char *const OPTION_EXEC_ENABLE_DUMP = "ge.exec.enableDump";
-const char *const OPTION_EXEC_DUMP_PATH = "ge.exec.dumpPath";
-const char *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep";
-const char *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode";
-const char *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug";
-const char *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode";
-const char *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild";
-const char *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath";
-const char *const OPTION_EXEC_ENABLE_EXCEPTION_DUMP = "ge.exec.enable_exception_dump";
-const char *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses";
-const char *const OPTION_EXEC_PROFILING_FPPONIT_OPTIONS = "ge.exec.profilingFpPointOptions";
-const char *const OPTION_EXEC_PROFILING_BPPONIT_OPTIONS = "ge.exec.profilingBpPointOptions";
+const char_t *const OPTION_EXEC_ENABLE_DUMP = "ge.exec.enableDump";
+const char_t *const OPTION_EXEC_DUMP_PATH = "ge.exec.dumpPath";
+const char_t *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep";
+const char_t *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode";
+const char_t *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug";
+const char_t *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode";
+const char_t *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild";
+const char_t *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath";
+const char_t *const OPTION_EXEC_ENABLE_EXCEPTION_DUMP = "ge.exec.enable_exception_dump";
+const char_t *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses";
+const char_t *const OPTION_EXEC_PROFILING_FPPONIT_OPTIONS = "ge.exec.profilingFpPointOptions";
+const char_t *const OPTION_EXEC_PROFILING_BPPONIT_OPTIONS = "ge.exec.profilingBpPointOptions";
 // profiling flag
-const char *const OPTION_EXEC_PROFILING_MODE = "ge.exec.profilingMode";
-const char *const OPTION_EXEC_PROFILING_OPTIONS = "ge.exec.profilingOptions";
+const char_t *const OPTION_EXEC_PROFILING_MODE = "ge.exec.profilingMode";
+const char_t *const OPTION_EXEC_PROFILING_OPTIONS = "ge.exec.profilingOptions";
 // Hccl flag, if ge.exec.hcclFlag =1, it means load plugin for opskernel, else:ge.exec.hcclFlag =0
-const char *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag";
-const char *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic";
-const char *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory";
-const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization";
+const char_t *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag";
+const char_t *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic";
+const char_t *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory";
+const char_t *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization";
 // Dynamic input flag. ge.exec.dynamicInput=1, means enable dynaimc input,
 // ge.exec.dynamicGraphExecuteMode, dynamic_execute[default]
-const char *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput";
-const char *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode";
-const char *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange";
+const char_t *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput";
+const char_t *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode";
+const char_t *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange";
+const char_t *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr";
+const char_t *const OPTION_EXEC_GRAPH_EXEC_TIMEOUT = "ge.exec.graphExecTimeout";
 
 // Option key: memory init
-const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize";
-const char *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize";
+const char_t *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize";
+const char_t *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize";
+const char_t *const OPTION_EXEC_REUSE_ZERO_COPY_MEMORY = "ge.exec.reuseZeroCopyMemory";
+
+const std::string ATOMIC_CLEAN_POLICY = "ge.exec.atomicCleanPolicy";
+
+const char_t *const OPTION_EXEC_LOGICAL_DEVICE_CLUSTER_DEPLOY_MODE = "ge.exec.logicalDeviceClusterDeployMode";
+const char_t *const OPTION_EXEC_LOGICAL_DEVICE_ID = "ge.exec.logicalDeviceId";
+
 namespace configure_option {
-const char *const STREAM_NUM = "ge.streamNum";
-const char *const HEAD_STREAM = "ge.headStream";
-const char *const PERF_LEVEL = "ge.perfLevel";
-const char *const ENCRYPT_MODE = "ge.encryptMode";
-const char *const EK_FILE = "ge.ekFile";
-const char *const CERT_FILE = "ge.certFile";
-const char *const HW_KEY_FILE = "ge.hwKeyFile";
-const char *const PRIVATE_KEY_FILE = "ge.privateKeyFile";
-const char *const FRAMEWORK_TYPE = "ge.frameworkType";
-const char *const CALIBRATION_CONF_FILE = "ge.calibrationConfFile";
-const char *const INSERT_OP_FILE = "ge.insertOpFile";
-const char *const OUTPUT_NODE_NAME = "ge.outputNodeName";
-const char *const COMPRESS_FLAG = "ge.compressFlag";
-const char *const PRECISION_MODE = "ge.exec.precision_mode";
-const char *const SINGLE_OP_FLAG = "ge.exec.single_op";
-const char *const TRAIN_FLAG = "ge.trainFlag";
-const char *const RUN_FLAG = "ge.runFlag";
-const char *const LOCAL_FMKOP_FLAG = "ge.enabledLocalFmkop";
-const char *const TBE_PLUGIN_PATH_FLAG = "ge.TBE_plugin_path";
-const char *const DDK_VERSION_FLAG = "ge.DDK_version";
-const char *const GE_FE_FLAG = "ge.feFlag";
-const char *const STREAM_MAX_PARALLEL_NUM = "ge.streamMaxParallelNum";
-const char *const OUTPUT_DATATYPE = "ge.outputDatatype";
-const char *const OP_SELECT_IMPL_MODE = "ge.opSelectImplmode";
-const char *const OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode";
-const char *const HCOM_PARALLEL = "ge.hcomParallel";
-const char *const AUTO_TUNE_MODE = "ge.autoTuneMode";
-const char *const SOC_VERSION = "ge.socVersion";
-const char *const CORE_TYPE = "ge.engineType";
-const char *const AICORE_NUM = "ge.aicoreNum";
-const char *const L1_FUSION = "ge.l1Fusion";
-const char *const BUFFER_OPTIMIZE = "ge.bufferOptimize";
-const char *const ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel";
-const char *const ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight";
-const char *const FUSION_SWITCH_FILE = "ge.fusionSwitchFile";
-const char *const SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel";
-const char *const ORIGINAL_MODEL_FILE = "ge.originalModelFile";
-const char *const INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16";
-const char *const OP_DEBUG_LEVEL = "ge.opDebugLevel";
-const char *const PERFORMANCE_MODE = "ge.performance_mode";
-const char *const MODIFY_MIXLIST = "ge.exec.modify_mixlist";
-const char *const OP_PRECISION_MODE = "ge.exec.op_precision_mode";
+const char_t *const STREAM_NUM = "ge.streamNum";
+const char_t *const HEAD_STREAM = "ge.headStream";
+const char_t *const PERF_LEVEL = "ge.perfLevel";
+const char_t *const ENCRYPT_MODE = "ge.encryptMode";
+const char_t *const EK_FILE = "ge.ekFile";
+const char_t *const CERT_FILE = "ge.certFile";
+const char_t *const HW_KEY_FILE = "ge.hwKeyFile";
+const char_t *const PRIVATE_KEY_FILE = "ge.privateKeyFile";
+const char_t *const FRAMEWORK_TYPE = "ge.frameworkType";
+const char_t *const CALIBRATION_CONF_FILE = "ge.calibrationConfFile";
+const char_t *const INSERT_OP_FILE = "ge.insertOpFile";
+const char_t *const OUTPUT_NODE_NAME = "ge.outputNodeName";
+const char_t *const COMPRESS_FLAG = "ge.compressFlag";
+const char_t *const PRECISION_MODE = "ge.exec.precision_mode";
+const char_t *const SINGLE_OP_FLAG = "ge.exec.single_op";
+const char_t *const TRAIN_FLAG = "ge.trainFlag";
+const char_t *const RUN_FLAG = "ge.runFlag";
+const char_t *const LOCAL_FMKOP_FLAG = "ge.enabledLocalFmkop";
+const char_t *const TBE_PLUGIN_PATH_FLAG = "ge.TBE_plugin_path";
+const char_t *const DDK_VERSION_FLAG = "ge.DDK_version";
+const char_t *const GE_FE_FLAG = "ge.feFlag";
+const char_t *const STREAM_MAX_PARALLEL_NUM = "ge.streamMaxParallelNum";
+const char_t *const OUTPUT_DATATYPE = "ge.outputDatatype";
+const char_t *const OP_SELECT_IMPL_MODE = "ge.opSelectImplmode";
+const char_t *const OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode";
+const char_t *const HCOM_PARALLEL = "ge.hcomParallel";
+const char_t *const AUTO_TUNE_MODE = "ge.autoTuneMode";
+const char_t *const SOC_VERSION = "ge.socVersion";
+const char_t *const VIRTUAL_TYPE = "ge.virtual_type";
+const char_t *const CORE_TYPE = "ge.engineType";
+const char_t *const AICORE_NUM = "ge.aicoreNum";
+const char_t *const L1_FUSION = "ge.l1Fusion";
+const char_t *const BUFFER_OPTIMIZE = "ge.bufferOptimize";
+const char_t *const ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel";
+const char_t *const ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight";
+const char_t *const FUSION_SWITCH_FILE = "ge.fusionSwitchFile";
+const char_t *const SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel";
+const char_t *const ORIGINAL_MODEL_FILE = "ge.originalModelFile";
+const char_t *const INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16";
+const char_t *const OP_DEBUG_LEVEL = "ge.opDebugLevel";
+const char_t *const PERFORMANCE_MODE = "ge.performance_mode";
+const char_t *const SHAPE_GENERALIZED_BUILD_MODE = "ge.shape_generalized_build_mode";
+const char_t *const MODIFY_MIXLIST = "ge.exec.modify_mixlist";
+const char_t *const OP_PRECISION_MODE = "ge.exec.op_precision_mode";
+const char_t *const CUSTOMIZE_DTYPES = "ge.customizeDtypes";
+const char_t *const COMPRESSION_OPTIMIZE_CONF = "ge.compressionOptimizeConf";
 }  // namespace configure_option
 // Configure stream num by Session constructor options param,
 // its value should be int32_t type, default value is "1"
@@ -223,7 +238,7 @@ const std::string OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode";
 const std::string HCOM_PARALLEL = "ge.hcomParallel";
 
 // configure whether to use dynamic batch size
-const char *const kDynamicBatchSize = "ge.dynamicBatchSize";
+const char_t *const kDynamicBatchSize = "ge.dynamicBatchSize";
 
 // configure threshold of fusion data size for communication op
 const std::string FUSION_TENSOR_SIZE = "ge.fusionTensorSize";
@@ -232,10 +247,10 @@ const std::string INPUT_SHAPE = "ge.inputShape";
 
 const std::string DYNAMIC_NODE_TYPE = "ge.dynamicNodeType";
 // configure whether to use dynamic image size
-const char *const kDynamicImageSize = "ge.dynamicImageSize";
+const char_t *const kDynamicImageSize = "ge.dynamicImageSize";
 
 // Configure whether to use dynamic dims
-const char *const kDynamicDims = "ge.dynamicDims";
+const char_t *const kDynamicDims = "ge.dynamicDims";
 
 // Configure auto tune mode, this option only take effect while AUTO_TUNE_FLAG is Y,
 // example: GA|RL, support configure multiple, split by |
@@ -244,9 +259,16 @@ const std::string AUTO_TUNE_MODE = "ge.autoTuneMode";
 // Configure soc version , example: "Ascend310"
 const std::string SOC_VERSION = "ge.socVersion";
 
+// configure whether to enable virtualization,
+// its value should be "0" or "1", default value is "0"
+const std::string VIRTUAL_TYPE = "ge.virtual_type";
+
 // Configure core type "VectorEngine", default value is "AIcoreEngine"
 const std::string CORE_TYPE = "ge.engineType";
 
+// Configure graph exclude one or more engines
+const std::string EXCLUDE_ENGINES = "ge.exec.exclude_engines";
+
 // Configure AICORE NUM
 const std::string AICORE_NUM = "ge.aicoreNum";
 
@@ -262,38 +284,51 @@ const std::string ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel";
 // Configure Compress Weight flag
 const std::string ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight";
 
+// Configure Sparse Matrix Weight flag
+const std::string ENABLE_SPARSE_MATRIX_WEIGHT = "ge.enableSparseMatrixWeight";
+
 // Configure fusion switch file path
 const std::string FUSION_SWITCH_FILE = "ge.fusionSwitchFile";
 
+// Configure compression optimize file path
+const std::string COMPRESSION_OPTIMIZE_CONF = "ge.compressionOptimizeConf";
+
+// Configure customize dtypes path
+const std::string CUSTOMIZE_DTYPES = "ge.customizeDtypes";
+
 // Save original model
 const std::string SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel";
 
 // Save original model file name
 const std::string ORIGINAL_MODEL_FILE = "ge.originalModelFile";
 
-const char *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum";
-const char *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize";
-const char *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum";
+const char_t *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum";
+const char_t *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize";
+const char_t *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum";
 
 // Configure for print op pass
 // Its value should be "0" or "1", default value is "1"
-const char *const ENABLE_PRINT_OP_PASS = "ge.enablePrintOpPass";
+const char_t *const ENABLE_PRINT_OP_PASS = "ge.enablePrintOpPass";
 
 // Configure operator compilation path
 // Its value should be file path, default value is "./"
-const char *const DEBUG_DIR = "ge.debugDir";
+const char_t *const DEBUG_DIR = "ge.debugDir";
+
+// Configure switch for op status check such as overflow
+// Its value should be true of flase
+const char_t *const STATUS_CHECK = "ge.status_check";
 
 // Configure operator compiler cache path
 // Its value should be file path, default value is "./"
-const char *const OP_COMPILER_CACHE_DIR = "ge.op_compiler_cache_dir";
+const char_t *const OP_COMPILER_CACHE_DIR = "ge.op_compiler_cache_dir";
 
 // Configure operator compiler cache mode
 // Its value should be "disable", "enable" or "force", default value is "disable"
-const char *const OP_COMPILER_CACHE_MODE = "ge.op_compiler_cache_mode";
+const char_t *const OP_COMPILER_CACHE_MODE = "ge.op_compiler_cache_mode";
 
 // Configure whether to use single stream.
 // Its value should be "true" or "false", default value is "false"
-const char *const ENABLE_SINGLE_STREAM = "ge.enableSingleStream";
+const char_t *const ENABLE_SINGLE_STREAM = "ge.enableSingleStream";
 
 // Configure input fp16 nodes
 const std::string INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16";
@@ -318,17 +353,28 @@ const std::string OP_BANK_UPDATE_FLAG = "ge.op_bank_update";
 const std::string HCOM_MULTI_MODE = "ge.hcomMultiMode";
 
 // atc and ir option
-const char *const INPUT_SHAPE_RANGE = "input_shape_range";
+const char_t *const INPUT_SHAPE_RANGE = "input_shape_range";
 
 // Configure express high compile performance or high execute performance
 // normal: no need to compile, used saved .o files directly
 // high: need to recompile, high execute performance mode
 const std::string PERFORMANCE_MODE = "ge.performance_mode";
 
+// For selecting the mode of shape generalization when build graph.
+// shape_generalized: Shape will be generalized during graph build.
+// shape_precise: Shape will not be generalized, use precise shape.
+const std::string SHAPE_GENERALIZED_BUILD_MODE = "ge.shape_generalized_build_mode";
+
 const std::string MODIFY_MIXLIST = "ge.exec.modify_mixlist";
 
 const std::string OP_PRECISION_MODE = "ge.exec.op_precision_mode";
 
+const std::string OP_WAIT_TIMEOUT = "ge.exec.opWaitTimeout";
+
+const std::string OP_EXECUTE_TIMEOUT = "ge.exec.opExecuteTimeout";
+
+const char_t *const FILE_CONSTANT_PATH = "ge.exec.value_bins";
+
 // Graph run mode
 enum GraphRunMode { PREDICTION = 0, TRAIN };
 
@@ -345,7 +391,7 @@ struct OutputTensorInfo {
   std::vector<int64_t> dims;        // shape description
   std::unique_ptr<uint8_t[]> data;  // tensor data
   int64_t length;                   // tensor length
-  OutputTensorInfo() : data_type(0), dims({}), data(nullptr), length(0) {}
+  OutputTensorInfo() : data_type(0U), dims({}), data(nullptr), length(0) {}
   OutputTensorInfo(OutputTensorInfo &&out)
       : data_type(out.data_type), dims(out.dims), data(std::move(out.data)), length(out.length) {}
 
@@ -367,48 +413,53 @@ using RunAsyncCallback = std::function<void(Status, std::vector<ge::Tensor> &)>;
 
 // for ir build
 namespace ir_option {
-static const char *const INPUT_FORMAT = "input_format";
-static const char *const INPUT_SHAPE = "input_shape";
-static const char *const INPUT_SHAPE_RANGE = ge::INPUT_SHAPE_RANGE;
-static const char *const OP_NAME_MAP = "op_name_map";
-static const char *const IS_DYNAMIC_INPUT = "is_dynamic_input";
-static const char *const IS_INPUT_ADJUST_HW_LAYOUT = "is_input_adjust_hw_layout";
-static const char *const IS_OUTPUT_ADJUST_HW_LAYOUT = "is_output_adjust_hw_layout";
-static const char *const ENABLE_SCOPE_FUSION_PASSES = "enable_scope_fusion_passes";
-static const char *const OUTPUT = "output";
-static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize;
-static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize;
-static const char *const DYNAMIC_DIMS = kDynamicDims;
-static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str();
-static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str();
-static const char *const TUNE_DEVICE_IDS = ge::TUNE_DEVICE_IDS.c_str();
-static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY;
-static const char *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str();
-static const char *const CORE_TYPE = ge::CORE_TYPE.c_str();
-static const char *const SOC_VERSION = ge::SOC_VERSION.c_str();
-static const char *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM;
-static const char *const AICORE_NUM = ge::AICORE_NUM.c_str();
-static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str();
-static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str();
-static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str();
-static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str();
-static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str();
-static const char *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str();
-static const char *const COMPRESS_WEIGHT_CONF = "compress_weight_conf";
-static const char *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str();
-static const char *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str();
-static const char *const LOG_LEVEL = "log";
-static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str();
-static const char *const DEBUG_DIR = ge::DEBUG_DIR;
-static const char *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR;
-static const char *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE;
-static const char *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str();
-static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str();
-static const char *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str();
-static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str();
-static const char *const PERFORMANCE_MODE = ge::PERFORMANCE_MODE.c_str();
-static const char *const MODIFY_MIXLIST = ge::MODIFY_MIXLIST.c_str();
-static const char *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str();
+static const char_t *const INPUT_FORMAT = "input_format";
+static const char_t *const INPUT_SHAPE = "input_shape";
+static const char_t *const INPUT_SHAPE_RANGE = ge::INPUT_SHAPE_RANGE;
+static const char_t *const OP_NAME_MAP = "op_name_map";
+static const char_t *const IS_DYNAMIC_INPUT = "is_dynamic_input";
+static const char_t *const IS_INPUT_ADJUST_HW_LAYOUT = "is_input_adjust_hw_layout";
+static const char_t *const IS_OUTPUT_ADJUST_HW_LAYOUT = "is_output_adjust_hw_layout";
+static const char_t *const ENABLE_SCOPE_FUSION_PASSES = "enable_scope_fusion_passes";
+static const char_t *const OUTPUT = "output";
+static const char_t *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize;
+static const char_t *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize;
+static const char_t *const DYNAMIC_DIMS = kDynamicDims;
+static const char_t *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str();
+static const char_t *const PRECISION_MODE = ge::PRECISION_MODE.c_str();
+static const char_t *const TUNE_DEVICE_IDS = ge::TUNE_DEVICE_IDS.c_str();
+static const char_t *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY;
+static const char_t *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str();
+static const char_t *const CORE_TYPE = ge::CORE_TYPE.c_str();
+static const char_t *const SOC_VERSION = ge::SOC_VERSION.c_str();
+static const char_t *const VIRTUAL_TYPE = ge::VIRTUAL_TYPE.c_str();
+static const char_t *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM;
+static const char_t *const AICORE_NUM = ge::AICORE_NUM.c_str();
+static const char_t *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str();
+static const char_t *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str();
+static const char_t *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str();
+static const char_t *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str();
+static const char_t *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str();
+static const char_t *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str();
+static const char_t *const SPARSITY = ge::ENABLE_SPARSE_MATRIX_WEIGHT.c_str();
+static const char_t *const COMPRESS_WEIGHT_CONF = "compress_weight_conf";
+static const char_t *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str();
+static const char_t *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str();
+static const char_t *const LOG_LEVEL = "log";
+static const char_t *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str();
+static const char_t *const DEBUG_DIR = ge::DEBUG_DIR;
+static const char_t *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR;
+static const char_t *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE;
+static const char_t *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str();
+static const char_t *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str();
+static const char_t *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str();
+static const char_t *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str();
+static const char_t *const PERFORMANCE_MODE = ge::PERFORMANCE_MODE.c_str();
+static const char_t *const SHAPE_GENERALIZED_BUILD_MODE = ge::SHAPE_GENERALIZED_BUILD_MODE.c_str();
+static const char_t *const MODIFY_MIXLIST = ge::MODIFY_MIXLIST.c_str();
+static const char_t *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str();
+static const char_t *const CUSTOMIZE_DTYPES = "ge.customizeDtypes";
+static const char_t *const COMPRESSION_OPTIMIZE_CONF = "ge.compressionOptimizeConf";
 
 // for interface: aclgrphBuildModel
 #ifdef __GNUC__
@@ -437,19 +488,23 @@ const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT,
                                                              OP_BANK_PATH,
                                                              OP_BANK_UPDATE,
                                                              PERFORMANCE_MODE,
-                                                             MODIFY_MIXLIST};
+                                                             SHAPE_GENERALIZED_BUILD_MODE,
+                                                             MODIFY_MIXLIST,
+                                                             CUSTOMIZE_DTYPES};
 
 // for interface: aclgrphParse
 const std::set<std::string> ir_parser_suppported_options = {
-  INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT,
-  OUT_NODES,        ENABLE_SCOPE_FUSION_PASSES};
+    INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT,
+    OUT_NODES,        ENABLE_SCOPE_FUSION_PASSES};
 
 // for interface: aclgrphBuildInitialize
 const std::set<std::string> global_options = {CORE_TYPE,
                                               SOC_VERSION,
+                                              VIRTUAL_TYPE,
                                               BUFFER_OPTIMIZE,
                                               ENABLE_COMPRESS_WEIGHT,
                                               COMPRESS_WEIGHT_CONF,
+                                              SPARSITY,
                                               PRECISION_MODE,
                                               TUNE_DEVICE_IDS,
                                               EXEC_DISABLE_REUSED_MEMORY,
@@ -464,7 +519,8 @@ const std::set<std::string> global_options = {CORE_TYPE,
                                               DEBUG_DIR,
                                               OP_COMPILER_CACHE_DIR,
                                               OP_COMPILER_CACHE_MODE,
-                                              MODIFY_MIXLIST};
+                                              MODIFY_MIXLIST,
+                                              COMPRESSION_OPTIMIZE_CONF};
 #endif
 }  // namespace ir_option
 }  // namespace ge
diff --git a/inc/external/ge/ge_error_codes.h b/inc/external/ge/ge_error_codes.h
index cafc5a64..550471cf 100644
--- a/inc/external/ge/ge_error_codes.h
+++ b/inc/external/ge/ge_error_codes.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,42 +32,44 @@
 #endif
 
 #include <stddef.h>
+#include <stdint.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
-static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000;
-static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001;
-static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002;
-static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003;
-static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006;
-static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007;
-static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008;
-static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009;
-static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011;
-static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012;
-static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013;
-static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014;
-static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015;
-static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016;
-static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017;
-static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018;
-static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019;
-static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020;
-static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021;
-static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022;
-static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000;
-static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001;
-static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000;
-static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001;
-static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002;
-static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003;
-static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004;
-static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005;
-static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006;
-static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007;
-static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008;
-static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009;
+static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000U;
+static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001U;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002U;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003U;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006U;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007U;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008U;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009U;
+static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011U;
+static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012U;
+static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013U;
+static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014U;
+static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015U;
+static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016U;
+static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017U;
+static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018U;
+static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019U;
+static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020U;
+static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021U;
+static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022U;
+static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000U;
+static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001U;
+static const uint32_t ACL_ERROR_GE_DEVICE_MEMORY_OPERATE_FAILED = 245002U;
+static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000U;
+static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001U;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002U;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003U;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004U;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005U;
+static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006U;
+static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007U;
+static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008U;
+static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009U;
 
 #ifdef __cplusplus
 }  // namespace ge
diff --git a/inc/external/ge/ge_ir_build.h b/inc/external/ge/ge_ir_build.h
index 04e059a1..5a542fd8 100644
--- a/inc/external/ge/ge_ir_build.h
+++ b/inc/external/ge/ge_ir_build.h
@@ -1,18 +1,18 @@
 /**
-* Copyright 2020 Huawei Technologies Co., Ltd
-
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-
-* http://www.apache.org/licenses/LICENSE-2.0
-
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 #ifndef INC_EXTERNAL_GE_IR_BUILD_H_
 #define INC_EXTERNAL_GE_IR_BUILD_H_
@@ -36,14 +36,10 @@
 #include <memory>
 #include "graph/graph.h"
 #include "graph/ge_error_codes.h"
-
-namespace {
-const int IR_MAJOR_VERSION = 1;
-const int IR_MINOR_VERSION = 0;
-const int IR_PATCH_VERSION = 0;
-}  // namespace
-
 namespace ge {
+const int32_t IR_MAJOR_VERSION = 1;
+const int32_t IR_MINOR_VERSION = 0;
+const int32_t IR_PATCH_VERSION = 0;
 
 struct ModelBufferData {
   std::shared_ptr<uint8_t> data = nullptr;
@@ -102,10 +98,10 @@ GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph,
  * @retval GRAPH_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *, const ModelBufferData &))
-GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model);
+ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char_t *, const ModelBufferData &))
+GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const std::string &output_file, const ModelBufferData &model);
 
-GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model);
+GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char_t *output_file, const ModelBufferData &model);
 
 /**
  * @ingroup AscendCL
@@ -117,7 +113,8 @@ GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *output_file, const M
  * @retval GRAPH_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-GE_FUNC_VISIBILITY graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version);
+GE_FUNC_VISIBILITY graphStatus aclgrphGetIRVersion(int32_t *major_version, int32_t *minor_version,
+                                                   int32_t *patch_version);
 
 /**
  * @ingroup AscendCL
@@ -129,7 +126,7 @@ GE_FUNC_VISIBILITY graphStatus aclgrphGetIRVersion(int *major_version, int *mino
  * @retval GRAPH_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len);
+GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char_t *file, const size_t len);
 
 /**
  * @ingroup AscendCL
@@ -153,7 +150,7 @@ GE_FUNC_VISIBILITY graphStatus aclgrphGenerateForOp(const AscendString &op_type,
  * @param cfg_path   [IN] the config file path
  * @return graphStatus
  */
-GE_FUNC_VISIBILITY graphStatus aclgrphSetOpAttr(Graph &graph, aclgrphAttrType attr_type, const char *cfg_path);
+GE_FUNC_VISIBILITY graphStatus aclgrphSetOpAttr(Graph &graph, aclgrphAttrType attr_type, const char_t *cfg_path);
 
 };      // namespace ge
 #endif  // INC_EXTERNAL_GE_IR_BUILD_H_
diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h
new file mode 100644
index 00000000..170c7862
--- /dev/null
+++ b/inc/external/hccl/hccl.h
@@ -0,0 +1,210 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file hccl.h
+ * @brief HCCL API
+ */
+
+#ifndef HCCL_H_
+#define HCCL_H_
+
+#include <hccl/hccl_types.h>
+#include <acl/acl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+/**
+ * @brief Initialize HCCL.
+ *
+ * @param clusterInfo A string identifying the cluster info file path, include file name.
+ * @param rank A integer identifying the identify for the rank.
+ * @param comm A pointer identifying the initialized communication resource.
+ * @return HcclResult
+ * @see HcclCommDestroy()
+ */
+extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm);
+
+/**
+ * @brief Get hccl root info.
+ *
+ * @param rootInfo A pointer identifying the hccl root info.
+ * @return HcclResult
+ */
+extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo);
+
+/**
+ * @brief Initialize HCCL with root info.
+ *
+ * @param nRanks A integer identifying the rank size of the cluster.
+ * @param rootInfo A struct identifying the hccl root info.
+ * @param rank A integer identifying the identify for the rank.
+ * @param comm A pointer identifying the initialized communication resource.
+ * @return HcclResult
+ * @see HcclCommDestroy()
+ */
+extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm);
+
+/**
+ * @brief AllReduce operator.
+ *
+ * @param sendBuf A pointer identifying the input data address of the operator.
+ * @param recvBuf A pointer identifying the output data address of the operator.
+ * @param count An integer(u64) identifying the number of the output data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16,
+ * float32.
+ * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult
+ */
+extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op,
+                                HcclComm comm, aclrtStream stream);
+
+/**
+ * @brief Broadcast operator.
+ *
+ * @param buf A pointer identifying the data address of the operator.
+ * @param count An integer(u64) identifying the number of the data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
+ * @param root An integer(u32) identifying the the root rank in the operator.
+ * @param comm A pointer identifying the communication resource based on
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult
+ */
+extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm,
+                                aclrtStream stream);
+
+/**
+ * @brief ReduceScatter operator.
+ *
+ * @param sendBuf A pointer identifying the input data address of the operator.
+ * @param recvBuf A pointer identifying the output data address of the operator.
+ * @param recvCount An integer(u64) identifying the number of the output data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
+ * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult
+ */
+extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType,
+                                    HcclReduceOp op, HcclComm comm, aclrtStream stream);
+
+/**
+ * @brief AllGather operator.
+ *
+ * @param sendBuf A pointer identifying the input data address of the operator.
+ * @param recvBuf A pointer identifying the output data address of the operator.
+ * @param sendCount An integer(u64) identifying the number of the input data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult
+ */
+extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm,
+                                aclrtStream stream);
+/**
+ * @brief Get the rank size of this comm.
+ *
+ * @param comm A pointer identifying the communication resource based on.
+ * @param rankSize  A pointer identifying the rank size.
+ * @return HcclResult
+ */
+extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize);
+
+/**
+ * @brief Get the rank id of this comm.
+ *
+ * @param comm A pointer identifying the communication resource based on.
+ * @param rankSize  A pointer identifying the rank id.
+ * @return HcclResult
+ */
+extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank);
+/**
+ * @brief Barrier operator.
+ *
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult
+ */
+extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream);
+
+/**
+ * @brief Send operator.
+ *
+ * @param sendBuff A pointer identifying the input data address of the operator.
+ * @param count An integer(u64) identifying the number of the send data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
+ * @param destRank An integer identifying the destination rank.
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult
+ */
+extern HcclResult HcclSend(void *sendBuf, uint64_t count, HcclDataType dataType, uint32_t destRank, HcclComm comm,
+                           aclrtStream stream);
+/**
+ * @brief Recv operator.
+ *
+ * @param recvBuff A pointer identifying the output data address of the operator.
+ * @param count An integer(u64) identifying the number of the receive data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
+ * @param srcRank An integer identifying the source rank.
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult
+ */
+extern HcclResult HcclRecv(void *recvBuf, uint64_t count, HcclDataType dataType, uint32_t srcRank, HcclComm comm,
+                           aclrtStream stream);
+
+/**
+ * @brief AlltoAllV operator.
+ *
+ * @param sendBuff A pointer identifying the input data address of the operator.
+ * @param sendCounts Integer array, where entry i specifies the number of elements to send to rank i.
+ * @param sdispls Integer array, where entry i specifies the displacement (offset from sendbuf, in units of sendtype)
+ * from which to send data to rank i.
+ * @param sendType Datatype of send buffer elements, must be one of the following types: int8, int32, int64, uint64,
+ * float16, float32.
+ * @param recvBuf A pointer identifying the output data address of the operator.
+ * @param recvCounts Integer array, where entry j specifies the number of elements to receive from rank j.
+ * @param rdispls Integer array, where entry j specifies the displacement (offset from recvbuf, in units of recvtype) to
+ * which data from rank j should be written.
+ * @param recvType Datatype of receive buffer elements, must be one of the following types: int8, int32, int64, uint64,
+ * float16, float32.
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult
+ */
+
+extern HcclResult HcclAlltoAllV(const void *sendBuf, const void *sendCounts, const void *sdispls, HcclDataType sendType,
+                                const void *recvBuf, const void *recvCounts, const void *rdispls, HcclDataType recvType,
+                                HcclComm comm, aclrtStream stream);
+
+/**
+ * @brief Destroy HCCL comm
+ *
+ * @param comm A pointer identifying the communication resource targetting
+ * @return HcclResult
+ * @see HcclCommInitClusterInfo()
+ */
+extern HcclResult HcclCommDestroy(HcclComm comm);
+
+#ifdef __cplusplus
+}
+#endif  // __cplusplus
+#endif  // HCCL_H_
diff --git a/inc/external/hccl/hccl_types.h b/inc/external/hccl/hccl_types.h
new file mode 100644
index 00000000..2fe98fde
--- /dev/null
+++ b/inc/external/hccl/hccl_types.h
@@ -0,0 +1,101 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file hccl_types.h
+ * @brief HCCL data type definition
+ *
+ */
+
+#ifndef HCCL_TYPES_H_
+#define HCCL_TYPES_H_
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+/**
+ * @brief HCCL functions return value definition
+ */
+typedef enum {
+  HCCL_SUCCESS = 0,              /**< success */
+  HCCL_E_PARA = 1,               /**< parameter error */
+  HCCL_E_PTR = 2,                /**< empty pointer */
+  HCCL_E_MEMORY = 3,             /**< memory error */
+  HCCL_E_INTERNAL = 4,           /**< internal error */
+  HCCL_E_NOT_SUPPORT = 5,        /**< not support feature */
+  HCCL_E_NOT_FOUND = 6,          /**< not found specific resource */
+  HCCL_E_UNAVAIL = 7,            /**< resource unavailable */
+  HCCL_E_SYSCALL = 8,            /**< call system interface error */
+  HCCL_E_TIMEOUT = 9,            /**< timeout */
+  HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */
+  HCCL_E_TCP_CONNECT = 11,       /**< tcp connect fail */
+  HCCL_E_ROCE_CONNECT = 12,      /**< roce connect fail */
+  HCCL_E_TCP_TRANSFER = 13,      /**< tcp transfer fail */
+  HCCL_E_ROCE_TRANSFER = 14,     /**< roce transfer fail */
+  HCCL_E_RUNTIME = 15,           /**< call runtime api fail */
+  HCCL_E_DRV = 16,               /**< call driver api fail */
+  HCCL_E_PROFILING = 17,         /**< call profiling api fail */
+  HCCL_E_CCE = 18,               /**< call cce api fail */
+  HCCL_E_NETWORK = 19,           /**< call network api fail */
+  HCCL_E_AGAIN = 20,             /**< try again */
+  HCCL_E_RESERVED                /**< reserved */
+} HcclResult;
+
+/**
+ * @brief handle to HCCL communicator
+ */
+typedef void *HcclComm;
+
+/**
+ * @brief HCCL Reduction opperation
+ */
+typedef enum {
+  HCCL_REDUCE_SUM = 0,  /**< sum */
+  HCCL_REDUCE_PROD = 1, /**< prod */
+  HCCL_REDUCE_MAX = 2,  /**< max */
+  HCCL_REDUCE_MIN = 3,  /**< min */
+  HCCL_REDUCE_RESERVED  /**< reserved */
+} HcclReduceOp;
+
+/**
+ * @brief HCCL data type
+ */
+typedef enum {
+  HCCL_DATA_TYPE_INT8 = 0,   /**< int8 */
+  HCCL_DATA_TYPE_INT16 = 1,  /**< int16 */
+  HCCL_DATA_TYPE_INT32 = 2,  /**< int32 */
+  HCCL_DATA_TYPE_FP16 = 3,   /**< fp16 */
+  HCCL_DATA_TYPE_FP32 = 4,   /**< fp32 */
+  HCCL_DATA_TYPE_INT64 = 5,  /**< int64 */
+  HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */
+  HCCL_DATA_TYPE_RESERVED    /**< reserved */
+} HcclDataType;
+
+const uint32_t HCCL_ROOT_INFO_BYTES = 4108;  // 4108: root info length
+/**
+ * @brief HCCL root info
+ */
+typedef struct HcclRootInfoDef {
+  char internal[HCCL_ROOT_INFO_BYTES];
+} HcclRootInfo;
+
+#ifdef __cplusplus
+}
+#endif  // __cplusplus
+#endif  // HCCL_TYPES_H_
diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h
new file mode 100644
index 00000000..05122efb
--- /dev/null
+++ b/inc/external/runtime/rt_error_codes.h
@@ -0,0 +1,123 @@
+/**
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
+#define __INC_EXTERNEL_RT_ERROR_CODES_H__
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static const int32_t ACL_RT_SUCCESS = 0;  // success
+
+static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000;             // param invalid
+static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001;          // invalid device id
+static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002;              // current context null
+static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003;            // stream not in current context
+static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004;             // model not in current context
+static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005;              // stream not in model
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006;   // event timestamp invalid
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007;  // event timestamp reversal
+static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008;            // memory address unaligned
+static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009;                 // open file failed
+static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010;                // write file failed
+static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011;          // error subscribe stream
+static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012;          // error subscribe thread
+static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013;             // group not set
+static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014;          // group not create
+static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015;          // callback not register to stream
+static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016;       // invalid memory type
+static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017;            // invalid handle
+static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018;       // invalid malloc type
+static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019;              // wait timeout
+static const int32_t ACL_ERROR_RT_TASK_TIMEOUT = 107020;              // task timeout
+
+static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000;  // feature not support
+static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001;    // memory allocation error
+static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002;          // memory free error
+static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003;     // aicore over flow
+static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004;            // no device
+static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005;  // resource alloc fail
+static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006;        // no permission
+static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007;    // no event resource
+static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008;   // no stream resource
+static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009;   // no notify resource
+static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010;    // no model resource
+static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011;      // no cdq resource
+static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012;           // over limit
+static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013;          // queue is empty
+static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014;           // queue is full
+static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015;        // repeated init
+static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016;      // aivec over flow
+static const int32_t ACL_ERROR_RT_OVER_FLOW = 207017;            // common over flow
+
+static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000;                   // runtime internal error
+static const int32_t ACL_ERROR_RT_TS_ERROR = 507001;                         // ts internel error
+static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002;                 // task full in stream
+static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003;                // task empty in stream
+static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004;              // stream not complete
+static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005;                  // end of sequence
+static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006;               // event not complete
+static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007;            // context release error
+static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008;                      // soc version error
+static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009;            // task type not support
+static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010;                   // ts lost heartbeat
+static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011;                    // model execute failed
+static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012;                   // report timeout
+static const int32_t ACL_ERROR_RT_SYS_DMA = 507013;                          // sys dma error
+static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014;                   // aicore timeout
+static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015;                 // aicore exception
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016;            // aicore trap exception
+static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017;                    // aicpu timeout
+static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018;                  // aicpu exception
+static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019;           // aicpu datadump response error
+static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020;              // aicpu model operate response error
+static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021;                  // profiling error
+static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022;                        // ipc error
+static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023;               // model abort normal
+static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024;             // kernel unregistering
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025;              // ringbuffer not init
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026;               // ringbuffer no data
+static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027;                    // kernel lookup error
+static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028;                 // kernel register duplicate
+static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029;              // debug register failed
+static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030;            // debug unregister failed
+static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031;                    // label not in current context
+static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032;                  // program register num use out
+static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033;                  // device setup error
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034;              // vector core timeout
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035;            // vector core exception
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036;       // vector core trap exception
+static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037;               // cdq alloc batch abnormal
+static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038;            // can not change die mode
+static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039;                    // single die mode can not set die
+static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040;                    // invalid die id
+static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041;                 // die mode not set
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042;        // aic trap read overflow
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043;       // aic trap write overflow
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044;   // aiv trap read overflow
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045;  // aiv trap write overflow
+
+static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;    // drv internal error
+static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900;  // aicpu internal error
+static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901;          // hdc disconnect
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // __INC_EXTERNEL_RT_ERROR_CODES_H__
diff --git a/inc/framework/common/aicpu_op.h b/inc/framework/common/aicpu_op.h
index 850ceca3..773d42fd 100644
--- a/inc/framework/common/aicpu_op.h
+++ b/inc/framework/common/aicpu_op.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/inc/framework/common/debug/ge_log.h b/inc/framework/common/debug/ge_log.h
index 3e646440..aff56d4f 100644
--- a/inc/framework/common/debug/ge_log.h
+++ b/inc/framework/common/debug/ge_log.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #ifndef INC_FRAMEWORK_COMMON_DEBUG_GE_LOG_H_
 #define INC_FRAMEWORK_COMMON_DEBUG_GE_LOG_H_
 
+#include <cinttypes>
 #include <cstdint>
 
 #include "framework/common/ge_inner_error_codes.h"
@@ -33,7 +34,7 @@
 extern "C" {
 #endif
 
-#define GE_MODULE_NAME static_cast<int>(GE)
+#define GE_MODULE_NAME static_cast<int32_t>(GE)
 
 // trace status of log
 enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP };
@@ -42,60 +43,79 @@ class GE_FUNC_VISIBILITY GeLog {
  public:
   static uint64_t GetTid() {
 #ifdef __GNUC__
-    uint64_t tid = static_cast<uint64_t>(syscall(__NR_gettid));
+    const uint64_t tid = static_cast<uint64_t>(syscall(__NR_gettid));
 #else
-    uint64_t tid = static_cast<uint64_t>(GetCurrentThreadId());
+    const uint64_t tid = static_cast<uint64_t>(GetCurrentThreadId());
 #endif
     return tid;
   }
 };
 
-inline bool IsLogEnable(int module_name, int log_level) {
-  int32_t enable = CheckLogLevel(module_name, log_level);
+inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) {
+  const int32_t enable = CheckLogLevel(module_name, log_level);
   // 1:enable, 0:disable
   return (enable == 1);
 }
 
-#define GELOGE(ERROR_CODE, fmt, ...)                                                                         \
-  dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %d(%s) %s" fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE,    \
-             ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \
-             ##__VA_ARGS__)
-#define GELOGW(fmt, ...)                      \
-  if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) \
-  dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-#define GELOGI(fmt, ...)                      \
-  if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) \
-  dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-#define GELOGD(fmt, ...)                       \
-  if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) \
-  dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-
-#define GEEVENT(fmt, ...) dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-
-#define GELOGT(VALUE, fmt, ...)                                                                                    \
-  do {                                                                                                             \
-    TraceStatus stat = VALUE;                                                                                      \
-    const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"};                                     \
-    int idx = static_cast<int>(stat);                                                                              \
-    char *k = const_cast<char *>("status");                                                                        \
-    char *v = const_cast<char *>(TraceStatStr[idx]);                                                               \
-    KeyValue kv = {k, v};                                                                                          \
-    DlogWithKV(static_cast<int>(GE_MODULE_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, \
-               ##__VA_ARGS__);                                                                                     \
-  } while (0)
-
-#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...)                                                         \
-  dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) %s" fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE,          \
-             ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \
-             ##__VA_ARGS__)
+#define GELOGE(ERROR_CODE, fmt, ...)                                                                   \
+  do {                                                                                                 \
+    dlog_error(GE_MODULE_NAME, "%" PRIu64 " %s: ErrorNo: %" PRIuLEAST8 "(%s) %s" fmt, GeLog::GetTid(), \
+               &__FUNCTION__[0], (ERROR_CODE), ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()),             \
+               ErrorManager::GetInstance().GetLogHeader().c_str(), ##__VA_ARGS__);                     \
+  } while (false)
+
+#define GELOGW(fmt, ...)                                                                                  \
+  do {                                                                                                    \
+    if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) {                                                         \
+      dlog_warn(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
+    }                                                                                                     \
+  } while (false)
+
+#define GELOGI(fmt, ...)                                                                                  \
+  do {                                                                                                    \
+    if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) {                                                         \
+      dlog_info(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
+    }                                                                                                     \
+  } while (false)
+
+#define GELOGD(fmt, ...)                                                                                   \
+  do {                                                                                                     \
+    if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) {                                                         \
+      dlog_debug(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
+    }                                                                                                      \
+  } while (false)
+
+#define GEEVENT(fmt, ...)                                                                                \
+  do {                                                                                                   \
+    dlog_event(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
+  } while (false)
+
+#define GELOGT(VALUE, fmt, ...)                                                                              \
+  do {                                                                                                       \
+    TraceStatus stat = (VALUE);                                                                              \
+    const char_t *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"};                             \
+    const int32_t idx = static_cast<int32_t>(stat);                                                          \
+    char_t *k = const_cast<char_t *>("status");                                                              \
+    char_t *v = const_cast<char_t *>(TraceStatStr[idx]);                                                     \
+    KeyValue kv = {k, v};                                                                                    \
+    DlogWithKV(GE_MODULE_NAME, DLOG_TRACE, &kv, 1, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], \
+               ##__VA_ARGS__);                                                                               \
+  } while (false)
+
+#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...)                                                                 \
+  do {                                                                                                               \
+    dlog_error((MOD_NAME), "%" PRIu64 " %s: ErrorNo: %" PRIuLEAST8 "(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], \
+               (ERROR_CODE), ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()),                                             \
+               ErrorManager::GetInstance().GetLogHeader().c_str(), ##__VA_ARGS__);                                   \
+  } while (false)
 
 // print memory when it is greater than 1KB.
-#define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE)                                                        \
-  do {                                                                                                      \
-    if ((SIZE) > 1024) {                                                                                    \
-      GELOGI("MallocMemory, func=%s, size=%zu, purpose=%s", (#FUNC), static_cast<size_t>(SIZE), (PURPOSE)); \
-    }                                                                                                       \
-  } while (0);
+#define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE)                                                                \
+  do {                                                                                                              \
+    if (static_cast<size_t>(SIZE) > 1024UL) {                                                                       \
+      GELOGI("MallocMemory, func=%s, size=%" PRIu64 ", purpose=%s", (#FUNC), static_cast<size_t>(SIZE), (PURPOSE)); \
+    }                                                                                                               \
+  } while (false)
 #ifdef __cplusplus
 }
 #endif
diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h
index f06faa1b..c50f83c0 100644
--- a/inc/framework/common/debug/log.h
+++ b/inc/framework/common/debug/log.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,17 +19,14 @@
 
 #include <string>
 #include <sstream>
-#include <securec.h>
 
-#include "runtime/rt.h"
-#include "common/string_util.h"
-#include "common/util.h"
-#include "common/util/error_manager/error_manager.h"
+#include "framework/common/string_util.h"
+#include "framework/common/util.h"
 #include "framework/common/debug/ge_log.h"
-#include "ge/ge_api_error_codes.h"
+#include "external/ge/ge_api_error_codes.h"
 
 #if !defined(__ANDROID__) && !defined(ANDROID)
-#define DOMI_LOGE(fmt, ...) GE_LOG_ERROR(GE_MODULE_NAME, ge::FAILED, fmt, ##__VA_ARGS__)
+#define DOMI_LOGE(fmt, ...) GE_LOG_ERROR(GE_MODULE_NAME, (ge::FAILED), fmt, ##__VA_ARGS__)
 #else
 #include <android/log.h>
 #if defined(BUILD_VERSION_PERF)
@@ -52,196 +49,116 @@
     GELOGW(__VA_ARGS__);           \
   }
 
-#define GE_LOGE_IF(condition, ...) \
-  if ((condition)) {               \
-    DOMI_LOGE(__VA_ARGS__);        \
+#define GE_LOGE_IF(condition, ...)     \
+  if ((condition)) {                   \
+    GELOGE((ge::FAILED), __VA_ARGS__); \
   }
 
 // If expr is not SUCCESS, print the log and return the same value
-#define GE_CHK_STATUS_RET(expr, ...)   \
-  do {                                 \
-    const ge::Status _status = (expr); \
-    if (_status != ge::SUCCESS) {      \
-      DOMI_LOGE(__VA_ARGS__);          \
-      return _status;                  \
-    }                                  \
-  } while (0);
+#define GE_CHK_STATUS_RET(expr, ...)       \
+  do {                                     \
+    const ge::Status _chk_status = (expr); \
+    if (_chk_status != ge::SUCCESS) {      \
+      GELOGE((ge::FAILED), __VA_ARGS__);   \
+      return _chk_status;                  \
+    }                                      \
+  } while (false)
 
 // If expr is not SUCCESS, print the log and do not execute return
-#define GE_CHK_STATUS(expr, ...)       \
-  do {                                 \
-    const ge::Status _status = (expr); \
-    if (_status != ge::SUCCESS) {      \
-      DOMI_LOGE(__VA_ARGS__);          \
-    }                                  \
-  } while (0);
+#define GE_CHK_STATUS(expr, ...)           \
+  do {                                     \
+    const ge::Status _chk_status = (expr); \
+    if (_chk_status != ge::SUCCESS) {      \
+      GELOGE(_chk_status, __VA_ARGS__);    \
+    }                                      \
+  } while (false)
 
 // If expr is not SUCCESS, return the same value
-#define GE_CHK_STATUS_RET_NOLOG(expr)  \
-  do {                                 \
-    const ge::Status _status = (expr); \
-    if (_status != ge::SUCCESS) {      \
-      return _status;                  \
-    }                                  \
-  } while (0);
+#define GE_CHK_STATUS_RET_NOLOG(expr)      \
+  do {                                     \
+    const ge::Status _chk_status = (expr); \
+    if (_chk_status != ge::SUCCESS) {      \
+      return _chk_status;                  \
+    }                                      \
+  } while (false)
 
 // If expr is not GRAPH_SUCCESS, print the log and return FAILED
 #define GE_CHK_GRAPH_STATUS_RET(expr, ...)                  \
   do {                                                      \
     if ((expr) != ge::GRAPH_SUCCESS) {                      \
       REPORT_CALL_ERROR("E19999", "Operator graph failed"); \
-      DOMI_LOGE(__VA_ARGS__);                               \
-      return FAILED;                                        \
+      GELOGE(ge::FAILED, __VA_ARGS__);                      \
+      return (ge::FAILED);                                  \
     }                                                       \
-  } while (0);
+  } while (false)
 
 // If expr is not SUCCESS, print the log and execute a custom statement
-#define GE_CHK_STATUS_EXEC(expr, exec_expr, ...)                  \
-  do {                                                            \
-    const ge::Status _status = (expr);                            \
-    GE_CHK_BOOL_EXEC(_status == SUCCESS, exec_expr, __VA_ARGS__); \
-  } while (0);
+#define GE_CHK_STATUS_EXEC(expr, exec_expr, ...)                      \
+  do {                                                                \
+    const ge::Status _chk_status = (expr);                            \
+    GE_CHK_BOOL_EXEC(_chk_status == SUCCESS, exec_expr, __VA_ARGS__); \
+  } while (false)
 
 // If expr is not true, print the log and return the specified status
 #define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \
   do {                                             \
-    bool b = (expr);                               \
+    const bool b = (expr);                         \
     if (!b) {                                      \
       REPORT_INNER_ERROR("E19999", __VA_ARGS__);   \
-      GELOGE(_status, __VA_ARGS__);                \
-      return _status;                              \
+      GELOGE((_status), __VA_ARGS__);              \
+      return (_status);                            \
     }                                              \
-  } while (0);
+  } while (false)
 
 // If expr is not true, print the log and return the specified status
 #define GE_CHK_BOOL_RET_STATUS_NOLOG(expr, _status, ...) \
   do {                                                   \
-    bool b = (expr);                                     \
+    const bool b = (expr);                               \
     if (!b) {                                            \
-      return _status;                                    \
+      return (_status);                                  \
     }                                                    \
-  } while (0);
+  } while (false)
 
 // If expr is not true, print the log and execute a custom statement
 #define GE_CHK_BOOL_EXEC(expr, exec_expr, ...) \
   {                                            \
-    bool b = (expr);                           \
+    const bool b = (expr);                     \
     if (!b) {                                  \
-      DOMI_LOGE(__VA_ARGS__);                  \
+      GELOGE(ge::FAILED, __VA_ARGS__);         \
       exec_expr;                               \
     }                                          \
   }
 
-// If expr is not true, print the log and execute a custom statement
-#define GE_CHK_BOOL_EXEC_WARN(expr, exec_expr, ...) \
-  {                                                 \
-    bool b = (expr);                                \
-    if (!b) {                                       \
-      GELOGW(__VA_ARGS__);                          \
-      exec_expr;                                    \
-    }                                               \
-  }
-// If expr is not true, print the log and execute a custom statement
-#define GE_CHK_BOOL_EXEC_INFO(expr, exec_expr, ...) \
-  {                                                 \
-    bool b = (expr);                                \
-    if (!b) {                                       \
-      GELOGI(__VA_ARGS__);                          \
-      exec_expr;                                    \
-    }                                               \
-  }
-
-// If expr is not true, print the log and execute a custom statement
-#define GE_CHK_BOOL_TRUE_EXEC_INFO(expr, exec_expr, ...) \
-  {                                                      \
-    bool b = (expr);                                     \
-    if (b) {                                             \
-      GELOGI(__VA_ARGS__);                               \
-      exec_expr;                                         \
-    }                                                    \
-  }
-
-// If expr is true, print logs and execute custom statements
-#define GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(expr, exec_expr, ...) \
-  {                                                          \
-    bool b = (expr);                                         \
-    if (b) {                                                 \
-      DOMI_LOGE(__VA_ARGS__);                                \
-      exec_expr;                                             \
-    }                                                        \
-  }
-// If expr is true, print the Information log and execute a custom statement
-#define GE_CHK_TRUE_EXEC_INFO(expr, exec_expr, ...) \
-  {                                                 \
-    bool b = (expr);                                \
-    if (b) {                                        \
-      GELOGI(__VA_ARGS__);                          \
-      exec_expr;                                    \
-    }                                               \
-  }
-
-// If expr is not SUCCESS, print the log and execute the expression + return
-#define GE_CHK_BOOL_TRUE_RET_VOID(expr, exec_expr, ...) \
-  {                                                     \
-    bool b = (expr);                                    \
-    if (b) {                                            \
-      DOMI_LOGE(__VA_ARGS__);                           \
-      exec_expr;                                        \
-      return;                                           \
-    }                                                   \
-  }
-
-// If expr is not SUCCESS, print the log and execute the expression + return _status
-#define GE_CHK_BOOL_TRUE_EXEC_RET_STATUS(expr, _status, exec_expr, ...) \
-  {                                                                     \
-    bool b = (expr);                                                    \
-    if (b) {                                                            \
-      REPORT_INNER_ERROR("E19999", __VA_ARGS__);                        \
-      DOMI_LOGE(__VA_ARGS__);                                           \
-      exec_expr;                                                        \
-      return _status;                                                   \
-    }                                                                   \
-  }
-
-// If expr is not true, execute a custom statement
-#define GE_CHK_BOOL_EXEC_NOLOG(expr, exec_expr) \
-  {                                             \
-    bool b = (expr);                            \
-    if (!b) {                                   \
-      exec_expr;                                \
-    }                                           \
-  }
-
 // -----------------runtime related macro definitions-------------------------------
 // If expr is not RT_ERROR_NONE, print the log
-#define GE_CHK_RT(expr)                                    \
-  do {                                                     \
-    rtError_t _rt_ret = (expr);                            \
-    if (_rt_ret != RT_ERROR_NONE) {                        \
-      DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \
-    }                                                      \
-  } while (0);
+#define GE_CHK_RT(expr)                                                \
+  do {                                                                 \
+    const rtError_t _rt_err = (expr);                                  \
+    if (_rt_err != RT_ERROR_NONE) {                                    \
+      GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_err); \
+    }                                                                  \
+  } while (false)
 
 // If expr is not RT_ERROR_NONE, print the log and execute the exec_expr expression
-#define GE_CHK_RT_EXEC(expr, exec_expr)                    \
-  {                                                        \
-    rtError_t _rt_ret = (expr);                            \
-    if (_rt_ret != RT_ERROR_NONE) {                        \
-      DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \
-      exec_expr;                                           \
-    }                                                      \
-  }
+#define GE_CHK_RT_EXEC(expr, exec_expr)                                \
+  do {                                                                 \
+    const rtError_t _rt_ret = (expr);                                  \
+    if (_rt_ret != RT_ERROR_NONE) {                                    \
+      GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \
+      exec_expr;                                                       \
+    }                                                                  \
+  } while (false)
 
 // If expr is not RT_ERROR_NONE, print the log and return
 #define GE_CHK_RT_RET(expr)                                                   \
   do {                                                                        \
-    rtError_t _rt_ret = (expr);                                               \
+    const rtError_t _rt_ret = (expr);                                         \
     if (_rt_ret != RT_ERROR_NONE) {                                           \
       REPORT_CALL_ERROR("E19999", "Call %s fail, ret: 0x%X", #expr, _rt_ret); \
-      DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret);                    \
+      GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_ret);        \
       return RT_ERROR_TO_GE_STATUS(_rt_ret);                                  \
     }                                                                         \
-  } while (0);
+  } while (false)
 
 // If expr is true, execute exec_expr without printing logs
 #define GE_IF_BOOL_EXEC(expr, exec_expr) \
@@ -256,32 +173,32 @@
   try {                                        \
     exec_expr0;                                \
   } catch (const std::bad_alloc &) {           \
-    DOMI_LOGE("Make shared failed");           \
+    GELOGE(ge::FAILED, "Make shared failed");  \
     exec_expr1;                                \
   }
 
-#define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg)    \
-  {                                                    \
-    GELOGE(_status, "[Check][InnerData]%s", errormsg); \
-    REPORT_INNER_ERROR("E19999", "%s", errormsg);      \
-  }
-
-#define GE_WARNINGLOG_AND_ERRORMSG(errormsg)                                           \
-  {                                                                                    \
-    GELOGW("%s", errormsg);                                                            \
-    ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {errormsg}); \
-  }
-
-#define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg)                                 \
-  do {                                                                                   \
-    bool b = (expr);                                                                     \
-    if (!b) {                                                                            \
-      GELOGE(_status, "%s", errormsg);                                                   \
-      ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {errormsg}); \
-      return _status;                                                                    \
-    }                                                                                    \
-  } while (0)
-
+#define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg)        \
+  {                                                        \
+    GELOGE((_status), "[Check][InnerData]%s", (errormsg)); \
+    REPORT_INNER_ERROR("E19999", "%s", (errormsg));        \
+  }
+
+#define GE_WARNINGLOG_AND_ERRORMSG(errormsg)                                             \
+  {                                                                                      \
+    GELOGW("%s", (errormsg));                                                            \
+    ErrorManager::GetInstance().ATCReportErrMessage("E10052", {"reason"}, {(errormsg)}); \
+  }
+
+#define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg)                                   \
+  do {                                                                                     \
+    const bool b = (expr);                                                                 \
+    if (!b) {                                                                              \
+      GELOGE((_status), "%s", (errormsg));                                                 \
+      ErrorManager::GetInstance().ATCReportErrMessage("E10052", {"reason"}, {(errormsg)}); \
+      return (_status);                                                                    \
+    }                                                                                      \
+  } while (false)
+namespace ge {
 template <typename T>
 GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) {
   std::string fmt;
@@ -290,5 +207,5 @@ GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) {
   fmt = st.str();
   return fmt;
 }
-
+}  // namespace ge
 #endif  // INC_FRAMEWORK_COMMON_DEBUG_LOG_H_
diff --git a/inc/framework/common/file_constant_util.h b/inc/framework/common/file_constant_util.h
new file mode 100644
index 00000000..a53c5a24
--- /dev/null
+++ b/inc/framework/common/file_constant_util.h
@@ -0,0 +1,51 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H
+#define INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H
+
+#include <map>
+#include <string>
+#include <vector>
+#include "ge/ge_api_error_codes.h"
+#include "nlohmann/json.hpp"
+#include "graph/op_desc.h"
+#include "graph/ge_tensor.h"
+
+namespace ge {
+struct FileConstantInfo {
+  std::string value_bin_file_id;
+  std::string value_bin_file_path;
+};
+
+struct OptionInfo {
+  std::vector<FileConstantInfo> info;
+};
+
+void from_json(const nlohmann::json &j, FileConstantInfo &info);
+
+void from_json(const nlohmann::json &j, OptionInfo &option_info);
+
+Status GetFilePathFromOption(std::map<std::string, std::string> &file_id_and_path_map);
+
+Status CopyOneWeightFromFile(const void *const curr_dev_ptr, const std::string &value, const size_t file_constant_size,
+                             size_t &left_size);
+
+Status GetFilePath(const OpDescPtr &op_desc, const std::map<std::string, std::string> &file_id_and_path_map,
+                   std::string &file_path);
+}  // namespace ge
+
+#endif  // INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H
diff --git a/inc/framework/common/fmk_error_codes.h b/inc/framework/common/fmk_error_codes.h
index e910e346..00610af5 100644
--- a/inc/framework/common/fmk_error_codes.h
+++ b/inc/framework/common/fmk_error_codes.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,14 +20,18 @@
 #if defined(_MSC_VER)
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY _declspec(dllexport)
+#define GE_OBJECT_VISIBILITY
 #else
 #define GE_FUNC_VISIBILITY
+#define GE_OBJECT_VISIBILITY
 #endif
 #else
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#define GE_OBJECT_VISIBILITY
 #else
 #define GE_FUNC_VISIBILITY
+#define GE_OBJECT_VISIBILITY __attribute__((visibility("hidden")))
 #endif
 #endif
 
@@ -42,23 +46,23 @@
 #define DECLARE_ERRORNO_OME(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OME, name, value)
 #define DECLARE_ERRORNO_CALIBRATION(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_CALIBRATION, name, value)
 
-#define DEF_ERRORNO(name, desc) const ErrorNoRegisterar g_##name##_errorno(name, desc);
+#define DEF_ERRORNO(name, desc) const ErrorNoRegisterar g_##name##_errorno((name), (desc));
 
 // Interface for Obtaining Error Code Description
 #define GET_ERRORNO_STR(value) domi::StatusFactory::Instance()->GetErrDesc(value)
 
-const int MODID_OMG = 1;          // OMG module ID
-const int MODID_OME = 2;          // OME module ID
-const int MODID_CALIBRATION = 3;  // Calibration module ID
-
 namespace domi {
+constexpr int32_t MODID_OMG = 1;          // OMG module ID
+constexpr int32_t MODID_OME = 2;          // OME module ID
+constexpr int32_t MODID_CALIBRATION = 3;  // Calibration module ID
+
 class GE_FUNC_VISIBILITY StatusFactory {
  public:
   static StatusFactory *Instance();
 
-  void RegisterErrorNo(uint32_t err, const std::string &desc);
+  void RegisterErrorNo(const uint32_t err, const std::string &desc);
 
-  std::string GetErrDesc(uint32_t err);
+  std::string GetErrDesc(const uint32_t err);
 
  protected:
   StatusFactory() {}
@@ -70,7 +74,9 @@ class GE_FUNC_VISIBILITY StatusFactory {
 
 class GE_FUNC_VISIBILITY ErrorNoRegisterar {
  public:
-  ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }
+  ErrorNoRegisterar(const uint32_t err, const std::string &desc) {
+    StatusFactory::Instance()->RegisterErrorNo(err, desc);
+  }
   ~ErrorNoRegisterar() {}
 };
 
diff --git a/inc/framework/common/fmk_types.h b/inc/framework/common/fmk_types.h
index f84390da..906e2a69 100644
--- a/inc/framework/common/fmk_types.h
+++ b/inc/framework/common/fmk_types.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/inc/framework/common/ge_compiler_options.h b/inc/framework/common/ge_compiler_options.h
index 5c947346..075c9403 100644
--- a/inc/framework/common/ge_compiler_options.h
+++ b/inc/framework/common/ge_compiler_options.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/inc/framework/common/ge_format_util.h b/inc/framework/common/ge_format_util.h
index dfceefb8..c6decdcc 100644
--- a/inc/framework/common/ge_format_util.h
+++ b/inc/framework/common/ge_format_util.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@
 
 #include <vector>
 
-#include "common/ge_inner_error_codes.h"
+#include "framework/common/ge_inner_error_codes.h"
 #include "graph/tensor.h"
 
 namespace ge {
@@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY GeFormatUtil {
   /// @param  [out] dst_shape     destination shape
   /// @return Status
   ///
-  static Status TransShape(const TensorDesc &src_desc, Format dst_format, std::vector<int64_t> &dst_shape);
+  static Status TransShape(const TensorDesc &src_desc, const Format dst_format, std::vector<int64_t> &dst_shape);
 };
 }  // namespace ge
 
diff --git a/inc/framework/common/ge_inner_error_codes.h b/inc/framework/common/ge_inner_error_codes.h
index 3697a526..fd80d2bd 100644
--- a/inc/framework/common/ge_inner_error_codes.h
+++ b/inc/framework/common/ge_inner_error_codes.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,17 +22,57 @@
 #include <string>
 #include "ge/ge_api_error_codes.h"
 
+// Each module defines error codes using the following macros, name can not be modified to (name)
+#define GE_ERRORNO_COMMON(name, value, desc)                                                                \
+  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
+             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::COMMON_MODULE, name, (value), (desc))
+#define GE_ERRORNO_CLIENT(name, value, desc)                                                                \
+  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
+             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::CLIENT_MODULE, name, (value), (desc))
+#define GE_ERRORNO_INIT(name, value, desc)                                                                  \
+  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
+             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::INIT_MODULE, name, (value), (desc))
+#define GE_ERRORNO_SESSION(name, value, desc)                                                               \
+  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
+             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::SESSION_MODULE, name, (value), (desc))
+#define GE_ERRORNO_GRAPH(name, value, desc)                                                                 \
+  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
+             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::GRAPH_MODULE, name, (value), (desc))
+#define GE_ERRORNO_ENGINE(name, value, desc)                                                                \
+  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
+             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::ENGINE_MODULE, name, (value), (desc))
+#define GE_ERRORNO_OPS(name, value, desc)                                                                   \
+  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
+             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::OPS_MODULE, name, (value), (desc))
+#define GE_ERRORNO_PLUGIN(name, value, desc)                                                                \
+  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
+             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::PLUGIN_MODULE, name, (value), (desc))
+#define GE_ERRORNO_RUNTIME(name, value, desc)                                                               \
+  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
+             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::RUNTIME_MODULE, name, (value), (desc))
+#define GE_ERRORNO_EXECUTOR(name, value, desc)                                                                \
+  GE_ERRORNO(ge::InnLogRuntime::RT_DEVICE, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
+             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::EXECUTOR_MODULE, name, (value), (desc))
+#define GE_ERRORNO_GENERATOR(name, value, desc)                                                             \
+  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
+             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::GENERATOR_MODULE, name, (value), (desc))
+
+// Get error code description
+#define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value)
+
+#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast<Status>(RT_ERROR)
+
 namespace ge {
 // System ID
-enum SystemIdType { SYSID_GE = 8 };
+enum class InnSystemIdType { SYSID_GE = 8 };
 // Runtime location
-enum LogRuntime {
+enum class InnLogRuntime {
   RT_HOST = 0b01,
   RT_DEVICE = 0b10,
 };
 
 // Sub model
-enum SubModuleId {
+enum class InnSubModuleId {
   COMMON_MODULE = 0,
   CLIENT_MODULE = 1,
   INIT_MODULE = 2,
@@ -47,13 +87,13 @@ enum SubModuleId {
 };
 
 // Error code type
-enum ErrorCodeType {
+enum class InnErrorCodeType {
   ERROR_CODE = 0b01,
   EXCEPTION_CODE = 0b10,
 };
 
 // Error level
-enum ErrorLevel {
+enum class InnErrorLevel {
   COMMON_LEVEL = 0b000,
   SUGGESTION_LEVEL = 0b001,
   MINOR_LEVEL = 0b010,
@@ -61,33 +101,6 @@ enum ErrorLevel {
   CRITICAL_LEVEL = 0b100,
 };
 
-// Each module defines error codes using the following macros
-#define GE_ERRORNO_COMMON(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, value, desc)
-#define GE_ERRORNO_CLIENT(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, value, desc)
-#define GE_ERRORNO_INIT(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, value, desc)
-#define GE_ERRORNO_SESSION(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, value, desc)
-#define GE_ERRORNO_GRAPH(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, value, desc)
-#define GE_ERRORNO_ENGINE(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, value, desc)
-#define GE_ERRORNO_OPS(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, value, desc)
-#define GE_ERRORNO_PLUGIN(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, value, desc)
-#define GE_ERRORNO_RUNTIME(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, value, desc)
-#define GE_ERRORNO_EXECUTOR(name, value, desc) \
-  GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, value, desc)
-#define GE_ERRORNO_GENERATOR(name, value, desc) \
-  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, value, desc)
-
-// Get error code description
-#define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value)
-
 // Common module error code definition
 GE_ERRORNO_COMMON(MEMALLOC_FAILED, 0, "Failed to allocate memory!");  // 1343225856
 GE_ERRORNO_COMMON(PARAM_INVALID, 1, "Parameter's invalid!");          // 1343225857
@@ -125,13 +138,13 @@ GE_ERRORNO_CLIENT(GE_CLI_GE_ALREADY_INITIALIZED, 10, "GE is already initialized.
 GE_ERRORNO_CLIENT(GE_CLI_GE_NOT_INITIALIZED, 11, "GE is not yet initialized or is finalized.");  // 1343229963
 
 // Init module error code definition
-GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported.");            // 1343234048
-GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization.");  // 1343234049
-GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported.");          // 1343234050
-GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported.");          // 1343234051
-GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done.");          // 1343234052
+GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported.");                 // 1343234048
+GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization.");       // 1343234049
+GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported.");               // 1343234050
+GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported.");  // 1343234051
+GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done.");              // 1343234052
 GE_ERRORNO_INIT(GE_PROF_MODE_CONFLICT, 5,
-                "Profiling command mode which is preferred is running, the api mode will not work.");   // 1343234053
+                "Profiling command mode which is preferred is running, the api mode will not work.");  // 1343234053
 
 // Session module error code definition
 GE_ERRORNO_SESSION(GE_SESS_INIT_FAILED, 0, "Failed to initialize session.");                          // 1343238144
@@ -216,8 +229,8 @@ GE_ERRORNO_ENGINE(GE_ENG_FINALIZE_FAILED, 1, "Engine finalize failed.");
 GE_ERRORNO_ENGINE(GE_ENG_MEMTYPE_ERROR, 2, "Memory type HBM is necessary when engine is in device");  // 1343246338
 
 // Optimize errocode
-GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted.");          // 1343242303
-GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed.");               // 1343242304
+GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted.");  // 1343242303
+GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed.");       // 1343242304
 
 // Ops module error code definition
 GE_ERRORNO_OPS(GE_OPS_KERNEL_STORE_INIT_FAILED, 0, "Failed to initialize OpsKernelInfoStore.");  // 1343250432
@@ -313,7 +326,6 @@ GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph ma
 GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed.");
 GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed.");
 
-#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast<Status>(RT_ERROR)
 }  // namespace ge
 
 #endif  // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_
diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h
index 64231b8c..bbbbf4b2 100644
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,31 +17,39 @@
 #ifndef INC_FRAMEWORK_COMMON_GE_TYPES_H_
 #define INC_FRAMEWORK_COMMON_GE_TYPES_H_
 
-#include <stdint.h>
+#include <cstdint>
 
 #include <string>
 #include <vector>
 
 #include "framework/common/fmk_error_codes.h"
-#include "ge/ge_api_error_codes.h"
+#include "external/ge/ge_api_error_codes.h"
 #include "external/graph/types.h"
 #include "external/ge/ge_api_types.h"
 
 namespace ge {
 enum RuntimeType { HOST = 0, DEVICE = 1 };
 
-enum PerfLevel { GEN_TASK_WITH_FUSION = -1, GEN_TASK_WITHOUT_L2FUSION = 3, GEN_TASK_WITHOUT_FUSION = 4 };
+enum class PerfLevel : int32_t {
+  GEN_TASK_WITH_FUSION = -1,
+  GEN_TASK_WITHOUT_L2FUSION = 3,
+  GEN_TASK_WITHOUT_FUSION = 4
+};
 
 enum FrameworkType {
   CAFFE = 0,
   MINDSPORE = 1,
   TENSORFLOW = 3,
-  ANDROID_NN,
-  ONNX,
+  ANDROID_NN = 4,
+  ONNX = 5,
 };
 
+enum class GraphStage : int64_t { GRAPH_STAGE_FUZZ = 0, GRAPH_STAGE_RESERVED };
+
+const char_t *const kGraphDumpStage = "DumpStage";
+
 const std::map<std::string, std::string> kFwkTypeToStr = {
-  {"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}};
+    {"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}};
 
 enum OpEngineType {
   ENGINE_SYS = 0,  // default engine
@@ -53,28 +61,52 @@ enum OpEngineType {
 
 enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYNAMIC_AIPP, DYNAMIC_AIPP_NODE };
 
-const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM";
-const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement";
+const char_t *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM";
+const char_t *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement";
 
 // profiling data
+
 const std::string kTaskTypeAicore = "AI_CORE";
 const std::string kTaskTypeAicpu = "AI_CPU";
 const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID";
+const std::string kTaskTypeFftsPlus = "FFTS_PLUS";
+const std::string kEngineNameVectorCore = "VectorEngine";
+
+const std::string kEngineNameHccl = "ops_kernel_info_hccl";
+const std::string kEngineNameRts = "DNN_VM_RTS_OP_STORE";
+const std::string kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE";
+const std::string kEngineNameGeLocal = "DNN_VM_GE_LOCAL_OP_STORE";
+const std::string kEngineNameAiCpu = "aicpu_ascend_kernel";
+const std::string kEngineNameAiCpuTf = "aicpu_tf_kernel";
+const std::string kEngineNameAiCore = "AIcoreEngine";
+const std::string kAtomicOpType = "DynamicAtomicAddrClean";
+
+const std::string kShapeTypeStatic = "static";
+const std::string kShapeTypeDynamic = "dynamic";
+
+constexpr uint64_t kInferSessionId = 0U;
+constexpr uint64_t kReleaseFlag = 1U;
+constexpr uint32_t kInvalidModelId = 0xFFFFFFFFU;
+constexpr size_t kNumTaskWithAtomicAddrCleanTask = 2U;
+constexpr uint32_t INVALID_MODEL_ID = 0xFFFFFFFFUL;
 
 // dynamic execute mode
-const char *const kLazyRecompile = "lazy_recompile";
+const char_t *const kLazyRecompile = "lazy_recompile";
+
+constexpr size_t kMaxHostMemInputLen = 128U;  // 64 aligned
 
 // Data cache, including data address and length
 struct DataBuffer {
- public:
   void *data;       // Data address
   uint64_t length;  // Data length
   bool isDataSupportMemShare = false;
-  uint32_t placement = 0;
-  DataBuffer(void *dataIn, uint64_t len, bool isSupportMemShare, uint32_t placement = 0)
-      : data(dataIn), length(len), isDataSupportMemShare(isSupportMemShare), placement(placement) {}
+  uint32_t placement = 0U;
 
-  DataBuffer() : data(nullptr), length(0), isDataSupportMemShare(false) {}
+  DataBuffer(void *const data_in, const uint64_t data_len, const bool is_support_mem_share = false,
+             const uint32_t data_placement = 0U)
+      : data(data_in), length(data_len), isDataSupportMemShare(is_support_mem_share), placement(data_placement) {}
+
+  DataBuffer() : data(nullptr), length(0UL), isDataSupportMemShare(false), placement(0U) {}
 };
 
 ///
@@ -86,7 +118,7 @@ struct InputData {
   uint32_t timestamp;                        // Data creation time
   uint32_t timeout;                          // Processing timeout
   uint32_t model_id;                         // Model ID required for data processing
-  uint64_t request_id = 0;                   // Request ID
+  uint64_t request_id = 0UL;                 // Request ID
   std::vector<DataBuffer> blobs;             // Actual input data, currently only supports one input
   bool is_dynamic_batch = false;             // Whether is dynamic batch size scene, default:false
   std::string batch_label;                   // Gear used for current inference in dynamic batch scene
@@ -113,10 +145,10 @@ struct Command {
 
 // The definition of I/O shape description
 struct ShapeDescription {
-  int64_t num = 0;
-  int64_t channel = 0;
-  int64_t height = 0;
-  int64_t width = 0;
+  int64_t num = 0L;
+  int64_t channel = 0L;
+  int64_t height = 0L;
+  int64_t width = 0L;
   std::vector<int64_t> dims;
   std::vector<std::pair<int64_t, int64_t>> shape_ranges;
 };
@@ -186,14 +218,14 @@ struct AippConfigInfo {
   int32_t mean_chn_1;
   int32_t mean_chn_2;
   int32_t mean_chn_3;
-  float min_chn_0;
-  float min_chn_1;
-  float min_chn_2;
-  float min_chn_3;
-  float var_reci_chn_0;
-  float var_reci_chn_1;
-  float var_reci_chn_2;
-  float var_reci_chn_3;
+  float32_t min_chn_0;
+  float32_t min_chn_1;
+  float32_t min_chn_2;
+  float32_t min_chn_3;
+  float32_t var_reci_chn_0;
+  float32_t var_reci_chn_1;
+  float32_t var_reci_chn_2;
+  float32_t var_reci_chn_3;
   int8_t support_rotation;
   uint32_t related_input_rank;
   uint32_t max_src_image_size;
@@ -202,26 +234,42 @@ struct AippConfigInfo {
 // The structure of offline Modeldata
 struct ModelData {
   void *model_data = nullptr;  // Model binary data start addr
-  uint32_t model_len = 0;      // Model binary data length
+  uint32_t model_len = 0U;     // Model binary data length
   int32_t priority = 0;        // Model priority
   std::string key;             // Key path for encrypt model, Empty for unencrypt
   std::string om_name;         // om file name, used for data dump
 };
 
+struct ModelParam {
+  ModelParam() : priority(0), mem_base(0U), mem_size(0U), weight_base(0U), weight_size(0U) {}
+  ModelParam(const int32_t pri, const uintptr_t m_base, const size_t m_len, const uintptr_t w_base, const size_t w_len)
+      : priority(pri), mem_base(m_base), mem_size(m_len), weight_base(w_base), weight_size(w_len) {}
+  ~ModelParam() = default;
+
+  int32_t priority;
+  uintptr_t mem_base;
+  size_t mem_size;
+  uintptr_t weight_base;
+  size_t weight_size;
+};
+
 // The definition of Model information
 struct ModelInfo {
-  uint32_t version = 0;
+  uint32_t version = 0U;
   std::string name;
-  bool is_encrypt = 0;  //  0:unencrypt, 1:encrypt
+  bool is_encrypt = false;  //  0:unencrypt, 1:encrypt
   std::vector<ShapeDescription> input_desc;
   std::vector<ShapeDescription> output_desc;
-  uint8_t reserved[3] = {0};  // 3-byte reserved field
+  uint8_t reserved[3] = {0U};  // 3-byte reserved field
 };
 
 // Asynchronous callback interface, implemented by the caller
 class GE_FUNC_VISIBILITY ModelListener {
  public:
   virtual ~ModelListener() {}
+  ModelListener() = default;
+  ModelListener(const ModelListener &) = delete;
+  ModelListener &operator=(const ModelListener &) = delete;
   ///
   /// @brief Asynchronous callback interface
   /// @param [in] model_id   Model ID of the callback
@@ -230,6 +278,18 @@ class GE_FUNC_VISIBILITY ModelListener {
   ///
   virtual Status OnComputeDone(uint32_t model_id, uint32_t data_index, uint32_t result_code,
                                std::vector<ge::Tensor> &outputs) = 0;
+
+  virtual void SetCallback(const RunAsyncCallback &callback) {
+    (void)callback;
+  }
+
+  virtual uint32_t GetResultCode() {
+    return 0U;
+  };
+
+  virtual Status ResetResult() {
+    return SUCCESS;
+  };
 };
 
 // OMM configuration item
@@ -249,6 +309,7 @@ struct Options {
   int32_t physical_device_id;
   std::string profiling_mode;
   std::string profiling_options;
+  int32_t graphExecTimeout;
 };
 
 // Profiling info of task
@@ -268,13 +329,24 @@ struct TaskDescInfo {
   std::vector<Format> output_format;
   std::vector<std::vector<int64_t>> output_shape;
   std::vector<DataType> output_data_type;
+  uint32_t context_id = 0xFFFFFFFFUL;
 };
 
 struct OpDescInfo {
   std::string op_name;
   std::string op_type;
-  uint32_t task_id;
-  uint32_t stream_id;
+  uint32_t task_id = 0U;
+  uint32_t stream_id = 0U;
+  uint32_t imply_type = 0U;
+  uint32_t block_dim = 0U;
+  std::string op_file_path;
+  std::string dev_func;
+  std::string tvm_magic;
+  uint32_t tiling_key = 0U;
+  uintptr_t args = 0U;
+  std::string tiling_data;
+  std::string node_info;
+  std::vector<int64_t> workspace_bytes;
   std::vector<Format> input_format;
   std::vector<std::vector<int64_t>> input_shape;
   std::vector<DataType> input_data_type;
diff --git a/inc/framework/common/gflags_util.h b/inc/framework/common/gflags_util.h
index 6e9ea41b..5d374261 100644
--- a/inc/framework/common/gflags_util.h
+++ b/inc/framework/common/gflags_util.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h
index 2a63291c..fda86b19 100644
--- a/inc/framework/common/helper/model_helper.h
+++ b/inc/framework/common/helper/model_helper.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,11 +20,10 @@
 #include <memory>
 #include <string>
 
-#include "common/fmk_types.h"
-#include "common/helper/om_file_helper.h"
+#include "framework/common/helper/om_file_helper.h"
 #include "common/model/ge_model.h"
 #include "common/model/ge_root_model.h"
-#include "common/types.h"
+#include "framework/common/types.h"
 #include "graph/model.h"
 
 namespace ge {
@@ -34,77 +33,71 @@ class GE_FUNC_VISIBILITY ModelHelper {
   ~ModelHelper();
 
   Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file,
-                       ge::ModelBufferData &model);
-  Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param, const string &output_file,
-                           ModelBufferData &model, bool is_unknown_shape);
-  Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file);
+                       ge::ModelBufferData &model) const;
+  Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param,
+                           const std::string &output_file, ModelBufferData &model, const bool is_unknown_shape) const;
+  Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file) const;
   Status LoadModel(const ge::ModelData &model_data);
   Status LoadRootModel(const ge::ModelData &model_data);
-  Status GetModelBufferData(ge::ModelBufferData &model);
-
-  const ModelFileHeader *GetFileHeader() const {
-    return file_header_;
-  }
+  static void SetModelToGeModel(const GeModelPtr &ge_model, Model &model);
 
   GeModelPtr GetGeModel();
   GeRootModelPtr GetGeRootModel();
-  void SetSaveMode(bool val) {
+  void SetSaveMode(const bool val) {
     is_offline_ = val;
   }
-  bool GetSaveMode(void) const {
-    return is_offline_;
-  }
+
   bool GetModelType() const {
     return is_unknown_shape_model_;
-  };
+  }
 
-  Status GetBaseNameFromFileName(const std::string &file_name, std::string &base_name);
-  Status GetModelNameFromMergedGraphName(const std::string &graph_name, std::string &model_name);
+  Status GetBaseNameFromFileName(const std::string &file_name, std::string &base_name) const;
+  Status GetModelNameFromMergedGraphName(const ComputeGraphPtr &compute_graph, std::string &model_name) const;
 
  private:
   bool is_assign_model_ = false;
   bool is_offline_ = true;
   bool is_unknown_shape_model_ = false;
   ModelFileHeader *file_header_ = nullptr;
-  // Encrypted model need delete temp model and unencrypted model need not delete model
-  uint8_t *model_addr_tmp_ = nullptr;
-  uint32_t model_len_tmp_ = 0;
   GeModelPtr model_;
   GeRootModelPtr root_model_;
 
-  ModelHelper(const ModelHelper &);
-  ModelHelper &operator=(const ModelHelper &);
-  Status GenerateGeModel(OmFileLoadHelper &om_load_helper);
-  Status GenerateGeRootModel(OmFileLoadHelper &om_load_helper);
-  Status LoadModelData(OmFileLoadHelper &om_load_helper);
-  void SetModelToGeModel(GeModelPtr &ge_model, Model &model);
-  Status LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index);
-  Status LoadWeights(OmFileLoadHelper &om_load_helper);
-  Status LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index);
-  Status LoadTask(OmFileLoadHelper &om_load_helper);
-  Status LoadTask(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index);
-  Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper);
-  Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index);
-  Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper);
-  Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index);
-  Status ReleaseLocalModelData() noexcept;
-  Status SaveModelPartition(std::shared_ptr<OmFileSaveHelper> &om_file_save_helper, ModelPartitionType type,
-                            const uint8_t *data, size_t size, size_t model_index);
+  ModelHelper(const ModelHelper &) = default;
+  ModelHelper &operator=(const ModelHelper &) = default;
+
+  bool IsPartitionedGraph(const GeModelPtr &cur_model) const;
+
+  Status GenerateGeModel(const OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index,
+                         const bool is_dyn_root);
+  Status GenerateGeRootModel(const OmFileLoadHelper &om_load_helper);
+
+  Status LoadModelData(const OmFileLoadHelper &om_load_helper, const GeModelPtr &cur_model,
+                       const size_t mode_index) const;
+  Status LoadWeights(const OmFileLoadHelper &om_load_helper, const GeModelPtr &cur_model,
+                     const size_t mode_index) const;
+  Status LoadTask(const OmFileLoadHelper &om_load_helper, const GeModelPtr &cur_model, const size_t mode_index) const;
+  Status LoadTBEKernelStore(const OmFileLoadHelper &om_load_helper, const GeModelPtr &cur_model,
+                            const size_t mode_index) const;
+  Status LoadCustAICPUKernelStore(const OmFileLoadHelper &om_load_helper, const GeModelPtr &cur_model,
+                                  const size_t mode_index) const;
+
+  Status SaveModelPartition(std::shared_ptr<OmFileSaveHelper> &om_file_save_helper, const ModelPartitionType type,
+                            const uint8_t *const data, const size_t size, const size_t model_index) const;
   Status SaveModelDef(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
-                      Buffer &model_buffer, size_t model_index = 0);
-  Status SaveSizeToModelDef(const GeModelPtr &ge_model);
+                      Buffer &model_buffer, const size_t model_index = 0U) const;
+  Status SaveSizeToModelDef(const GeModelPtr &ge_model) const;
   Status SaveModelWeights(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
-                          size_t model_index = 0);
+                          const size_t model_index = 0U) const;
   Status SaveModelTbeKernel(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
-                            size_t model_index = 0);
+                            const size_t model_index = 0U) const;
   Status SaveModelCustAICPU(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
-                            size_t model_index = 0);
+                            const size_t model_index = 0U) const;
   Status SaveModelTaskDef(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
-                          Buffer &task_buffer, size_t model_index = 0);
+                          Buffer &task_buffer, const size_t model_index = 0U) const;
   Status SaveModelHeader(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
-                         size_t model_num = 1);
+                         const size_t model_num = 1U) const;
   Status SaveAllModelPartiton(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
-                              Buffer &model_buffer, Buffer &task_buffer, size_t model_index = 0);
+                              Buffer &model_buffer, Buffer &task_buffer, const size_t model_index = 0U) const;
 };
 }  // namespace ge
 #endif  // INC_FRAMEWORK_COMMON_HELPER_MODEL_HELPER_H_
diff --git a/inc/framework/common/helper/om_file_helper.h b/inc/framework/common/helper/om_file_helper.h
index 34509b39..4ec5fa75 100644
--- a/inc/framework/common/helper/om_file_helper.h
+++ b/inc/framework/common/helper/om_file_helper.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,25 +21,20 @@
 #include <vector>
 
 #include "external/ge/ge_ir_build.h"
-#include "framework/common/fmk_types.h"
 #include "framework/common/types.h"
 #include "framework/common/ge_types.h"
 
-using ProcParam = struct PROC_PARAM;
-using std::string;
-using std::vector;
-
 namespace ge {
 struct ModelPartition {
   ModelPartitionType type;
-  uint8_t *data = 0;
-  uint32_t size = 0;
+  const uint8_t *data = nullptr;
+  uint32_t size = 0U;
 };
 
 struct OmFileContext {
   std::vector<ModelPartition> partition_datas_;
-  std::vector<char> partition_table_;
-  uint32_t model_data_len_ = 0;
+  std::vector<char_t> partition_table_;
+  uint32_t model_data_len_ = 0U;
 };
 
 struct SaveParam {
@@ -53,57 +48,56 @@ struct SaveParam {
 
 class GE_FUNC_VISIBILITY OmFileLoadHelper {
  public:
-  Status Init(const ge::ModelData &model);
+  Status Init(const ModelData &model);
 
-  Status Init(uint8_t *model_data, const uint32_t model_data_size);
+  Status Init(uint8_t *const model_data, const uint32_t model_data_size);
 
-  Status Init(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num);
+  Status Init(uint8_t *const model_data, const uint32_t model_data_size, const uint32_t model_num);
 
-  Status GetModelPartition(ModelPartitionType type, ModelPartition &partition);
+  Status GetModelPartition(const ModelPartitionType type, ModelPartition &partition);
 
-  Status GetModelPartition(ModelPartitionType type, ModelPartition &partition, size_t model_index);
+  Status GetModelPartition(const ModelPartitionType type, ModelPartition &partition, const size_t model_index) const;
 
   OmFileContext context_;
 
-  vector<OmFileContext> model_contexts_;
+  std::vector<OmFileContext> model_contexts_;
 
  private:
-  Status CheckModelValid(const ge::ModelData &model) const;
-
-  Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size);
+  Status LoadModelPartitionTable(uint8_t *const model_data, const uint32_t model_data_size, const size_t model_index,
+                                 size_t &mem_offset);
 
-  Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num);
+  Status LoadModelPartitionTable(uint8_t *const model_data, const uint32_t model_data_size, const uint32_t model_num);
 
   bool is_inited_{false};
 };
 
 class GE_FUNC_VISIBILITY OmFileSaveHelper {
  public:
-  ModelFileHeader &GetModelFileHeader() { return model_header_; }
+  ModelFileHeader &GetModelFileHeader() {
+    return model_header_;
+  }
 
-  uint32_t GetModelDataSize() const { return context_.model_data_len_; }
+  uint32_t GetModelDataSize() const;
 
   ModelPartitionTable *GetPartitionTable();
 
-  Status AddPartition(ModelPartition &partition);
+  Status AddPartition(const ModelPartition &partition);
 
-  Status AddPartition(ModelPartition &partition, size_t cur_index);
+  Status AddPartition(const ModelPartition &partition, const size_t cur_index);
 
-  const std::vector<ModelPartition> &GetModelPartitions() const;
+  Status SaveModel(const SaveParam &save_param, const char_t *const output_file, ModelBufferData &model,
+                   const bool is_offline = true);
 
-  Status SaveModel(const SaveParam &save_param, const char *target_file, ge::ModelBufferData &model,
-                   bool is_offline = true);
+  Status SaveModelToFile(const char_t *const output_file, ModelBufferData &model, const bool is_offline = true);
 
-  Status SaveModelToFile(const char *output_file, ge::ModelBufferData &model, bool is_offline = true);
+  ModelPartitionTable *GetPartitionTable(const size_t cur_ctx_index);
 
-  vector<OmFileContext> model_contexts_;
+  Status SaveRootModel(const SaveParam &save_param, const char_t *const output_file, ModelBufferData &model,
+                       const bool is_offline);
 
+ private:
   ModelFileHeader model_header_;
-  OmFileContext context_;
-
-  ModelPartitionTable *GetPartitionTable(size_t cur_ctx_index);
-
-  Status SaveRootModel(const SaveParam &save_param, const char *output_file, ModelBufferData &model, bool is_offline);
+  std::vector<OmFileContext> model_contexts_;
 };
 }  // namespace ge
 #endif  // INC_FRAMEWORK_COMMON_HELPER_OM_FILE_HELPER_H_
diff --git a/inc/framework/common/l2_cache_optimize.h b/inc/framework/common/l2_cache_optimize.h
index fdb1c8b5..478c1750 100644
--- a/inc/framework/common/l2_cache_optimize.h
+++ b/inc/framework/common/l2_cache_optimize.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,107 +17,24 @@
 #ifndef INC_FRAMEWORK_COMMON_L2_CACHE_OPTIMIZE_H_
 #define INC_FRAMEWORK_COMMON_L2_CACHE_OPTIMIZE_H_
 
-#include <stdint.h>
+#include <cstdint>
 
 #include <algorithm>
 #include <functional>
 #include <string>
 #include <vector>
 
-#include "common/types.h"
-#include "common/util.h"
+#include "framework/common/types.h"
+#include "framework/common/util.h"
 #include "graph/compute_graph.h"
 
-using std::vector;
-
 namespace ge {
 // Size of RC memory alignment, 2M
-constexpr size_t ALIGN_SIZE = 2097152;
-
-constexpr uint32_t RC_VALUE_DEFAULT = 1;
-constexpr uint32_t RC_VALUE_MAX = 32;
-
-// RC data type classification
-enum RCType {
-  RC_DEFAULT,      // Such as temporary workspace memory of operator, variable (including global and local variable)
-  RC_HCOM,         // Output of gradient aggregation, RC value should be set to 0
-  RC_L2LOSS,       // Parameter of L2 loss operator, RC value should be set to 0
-  RC_INPUTOUTPUT,  // Input and output tensor of operator, RC value is returned by FE calculation
-  RC_WEIGHTS,      // The weight, fp16, RC value used by FP/BP operator should be set to 1 or the actual access numbers
-  RC_DW,           // The gradient data DW and RC value output by BP operator
-                   // should be set to 1 or the actual access numbers
-  RC_ARGS          // Args of FlowTable, actual access numbers
-};
-
-enum MemType { INPUT_TENSOR, OUTPUT_TENSOR, WEIGHT, WORKSPACE };
-
-// Memory usage information < node, type, number >
-struct NodeInfo {
-  string nodeName;
-  MemType memType;
-  size_t index;
-};
-
-// Memory block RC value
-struct RCMemoryBlock {
-  RCType type;        // RC type
-  size_t blockSize;   // memory block size
-  size_t headOffset;  // Start offset from base address
-  size_t tailOffset;  // End offset from base address
-  uint32_t rcCount;   // RC value
-  NodeInfo nodeInfo;  // Input and output indexes of node objects to which RC belongs
-};
-
-// L2Cache optimizer
-class GE_FUNC_VISIBILITY L2CacheOptimize {
- public:
-  explicit L2CacheOptimize(ge::ComputeGraphPtr &graph);
-  ~L2CacheOptimize();
-
-  // Collect the information L2Cache Memory optimization
-  Status Gath();
-
- private:
-  ge::ComputeGraphPtr graph_;
-
-  // Save RC block information list
-  vector<RCMemoryBlock> weightRCs;
-  vector<RCMemoryBlock> opRCs;
-
-  // Extract RC information generated by FE from compiled graph
-  void RetirveRCinfo();
-
-  // Take the maximum common divisor of RC values for the duplicate address
-  void Merge(vector<RCMemoryBlock> &blocks);
-
-  // The RC information is aligned with the 2m address
-  void Align(vector<RCMemoryBlock> &blocks);
-
-  // Weight of l2loss operator, output of gradient aggregation output, RC value set to 0
-  void HandleOutputZeroRC(RCType type, ge::NodePtr node, vector<int64_t> &outputList, vector<RCMemoryBlock> &blocks);
-
-  // Processing operator input Tensor's RC
-  void HandOPInput(ge::NodePtr node, vector<int64_t> &inputList, vector<RCMemoryBlock> &blocks);
-
-  // Processing operator output Tensor's RC
-  void HandOPoutput(ge::NodePtr node, vector<int64_t> &outputList, vector<RCMemoryBlock> &blocks);
+constexpr size_t ALIGN_SIZE = 2097152U;
 
-  // maximum common divisor
-  uint32_t Measure(uint32_t x, uint32_t y) {
-    if (x == 0 || y == 0) return RC_VALUE_DEFAULT;
-    uint32_t z = y;
-    while (x % y != 0) {
-      z = x % y;
-      x = y;
-      y = z;
-    }
-    return z;
-  }
+constexpr uint32_t RC_VALUE_DEFAULT = 1U;
+constexpr uint32_t RC_VALUE_MAX = 32U;
 
-  bool Contain(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block);
-  bool Cross(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block);
-  bool Connect(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block);
-};
 }  // namespace ge
 
 #endif  // INC_FRAMEWORK_COMMON_L2_CACHE_OPTIMIZE_H_
\ No newline at end of file
diff --git a/inc/framework/common/op/attr_value_util.h b/inc/framework/common/op/attr_value_util.h
index 28d48c1d..5a41de05 100644
--- a/inc/framework/common/op/attr_value_util.h
+++ b/inc/framework/common/op/attr_value_util.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,141 +34,10 @@
 #include <google/protobuf/map.h>
 #include <unordered_map>
 #include <string>
-
 #include "graph/debug/ge_attr_define.h"
 #include "proto/om.pb.h"
 
-using domi::AttrDef;
-using domi::AttrDef_ListValue;
-using domi::ModelDef;
-using domi::NamedAttrs;
-using domi::OpDef;
-
 namespace ge {
-using AttrDefMap = ::google::protobuf::Map<::std::string, ::domi::AttrDef>;
-using AttrDefPair = ::google::protobuf::MapPair<std::string, domi::AttrDef>;
-
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, AttrDef &attr, OpDef *opdef);
-// DEFINE_ADD_ATTR_VALUE
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, AttrDefMap *attrs);
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, AttrDefMap *attrs);
-GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, AttrDefMap *attrs);
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, AttrDefMap *attrs);
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, AttrDefMap *attrs);
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, AttrDefMap *attrs);
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, AttrDefMap *attrs);
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, AttrDefMap *attrs);
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, AttrDefMap *attrs);
-
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, AttrDefMap *attrs);
-
-// DEFINE_ADD_ATTR_VALUE
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, OpDef *opdef);
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, OpDef *opdef);
-GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, OpDef *opdef);
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, OpDef *opdef);
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, OpDef *opdef);
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, OpDef *opdef);
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, OpDef *opdef);
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, OpDef *opdef);
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, OpDef *opdef);
-
-GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, OpDef *opdef);
-
-GE_FUNC_VISIBILITY void AddOpBytesAttr(const std::string &key, const void *value, size_t size, OpDef *opdef);
-
-// DEFINE_ADD_ATTR_VALUE_LIST
-GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, AttrDefMap *attrs);
-GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, AttrDefMap *attrs);
-GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, AttrDefMap *attrs);
-GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, AttrDefMap *attrs);
-GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string value, AttrDefMap *attrs);
-GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, OpDef *opdef);
-GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, OpDef *opdef);
-GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, OpDef *opdef);
-GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, OpDef *opdef);
-GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const bool value, OpDef *opdef);
-GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int64_t value, OpDef *opdef);
-
-GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string &value, OpDef *opdef);
-
-GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, std::string *value, const OpDef *opdef);
-GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int32_t *value, const OpDef *opdef);
-GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int64_t *value, const OpDef *opdef);
-GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, uint32_t *value, const OpDef *opdef);
-GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, float *value, const OpDef *opdef);
-GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, double *value, const OpDef *opdef);
-GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, bool *value, const OpDef *opdef);
-GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, AttrDef_ListValue *value, const OpDef *opdef);
-
-GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, std::string value, const OpDef *opdef);
-GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int32_t value, const OpDef *opdef);
-GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int64_t value, const OpDef *opdef);
-GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, uint32_t value, const OpDef *opdef);
-GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, float value, const OpDef *opdef);
-GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, double value, const OpDef *opdef);
-GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, bool value, const OpDef *opdef);
-
-GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const OpDef *opdef);
-GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const ModelDef *model_def);
-
-GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const std::string &value, ModelDef *model_def);
-GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const char *value, ModelDef *model_def);
-GE_FUNC_VISIBILITY void AddModelAttr(const char *key, const char *value, ModelDef *model_def);
-GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const uint32_t value, ModelDef *model_def);
-GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int32_t value, ModelDef *model_def);
-GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int64_t value, ModelDef *model_def);
-GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const float value, ModelDef *model_def);
-GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const double value, ModelDef *model_def);
-GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const bool value, ModelDef *model_def);
-GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const void *value, size_t size, ModelDef *model_def);
-GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const AttrDef_ListValue &value, ModelDef *model_def);
-
-GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const double value, ModelDef *model_def);
-GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const float value, ModelDef *model_def);
-GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const uint32_t value, ModelDef *model_def);
-GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const int32_t value, ModelDef *model_def);
-GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const std::string &value, ModelDef *model_def);
-
-GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, std::string *value, const ModelDef *model_def);
-GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int32_t *value, const ModelDef *model_def);
-GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int64_t *value, const ModelDef *model_def);
-GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, uint32_t *value, const ModelDef *model_def);
-GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, float *value, const ModelDef *model_def);
-GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, double *value, const ModelDef *model_def);
-GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, bool *value, const ModelDef *model_def);
-GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, AttrDef_ListValue *value, const ModelDef *model_def);
-
-GE_FUNC_VISIBILITY bool HasOpAttr(const OpDef *opdef, const std::string &attr_name);
-
-GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, AttrDef *out);
-GE_FUNC_VISIBILITY void SetAttrDef(const char *value, AttrDef *out);
-GE_FUNC_VISIBILITY void SetAttrDef(const uint32_t value, AttrDef *out);
-GE_FUNC_VISIBILITY void SetAttrDef(const int32_t value, AttrDef *out);
-GE_FUNC_VISIBILITY void SetAttrDef(const float value, AttrDef *out);
-GE_FUNC_VISIBILITY void SetAttrDef(const double value, AttrDef *out);
-GE_FUNC_VISIBILITY void SetAttrDef(const bool value, AttrDef *out);
-GE_FUNC_VISIBILITY void SetAttrList(const std::string &value, AttrDef *out);
-GE_FUNC_VISIBILITY void SetAttrList(const bool value, AttrDef *out);
-GE_FUNC_VISIBILITY void SetAttrList(const float value, AttrDef *out);
-GE_FUNC_VISIBILITY void SetAttrList(const double value, AttrDef *out);
-GE_FUNC_VISIBILITY void SetAttrList(const uint32_t value, AttrDef *out);
-
-GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, std::string *value, const AttrDefMap &attr);
-GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int32_t *value, const AttrDefMap &attr);
-GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int64_t *value, const AttrDefMap &attr);
-GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, uint32_t *value, const AttrDefMap &attr);
-GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, float *value, const AttrDefMap &attr);
-GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, double *value, const AttrDefMap &attr);
-GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, bool *value, const AttrDefMap &attr);
-GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, AttrDef_ListValue *value, const AttrDefMap &attr);
-GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, NamedAttrs *&value, AttrDefMap *attr);
-GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, const NamedAttrs *&value, const AttrDefMap &attr);
-
-GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, int32_t *value, const AttrDefMap &attr);
-GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, uint32_t *value, const AttrDefMap &attr);
-GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, float *value, const AttrDefMap &attr);
-GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, double *value, const AttrDefMap &attr);
+GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, domi::AttrDef *const out);
 }
-
-#endif  // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_
+#endif  // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_
\ No newline at end of file
diff --git a/inc/framework/common/op/ge_op_utils.h b/inc/framework/common/op/ge_op_utils.h
index bc965d13..95f1dca1 100644
--- a/inc/framework/common/op/ge_op_utils.h
+++ b/inc/framework/common/op/ge_op_utils.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,10 +20,8 @@
 #include <memory>
 #include <vector>
 
-#include "common/op/attr_value_util.h"
-#include "register/register_types.h"
-#include "register/register_error_codes.h"
-#include "common/util.h"
+#include "framework/common/op/attr_value_util.h"
+#include "framework/common/util.h"
 #include "graph/attr_value.h"
 #include "graph/ge_tensor.h"
 #include "graph/node.h"
@@ -31,18 +29,16 @@
 #include "proto/insert_op.pb.h"
 
 namespace ge {
-using domi::Status;
 
 // Add Sub Mul
 GE_FUNC_VISIBILITY extern const uint32_t ADD_INPUT_NUM;
-GE_FUNC_VISIBILITY extern const uint32_t SUB_INPUT_NUM;
 GE_FUNC_VISIBILITY extern const uint32_t MUL_INPUT_NUM;
 
 // Permute
 GE_FUNC_VISIBILITY extern const int32_t PERMUTE_ORDER_NUM;
 
 // Ssd PriroBox
-GE_FUNC_VISIBILITY extern const double SSD_PRIORBOX_ASPECT_RATIO_VALUE;
+GE_FUNC_VISIBILITY extern const float64_t SSD_PRIORBOX_ASPECT_RATIO_VALUE;
 
 GE_FUNC_VISIBILITY extern const uint32_t STRIDEDSLICE_INPUT_NUM;
 
@@ -55,8 +51,8 @@ GE_FUNC_VISIBILITY extern const uint32_t SWITCH_DATA_INPUT;
 GE_FUNC_VISIBILITY extern const uint32_t SWITCH_PRED_INPUT;
 
 // Merge
-GE_FUNC_VISIBILITY extern const uint32_t MERGE_DATA_OUTPUT;
-GE_FUNC_VISIBILITY extern const uint32_t MERGE_INDEX_OUTPUT;
+GE_FUNC_VISIBILITY extern const int32_t MERGE_DATA_OUTPUT;
+GE_FUNC_VISIBILITY extern const int32_t MERGE_INDEX_OUTPUT;
 
 // FunctionOp
 GE_FUNC_VISIBILITY extern const uint32_t IF_COND_INPUT;
@@ -65,85 +61,36 @@ GE_FUNC_VISIBILITY extern const uint32_t FOR_LIMIT_INPUT;
 GE_FUNC_VISIBILITY extern const uint32_t FOR_DELTA_INPUT;
 GE_FUNC_VISIBILITY extern const uint32_t FOR_DATA_INPUT;
 
-GE_FUNC_VISIBILITY extern const int NORMAL_TENSOR_SIZE;
-
+GE_FUNC_VISIBILITY extern const int32_t NORMAL_TENSOR_SIZE;
+/*lint -e148*/
 class GE_FUNC_VISIBILITY OpUtils {
  public:
   ///
-  /// @ingroup domi_ome
-  /// @brief Check whether check_value is in [min_enum_value, max_enum_value]
-  /// @return true Within
-  /// @return false out of range
-  //
-  static inline bool CheckEnumValid(int32_t check_value, int32_t min_enum_value, int32_t max_enum_value) {
-    return check_value < min_enum_value ? false : (check_value >= max_enum_value ? false : true);
-  }
-
-  ///
-  /// @ingroup domi_omg
-  /// @brief Determine whether to manually calculate the tensor size based on the values of format and dim
-  /// @param [in] format, Format information of the tensor
-  /// @param [in] real_dim_cnt, Tensor dim
-  /// @return true Manually calculate the size based on dim and datatype
-  /// @return false skip
-  ///
-  static bool IsComputDimsSize(const int32_t format, const uint32_t real_dim_cnt);
-
-  ///
   /// @brief Extract AIPP parameters from AttrDefMap and splice them
   /// @param [in] aipp_attr attr of operator
   /// @param [out] aipp_params aipp parameters
   /// @return enum of tagCCAippInputFormat
   ///
-  static Status ConvertAippParams(const GeAttrValue::NamedAttrs &aipp_attr, domi::AippOpParams *aipp_params);
-  static Status TransferDim(const std::vector<int64_t> &dim, std::vector<int64_t> &dim_vector);
+
+  static Status ConvertAippParams(const GeAttrValue::NamedAttrs &aipp_attr, domi::AippOpParams &aipp_params);
   template <typename T>
-  static void SliceData(const std::vector<char *> &input, int64_t chunk_size, std::vector<char *> &output,
-                        int64_t begin, int64_t out_dim, int64_t stride);
+  static void SliceData(const std::vector<char_t *> &input, const int64_t chunk_size, std::vector<char_t *> &output,
+                        const int64_t begin, const int64_t out_dim, const int64_t stride);
   template <typename T>
-  static Status SetDataByDataType(size_t out_size, const std::vector<char *> &chunk_input,
-                                  const std::vector<char *> &chunk_output, GeTensor *output);
+  static Status SetDataByDataType(const size_t out_size, const std::vector<char_t *> &chunk_input,
+                                  const std::vector<char_t *> &chunk_output, GeTensor *const output);
   template <typename T>
-  static Status SetOutputSliceDataByDataType(void *data, int64_t data_size, const std::vector<int64_t> &input_dims,
-                                             const std::vector<int64_t> &begin, const std::vector<int64_t> &output_dims,
-                                             ge::GeTensor *output, const std::vector<int64_t> &stride);
-  static Status SetOutputSliceData(void *data, int64_t data_size, int32_t data_type, std::vector<int64_t> &input_dims,
-                                   std::vector<int64_t> &begin, std::vector<int64_t> &output_dims, ge::GeTensor *output,
-                                   std::vector<int64_t> &stride);
-
-  ///
-  /// @ingroup domi_omg
-  /// @brief Convert the convolutional weight data from [h, w, c, k] to [k, c, h, w]
-  /// @param [in] input Weight data in HWCK format
-  /// @param [in] H value of H dimension
-  /// @param [in] W value of W dimension
-  /// @param [in] C value of C dimension
-  /// @param [in] K value of K dimension
-  /// @param [out] output Data pointer after conversion. The format is KCHW.
-  ///
-  static void TransDataHWCK2KCHW(const void *input, int64_t H, int64_t W, int64_t C, int64_t K, void **output);
-  ///
-  /// @ingroup domi_omg
-  /// @brief Converts the convolutional weight data from [k, c, h, w] to [h, w, c, k].
-  /// @param [in] input Weight data in HWCK format
-  /// @param [in] K value of K dimension
-  /// @param [in] C value of C dimension
-  /// @param [in] H value of H dimension
-  /// @param [in] W value of W dimension
-  /// @param [out] output Data pointer after conversion. The format is HWCK
-  ///
-  static void TransDataKCHW2HWCK(const void *input, int64_t K, int64_t C, int64_t H, int64_t W, void *output);
-
-  static vector<ConstGeTensorPtr> GetWeights(const ge::Node &node);
-  static vector<ConstGeTensorPtr> GetWeights(ge::ConstNodePtr node);
-  static vector<GeTensorPtr> MutableWeights(const ge::Node &node);
-  static vector<GeTensorPtr> MutableWeights(const ge::NodePtr node);
-  static Status SetWeights(ge::Node &node, const vector<ge::GeTensorPtr> &weights);
-  static Status SetWeights(ge::NodePtr node, const vector<ge::GeTensorPtr> &weights);
-  static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, DataType type, std::vector<int64_t> &dims);
-
- private:
-  static uint32_t GetRealDimCnt(const GeTensorDesc &tensor_desc);
+  static Status SetOutputSliceDataByDataType(void *const data, const int64_t data_size,
+                                             const std::vector<int64_t> &input_dims, const std::vector<int64_t> &begin,
+                                             const std::vector<int64_t> &output_dims, ge::GeTensor *const output,
+                                             const std::vector<int64_t> &stride);
+  static Status SetOutputSliceData(void *const data, const int64_t data_size, const int32_t data_type,
+                                   const std::vector<int64_t> &input_dims, const std::vector<int64_t> &begin,
+                                   const std::vector<int64_t> &output_dims, GeTensor *const output,
+                                   const std::vector<int64_t> &stride);
+  static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, const DataType type,
+                                            std::vector<int64_t> &dims);
 };
+/*lint +e148*/
 }  // namespace ge
 #endif  // INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_
diff --git a/inc/framework/common/op/op_parser_util.h b/inc/framework/common/op/op_parser_util.h
index 43254ca9..6d33c508 100644
--- a/inc/framework/common/op/op_parser_util.h
+++ b/inc/framework/common/op/op_parser_util.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #ifndef INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_
 #define INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_
 
-#include <limits.h>
-#include <math.h>
-#include <stdint.h>
+#include <climits>
+#include <cmath>
+#include <cstdint>
 
 namespace ge {
 // general
@@ -30,7 +30,7 @@ const uint32_t NORMAL_OUTPUT_NUM = 1;
 const uint32_t NORMAL_WORKSPACE_NUM = 0;
 const int32_t NORMAL_1D_DIM_NUM = 1;
 const int32_t NORMAL_SCALE_DIM_NUM = 0;
-const int NORMAL_TENSOR_SIZE = 4;
+const int32_t NORMAL_TENSOR_SIZE = 4;
 const uint32_t DEFAULT_REAL_DIM_CNT = 4;
 
 // const
@@ -111,8 +111,8 @@ const int32_t ROIPOOLING_DEFAULT_SAMPLING_RATIO = -1;
 const int32_t DETECTIONOUTPUT_INPUT_SIZE = 3;
 const int32_t DETECTIONOUTPUT_OUTPUT_SIZE = 2;
 const int32_t DETECTIONOUTPUT_WORKSPACE_NUM = 1;
-const int DETECTIONOUTPUT_CLASS_NUM = 20;  // Number of background categories
-const int DETECTIONOUTPUT_NUM_CLASSES_DEFAULT_VALUE = 21;
+const int32_t DETECTIONOUTPUT_CLASS_NUM = 20;  // Number of background categories
+const int32_t DETECTIONOUTPUT_NUM_CLASSES_DEFAULT_VALUE = 21;
 const float DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3;
 const float DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.8;
 
@@ -128,8 +128,8 @@ const float PROPOSAL_SCALE_DIM_0_DEFAULT_VALUE = 8;
 const float PROPOSAL_SCALE_DIM_1_DEFAULT_VALUE = 16;
 const float PROPOSAL_SCALE_DIM_2_DEFAULT_VALUE = 32;
 const float PROPOSAL_MIN_SIZE_DEFAULT_VALUE = 16;
-const int PROPOSAL_PRE_NMS_TOPN_DEFAULT_VALUE = 6000;
-const int PROPOSAL_POST_NMS_TOPN_DEFAULT_VALUE = 304;
+const int32_t PROPOSAL_PRE_NMS_TOPN_DEFAULT_VALUE = 6000;
+const int32_t PROPOSAL_POST_NMS_TOPN_DEFAULT_VALUE = 304;
 const float PROPOSAL_NMS_THRESH_DEFAULT_VALUE = 0.7;
 const float PROPOSAL_FILTER_THRESH_DEFAULT_VALUE = 0;
 
@@ -150,7 +150,7 @@ const int32_t PERMUTE_WORKSPACE_NUM = 1;
 const int32_t PERMUTE_ORDER_NUM = 4;
 
 // Ssd normalize
-const int SSD_NORMALIZE_INPUT_SIZE = 1;
+const int32_t SSD_NORMALIZE_INPUT_SIZE = 1;
 const float SSD_NORMALIZE_EPS_DEFAULT_VALUE = 2e-7;
 
 // SsdPriroBox
@@ -163,9 +163,9 @@ const double SSD_PRIORBOX_VARIANCE_VALUE = 0.1;
 const double SSD_PRIORBOX_VARIANCE_SIZE_ONE = 1;
 const double SSD_PRIORBOX_VARIANCE_SIZE_FOUR = 4;
 const double SSD_PRIORBOX_ASPECT_RATIO_VALUE = 1.0;
-const int SSD_PRIOR_BOX_CODETYPE_CORNER_VALUE = 1;
-const int SSD_PRIOR_BOX_CODETYPE_CENTER_SIZE_VALUE = 2;
-const int SSD_PRIOR_BOX_CODETYPE_CORNER_SIZE_VALUE = 3;
+const int32_t SSD_PRIOR_BOX_CODETYPE_CORNER_VALUE = 1;
+const int32_t SSD_PRIOR_BOX_CODETYPE_CENTER_SIZE_VALUE = 2;
+const int32_t SSD_PRIOR_BOX_CODETYPE_CORNER_SIZE_VALUE = 3;
 
 // Ssd DetectionOutput
 const int32_t SSD_DETECTIONOUTPUT_INPUT_SIZE = 3;
@@ -205,8 +205,8 @@ const int32_t CHANNEL_AXPY_INPUT_DIM_SIZE = 4;
 const int32_t CHANNEL_AXPY_WORKSPACE_NUM = 1;
 
 // Psroi pooling
-const int PSROI_POOLING_INPUT_COUNT = 2;
-const int PSROI_POOLING_WORKSPACE_NUM = 1;
+const int32_t PSROI_POOLING_INPUT_COUNT = 2;
+const int32_t PSROI_POOLING_WORKSPACE_NUM = 1;
 
 // MaxPoolWithArgmax
 const uint32_t MAX_POOL_WITH_ARGMAX_OUTPUT_NUM = 2;
@@ -223,7 +223,7 @@ const int32_t ROIALIGN_DEFAULT_POOLED_W = 1;
 
 // Correlation
 const uint32_t CORRELATION_INPUT_NUM = 2;
-const int CORRELATION_WORKSPACE_NUM = 1;
+const int32_t CORRELATION_WORKSPACE_NUM = 1;
 
 // Detectionpostprocess
 const int32_t POSTPROCESS_INPUT_SIZE = 4;
@@ -394,15 +394,15 @@ const uint32_t ATTENTION_DECODER_WORKSPACE_NUM = 1;
 const uint32_t ATTENTION_DECODER_INPUT_DECODER_INPUTS = 0;
 const uint32_t ATTENTION_DECODER_INPUT_DECODER_INITIAL_HIDDEN = 1;
 
-const int ATTENTION_DECODER_ALGO_NORMAL = 0;
-const int ATTENTION_DECODER_SYMBOLS = 10000;
-const int ATTENTION_DECODER_EMBEDDING_SIZE = 128;
-const int ATTENTION_DECODER_ATTENTION_NUM_HIDDEN = 256;
-const int ATTENTION_DECODER_DECODER_NUM_HIDDEN = 128;
-const int ATTENTION_DECODER_DECODER_NUM_LAYERS = 2;
-const int ATTENTION_DECODER_RNN_UNBIDIRECTIONAL = 0;
-const int ATTENTION_DECODER_SEQLEN_VALUE = 57;
-const int ATTENTION_DECODER_GRU = 3;
+const int32_t ATTENTION_DECODER_ALGO_NORMAL = 0;
+const int32_t ATTENTION_DECODER_SYMBOLS = 10000;
+const int32_t ATTENTION_DECODER_EMBEDDING_SIZE = 128;
+const int32_t ATTENTION_DECODER_ATTENTION_NUM_HIDDEN = 256;
+const int32_t ATTENTION_DECODER_DECODER_NUM_HIDDEN = 128;
+const int32_t ATTENTION_DECODER_DECODER_NUM_LAYERS = 2;
+const int32_t ATTENTION_DECODER_RNN_UNBIDIRECTIONAL = 0;
+const int32_t ATTENTION_DECODER_SEQLEN_VALUE = 57;
+const int32_t ATTENTION_DECODER_GRU = 3;
 
 // Logicaland
 const int32_t LOGICAL_AND_INPUT_NUM = 2;
diff --git a/inc/framework/common/op_types.h b/inc/framework/common/op_types.h
index fa41c1b6..94a6b977 100644
--- a/inc/framework/common/op_types.h
+++ b/inc/framework/common/op_types.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,8 @@
 #include <set>
 #include <string>
 
+#include "graph/types.h"
+
 namespace ge {
 class GE_FUNC_VISIBILITY OpTypeContainer {
  public:
@@ -29,11 +31,12 @@ class GE_FUNC_VISIBILITY OpTypeContainer {
   }
   ~OpTypeContainer() = default;
 
-  void Register(const std::string &op_type) { op_type_list_.insert(op_type); }
+  bool Register(const std::string &op_type) {
+    return op_type_list_.insert(op_type).second;
+  }
 
   bool IsExisting(const std::string &op_type) {
-    auto iter_find = op_type_list_.find(op_type);
-    return iter_find != op_type_list_.end();
+    return op_type_list_.find(op_type) != op_type_list_.end();
   }
 
  protected:
@@ -42,21 +45,14 @@ class GE_FUNC_VISIBILITY OpTypeContainer {
  private:
   std::set<std::string> op_type_list_;
 };
-
-class GE_FUNC_VISIBILITY OpTypeRegistrar {
- public:
-  explicit OpTypeRegistrar(const std::string &op_type) { OpTypeContainer::Instance()->Register(op_type); }
-  ~OpTypeRegistrar() {}
-};
+}  // namespace ge
 
 #define REGISTER_OPTYPE_DECLARE(var_name, str_name) \
-  FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *var_name;
+  FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char_t *var_name;
 
 #define REGISTER_OPTYPE_DEFINE(var_name, str_name) \
-  const char *var_name = str_name;                 \
-  const OpTypeRegistrar g_##var_name##_reg(str_name);
-
-#define IS_OPTYPE_EXISTING(str_name) (OpTypeContainer::Instance()->IsExisting(str_name))
-}  // namespace ge
+  const char_t *var_name = str_name;               \
+  const bool g_##var_name##_reg = OpTypeContainer::Instance()->Register(str_name);
 
+#define IS_OPTYPE_EXISTING(str_name) (ge::OpTypeContainer::Instance()->IsExisting(str_name))
 #endif  // INC_FRAMEWORK_COMMON_OP_TYPES_H_
diff --git a/inc/framework/common/profiling/ge_profiling.h b/inc/framework/common/profiling/ge_profiling.h
index c87c082c..16588830 100644
--- a/inc/framework/common/profiling/ge_profiling.h
+++ b/inc/framework/common/profiling/ge_profiling.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,39 +17,15 @@
 #ifndef INC_FRAMEWORK_COMMON_GE_PROFILING_H_
 #define INC_FRAMEWORK_COMMON_GE_PROFILING_H_
 
-#include "ge/ge_api_error_codes.h"
-#include "toolchain/prof_callback.h"
+#include "external/ge/ge_api_error_codes.h"
 #include "runtime/base.h"
 
-const int MAX_DEV_NUM = 64;
-
-enum ProfCommandHandleType {
-  kProfCommandhandleInit = 0,
-  kProfCommandhandleStart,
-  kProfCommandhandleStop,
-  kProfCommandhandleFinalize,
-  kProfCommandhandleModelSubscribe,
-  kProfCommandhandleModelUnsubscribe
-};
-
-struct ProfCommandHandleData {
-  uint64_t profSwitch;
-  uint32_t devNums;  // length of device id list
-  uint32_t devIdList[MAX_DEV_NUM];
-  uint32_t modelId;
-};
-
-GE_FUNC_VISIBILITY ge::Status RegProfCtrlCallback(MsprofCtrlCallback func);
-GE_FUNC_VISIBILITY ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func);
-GE_FUNC_VISIBILITY ge::Status RegProfReporterCallback(MsprofReporterCallback func);
-GE_FUNC_VISIBILITY ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len);
-
 ///
 /// @brief Output the profiling data of single operator in Pytorch, and does not support multithreading
 /// @return Status result
 ///
-GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream);
+GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(const uint64_t index_id, const uint16_t tag_id, rtStream_t const stream);
 
-GE_FUNC_VISIBILITY ge::Status ProfGetDeviceFormGraphId(uint32_t graph_id, uint32_t &device_id);
+GE_FUNC_VISIBILITY ge::Status ProfGetDeviceFormGraphId(const uint32_t graph_id, uint32_t &device_id);
 
 #endif  // INC_FRAMEWORK_COMMON_GE_PROFILING_H_
diff --git a/inc/framework/common/profiling/ge_runner_profiling.h b/inc/framework/common/profiling/ge_runner_profiling.h
index 27e19bce..b0a666ce 100644
--- a/inc/framework/common/profiling/ge_runner_profiling.h
+++ b/inc/framework/common/profiling/ge_runner_profiling.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/inc/framework/common/profiling_definitions.h b/inc/framework/common/profiling_definitions.h
new file mode 100644
index 00000000..f814ba76
--- /dev/null
+++ b/inc/framework/common/profiling_definitions.h
@@ -0,0 +1,220 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef AIR_CXX_PROFILING_DEFINITIONS_H
+#define AIR_CXX_PROFILING_DEFINITIONS_H
+#include <string>
+#include <iostream>
+#include <mutex>
+#include <unordered_map>
+#include "graph/profiler.h"
+#include "external/ge/ge_api_types.h"
+#include "toolchain/prof_callback.h"
+namespace ge {
+namespace profiling {
+enum {
+  kAclCompileAndExecute,
+  kAclMatchOpModel,
+  kAclMatchStaticOpModel,
+  kAclMatchDynamicOpModel,
+  kAclExecuteAsync,
+  kAclLoadSingleOp,
+  kAclBuildOpModel,
+  kInferShape,
+  kTiling,
+  kUpdateShape,
+  kConstPrepare,
+  kInitHybridExecuteArgs,
+  kInitInferShapeContext,
+  kDestroyInferShapeContext,
+  kResetSubgraphExecutor,
+  kCommitInferShapeTask,
+  kDeviceToHost,
+  kPrepareTask,
+  kLaunchTask,
+  kCommitTilingTask,
+  kAtomic,
+  kKernelLaunchPrepare,
+  kRtKernelLaunch,
+  kRtEventCreateRecord,
+  kRtEventSync,
+  kRtEventDestroy,
+  kRtStreamSync,
+  kOpExecute,
+  kModelExecute,
+  kAllocMem,
+  kCopyH2D,
+  kPrepareNode,
+  kWaitForPrepareDone,
+  kPropgateOutputs,
+  kOnNodeDoneCallback,
+  kValidateInputTensor,
+  kAfterExecuted,
+  kRtEventSychronize,
+  kInferShapeWaitDependShape,
+  kInferShapeWaitInputTensor,
+  kInferShapeCallInferFunc,
+  kInferShapePropgate,
+  // v2 control node
+  kSelectBranch,
+  kExecuteSubGraph,
+  kInitSubGraphExecutor,
+  // fuzz compile
+  kSelectBin,
+  kFindCompileCache,
+  kAddCompileCache,
+  kFuzzCompileOp,
+  kCalcRuningParam,
+  kGenTask,
+  kRegisterBin,
+
+  // FFTS Plus
+  kFftsPlusPreThread,
+  kFftsPlusNodeThread,
+  kFftsPlusInferShape,
+  kOpFftsCalculateV2,
+  kInitThreadRunInfo,
+  kFftsPlusGraphSchedule,
+  kKnownGetAddrAndPrefCnt,
+  kKernelGetAddrAndPrefCnt,
+  kUpdateAddrAndPrefCnt,
+  kInitOpRunInfo,
+  kGetAutoThreadParam,
+  kAllocateOutputs,
+  kAllocateWorkspaces,
+  kInitTaskAddrs,
+  kInitThreadRunParam,
+  kUpdateTaskAndCache,
+  kFftsPlusTaskLaunch,
+
+  // Add new definitions here
+  kProfilingIndexEnd
+};
+constexpr uint64_t kInvalidHashId = 0UL;
+
+class ProfilingContext {
+ public:
+  static bool IsDumpToStdEnabled();
+  static ProfilingContext &GetInstance();
+  ProfilingContext();
+  ~ProfilingContext();
+
+  /*
+   * 还有一种思路是`IsEnabled`只判断profiler_是否为空指针，不再设置单独的enabled标记位，这样可以少一个标记位。
+   * 但是这么做就意味着，profiler_实例在未使能profiling时，必须是空指针状态。
+   * 为了性能考虑，profiling机制在编译和加载时，就会调用`RegisterString`，向profiler_注册字符串，后续执行时，只会使用注册好的index了。
+   * 因此存在一种场景：编译时并未使能profiling（因为编译时间很长，使能profiling也无法真实反应执行时的耗时状态），
+   * 因此编译时注册字符串的动作并没有生效。在执行时，动态的打开了profiling，这种场景下，执行时无法拿到注册后字符串
+   */
+  bool IsEnabled() const noexcept {
+    return enabled_ && (profiler_ != nullptr);
+  }
+  void SetEnable() noexcept {
+    enabled_ = true;
+  }
+  void SetDisable() noexcept {
+    enabled_ = false;
+  }
+
+  void RecordCurrentThread(const int64_t element, const int64_t event, const EventType et,
+                           const std::chrono::time_point<std::chrono::system_clock> time_point) {
+    if (IsEnabled()) {
+      profiler_->RecordCurrentThread(element, event, et, time_point);
+    }
+  }
+
+  void RecordCurrentThread(const int64_t element, const int64_t event, const EventType et) {
+    RecordCurrentThread(element, event, et, std::chrono::system_clock::now());
+  }
+
+  const Profiler *GetProfiler() const {
+    return profiler_.get();
+  }
+
+  void Dump(std::ostream &out_stream) const {
+    if (IsEnabled()) {
+      profiler_->Dump(out_stream);
+    } else {
+      out_stream << "Profiling not enable, skip to dump" << std::endl;
+    }
+  }
+
+  void DumpToStdOut() const {
+    Dump(std::cout);
+  }
+
+  void Reset() {
+    if (IsEnabled()) {
+      profiler_->Reset();
+    }
+  }
+
+  int64_t RegisterString(const std::string &str);
+  int64_t RegisterStringHash(const uint64_t hash_id, const std::string &str);
+  void UpdateElementHashId();
+  static Status QueryHashId(const std::string &src_str, uint64_t &hash_id);
+  size_t GetRegisterStringNum() const {
+    return strings_to_index_.size();
+  }
+
+  void Init();
+
+ private:
+  void UpdateHashByStr(const std::string &str, const uint64_t hash);
+
+ private:
+  bool inited_;
+  bool enabled_;
+  int64_t str_index_;
+  std::unordered_map<std::string, int64_t> strings_to_index_;
+  std::mutex strings_to_index_mutex_;
+  std::unique_ptr<Profiler> profiler_;
+};
+
+class ScopeProfiler {
+ public:
+  ScopeProfiler(const int64_t element, const int64_t event) : element_(element), event_(event) {
+    if (ProfilingContext::GetInstance().IsEnabled()) {
+      start_trace_ = std::chrono::system_clock::now();
+    }
+  }
+  ~ScopeProfiler() {
+    if (ProfilingContext::GetInstance().IsEnabled()) {
+      ProfilingContext::GetInstance().RecordCurrentThread(element_, event_, EventType::kEventStart, start_trace_);
+      ProfilingContext::GetInstance().RecordCurrentThread(element_, event_, EventType::kEventEnd);
+    }
+  }
+  void SetElement(const int64_t element) {
+    element_ = element;
+  }
+
+ private:
+  std::chrono::time_point<std::chrono::system_clock> start_trace_;
+  int64_t element_;
+  int64_t event_;
+};
+}  // namespace profiling
+}  // namespace ge
+#define PROFILING_START(element, event)                                                  \
+  ge::profiling::ProfilingContext::GetInstance().RecordCurrentThread((element), (event), \
+                                                                     ge::profiling::EventType::kEventStart)
+#define PROFILING_END(element, event)                                                    \
+  ge::profiling::ProfilingContext::GetInstance().RecordCurrentThread((element), (event), \
+                                                                     ge::profiling::EventType::kEventEnd)
+#define PROFILING_SCOPE(element, event) ge::profiling::ScopeProfiler profiler((element), (event))
+#define PROFILING_SCOPE_CONST(element, event) const ge::profiling::ScopeProfiler profiler((element), (event))
+#define PROFILING_SCOPE_ELEMENT(element) profiler.SetElement((element))
+#endif  // AIR_CXX_PROFILING_DEFINITIONS_H
diff --git a/inc/framework/common/runtime_tensor_desc.h b/inc/framework/common/runtime_tensor_desc.h
new file mode 100644
index 00000000..ebd28e25
--- /dev/null
+++ b/inc/framework/common/runtime_tensor_desc.h
@@ -0,0 +1,38 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021-2022. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef INC_FRAMEWORK_COMMON_RUNTIME_TENSOR_DESC_H_
+#define INC_FRAMEWORK_COMMON_RUNTIME_TENSOR_DESC_H_
+
+#include <cstdint>
+
+namespace ge {
+constexpr int64_t kMaxDimSize = 32;
+
+#pragma pack(push, 1)
+struct RuntimeTensorDesc {
+  uint64_t data_addr;
+  int64_t data_offset_size;
+  int64_t dtype;
+  int64_t shape[kMaxDimSize + 1];           // shape:Dim_Num|DIM0|DIM1|...|DIM31
+  int64_t original_shape[kMaxDimSize + 1];  // original_shape:Dim_Num|DIM0|DIM1|...|DIM31
+  int64_t format;
+  int64_t sub_format;
+  uint8_t reserved[456];  // padding to 1024 bytes
+};
+#pragma pack(pop)
+}  // namespace ge
+
+#endif  // INC_FRAMEWORK_COMMON_RUNTIME_TENSOR_DESC_H_
\ No newline at end of file
diff --git a/inc/framework/common/scope_guard.h b/inc/framework/common/scope_guard.h
index 62ae4b6d..bda9ffe2 100644
--- a/inc/framework/common/scope_guard.h
+++ b/inc/framework/common/scope_guard.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,14 +18,15 @@
 #define INC_FRAMEWORK_COMMON_SCOPE_GUARD_H_
 
 #include <functional>
-#include <iostream>
 
 /// Usage:
 /// Acquire Resource 1
 /// MAKE_GUARD([&] { Release Resource 1 })
 /// Acquire Resource 2
 // MAKE_GUARD([&] { Release Resource 2 })
-#define GE_MAKE_GUARD(var, callback) ScopeGuard make_guard_##var(callback)
+#define GE_MAKE_GUARD(var, callback) const ::ge::ScopeGuard const_guard_##var(callback)
+
+#define GE_DISMISSABLE_GUARD(var, callback) ::ge::ScopeGuard make_guard_##var(callback)
 #define GE_DISMISS_GUARD(var) make_guard_##var.Dismiss()
 
 namespace ge {
@@ -42,13 +43,16 @@ class GE_FUNC_VISIBILITY ScopeGuard {
       if (on_exit_scope_ != nullptr) {
         try {
           on_exit_scope_();
-        } catch (std::bad_function_call &e) { }
-          catch (...) { }
+        } catch (std::bad_function_call &) {
+        } catch (...) {
+        }
       }
     }
   }
 
-  void Dismiss() { dismissed_ = true; }
+  void Dismiss() {
+    dismissed_ = true;
+  }
 
  private:
   std::function<void()> on_exit_scope_;
diff --git a/inc/framework/common/string_util.h b/inc/framework/common/string_util.h
index f0368363..c369d04f 100644
--- a/inc/framework/common/string_util.h
+++ b/inc/framework/common/string_util.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,24 +39,27 @@
 #include <sstream>
 #include <string>
 #include <vector>
+#include "graph/types.h"
 
 namespace ge {
 class GE_FUNC_VISIBILITY StringUtils {
  public:
   static std::string &Ltrim(std::string &s) {
 #if __cplusplus >= 201103L
-    (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { return !std::isspace(c); }));
+    (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](const int32_t c) { return std::isspace(c) == 0; }));
 #else
-    (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
+    (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int32_t, int32_t>(std::isspace))));
 #endif
     return s;
   }
   // lint -esym(551,*)
-  static std::string &Rtrim(std::string &s) {  /*lint !e618*/
+  static std::string &Rtrim(std::string &s) { /*lint !e618*/
 #if __cplusplus >= 201103L
-    (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { return !std::isspace(c); }));
+    (void)s.erase(std::find_if(s.rbegin(), s.rend(), [](const int32_t c) { return std::isspace(c) == 0; }).base(),
+                  s.end());
 #else
-    (void)s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
+    (void)s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int32_t, int32_t>(std::isspace))).base(),
+                  s.end());
 #endif
     return s;
   }
@@ -67,7 +70,9 @@ class GE_FUNC_VISIBILITY StringUtils {
   ///  @param [in] string to be trimmed
   ///  @return string after trim
   ///
-  static std::string &Trim(std::string &s) { return Ltrim(Rtrim(s)); }
+  static std::string &Trim(std::string &s) {
+    return Ltrim(Rtrim(s));
+  }
 
   ///
   ///  @ingroup domi_common
@@ -76,8 +81,8 @@ class GE_FUNC_VISIBILITY StringUtils {
   ///  @param [in] delim  separator
   ///  @return string array after segmentation
   ///
-  static std::vector<std::string> Split(const std::string &str, char delim) {
-    std::vector<std::string> elems;
+  static std::vector<std::string, std::allocator<std::string>> Split(const std::string &str, const char_t delim) {
+    std::vector<std::string, std::allocator<std::string>> elems;
 
     if (str.empty()) {
       elems.emplace_back("");
@@ -91,8 +96,8 @@ class GE_FUNC_VISIBILITY StringUtils {
       elems.push_back(item);
     }
 
-    auto str_size = str.size();
-    if (str_size > 0 && str[str_size - 1] == delim) {
+    const auto str_size = str.size();
+    if ((str_size > 0U) && (str[str_size - 1U] == delim)) {
       elems.emplace_back("");
     }
 
@@ -104,13 +109,13 @@ class GE_FUNC_VISIBILITY StringUtils {
   ///  @param [in] s path name
   ///  @return file name
   ///
-  static std::string GetFileName(std::string &s) {
+  static std::string GetFileName(const std::string &s) {
     if (s.empty()) {
       return "";
     }
-    std::vector<std::string> files = StringUtils::Split(s, '/');
+    const std::vector<std::string> files = StringUtils::Split(s, '/');
 
-    return files.empty() ? "" : files[files.size() - 1];
+    return files.empty() ? "" : files[files.size() - 1U];
   }
   ///
   ///  @ingroup domi_common
@@ -122,12 +127,13 @@ class GE_FUNC_VISIBILITY StringUtils {
   ///  @return string after replacement
   ///
   static std::string ReplaceAll(std::string str, const std::string &old_value, const std::string &new_value) {
-    std::string::size_type cur_pos = 0;
-    std::string::size_type old_length = old_value.length();
-    std::string::size_type new_length = new_value.length();
+    std::string::size_type cur_pos = 0U;
+    const std::string::size_type old_length = old_value.length();
+    const std::string::size_type new_length = new_value.length();
     // cycle replace
     for (; cur_pos != std::string::npos; cur_pos += new_length) {
-      if ((cur_pos = str.find(old_value, cur_pos)) != std::string::npos) {
+      cur_pos = str.find(old_value, cur_pos);
+      if (cur_pos != std::string::npos) {
         (void)str.replace(cur_pos, old_length, new_value);
       } else {
         break;
@@ -145,7 +151,7 @@ class GE_FUNC_VISIBILITY StringUtils {
   ///  @return if the value is a prefix, true is returned. Otherwise, false is returned
   ///
   static bool StartWith(const std::string &str, const std::string str_x) {
-    return ((str.size() >= str_x.size()) && (str.compare(0, str_x.size(), str_x) == 0));
+    return ((str.size() >= str_x.size()) && (str.compare(0U, str_x.size(), str_x) == 0));
   }
 
   ///
@@ -156,14 +162,14 @@ class GE_FUNC_VISIBILITY StringUtils {
   ///  @param [in] ... format Filling Content
   ///  @return formatted string
   ///
-  static std::string FormatString(const char *format, ...) {
-    const uint32_t MAX_BUFFER_LEN = 1024;  // the stack memory plint check result must be less than 1024
+  static std::string FormatString(const char_t *const format, ...) {
+    const uint32_t MAX_BUFFER_LEN = 1024U;  // the stack memory plint check result must be less than 1024
     va_list args;
     va_start(args, format);
-    char buffer[MAX_BUFFER_LEN] = {0};
-    int32_t ret = vsnprintf_s(buffer, MAX_BUFFER_LEN, MAX_BUFFER_LEN - 1, format, args);
+    char_t buffer[MAX_BUFFER_LEN] = {};
+    const int32_t ret = vsnprintf_s(&buffer[0], MAX_BUFFER_LEN, MAX_BUFFER_LEN - 1U, format, args);
     va_end(args);
-    return ret > 0 ? buffer : "";
+    return (ret > 0) ? buffer : "";
   }
 };
 }  // namespace ge
diff --git a/inc/framework/common/taskdown_common.h b/inc/framework/common/taskdown_common.h
index f2f731be..159072b8 100644
--- a/inc/framework/common/taskdown_common.h
+++ b/inc/framework/common/taskdown_common.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,9 +21,9 @@
 
 namespace ge {
 
-const int CC_FUSION_OP_MAX = 32;
+const int32_t CC_FUSION_OP_MAX = 32;
 
-typedef enum tagCcStatus {
+enum class ccStatus_t {
   CC_STATUS_SUCCESS = 0,         /**< succ */
   CC_STATUS_NOT_INITIALIZED = 1, /**< not init */
   CC_STATUS_ALLOC_FAILED = 2,    /**< alloc mem failed */
@@ -33,10 +33,10 @@ typedef enum tagCcStatus {
   CC_STATUS_RUNTIME_ERROR = 6,   /**< runtime error */
   CC_STATUS_NOT_SUPPORTED = 7,   /**< unsupport error */
   CC_STATUS_INVALID_VALUE = 7,   /**< invalid value error for blas*/
-  CC_STATUS_RESERVED             /**< just for check */
-} ccStatus_t;
+  CC_STATUS_RESERVED = 8,        /**< just for check */
+};
 
-typedef enum tagccKernelType {
+enum class ccKernelType {
   CCE_AI_CORE = 0, /* cce aicore */
   CCE_AI_CPU = 1,  /* cce aicpu */
   TE = 2,          /* te operator*/
@@ -47,9 +47,9 @@ typedef enum tagccKernelType {
   CUST_AI_CPU = 7, /* custom aicpu*/
   HOST_CPU = 8,    /* host cpu */
   INVALID = 10000  /* unknown kernel type */
-} ccKernelType;
+};
 
-typedef struct tagOpContext {
+using ccOpContext = struct tagOpContext {
   ccKernelType kernelType;
   uint32_t opId;
   uint32_t kernelFuncId;
@@ -66,7 +66,28 @@ typedef struct tagOpContext {
   uint64_t genVariableBaseAddr;
   uint64_t genVariableBaseSize;
   uint64_t l2ctrlSize;
-} ccOpContext;
-}  // namespace ge
+};
+
+enum class tagOpTensorFormat { OP_TENSOR_FORMAT_NC1HWC0 = 0, OP_TENSOR_FORMAT_ND, OP_TENSOR_FORMAT_RESERVED };
 
+enum class tagOpDataType {
+  OP_DATA_FLOAT = 0,            /**< float type */
+  OP_DATA_HALF,                 /**< fp16 type */
+  OP_DATA_INT8,                 /**< int8 type */
+  OP_DATA_INT32,                /**< int32 type */
+  OP_DATA_UINT8,                /**< uint8 type */
+  OP_DATA_HALF_UINT16_PROPOSAL, /**< mixed type for proposal */
+  OP_DATA_RESERVED
+};
+
+// AICPU Tensor
+using ccAICPUTensor = struct tagOpTensor {
+  // real dim info
+  tagOpTensorFormat format;
+  tagOpDataType data_type;
+  int32_t dim_cnt;
+  int32_t mm;
+  int32_t dim[8];
+};
+}  // namespace ge
 #endif  // INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_
diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h
index 811d5eed..ab5218c0 100644
--- a/inc/framework/common/types.h
+++ b/inc/framework/common/types.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,28 +17,16 @@
 #ifndef INC_FRAMEWORK_COMMON_TYPES_H_
 #define INC_FRAMEWORK_COMMON_TYPES_H_
 
-#include <limits.h>
-#include <stdint.h>
-#include <algorithm>
+#include <cstdint>
 #include <map>
 #include <memory>
 #include <string>
-#include <utility>
-#include <vector>
 
 #include "framework/common/fmk_error_codes.h"
 #include "framework/common/fmk_types.h"
 #include "framework/common/op_types.h"
 #include "register/register_types.h"
 
-#if !defined(__ANDROID__) && !defined(ANDROID)
-#define DOMI_DYNAMIC_CAST static_cast
-#define DOMI_DYNAMIC_POINTER_CAST std::static_pointer_cast
-#else
-#define DOMI_DYNAMIC_CAST static_cast
-#define DOMI_DYNAMIC_POINTER_CAST std::static_pointer_cast
-#endif
-
 namespace ge {
 // dump
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_MODEL;
@@ -51,54 +39,20 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEB
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_ATOMIC;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_ALL;
 
-// Supported public properties name
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROP_OME_START_TIME;  // Start time
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROP_OME_DUMP_PATH;   // Dump path
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROP_OME_LOG_PATH;    // Log path
-
 // Profile-related constants
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CCE_PROFILE_ON;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CCE_PROFILE_OFF;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OME_PROFILE;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string CCE_PROFILE;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string RTS_PROFILE;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILER_JOBCTX;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILER_TARGET_PATH;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string RTS_PROFILE_PATH;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_STOP_KEY;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_STOP_VALUE;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::map<std::string, std::string> PROFILE_COMPONENT_MAP;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_CONFIG;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_MODEL_ID;
 
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR;
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_HOST_BASE_ADDR;
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_HOST_MEMORY_SIZE;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_FUSION_MODEL_DEF;
-
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int MODEL_MAX_SIZE;  // Max size of 2 GB minus 1 byte.
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint64_t FILE_HEADER_MAX_SIZE;  // Max size of 3 GB.
-
-#if !defined(__ANDROID__) && !defined(ANDROID)
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint64_t ALLOC_MEMORY_MAX_SIZE;  // Max size of 8 GB.
-#else
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint64_t ALLOC_MEMORY_MAX_SIZE;  // Max size of 512M.
-#endif
-
-template <typename K, typename V>
-static std::pair<V, K> flip_pair(const std::pair<K, V> &p) {
-  return std::pair<V, K>(p.second, p.first);
-}
-
-template <typename K, typename V>
-static std::map<V, K> flip_map(std::map<K, V> src) {
-  std::map<V, K> dst;
-  std::transform(src.begin(), src.end(), std::inserter(dst, dst.begin()), flip_pair<K, V>);
-  return dst;
-}
 
 REGISTER_OPTYPE_DECLARE(DATA, "Data");
 REGISTER_OPTYPE_DECLARE(AIPPDATA, "AippData");
+REGISTER_OPTYPE_DECLARE(QUEUE_DATA, "QueueData");
 REGISTER_OPTYPE_DECLARE(CONVOLUTION, "Convolution");
 REGISTER_OPTYPE_DECLARE(CORRELATION, "Correlation");
 REGISTER_OPTYPE_DECLARE(CORRELATIONV2, "Correlation_V2");
@@ -134,6 +88,7 @@ REGISTER_OPTYPE_DECLARE(DROPOUTDOMASKV3, "DropOutDoMaskV3");
 REGISTER_OPTYPE_DECLARE(DROPOUTDOMASKV3D, "DropOutDoMaskV3D");
 REGISTER_OPTYPE_DECLARE(SOFTMAXV2WITHDROPOUTDOMASKV3D, "SoftmaxV2WithDropOutDoMaskV3D");
 REGISTER_OPTYPE_DECLARE(DROPOUTGENMASK, "DropOutGenMask");
+REGISTER_OPTYPE_DECLARE(AXPYWITHSOFTMAXANDDROPOUTDOMASK, "AxpyWithSoftmaxAndDropOutDoMask");
 REGISTER_OPTYPE_DECLARE(CONCAT, "Concat");
 REGISTER_OPTYPE_DECLARE(ROIPOOLING, "ROIPooling");
 REGISTER_OPTYPE_DECLARE(PROPOSAL, "Proposal");
@@ -167,6 +122,10 @@ REGISTER_OPTYPE_DECLARE(SLICED, "SliceD");
 REGISTER_OPTYPE_DECLARE(FLOORDIV, "FloorDiv");
 REGISTER_OPTYPE_DECLARE(SQUEEZE, "Squeeze");
 REGISTER_OPTYPE_DECLARE(UNSQUEEZE, "Unsqueeze");
+REGISTER_OPTYPE_DECLARE(SQUEEZEV2, "SqueezeV2");
+REGISTER_OPTYPE_DECLARE(UNSQUEEZEV2, "UnsqueezeV2");
+REGISTER_OPTYPE_DECLARE(SQUEEZEV3, "SqueezeV3");
+REGISTER_OPTYPE_DECLARE(UNSQUEEZEV3, "UnsqueezeV3");
 REGISTER_OPTYPE_DECLARE(STRIDEDSLICE, "StridedSlice");
 REGISTER_OPTYPE_DECLARE(RANGE, "Range");
 REGISTER_OPTYPE_DECLARE(RPNPROPOSALS, "GenerateRpnProposals");
@@ -203,6 +162,7 @@ REGISTER_OPTYPE_DECLARE(_IF, "_If");
 REGISTER_OPTYPE_DECLARE(STATELESSIF, "StatelessIf");
 REGISTER_OPTYPE_DECLARE(IF, "If");
 REGISTER_OPTYPE_DECLARE(CASE, "Case");
+REGISTER_OPTYPE_DECLARE(STATELESSCASE, "StatelessCase");
 REGISTER_OPTYPE_DECLARE(_WHILE, "_While");
 REGISTER_OPTYPE_DECLARE(WHILE, "While");
 REGISTER_OPTYPE_DECLARE(STATELESSWHILE, "StatelessWhile");
@@ -339,10 +299,15 @@ REGISTER_OPTYPE_DECLARE(PLACEHOLDER, "PlaceHolder");
 REGISTER_OPTYPE_DECLARE(END, "End");
 REGISTER_OPTYPE_DECLARE(BASICLSTMCELL, "BasicLSTMCell");
 REGISTER_OPTYPE_DECLARE(GETNEXT, "GetNext");
+REGISTER_OPTYPE_DECLARE(ITERATOR, "Iterator");
+REGISTER_OPTYPE_DECLARE(ITERATORV2, "IteratorV2");
 REGISTER_OPTYPE_DECLARE(INITDATA, "InitData");
 REGISTER_OPTYPE_DECLARE(TRANSSHAPE, "TransShape")
 REGISTER_OPTYPE_DECLARE(REFIDENTITY, "RefIdentity");
 REGISTER_OPTYPE_DECLARE(BITCAST, "Bitcast");
+REGISTER_OPTYPE_DECLARE(GATHERSHAPES, "GatherShapes");
+REGISTER_OPTYPE_DECLARE(FLATTENV2, "FlattenV2");
+REGISTER_OPTYPE_DECLARE(FILECONSTANT, "FileConstant");
 
 // ANN dedicated operator
 REGISTER_OPTYPE_DECLARE(ANN_MEAN, "AnnMean");
@@ -460,6 +425,7 @@ REGISTER_OPTYPE_DECLARE(MODELEXIT, "ModelExit");
 REGISTER_OPTYPE_DECLARE(SEND, "Send");
 REGISTER_OPTYPE_DECLARE(RECV, "Recv");
 REGISTER_OPTYPE_DECLARE(ENDOFSEQUENCE, "EndOfSequence");
+REGISTER_OPTYPE_DECLARE(STARTOFSEQUENCE, "StartOfSequence");
 
 REGISTER_OPTYPE_DECLARE(LABELSET, "LabelSet");
 REGISTER_OPTYPE_DECLARE(LABELGOTO, "LabelGoto");
@@ -483,8 +449,6 @@ REGISTER_OPTYPE_DECLARE(ELU_GRAD, "EluGrad");
 REGISTER_OPTYPE_DECLARE(ADD_V2, "AddV2");
 REGISTER_OPTYPE_DECLARE(DATAFORMATDIMMAP, "DataFormatDimMap");
 REGISTER_OPTYPE_DECLARE(DATAFORMATVECPERMUTE, "DataFormatVecPermute");
-REGISTER_OPTYPE_DECLARE(BESSELI0e, "BesselI0e");
-REGISTER_OPTYPE_DECLARE(BESSELI1e, "BesselI1e");
 REGISTER_OPTYPE_DECLARE(DEQUANTIZE, "Dequantize");
 REGISTER_OPTYPE_DECLARE(APPLYADADELTA, "ApplyAdadelta");
 REGISTER_OPTYPE_DECLARE(APPLYADAGRAD, "ApplyAdagrad");
@@ -538,29 +502,11 @@ REGISTER_OPTYPE_DECLARE(GETDYNAMICDIMS, "GetDynamicDims");
 // profiling training trace node
 REGISTER_OPTYPE_DECLARE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace");
 
-enum InputMode { INPUT = 0, CONST_INPUT };
-
-// Definition of the processing status enum of the process module
-enum ModelProcessState {
-  INIT_STATE = 0,    // init status
-  WAIT_EVENT_STATE,  // Wait for the event status
-  IND_RSLT_STATE,    // The model execution result is being output to the high level
-  STOPPED_STATE,     // Model execution completed. The model enters this state after Model Manager::Stop
-  RESERVED_STATE,    // reserved
-};
-
-// Indicates the enun definition of the execution mode of the access module
-enum SysMode {
-  INFERENCE = 0,  // Normal, that is, Inference mode
-  DEBUG,          // Debug mode
-  TIME,           // Model execution time mode, including the execution time of each OP
-  STOP,           // STOP mode
-  RESET,          // RESET mode
-  PERFORMANCE,  // Impact of enabling the performance model: 1. The input data of the model is considered ready and does
-                // not need to be converted
-  ANDROID_DEBUG,  // Exports Android platform computing data
-  RESERVED,       // reserved
-};
+// Stack series
+REGISTER_OPTYPE_DECLARE(STACK, "Stack");
+REGISTER_OPTYPE_DECLARE(STACKPUSH, "StackPush");
+REGISTER_OPTYPE_DECLARE(STACKPOP, "StackPop");
+REGISTER_OPTYPE_DECLARE(STACKCLOSE, "StackClose");
 
 // @brief encryption type of the model file
 enum ModelEncryptType {
@@ -599,50 +545,22 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_FIL
 ///
 /// @brief model name length
 ///
-static constexpr uint32_t MODEL_NAME_LENGTH = 32;
+constexpr uint32_t MODEL_NAME_LENGTH = 32U;
 
 ///
 /// @brief length of user-defined information
 ///
-static constexpr uint32_t USER_DEFINE_INFO_LENGTH = 32;
+constexpr uint32_t USER_DEFINE_INFO_LENGTH = 32U;
 
 ///
 /// @brief length of the model file signature
 ///
-static constexpr uint32_t MODEL_FILE_CHECKSUM_LENGTH = 64;
+constexpr uint32_t MODEL_FILE_CHECKSUM_LENGTH = 64U;
 
 ///
 /// @brief length of the reserved field in the model file header
 ///
-static constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 75;
-
-///
-/// @ingroup domi_omg
-/// @brief INPUT node type
-///
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string INPUT_TYPE;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMMY_DATA;
-
-///
-/// @ingroup domi_omg
-/// @brief AIPP flag, indicating the aipp conv operator
-///
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string AIPP_CONV_FLAG;
-
-///
-/// @ingroup domi_omg
-/// @brief AIPP flag, indicating the aipp data operator
-///
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string AIPP_DATA_FLAG;
-
-// flag of the Data operator, indicating that the input will be input to the dynamic AIPP operator
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string INPUT_TO_DYNAMIC_AIPP;
-
-// records the W dimension of the model input corresponding to the dynamic AIPP
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string AIPP_RELATED_DATA_DIM_W;
-
-// H dimension of the model input corresponding to the dynamic AIPP
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string AIPP_RELATED_DATA_DIM_H;
+constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 75U;
 
 // DATA node type
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DATA_TYPE;
@@ -655,10 +573,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string FRAMEW
 
 // DATA node type
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ANN_DATA_TYPE;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ANN_NETOUTPUT_TYPE;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ANN_DEPTHCONV_TYPE;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ANN_CONV_TYPE;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ANN_FC_TYPE;
 // convolution node type
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_NET_OUTPUT;
 
@@ -667,161 +581,31 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_N
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_OP_DEBUG;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_TYPE_OP_DEBUG;
 
-// convolution node type
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_TYPE_CONVOLUTION;
-// adds a convolutional node name for the hard AIPP
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string AIPP_CONV_OP_NAME;
 // delimiter of operator configuration items
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_CONF_DELIMITER;
 
-// op attr name
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ATTR_NAME_VALUE1;
-
-// op attr name, used to 6d_2_4d C channel
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ATTR_NAME_INPUT_CVALUE;
-
-// op attr name
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ATTR_NAME_VALUE1;
-
-// alpha default value
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const float ALPHA_DEFAULT_VALUE;
-
-// beta default value
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const float BETA_DEFAULT_VALUE;
-
-// coef default value
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const float COEF_DEFAULT_VALUE;
-
-// coef value of Relu6
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const float RELU6_COEF;
-
-// stride default value
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STRIDE_DEFAULT_VALUE;
-
-// pad default value
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t PAD_DEFAULT_VALUE;
-
-// dilation default value
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int DILATION_DEFAULT_VALUE;
-
-// kernel default value
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t KERNEL_DEFAULT_VALUE;
-
-// default conv Group Size
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t DEFAULT_CONV_GROUP;
-
-// default deconv adj
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t DEFAULT_DECONV_ADJ;
-
-// indicate num 1
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NUM_ONE;
-
 // dim default size value
-static const int32_t DIM_DEFAULT_SIZE = 4;
-
-// the shape of c must be the mutiply of 16 for depthwise
-static const uint32_t DEPTHWISE_DIM_C_BASE_NUM = 16;
-
-// C1HWNCoC0 dim size
-static const int32_t DIM_C1HWNCoC0_SIZE = 6;
-// C1HWNCoC0 C0 value
-static const int C1HWCOC_C0_VALUE = 16;
-// spatial default dim size
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t SPATIAL_DIM_DEFAULT_SIZE;
+constexpr int32_t DIM_DEFAULT_SIZE = 4;
 
 // dim extension default value
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t DIM_DEFAULT_VALUE;
 
-// the first item in the weight list of opdef is filter
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t WEIGHT_FILTER_INDEX;
-
-// the second item in the weight list of opdef is bias.
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t WEIGHT_BIAS_INDEX;
-
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t TENSOR_ND_SUPPORT_SIZE;
-
 // default NCHW index
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_N;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_C;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_H;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_W;
 
-// default C1HWNCoC0 index
-static const uint32_t C1HWNCoC0_DIM_C1 = 0;
-static const uint32_t C1HWNCoC0_DIM_H = 1;
-static const uint32_t C1HWNCoC0_DIM_W = 2;
-static const uint32_t C1HWNCoC0_DIM_N = 3;
-static const uint32_t C1HWNCoC0_DIM_Co = 4;
-static const uint32_t C1HWNCoC0_DIM_C0 = 5;
-
-// default KCHW index
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t KCHW_DIM_K;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t KCHW_DIM_C;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t KCHW_DIM_H;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t KCHW_DIM_W;
-
-// default HWCK index
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t HWCK_DIM_H;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t HWCK_DIM_W;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t HWCK_DIM_C;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t HWCK_DIM_K;
-
 // default NHWC index
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_N;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_H;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_W;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_C;
 
-// default CHWN index
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CHWN_DIM_N;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CHWN_DIM_C;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CHWN_DIM_H;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CHWN_DIM_W;
-
-// default CHW index
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CHW_DIM_C;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CHW_DIM_H;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CHW_DIM_W;
-
-// default HWC index
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t HWC_DIM_H;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t HWC_DIM_W;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t HWC_DIM_C;
-// default Pad index
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t PAD_H_HEAD;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t PAD_H_TAIL;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t PAD_W_HEAD;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t PAD_W_TAIL;
-
-// default window index
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t WINDOW_H;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t WINDOW_W;
-
-// default stride index
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STRIDE_H;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STRIDE_W;
-
-// default dilation index
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t DILATION_H;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t DILATION_W;
-
-// the num of XRBG channel
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t XRGB_CHN_NUM;
-
-// default tensor format
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int DEFAULT_FORMAT;
-
-// default global pooling
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const bool DEFAULT_GLOBAL_POOLING;
-
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_VERSION;  // model version 1.0
 
-// Number of inputs of the Eltwise operator
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int ELTWISE_MIN_INPUT_SIZE;
-
 // flowctrl
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_STREAM_SWITCH;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_STREAM_ACTIVE;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_FLOWCTRL_LOOP_PER_ITER;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_FLOWCTRL_LOOP_COND;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_FLOWCTRL_LOOP_INCREMENT;
@@ -833,41 +617,36 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t TRUE_STRE
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STREAM_SWITCH_INPUT_NUM;
 
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_GLOBAL_STEP;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_GLOBAL_STEP_ASSIGNADD;
 
-static const int PLATFORM_VERSION_LEN = 20;
+constexpr uint32_t PLATFORM_VERSION_LEN = 20U;
 
 // Definition of the file header of the model file
 struct ModelFileHeader {
-  uint32_t magic = MODEL_FILE_MAGIC_NUM;               // magic number of DOMI
-  uint32_t headsize = MODEL_FILE_HEAD_LEN;             // length of the model header. The value is fixed at 256
-  uint32_t version = MODEL_VERSION;                    // version 1.0
-  uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0};  // signature
-  uint32_t length = 0;  // Ciphertext length. In the non-encryption model, the length is the plaintext length.
-  uint8_t is_encrypt = ModelEncryptType::UNENCRYPTED;     // whether encrypted 0:not encrypt, 1:encrypt
-  uint8_t is_checksum = ModelCheckType::CHECK;            // whether to check the checksum
-  uint8_t modeltype = 0;                                  // 0：IR model 1：standard model 2: OM Tiny model
-  uint8_t genmode = 0;                                    // 0：offline generate 1：online generate
-  uint8_t name[MODEL_NAME_LENGTH] = {0};                  // Model name, which contains 32 characters
-  uint32_t ops = 0;                                       // Computing power (Kops)
-  uint8_t userdefineinfo[USER_DEFINE_INFO_LENGTH] = {0};  // User-defined information. The value contains 32 characters
-  uint32_t om_ir_version = 0;
-  uint32_t model_num = 0;
-  uint8_t platform_version[PLATFORM_VERSION_LEN] = {0};
-  uint8_t platform_type = {0};
-  uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0};  // Reserved field 75
+  uint32_t magic = MODEL_FILE_MAGIC_NUM;                // magic number of DOMI
+  uint32_t headsize = MODEL_FILE_HEAD_LEN;              // length of the model header. The value is fixed at 256
+  uint32_t version = MODEL_VERSION;                     // version 1.0
+  uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0U};  // signature
+  uint32_t length = 0U;  // Ciphertext length. In the non-encryption model, the length is the plaintext length.
+  // whether encrypted 0:not encrypt, 1:encrypt
+  uint8_t is_encrypt = static_cast<uint8_t>(ModelEncryptType::UNENCRYPTED);
+  uint8_t is_checksum = static_cast<uint8_t>(ModelCheckType::CHECK);  // whether to check the checksum
+  uint8_t modeltype = 0U;                                  // 0：IR model 1：standard model 2: OM Tiny model
+  uint8_t genmode = 0U;                                    // 0：offline generate 1：online generate
+  uint8_t name[MODEL_NAME_LENGTH] = {0U};                  // Model name, which contains 32 characters
+  uint32_t ops = 0U;                                       // Computing power (Kops)
+  uint8_t userdefineinfo[USER_DEFINE_INFO_LENGTH] = {0U};  // User-defined information. The value contains 32 characters
+  uint32_t om_ir_version = 0U;
+  uint32_t model_num = 0U;
+  uint8_t platform_version[PLATFORM_VERSION_LEN] = {0U};
+  uint8_t platform_type = {0U};
+  uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0U};  // Reserved field 75
 };
 
-static constexpr uint8_t TARGET_TYPE_LTTE_8BIT = 0;
-static constexpr uint8_t TARGET_TYPE_MINI_8BIT = 1;
-static constexpr uint8_t TARGET_TYPE_TINY_8BIT = 2;
-
-static constexpr int32_t PARTITION_TYPE_MODEL_DEF = 0;
-static constexpr int32_t PARTITION_TYPE_WEIGHTS = 1;
-static constexpr int32_t PARTITION_TYPE_TASK_INFO = 2;
+constexpr uint8_t TARGET_TYPE_LTTE_8BIT = 0U;
+constexpr uint8_t TARGET_TYPE_MINI_8BIT = 1U;
 
 // number of partitions in the current model
-static constexpr uint32_t PARTITION_SIZE = 5;
+constexpr uint32_t PARTITION_SIZE = 5U;
 
 enum ModelPartitionType { MODEL_DEF = 0, WEIGHTS_DATA, TASK_INFO, TBE_KERNELS, CUST_AICPU_KERNELS };
 
@@ -882,24 +661,9 @@ struct ModelPartitionTable {
   ModelPartitionMemInfo partition[0];
 };
 
-#define SIZE_OF_MODEL_PARTITION_TABLE(table) (sizeof(ModelPartitionTable) + sizeof(ModelPartitionMemInfo) * (table).num)
-
-static constexpr int32_t PTHREAD_CREAT_SUCCESS = 0;  // pthread_creat success
-
-// Filter format
-typedef enum tagDomiFilterFormat {
-  DOMI_FILTER_KCHW,  // KCHW
-  DOMI_FILTER_HWCK,  // HWCK
-  DOMI_FILTER_RESERVED
-} domiFilterFormat_t;
-
-// Const data trans type
-typedef enum tagDomiConstDataTransType {
-  DOMI_CONST_DATA_NOT_CHANGE = 0,  // No action is required
-  DOMI_CONST_DATA_TRANS_MATMUL,    // The const input to MatMul and needs to be transposed
-  DOMI_CONST_DATA_RESERVED
-} domiConstDataTransType_t;
-
+inline uint64_t SizeOfModelPartitionTable(const ModelPartitionTable &table) {
+  return sizeof(ModelPartitionTable) + (sizeof(ModelPartitionMemInfo) * static_cast<uint64_t>(table.num));
+}
 // mode of activation
 typedef enum tagDomiActivationMode {
   DOMI_ACTIVATION_SIGMOID = 0,   // sigmoid
@@ -919,190 +683,6 @@ typedef enum tagDomiActivationMode {
   DOMI_ACTIVATION_RESERVED
 } domiActivationMode_t;
 
-// mode of batchnorm
-typedef enum tagDomiBatchNormMode {
-  DOMI_BATCHNORM_PER_ACTIVATION = 0,  // bnScale, bnBias tensor dims are 1xCxHxW
-  DOMI_BATCHNORM_SPATIAL,             // bnScale, bnBias tensor dims are 1xCx1x1
-  DOMI_BATCHNORM_RESERVED
-} domiBatchNormMode_t;
-
-// eltwise mode
-typedef enum tagDomiEltwiseMode {
-  DOMI_ELTWISE_PROD = 0,  // prod
-  DOMI_ELTWISE_SUM,       // sum
-  DOMI_ELTWISE_MAX,       // max
-  DOMI_ELTWISE_RESERVED
-} domiEltwiseMode_t;
-
-// mode of padding
-typedef enum tagDomiPaddingMode {
-  DOMI_PADDING_CEIL = 0,      // Default padding mode
-  DOMI_PADDING_DIRECTASSIGN,  // Default padding mode: NOTSET
-  DOMI_PADDING_VALID,         // VALID padding mode
-  DOMI_PADDING_SAME,          // Padding values of 0 are always used
-  DOMI_PADDING_CEIL_NEW,      // Padding values of 0 are always used
-  DOMI_PADDING_VALID_NEW,     // Padding values of 0 are always used
-  DOMI_PADDING_SAME_NEW,      // Padding values of 0 are always used
-  DOMI_PADDING_RESERVED
-} domiPaddingMode_t;
-
-// algorithm of convolution forward
-typedef enum tagDomiConvolutionFwdAlgo {
-  DOMI_CONVOLUTION_FWD_ALGO_GEMM = 0,           // matrix gemm algo
-  DOMI_CONVOLUTION_FWD_ALGO_WINOGRAD,           // Winograd Transform algo
-  DOMI_CONVOLUTION_FWD_ALGO_GEMM_ACCU_FLOAT32,  // accumulate in L0c with FP32
-  DOMI_CONVOLUTION_FWD_ALGO_RESERVED
-} domiConvolutionFwdAlgo_t;
-
-typedef enum tagDomiFullConnectFwdAlgo {
-  DOMI_FULLCONNECT_FWD_ALGO_HALF = 0,  // accumulate in L0c with FP16
-  DOMI_FULLCONNECT_FWD_ALGO_FLOAT32    // accumulate in L0c with FP32
-} domiFullConnectFwdAlgo_t;
-
-typedef enum tagDomiPooingFwdAlgo {
-  DOMI_POOLING_FWD_ALGO_HALF = 0,  // accumulate in L0c with FP16
-  DOMI_POOLING_FWD_ALGO_FLOAT32    // accumulate in L0c with FP32
-} domiPooingFwdAlgo_t;
-
-// mode of convolution
-typedef enum tagDomiConvolutionMode {
-  DOMI_CONV_CONVOLUTION = 0,    // math convolution
-  DOMI_CONV_CROSS_CORRELATION,  // cross-correlation convolution
-  DOMI_CONV_DECONVOLUTION,      // deconvolution, also named transposed convolution
-  DOMI_CONV_MODE_DEPTHWISE,     // depthwise convolution
-  DOMI_CONV_MODE_RESERVED
-} domiConvolutionMode_t;
-
-// softmax mode
-typedef enum tagDomiSoftmaxMode {
-  DOMI_SOFTMAX_MODE_INSTANCE = 0,  // compute the softmax over all C, H, W for each N
-  DOMI_SOFTMAX_MODE_CHANNEL,       // compute the softmax over all C for each H, W, N
-  DOMI_SOFTMAX_MODE_HEIGHT,        // compute the softmax over all H for each N, C, W
-  DOMI_SOFTMAX_MODE_WIDTH,         // compute the softmax over all W for each N, C, H
-  DOMI_SOFTMAX_MODE_RESERVED
-} domiSoftmaxMode_t;
-
-// softmax algorithm
-typedef enum tagDomiSoftmaxAlgo {
-  DOMI_SOFTMAX_FAST = 0,  // straightforward implementation
-  DOMI_SOFTMAX_ACCURATE,  // subtract max from every point to avoid overflow
-  DOMI_SOFTMAX_LOG,       // perform the Log softmax operation to avoid overflow
-  DOMI_SOFTMAX_ACCURATE_FP32,
-  DOMI_SOFTMAX_RESERVED
-} domiSoftmaxAlgo_t;
-
-// algorithm of convolution backward
-typedef enum tagDomiConvolutionBwdAlgo {
-  DOMI_CONVOLUTION_BWD_ALGO_GEMM = 0,  // matrix gemm algo
-  DOMI_CONVOLUTION_BWD_ALGO_WINOGRAD,  // Winograd Transform algo
-  DOMI_CONVOLUTION_BWD_ALGO_RESERVED
-} domiConvolutionBwdAlgo_t;
-
-// mode of pooling
-typedef enum tagDomiPoolingMode {
-  DOMI_POOLING_MAX = 0,  // max pooling
-  DOMI_POOLING_AVG,      // average pooling
-  DOMI_POOLING_L2,       // L2 pooling
-  DOMI_POOLING_RESERVED
-} domiPoolingMode_t;
-
-// propagate Nan
-typedef enum tagDomiNanPropagation {
-  DOMI_NAN_NOT_PROPAGATE = 0,  // Nan numbers are not propagated
-  DOMI_NAN_PROPAGATE,          // Nan numbers are propagated
-  DOMI_NAN_PROPAGATE_RESERVED
-} domiNanPropagation_t;
-
-// mode of cropandresize
-typedef enum tagDomiCropAndResizeMode {
-  DOMI_RESIZE_METHOD_BILINEAR = 0,  // resize bilinear
-  DOMI_RESIZE_METHOD_NEAREST,       // resize nearest
-  DOMI_RESIZE_RESERVED
-} domiCropAndResizeMode_t;
-
-// yolo version
-typedef enum tagDomiYoloVersion { DOMI_YOLO_V2 = 1, DOMI_YOLO_V3, DOMI_YOLO_TRSERVED } domiYoloVersion_t;
-
-typedef enum tagDomiRNNScopePassType {
-  DOMI_STATIC_BIDIRECTIONAL_RNN_GENERAL_PASS = 0,
-  DOMI_DYNAMIC_BIDIRECTIONAL_RNN_GENERAL_PASS,
-  DOMI_DYNAMIC_BIDIRECTIONAL_RNN_BIDAF_PASS
-} domiRNNScopePassType;
-
-// RNNDataLayout
-typedef enum tagDomiRNNDataLayout {
-  DOMI_RNN_ND_TBX = 0,  // data[max_time,batch_size,Xt]
-  DOMI_RNN_ND_BTX,      // data[batch_size,max_time,Xt]
-  DOMI_RNN_5D_TX1BX,    // data[max_time,Xt,1,batch_size,Xt]
-  DOMI_RNN_5D_BX1TX,    // dataa[batch_size,Xt,1,max_time,Xt]
-  DOMI_RNN_4DTBX1,
-  DOMI_ENN_DL_RESERVED
-} domiRNNDataLayout_t;
-
-// RNNInputMode
-typedef enum tagDomiRNNInputMode { DOMI_RNN_LINEAR_INPUT = 0, DOMI_RNN_SKIP_INPUT } domiRNNInputMode_t;
-
-// RNNDirectionMode
-typedef enum tagDomiRNNDirectionMode { DOMI_RNN_UNIDIRECTIONAL = 0, DOMI_RNN_BIDIRECTIONAL } domiDirectionMode_t;
-
-typedef enum tagDomiPoolingCeilMode { DOMI_POOLING_FLOOR = 0, DOMI_POOLING_CEIL } domiPoolingCeilMode_t;
-
-// RNNMode
-typedef enum tagDomiRNNActivationMode {
-  DOMI_RNN_ACTIVATION_SIGMOID = 0,  // sigmoid
-  DOMI_RNN_ACTIVATION_TANH,         // tanh
-  DOMI_RNN_ACTIVATION_RELU,         // ReLU
-  DOMI_RNN_ACTIVATION_RELU1,        //  ReLU1
-  DOMI_RNN_ACTIVATION_RELU6,        //  ReLU6
-  DOMI_RNN_ACTIVATION_RESERVED
-} domiRNNActivationMode_t;
-
-typedef enum tagDomiRNNLSTMOutMode {
-  DOMI_RNN_LSTM_OUT_SEPARATE = 0,
-  DOMI_RNN_LSTM_OUT_CONCAT,
-  DOMI_RNN_LSTM_OUT_RESERVED
-} domiRNNLSTMOutPutMode_t;
-typedef enum tagDomiRNNLSTMStateOutMode {
-  DOMI_RNN_LSTM_STATE_OUT_SEPARATE = 0,
-  DOMI_RNN_LSTM_STATE_OUT_CONCAT_ALL,
-  DOMI_RNN_LSTM_STATE_OUT_RESERVED
-} domiRNNLSTMStateOutMode_t;
-
-typedef enum tagDomiRNNMode {
-  DOMI_RNN_RELU = 0,
-  DOMI_RNN_TANH,
-  DOMI_LSTM,
-  DOMI_GRU,
-  DOMI_RNN_MODE_RESERVED
-} domiRNNMode_t;
-
-typedef enum tagDomiResizeBilinearMode {
-  DOMI_RESIZE_OUTPUT_DIM_BY_ZOOM_FACTOR = 0,  // Output dimension specified by zoom factor
-  DOMI_RESIZE_OUTPUT_DIM_BY_SHRINK_FACTOR,    // specified by shrink factor
-  DOMI_RESIZE_OUTPUT_DIM_EXPLICIT,            // specified explicitly
-  DOMI_RESIZE_OUTPUT_DIM_RESERVED
-} domiResizeOutputDimMode_t;
-
-#pragma pack(1)  // single-byte alignment
-// DUMP file struct
-struct FileHeader {
-  int32_t Version;          // version
-  int32_t Output_Offset;    // output offset address
-  char Reserved[24] = {0};  // 24 bytes reserved
-};
-
-struct BasicInfo {
-  struct FileHeader header;  // file header
-  int32_t stream_id;         // stread id
-  uint64_t start_time;       // start time
-  uint64_t end_time;         // end time
-  uint32_t input_size;       // input memory size
-  uint32_t output_size;      // output memory size
-  uint32_t weight_size;      // weight Memory Size
-  uint32_t workspace_size;   // workspace
-  uint32_t total_size;       // total memory size
-};
-#pragma pack()  // Cancels single-byte alignment
 enum class MemorySizeCalcType { NORMAL = 0, ALWAYS_EMPTY };
 }  // namespace ge
 
diff --git a/inc/framework/common/util.h b/inc/framework/common/util.h
index a3989b9d..84912e64 100644
--- a/inc/framework/common/util.h
+++ b/inc/framework/common/util.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,33 +14,32 @@
  * limitations under the License.
  */
 
-#ifndef INC_FRAMEWORK_COMMON_UTIL_H_
-#define INC_FRAMEWORK_COMMON_UTIL_H_
+#ifndef AIR_INC_FRAMEWORK_COMMON_UTIL_H_
+#define AIR_INC_FRAMEWORK_COMMON_UTIL_H_
 
-#include <google/protobuf/text_format.h>
-#include <limits.h>
-#include <math.h>
+#include <cmath>
 #include <sstream>
 #include <string>
-#include <vector>
 
-#include "framework/common/debug/ge_log.h"
+#include <google/protobuf/text_format.h>
+#include "external/graph/types.h"
+#include "external/register/register.h"
 #include "framework/common/debug/log.h"
 #include "framework/common/scope_guard.h"
 #include "framework/common/ge_inner_error_codes.h"
-#include "mmpa/mmpa_api.h"
+#include "graph/detail/attributes_holder.h"
 
-#define GE_CHECK_POSITIVE_SIZE_RANGE(size)                    \
-  do {                                                        \
-    if (size <= 0) {                                          \
-      DOMI_LOGE("param[%s] is not a positive number", #size); \
-      return PARAM_INVALID;                                   \
-    }                                                         \
-  } while (0)
+#define GE_CHECK_POSITIVE_SIZE_RANGE(size)                             \
+  do {                                                                 \
+    if ((size) <= 0) {                                                 \
+      GELOGE(ge::FAILED, "param[%s] is not a positive number", #size); \
+      return PARAM_INVALID;                                            \
+    }                                                                  \
+  } while (false)
 
 #define CHECK_FALSE_EXEC(expr, exec_expr, ...) \
   {                                            \
-    bool b = (expr);                           \
+    const bool b = (expr);                     \
     if (!b) {                                  \
       exec_expr;                               \
     }                                          \
@@ -48,189 +47,244 @@
 
 // new ge marco
 // Encapsulate common resource releases
-#define GE_MAKE_GUARD_RTMEM(var)         \
-  GE_MAKE_GUARD(var, [&] {               \
-    if (var) GE_CHK_RT(rtFreeHost(var)); \
-  });
+#define GE_MAKE_GUARD_RTMEM(var)  \
+  GE_MAKE_GUARD(var, [&] {        \
+    if ((var) != nullptr) {       \
+      GE_CHK_RT(rtFreeHost(var)); \
+    }                             \
+  })
 
-#define GE_MAKE_GUARD_RTSTREAM(var)           \
-  GE_MAKE_GUARD(var, [&] {                    \
-    if (var) GE_CHK_RT(rtStreamDestroy(var)); \
-  });
+#define GE_MAKE_GUARD_RTSTREAM(var)    \
+  GE_MAKE_GUARD(var, [&] {             \
+    if ((var) != nullptr) {            \
+      GE_CHK_RT(rtStreamDestroy(var)); \
+    }                                  \
+  })
 
 // For propagating errors when calling a function.
-#define GE_RETURN_IF_ERROR(expr)         \
-  do {                                   \
-    const ::ge::Status _status = (expr); \
-    if (_status) return _status;         \
-  } while (0)
+#define GE_RETURN_IF_ERROR(expr)           \
+  do {                                     \
+    const ge::Status _chk_status = (expr); \
+    if (_chk_status != ge::SUCCESS) {      \
+      return _chk_status;                  \
+    }                                      \
+  } while (false)
 
 #define GE_RETURN_WITH_LOG_IF_ERROR(expr, ...) \
   do {                                         \
-    const ::ge::Status _status = (expr);       \
-    if (_status) {                             \
-      DOMI_LOGE(__VA_ARGS__);                  \
-      return _status;                          \
+    const ge::Status _chk_status = (expr);     \
+    if (_chk_status != ge::SUCCESS) {          \
+      GELOGE(ge::FAILED, __VA_ARGS__);         \
+      return _chk_status;                      \
     }                                          \
-  } while (0)
+  } while (false)
 
 // check whether the parameter is true. If it is, return FAILED and record the error log
 #define GE_RETURN_WITH_LOG_IF_TRUE(condition, ...) \
   do {                                             \
     if (condition) {                               \
-      DOMI_LOGE(__VA_ARGS__);                      \
+      GELOGE(ge::FAILED, __VA_ARGS__);             \
       return ge::FAILED;                           \
     }                                              \
-  } while (0)
+  } while (false)
 
 // Check if the parameter is false. If yes, return FAILED and record the error log
 #define GE_RETURN_WITH_LOG_IF_FALSE(condition, ...) \
   do {                                              \
-    bool _condition = (condition);                  \
+    const bool _condition = (condition);            \
     if (!_condition) {                              \
-      DOMI_LOGE(__VA_ARGS__);                       \
+      GELOGE(ge::FAILED, __VA_ARGS__);              \
       return ge::FAILED;                            \
     }                                               \
-  } while (0)
+  } while (false)
 
 // Checks whether the parameter is true. If so, returns PARAM_INVALID and records the error log
 #define GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(condition, ...) \
   do {                                                       \
     if (condition) {                                         \
-      DOMI_LOGE(__VA_ARGS__);                                \
+      GELOGE(ge::FAILED, __VA_ARGS__);                       \
       return ge::PARAM_INVALID;                              \
     }                                                        \
-  } while (0)
+  } while (false)
 
 // Check if the parameter is false. If yes, return PARAM_INVALID and record the error log
 #define GE_RT_PARAM_INVALID_WITH_LOG_IF_FALSE(condition, ...) \
   do {                                                        \
-    bool _condition = (condition);                            \
+    const bool _condition = (condition);                      \
     if (!_condition) {                                        \
-      DOMI_LOGE(__VA_ARGS__);                                 \
+      GELOGE(ge::FAILED, __VA_ARGS__);                        \
       return ge::PARAM_INVALID;                               \
     }                                                         \
-  } while (0)
+  } while (false)
 
 // Check if the parameter is null. If yes, return PARAM_INVALID and record the error
-#define GE_CHECK_NOTNULL(val)                                                   \
-  do {                                                                          \
-    if (val == nullptr) {                                                       \
-      REPORT_INNER_ERROR("E19999", "Param:%s is nullptr, check invalid", #val); \
-      DOMI_LOGE("[Check][Param:%s]null is invalid.", #val);                     \
-      return ge::PARAM_INVALID;                                                 \
-    }                                                                           \
-  } while (0)
+#define GE_CHECK_NOTNULL(val, ...)                                                          \
+  do {                                                                                      \
+    if ((val) == nullptr) {                                                                 \
+      REPORT_INNER_ERROR("E19999", "Param:" #val " is nullptr, check invalid" __VA_ARGS__); \
+      GELOGE(ge::FAILED, "[Check][Param:" #val "]null is invalid" __VA_ARGS__);             \
+      return ge::PARAM_INVALID;                                                             \
+    }                                                                                       \
+  } while (false)
 
 // Check if the parameter is null. If yes, just return and record the error
-#define GE_CHECK_NOTNULL_JUST_RETURN(val)             \
-  do {                                                \
-    if (val == nullptr) {                             \
-      DOMI_LOGE("param[%s] must not be null.", #val); \
-      return;                                         \
-    }                                                 \
-  } while (0)
+#define GE_CHECK_NOTNULL_JUST_RETURN(val)                      \
+  do {                                                         \
+    if ((val) == nullptr) {                                    \
+      GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
+      return;                                                  \
+    }                                                          \
+  } while (false)
 
 // Check whether the parameter is null. If so, execute the exec_expr expression and record the error log
-#define GE_CHECK_NOTNULL_EXEC(val, exec_expr)         \
-  do {                                                \
-    if (val == nullptr) {                             \
-      DOMI_LOGE("param[%s] must not be null.", #val); \
-      exec_expr;                                      \
-    }                                                 \
-  } while (0)
+#define GE_CHECK_NOTNULL_EXEC(val, exec_expr)                  \
+  do {                                                         \
+    if ((val) == nullptr) {                                    \
+      GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
+      exec_expr;                                               \
+    }                                                          \
+  } while (false)
 
 // Check whether the parameter is null. If yes, return directly and record the error log
-#define GE_RT_VOID_CHECK_NOTNULL(val)                 \
-  do {                                                \
-    if (val == nullptr) {                             \
-      DOMI_LOGE("param[%s] must not be null.", #val); \
-      return;                                         \
-    }                                                 \
-  } while (0)
+#define GE_RT_VOID_CHECK_NOTNULL(val)                          \
+  do {                                                         \
+    if ((val) == nullptr) {                                    \
+      GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
+      return;                                                  \
+    }                                                          \
+  } while (false)
 
 // Check if the parameter is null. If yes, return false and record the error log
-#define GE_RT_FALSE_CHECK_NOTNULL(val)                \
-  do {                                                \
-    if (val == nullptr) {                             \
-      DOMI_LOGE("param[%s] must not be null.", #val); \
-      return false;                                   \
-    }                                                 \
-  } while (0)
+#define GE_RT_FALSE_CHECK_NOTNULL(val)                         \
+  do {                                                         \
+    if ((val) == nullptr) {                                    \
+      GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
+      return false;                                            \
+    }                                                          \
+  } while (false)
 
 // Check if the parameter is out of bounds
-#define GE_CHECK_SIZE(size)                          \
-  do {                                               \
-    if (size == 0) {                                 \
-      DOMI_LOGE("param[%s] is out of range", #size); \
-      return ge::PARAM_INVALID;                      \
-    }                                                \
-  } while (0)
+#define GE_CHECK_SIZE(size)                                   \
+  do {                                                        \
+    if ((size) == 0U) {                                       \
+      GELOGE(ge::FAILED, "param[%s] is out of range", #size); \
+      return ge::PARAM_INVALID;                               \
+    }                                                         \
+  } while (false)
 
 // Check if the value on the left is greater than or equal to the value on the right
-#define GE_CHECK_GE(lhs, rhs)                              \
-  do {                                                     \
-    if (lhs < rhs) {                                       \
-      DOMI_LOGE("param[%s] is less than[%s]", #lhs, #rhs); \
-      return ge::PARAM_INVALID;                            \
-    }                                                      \
-  } while (0)
+#define GE_CHECK_GE(lhs, rhs)                                       \
+  do {                                                              \
+    if ((lhs) < (rhs)) {                                            \
+      GELOGE(ge::FAILED, "param[%s] is less than[%s]", #lhs, #rhs); \
+      return ge::PARAM_INVALID;                                     \
+    }                                                               \
+  } while (false)
 
 // Check if the value on the left is less than or equal to the value on the right
-#define GE_CHECK_LE(lhs, rhs)                                 \
-  do {                                                        \
-    if (lhs > rhs) {                                          \
-      DOMI_LOGE("param[%s] is greater than[%s]", #lhs, #rhs); \
-      return ge::PARAM_INVALID;                               \
-    }                                                         \
-  } while (0)
+#define GE_CHECK_LE(lhs, rhs)                                          \
+  do {                                                                 \
+    if ((lhs) > (rhs)) {                                               \
+      GELOGE(ge::FAILED, "param[%s] is greater than[%s]", #lhs, #rhs); \
+      return ge::PARAM_INVALID;                                        \
+    }                                                                  \
+  } while (false)
 
 #define GE_DELETE_NEW_SINGLE(var) \
   do {                            \
-    if (var != nullptr) {         \
-      delete var;                 \
-      var = nullptr;              \
+    if ((var) != nullptr) {       \
+      delete (var);               \
+      (var) = nullptr;            \
     }                             \
-  } while (0)
+  } while (false)
 
 #define GE_DELETE_NEW_ARRAY(var) \
   do {                           \
-    if (var != nullptr) {        \
-      delete[] var;              \
-      var = nullptr;             \
+    if ((var) != nullptr) {      \
+      delete[](var);             \
+      (var) = nullptr;           \
     }                            \
-  } while (0)
+  } while (false)
 
 #define GE_FREE_RT_LOG(addr)                                        \
   do {                                                              \
-    if (addr != nullptr) {                                          \
-      rtError_t error = rtFree(addr);                               \
+    if ((addr) != nullptr) {                                        \
+      const rtError_t error = rtFree(addr);                         \
       if (error != RT_ERROR_NONE) {                                 \
         GELOGE(RT_FAILED, "Call rtFree failed, error: %#x", error); \
       }                                                             \
-      addr = nullptr;                                               \
+      (addr) = nullptr;                                             \
     }                                                               \
-  } while (0)
+  } while (false)
 
+namespace ge {
 /**
  * @ingroup domi_common
  * @brief version of om.proto file
  */
-static constexpr int32_t OM_PROTO_VERSION = 2;
-
-/**
- * Finding an Integer Ceiling Value Without Precision Loss
- */
-#define CEIL(N, n) (((N) + (n)-1) / (n))
+constexpr int32_t OM_PROTO_VERSION = 2;
 
-namespace ge {
-using google::protobuf::Message;
+///
+/// @ingroup domi_common
+/// @brief onverts Vector of a number to a string.
+/// @param [in] v  Vector of a number
+/// @return string
+///
+template <typename T>
+GE_FUNC_VISIBILITY std::string ToString(const std::vector<T> &v) {
+  bool first = true;
+  std::stringstream ss;
+  ss << "[";
+  for (const T &x : v) {
+    if (first) {
+      first = false;
+      ss << x;
+    } else {
+      ss << ", " << x;
+    }
+  }
+  ss << "]";
+  return ss.str();
+}
 
 ///
 /// @ingroup domi_common
-/// @brief Maximum file path length
+/// @brief Converts RepeatedField to String.
+/// @param [in] rpd_field  RepeatedField
+/// @return string
 ///
-const int32_t DOMI_MAX_PATH_LEN = 256;
+template <typename T>
+GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedField<T> &rpd_field) {
+  std::stringstream ss;
+  ss << "[";
+  for (const T x : rpd_field) {
+    ss << x;
+    ss << ", ";
+  }
+  // Delete the two extra characters at the end of the line.
+  std::string str = ss.str().substr(0U, ss.str().length() - 2U);
+  str += "]";
+  return str;
+}
+
+///
+///  @ingroup ge_ir_utils
+///  @brief RepeatedPtrField->String
+///  @param [in] const rpd_field  RepeatedPtrField
+///  @return String
+///
+template <typename T>
+GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedPtrField<T> &rpd_ptr_field) {
+  std::stringstream ss;
+  ss << "[";
+  for (const T &x : rpd_ptr_field) {
+    ss << x;
+    ss << ", ";
+  }
+  std::string str_ret = ss.str().substr(0U, ss.str().length() - 2U);
+  str_ret += "]";
+  return str_ret;
+}
 
 ///
 /// @ingroup domi_common
@@ -241,7 +295,8 @@ const int32_t DOMI_MAX_PATH_LEN = 256;
 /// @return true success
 /// @return false fail
 ///
-GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *data, int size, Message *proto);
+GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *const data, const int32_t size,
+                                           google::protobuf::Message *const proto);
 
 ///
 /// @ingroup domi_proto
@@ -251,17 +306,15 @@ GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *data, int size, Message *
 /// @return true success
 /// @return false fail
 ///
-GE_FUNC_VISIBILITY bool ReadProtoFromText(const char *file, google::protobuf::Message *message);
-
-GE_FUNC_VISIBILITY bool ReadProtoFromMem(const char *data, int size, google::protobuf::Message *message);
+GE_FUNC_VISIBILITY bool ReadProtoFromText(const char_t *const file, google::protobuf::Message *const message);
 
 ///
 /// @ingroup: domi_common
 /// @brief: get length of file
 /// @param [in] input_file: path of file
-/// @return long： File length. If the file length fails to be obtained, the value -1 is returned.
+/// @return int64_t： File length. If the file length fails to be obtained, the value -1 is returned.
 ///
-GE_FUNC_VISIBILITY extern long GetFileLength(const std::string &input_file);
+GE_FUNC_VISIBILITY extern int64_t GetFileLength(const std::string &input_file);
 
 ///
 /// @ingroup domi_common
@@ -272,9 +325,7 @@ GE_FUNC_VISIBILITY extern long GetFileLength(const std::string &input_file);
 /// @return false fail
 /// @return true success
 ///
-GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *file_name, char **buffer, int &length);
-
-GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *file_name, std::vector<char> &buffer);
+GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char_t *const file_name, char_t **const buffer, int32_t &length);
 
 ///
 /// @ingroup domi_common
@@ -283,7 +334,7 @@ GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *file_name, std::vect
 /// @return 0 success
 /// @return -1 fail
 ///
-GE_FUNC_VISIBILITY extern int CreateDirectory(const std::string &directory_path);
+GE_FUNC_VISIBILITY extern int32_t CreateDirectory(const std::string &directory_path);
 
 ///
 /// @ingroup domi_common
@@ -294,46 +345,6 @@ GE_FUNC_VISIBILITY std::string CurrentTimeInStr();
 
 ///
 /// @ingroup domi_common
-/// @brief onverts Vector of a number to a string.
-/// @param [in] v  Vector of a number
-/// @return string
-///
-template <typename T>
-GE_FUNC_VISIBILITY std::string ToString(std::vector<T> &v) {
-  std::stringstream ss;
-  ss << "[";
-  for (T x : v) {
-    ss << x;
-    ss << ", ";
-  }
-  std::string strRet =
-    ss.str().substr(0, ss.str().length() - 2);  // Delete the two extra characters at the end of the line.
-  strRet += "]";
-  return strRet;
-}
-
-///
-/// @ingroup domi_common
-/// @brief Converts RepeatedField to String.
-/// @param [in] rpd_field  RepeatedField
-/// @return string
-///
-template <typename T>
-GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedField<T> &rpd_field) {
-  std::stringstream ss;
-  ss << "[";
-  for (T x : rpd_field) {
-    ss << x;
-    ss << ", ";
-  }
-  std::string strRet =
-    ss.str().substr(0, ss.str().length() - 2);  // Delete the two extra characters at the end of the line.
-  strRet += "]";
-  return strRet;
-}
-
-///
-/// @ingroup domi_common
 /// @brief Obtains the absolute time (timestamp) of the current system.
 /// @return Timestamp, in microseconds (US)
 ///
@@ -355,7 +366,7 @@ GE_FUNC_VISIBILITY uint32_t GetCurrentSecondTimestap();
 /// @param [in] b
 /// @return false: true: The result is within the normal int64 range.
 ///
-GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b);
+GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(const int64_t a, const int64_t b);
 
 ///
 /// @ingroup domi_common
@@ -363,7 +374,7 @@ GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b);
 /// @param [in] path of input file
 /// @param [out] Absolute path of a file. If the absolute path cannot be obtained, an empty string is returned
 ///
-GE_FUNC_VISIBILITY std::string RealPath(const char *path);
+GE_FUNC_VISIBILITY std::string RealPath(const char_t *path);
 
 ///
 /// @ingroup domi_common
@@ -387,28 +398,12 @@ GE_FUNC_VISIBILITY bool CheckOutputPathValid(const std::string &file_path, const
 ///
 /// @ingroup domi_common
 /// @brief Check whether the file path meets the whitelist verification requirements.
-/// @param [in] filePath file path
-/// @param [out] result
-///
-GE_FUNC_VISIBILITY bool ValidateStr(const std::string &filePath, const std::string &mode);
-
-///
-/// @ingroup domi_common
-/// @brief Check whether the file is normal file.
-/// @param [in] file_path file path
+/// @param [in] str file path
 /// @param [out] result
 ///
-GE_FUNC_VISIBILITY bool IsValidFile(const char *file_path);
+GE_FUNC_VISIBILITY bool ValidateStr(const std::string &file_path, const std::string &mode);
 
-///
-/// @ingroup domi_common
-/// @brief Check path invalid
-/// @param [in] path, path to be checked
-/// @param [in] length, length of path
-/// @return 0 success
-/// @return -1 fail
-///
-GE_FUNC_VISIBILITY Status CheckPath(const char *path, size_t length);
+GE_FUNC_VISIBILITY Status ConvertToInt32(const std::string &str, int32_t &val);
 }  // namespace ge
 
-#endif  // INC_FRAMEWORK_COMMON_UTIL_H_
+#endif  // AIR_INC_FRAMEWORK_COMMON_UTIL_H_
diff --git a/inc/framework/engine/dnnengine.h b/inc/framework/engine/dnnengine.h
index 8a0f3b65..f9a7b65b 100644
--- a/inc/framework/engine/dnnengine.h
+++ b/inc/framework/engine/dnnengine.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,16 +21,17 @@
 #include <string>
 #include <vector>
 
-#include "common/ge_inner_error_codes.h"
-#include "common/ge_types.h"
+#include "framework/common/ge_inner_error_codes.h"
+#include "framework/common/ge_types.h"
 #include "graph/types.h"
 
 namespace ge {
-enum PriorityEnum {
+enum class PriorityEnum {
   COST_0 = 0,
-  COST_1,
-  COST_2,
-  COST_3,
+  COST_1 = 1,
+  COST_2 = 2,
+  COST_3 = 3,
+  COST_4 = 4,
   COST_9 = 9,
   COST_10 = 10,
 };
@@ -38,19 +39,37 @@ enum PriorityEnum {
 struct DNNEngineAttribute {
   std::string engine_name;
   std::vector<std::string> mem_type;
-  uint32_t compute_cost;
+  PriorityEnum compute_cost;
   enum RuntimeType runtime_type;  // HOST, DEVICE
   // If engine input format must be specific, set this attribute, else set FORMAT_RESERVED
   Format engine_input_format;
   Format engine_output_format;
+  bool atomic_engine_flag;
 };
 
 class GE_FUNC_VISIBILITY DNNEngine {
  public:
+  DNNEngine() = default;
+  explicit DNNEngine(const DNNEngineAttribute &attrs) {
+    engine_attribute_ = attrs;
+  }
   virtual ~DNNEngine() = default;
-  virtual Status Initialize(const std::map<std::string, std::string> &options) = 0;
-  virtual Status Finalize() = 0;
-  virtual void GetAttributes(DNNEngineAttribute &attr) const = 0;
+  Status Initialize(const std::map<std::string, std::string> &options) const {
+    (void)options;
+    return SUCCESS;
+  }
+  Status Finalize() const {
+    return SUCCESS;
+  }
+  void GetAttributes(DNNEngineAttribute &attr) const {
+    attr = engine_attribute_;
+  }
+  bool IsAtomic() const {
+    return engine_attribute_.atomic_engine_flag;
+  }
+
+ protected:
+  DNNEngineAttribute engine_attribute_;
 };
 }  // namespace ge
 
diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h
index fcca561c..3c921345 100644
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,42 +22,58 @@
 #include <vector>
 
 #include "common/dynamic_aipp.h"
-#include "common/ge_inner_error_codes.h"
-#include "common/ge_types.h"
-#include "common/types.h"
+#include "framework/common/ge_inner_error_codes.h"
+#include "framework/common/ge_types.h"
+#include "framework/common/types.h"
 #include "graph/tensor.h"
 #include "graph/ge_tensor.h"
-#include "runtime/base.h"
 
 namespace ge {
 class SingleOp;
 class DynamicSingleOp;
+class GeRootModel;
 
 struct RunModelData {
   uint32_t index;  // Data index
   uint32_t modelId;
-  std::vector<DataBuffer> blobs;       // All input/output data buffer
-  uint32_t timestamp;                  // Data creation time
-  uint32_t timeout;                    // Processing timeout
-  uint64_t request_id = 0;             // Request ID
-  uint64_t dynamic_batch_size = 0;     // Dynamic batch size scene, set dynamic size, not supported by default:0
-  uint64_t dynamic_image_height = 0;   // Dynamic image size scene, set image height, not supported by default:0
-  uint64_t dynamic_image_width = 0;    // Dynamic image size scene, set image width, not supported by default:0
-  std::vector<uint64_t> dynamic_dims;  // Dynamic dims scene, set dynamic dims, not supported by default:empty
+  std::vector<DataBuffer> blobs;        // All input/output data buffer
+  uint32_t timestamp;                   // Data creation time
+  uint32_t timeout;                     // Processing timeout
+  uint64_t request_id = 0UL;            // Request ID
+  uint64_t dynamic_batch_size = 0UL;    // Dynamic batch size scene, set dynamic size, not supported by default:0
+  uint64_t dynamic_image_height = 0UL;  // Dynamic image size scene, set image height, not supported by default:0
+  uint64_t dynamic_image_width = 0UL;   // Dynamic image size scene, set image width, not supported by default:0
+  std::vector<uint64_t> dynamic_dims;   // Dynamic dims scene, set dynamic dims, not supported by default:empty
 };
 
 class GE_FUNC_VISIBILITY GeExecutor {
  public:
   GeExecutor();
   ~GeExecutor() = default;
-  ge::Status Initialize();
-  ge::Status Finalize();
 
-  ge::Status UnloadModel(uint32_t modelId);
+  Status Initialize();
+  Status Finalize();
+
+  ///
+  /// @ingroup ge
+  /// @brief Initialize global execute environment.
+  /// @param [in] options: environment variables.
+  /// @return init result
+  ///
+  static Status Initialize(const std::map<std::string, std::string> &options);
+
+  ///
+  /// @ingroup ge
+  /// @brief Finalize global execute environment.
+  /// @return execute result
+  ///
+  static Status FinalizeEx();
+
+  Status UnloadModel(const uint32_t model_id);
 
   // Get input and output descriptor
-  ge::Status GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
-                              std::vector<ge::TensorDesc> &output_desc, bool new_model_desc = false);
+  Status GetModelDescInfo(const uint32_t model_id, std::vector<TensorDesc> &input_desc,
+                          std::vector<TensorDesc> &output_desc, const bool new_model_desc = false);
 
   ///
   /// @ingroup ge
@@ -68,7 +84,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [in] batch_size: batch size entered by user in dynamic multi-batch scenario
   /// @return execute result
   ///
-  ge::Status SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t batch_size);
+  Status SetDynamicBatchSize(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length,
+                             const uint64_t batch_size);
 
   ///
   /// @ingroup ge
@@ -80,8 +97,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [in] image_width: image width entered by user in dynamic multi-resolution scenario
   /// @return execute result
   ///
-  ge::Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height,
-                                 uint64_t image_width);
+  Status SetDynamicImageSize(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length,
+                             const uint64_t image_height, const uint64_t image_width);
 
   ///
   /// @ingroup ge
@@ -93,8 +110,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [in] dynamic_dims: array of dynamic dimensions
   /// @return execute result
   ///
-  ge::Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
-                            const std::vector<uint64_t> &dynamic_dims);
+  Status SetDynamicDims(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length,
+                        const std::vector<uint64_t> &dynamic_dims);
 
   ///
   /// @ingroup ge
@@ -104,8 +121,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] cur_dynamic_dims: current dynamic dims
   /// @return execute result
   ///
-  ge::Status GetCurDynamicDims(uint32_t model_id, const std::vector<uint64_t> &dynamic_dims,
-                               std::vector<uint64_t> &cur_dynamic_dims);
+  Status GetCurDynamicDims(const uint32_t model_id, const std::vector<uint64_t> &dynamic_dims,
+                           std::vector<uint64_t> &cur_dynamic_dims);
 
   ///
   /// @ingroup ge
@@ -115,8 +132,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] dynamic_type
   /// @return execute result
   ///
-  ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
-                                 int32_t &dynamic_type);
+  Status GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
+                             int32_t &dynamic_type);
 
   ///
   /// @ingroup ge
@@ -125,7 +142,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] batch_info
   /// @return execute result
   ///
-  ge::Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);
+  Status GetCombinedDynamicDims(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);
 
   ///
   /// @ingroup ge
@@ -134,9 +151,9 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] user_designate_shape_order
   /// @return execute result
   ///
-  ge::Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector<std::string> &user_designate_shape_order);
+  Status GetUserDesignateShapeOrder(const uint32_t model_id, std::vector<std::string> &user_designate_shape_order);
 
-  ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);
+  Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);
 
   ///
   /// @ingroup ge
@@ -148,22 +165,22 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [in] aippParms: kAippDynamicPara by user in dynamic aipp
   /// @return execute result
   ///
-  ge::Status SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
-                                const std::vector<kAippDynamicBatchPara> &aippBatchPara,
-                                const kAippDynamicPara &aippParms);
+  Status SetDynamicAippData(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length,
+                            const std::vector<kAippDynamicBatchPara> &aipp_batch_para,
+                            const kAippDynamicPara &aipp_parms);
 
-  ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
+  Status GetAIPPInfo(const uint32_t model_id, const uint32_t index, AippConfigInfo &aipp_info);
 
-  ge::Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
-                       std::string &attr_value);
+  Status GetOpAttr(const uint32_t model_id, const std::string &op_name, const std::string &attr_name,
+                   std::string &attr_value);
 
-  ge::Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);
+  Status GetModelAttr(const uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);
 
-  ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);
+  Status GetAippType(const uint32_t model_id, const uint32_t index, InputAippType &type, size_t &aipp_index);
 
-  ge::Status CommandHandle(const ge::Command &command);
+  Status CommandHandle(const Command &command) const;
 
-  ge::Status SetDump(const DumpConfig &dump_config);
+  Status SetDump(const DumpConfig &dump_config);
 
   ///
   /// @ingroup ge
@@ -173,7 +190,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @return SUCCESS
   /// @return FAILED
   ///
-  ge::Status GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size);
+  Status GetMaxUsedMemory(const uint32_t model_id, uint32_t &max_size);
 
   ///
   /// @ingroup ge
@@ -182,7 +199,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] ModelData &model_data: Offline model memory data
   /// @return SUCCESS handle successfully / others handle failed
   ///
-  ge::Status LoadDataFromFile(const std::string &path, ge::ModelData &model_data);
+  Status LoadDataFromFile(const std::string &path, ModelData &model_data);
 
   ///
   /// @ingroup ge
@@ -195,8 +212,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] uint32_t &model_id: Corresponding identification after model loading
   /// @return SUCCESS handle successfully / others handle failed
   ///
-  ge::Status LoadModelFromData(uint32_t &model_id, const ge::ModelData &model_data, void *dev_ptr, size_t mem_size,
-                               void *weight_ptr, size_t weight_size);
+  Status LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *const dev_ptr, const size_t mem_size,
+                           void *const weight_ptr, const size_t weight_size);
 
   ///
   /// @ingroup ge
@@ -207,9 +224,20 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [in] output_queue_ids: input queue ids create from user.
   /// @return: 0 for success / others for fail
   ///
-  ge::Status LoadModelWithQ(uint32_t &model_id, const ge::ModelData &model_data,
-                            const std::vector<uint32_t> &input_queue_ids,
-                            const std::vector<uint32_t> &output_queue_ids);
+  Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector<uint32_t> &input_queue_ids,
+                        const std::vector<uint32_t> &output_queue_ids);
+
+  ///
+  /// @ingroup ge
+  /// @brief Load task list from ModelData with queue.
+  /// @param [out] model_id: model id allocate from manager.
+  /// @param [in] root_model: Instance of GeRootModel.
+  /// @param [in] input_queue_ids: input queue ids create from user.
+  /// @param [in] output_queue_ids: input queue ids create from user.
+  /// @return: 0 for success / others for fail
+  ///
+  Status LoadModelWithQ(uint32_t &model_id, const std::shared_ptr<GeRootModel> &root_model,
+                        const std::vector<uint32_t> &input_queue_ids, const std::vector<uint32_t> &output_queue_ids);
 
   ///
   /// @ingroup ge
@@ -221,8 +249,17 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] domi::OutputData *output_data: Model output data
   /// @return SUCCESS handle successfully / others handle failed
   ///
-  ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &input_data,
-                       ge::RunModelData &output_data, bool async_mode = false);
+  Status ExecModel(const uint32_t model_id, void *const stream, const RunModelData &input_data,
+                   RunModelData &output_data, const bool async_mode = false);
+
+  ///
+  /// @ingroup ge
+  /// @brief Load task list from root_model without input queue or output queue.
+  /// @param [out] model_id: model id allocate from manager.
+  /// @param [in] root_model: Instance of GeRootModel.
+  /// @return: 0 for success / others for fail
+  ///
+  Status LoadModelWithoutQ(uint32_t &model_id, const std::shared_ptr<GeRootModel> &root_model) const;
 
   ///
   /// @ingroup ge
@@ -236,9 +273,9 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] std::vector<GeTensorDesc> &output_desc: description of model output data
   /// @return SUCCESS handle successfully / others handle failed
   ///
-  ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
-                       const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data,
-                       std::vector<GeTensorDesc> &output_desc, bool async_mode = false);
+  Status ExecModel(const uint32_t model_id, void *const stream, const RunModelData &run_input_data,
+                   const std::vector<GeTensorDesc> &input_desc, RunModelData &run_output_data,
+                   std::vector<GeTensorDesc> &output_desc, const bool async_mode = false);
 
   ///
   /// @ingroup ge
@@ -248,7 +285,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] size_t &weight_size Weight memory space size
   /// @return SUCCESS handle successfully / others handle failed
   ///
-  ge::Status GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size);
+  Status GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size);
 
   ///
   /// @ingroup ge
@@ -259,39 +296,45 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] size_t &weight_size Weight memory space size
   /// @return SUCCESS handle successfully / others handle failed
   ///
-  ge::Status GetMemAndWeightSize(const void *model_data, size_t model_size, size_t &mem_size, size_t &weight_size);
+  Status GetMemAndWeightSize(const void *const model_data, const size_t model_size, size_t &mem_size,
+                             size_t &weight_size);
+
+  static Status LoadSingleOp(const std::string &model_name, const ModelData &model_data, void *const stream,
+                             SingleOp **const single_op);
+
+  static Status LoadSingleOpV2(const std::string &model_name, const ModelData &model_data, void *const stream,
+                               SingleOp **const single_op, const uint64_t model_id);
 
-  static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream,
-                                 SingleOp **single_op);
+  static Status ExecuteAsync(SingleOp *const executor, const std::vector<DataBuffer> &inputs,
+                             std::vector<DataBuffer> &outputs);
 
-  static ge::Status LoadSingleOpV2(const std::string &modelName, const ge::ModelData &modelData, void *stream,
-                                   SingleOp **single_op, const uint64_t model_id);
+  static Status LoadDynamicSingleOp(const std::string &model_name, const ModelData &model_data, void *const stream,
+                                    DynamicSingleOp **const single_op);
 
-  static ge::Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,
-                                 std::vector<DataBuffer> &outputs);
+  static Status LoadDynamicSingleOpV2(const std::string &model_name, const ModelData &model_data, void *const stream,
+                                      DynamicSingleOp **const single_op, const uint64_t model_id);
 
-  static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
-                                        DynamicSingleOp **single_op);
+  static Status UnloadSingleOp(const uint64_t op_id);
 
-  static ge::Status LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream,
-                                          DynamicSingleOp **single_op, const uint64_t model_id);
+  static Status UnloadDynamicSingleOp(const uint64_t op_id);
 
-  static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc,
-                                 const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc,
-                                 std::vector<DataBuffer> &outputs);
+  static Status ExecuteAsync(DynamicSingleOp *const executor, const std::vector<GeTensorDesc> &input_desc,
+                             const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc,
+                             std::vector<DataBuffer> &outputs);
 
-  static ge::Status ReleaseSingleOpResource(void *stream);
+  static Status ReleaseSingleOpResource(void *const stream);
 
-  static ge::Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id);
+  static Status GetDeviceIdByModelId(const uint32_t model_id, uint32_t &device_id);
 
-  ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count);
-  ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
-  ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
-                                       std::vector<InputOutputDims> &output_dims);
-  ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);
+  Status GetBatchInfoSize(const uint32_t model_id, size_t &shape_count);
+  Status GetOrigInputInfo(const uint32_t model_id, const uint32_t index, OriginInputInfo &orig_input_info);
+  Status GetAllAippInputOutputDims(const uint32_t model_id, const uint32_t index,
+                                   std::vector<InputOutputDims> &input_dims, std::vector<InputOutputDims> &output_dims);
+  Status GetOpDescInfo(const uint32_t device_id, const uint32_t stream_id, const uint32_t task_id,
+                       OpDescInfo &op_desc_info);
 
  private:
-  static bool isInit_;
+  static std::atomic_bool is_inited_;
 };
 }  // namespace ge
 
diff --git a/inc/framework/ge_runtime/davinci_model.h b/inc/framework/ge_runtime/davinci_model.h
deleted file mode 100644
index 91e70159..00000000
--- a/inc/framework/ge_runtime/davinci_model.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_
-#define INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_
-
-#include <memory>
-#include <vector>
-
-#include "ge_runtime/op_info.h"
-#include "ge_runtime/task_info.h"
-
-namespace ge {
-namespace model_runner {
-class DavinciModel {
- public:
-  DavinciModel(const std::vector<std::shared_ptr<TaskInfo>> &task_info_list,
-               const std::vector<std::shared_ptr<OpInfo>> &data_info_list,
-               const std::vector<std::shared_ptr<OpInfo>> &output_info_list,
-               const std::vector<std::shared_ptr<OpInfo>> &constant_info_list,
-               const std::vector<model_runner::OpInfoPtr> &variable_info_list,
-               const std::vector<uint32_t> &wait_active_stream_list,
-               const std::vector<uint32_t> &force_copy_stream_list, uint64_t mem_size = 0, uint64_t weight_size = 0,
-               uint64_t var_size = 0, uintptr_t logic_mem_base = 0, uintptr_t logic_weight_base = 0,
-               uintptr_t logic_var_base = 0, uint32_t stream_num = 0, uint32_t batch_num = 0, uint32_t event_num = 0,
-               int32_t priority = 0)
-      : task_info_list_(task_info_list),
-        data_info_list_(data_info_list),
-        output_info_list_(output_info_list),
-        constant_info_list_(constant_info_list),
-        variable_info_list_(variable_info_list),
-        wait_active_stream_list_(wait_active_stream_list),
-        force_copy_stream_list_(force_copy_stream_list),
-        mem_size_(mem_size),
-        weight_size_(weight_size),
-        var_size_(var_size),
-        logic_mem_base_(logic_mem_base),
-        logic_weight_base_(logic_weight_base),
-        logic_var_base_(logic_var_base),
-        stream_num_(stream_num),
-        batch_num_(batch_num),
-        event_num_(event_num),
-        priority_(priority) {}
-  ~DavinciModel() {}
-
-  uint64_t GetMemSize() const { return mem_size_; }
-  uint64_t GetWeightSize() const { return weight_size_; }
-  uint64_t GetVarSize() const { return var_size_; }
-
-  uintptr_t GetLogicMemBase() const { return logic_mem_base_; }
-  uintptr_t GetLogicWeightBase() const { return logic_weight_base_; }
-  uintptr_t GetLogicVarBase() const { return logic_var_base_; }
-
-  uint32_t GetStreamNum() const { return stream_num_; }
-  uint32_t GetBatchNum() const { return batch_num_; }
-  uint32_t GetEventNum() const { return event_num_; }
-
-  const std::vector<uint32_t> &GetWaitActiveStreams() const { return wait_active_stream_list_; }
-  const std::vector<uint32_t> &GetForceCopyStreams() const { return force_copy_stream_list_; }
-
-  int32_t GetPriority() const { return priority_; }
-
-  const std::vector<std::shared_ptr<TaskInfo>> &GetTaskInfoList() const { return task_info_list_; }
-  const std::vector<std::shared_ptr<OpInfo>> &GetDataInfoList() const { return data_info_list_; }
-  const std::vector<std::shared_ptr<OpInfo>> &GetOutputInfoList() const { return output_info_list_; }
-  const std::vector<std::shared_ptr<OpInfo>> &GetConstantInfoList() const { return output_info_list_; }
-  const std::vector<model_runner::OpInfoPtr> &GetVariableInfoList() const { return variable_info_list_; }
-
- private:
-  std::vector<std::shared_ptr<TaskInfo>> task_info_list_;
-  std::vector<std::shared_ptr<OpInfo>> data_info_list_;
-  std::vector<std::shared_ptr<OpInfo>> output_info_list_;
-  std::vector<std::shared_ptr<OpInfo>> constant_info_list_;
-  std::vector<model_runner::OpInfoPtr> variable_info_list_;
-
-  std::vector<uint32_t> wait_active_stream_list_;
-  std::vector<uint32_t> force_copy_stream_list_;
-
-  uint64_t mem_size_;
-  uint64_t weight_size_;
-  uint64_t var_size_;
-
-  uintptr_t logic_mem_base_;
-  uintptr_t logic_weight_base_;
-  uintptr_t logic_var_base_;
-
-  uint32_t stream_num_;
-  uint32_t batch_num_;
-  uint32_t event_num_;
-
-  int32_t priority_;
-
-  // Disable to copy constructor and assignment operator
-  DavinciModel &operator=(const DavinciModel &) = delete;
-  DavinciModel(const DavinciModel &) = delete;
-};
-}  // namespace model_runner
-}  // namespace ge
-
-#endif  // INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_
diff --git a/inc/framework/ge_runtime/model_runner.h b/inc/framework/ge_runtime/model_runner.h
deleted file mode 100644
index e495dfdf..00000000
--- a/inc/framework/ge_runtime/model_runner.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_
-#define INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_
-
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-#include "common/ge_inner_error_codes.h"
-#include "common/ge_types.h"
-#include "ge_runtime/davinci_model.h"
-
-namespace ge {
-namespace model_runner {
-class RuntimeModel;
-using RuntimeInfo = std::tuple<uint32_t, uint32_t, void *>;
-class ModelRunner {
- public:
-  static ModelRunner &Instance();
-
-  bool LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint32_t model_id,
-                        std::shared_ptr<DavinciModel> davinci_model, std::shared_ptr<ModelListener> listener);
-
-  bool DistributeTask(uint32_t model_id);
-
-  bool LoadModelComplete(uint32_t model_id);
-
-  const std::vector<uint32_t> &GetTaskIdList(uint32_t model_id) const;
-
-  const std::vector<uint32_t> &GetStreamIdList(uint32_t model_id) const;
-
-  const std::map<std::string, std::shared_ptr<RuntimeInfo>> &GetRuntimeInfoMap(uint32_t model_id) const;
-
-  void *GetModelHandle(uint32_t model_id) const;
-
-  bool UnloadModel(uint32_t model_id);
-
-  bool RunModel(uint32_t model_id, const InputData &input_data, OutputData *output_data);
-
-  bool GetInputOutputDescInfo(uint32_t model_id, bool zero_copy, std::vector<InputOutputDescInfo> *input_desc,
-                              std::vector<InputOutputDescInfo> *output_desc, std::vector<uint32_t> *input_format,
-                              std::vector<uint32_t> *output_format);
-
- private:
-  ModelRunner() = default;
-  ~ModelRunner() = default;
-
-  std::unordered_map<uint32_t, std::shared_ptr<RuntimeModel>> runtime_models_;
-};
-}  // namespace model_runner
-}  // namespace ge
-
-#endif  // INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_
diff --git a/inc/framework/ge_runtime/op_info.h b/inc/framework/ge_runtime/op_info.h
deleted file mode 100644
index 22c16ed6..00000000
--- a/inc/framework/ge_runtime/op_info.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_
-#define INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-namespace ge {
-namespace model_runner {
-struct TensorInfo {
-  int64_t GetShapeSize() const {
-    int64_t res = 1;
-    if (dims.empty()) {
-      return 0;
-    }
-    for (auto dim : dims) {
-      res *= dim;
-    }
-    return res;
-  }
-
-  int64_t GetDim(uint32_t index) {
-    if (index >= dims.size()) {
-      return 0;
-    }
-    return dims[index];
-  }
-
-  std::vector<int64_t> dims;
-  uint32_t datatype;
-  uint32_t format;
-  uint32_t real_dim_cnt;
-  uint32_t size;
-  bool is_output;
-};
-
-struct OpInfo {
-  uint32_t index;
-  std::string name;
-  std::string type;
-  bool var_is_broadcast;
-  std::vector<uintptr_t> input_addrs;
-  std::vector<uintptr_t> output_addrs;
-  std::vector<TensorInfo> input_tensors;
-  std::vector<TensorInfo> output_tensors;
-  std::vector<TensorInfo> weight_tensors;
-  std::vector<std::string> src_name;
-  std::vector<int64_t> src_index;
-  std::string weight_data;
-};
-
-using TensorInfoPtr = std::shared_ptr<TensorInfo>;
-using OpInfoPtr = std::shared_ptr<OpInfo>;
-}  // namespace model_runner
-}  // namespace ge
-#endif  // INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_
diff --git a/inc/framework/ge_runtime/task_info.h b/inc/framework/ge_runtime/task_info.h
deleted file mode 100644
index f59c6454..00000000
--- a/inc/framework/ge_runtime/task_info.h
+++ /dev/null
@@ -1,405 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_
-#define INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_
-
-#include <stdint.h>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "cce/taskdown_api.h"
-
-namespace ge {
-namespace model_runner {
-enum TaskInfoType {
-  CCE = 0,
-  TBE,
-  AICPU,
-  LABEL_SET,
-  LABEL_SWITCH,
-  LABEL_GOTO,
-  EVENT_RECORD,
-  EVENT_WAIT,
-  FUSION_START,
-  FUSION_END,
-  HCCL,
-  PROFILER_TRACE,
-  MEMCPY_ASYNC,
-  STREAM_SWITCH,
-  STREAM_ACTIVE,
-  // Insert new task type here
-  REVSERVED = 23
-};
-
-class TaskInfo {
- public:
-  virtual ~TaskInfo() {}
-  uint32_t stream_id() const { return stream_id_; }
-  TaskInfoType type() const { return type_; }
-  std::string op_name() const { return op_name_; }
-  bool dump_flag() const { return dump_flag_; }
-
- protected:
-  TaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, bool dump_flag)
-      : op_name_(op_name), stream_id_(stream_id), type_(type), dump_flag_(dump_flag) {}
-
- private:
-  std::string op_name_;
-  uint32_t stream_id_;
-  TaskInfoType type_;
-  bool dump_flag_;
-};
-
-class CceTaskInfo : public TaskInfo {
- public:
-  CceTaskInfo(const std::string &op_name, uint32_t stream_id, const cce::ccOpContext &ctx, const std::string &stub_func,
-              uint32_t block_dim, const std::vector<uint8_t> &args, uint32_t args_size,
-              const std::vector<uint8_t> &sm_desc, const std::vector<uint8_t> &flow_table,
-              const std::vector<uint8_t> &args_offset, bool is_flowtable)
-      : TaskInfo(op_name, stream_id, TaskInfoType::CCE, false),
-        ctx_(ctx),
-        stub_func_(stub_func),
-        block_dim_(block_dim),
-        args_(args),
-        args_size_(args_size),
-        sm_desc_(sm_desc),
-        flow_table_(flow_table),
-        args_offset_(args_offset),
-        is_flowtable_(is_flowtable) {}
-  ~CceTaskInfo() override {}
-
-  cce::ccOpContext cc_context() const { return ctx_; }
-  std::string stub_func() const { return stub_func_; }
-  uint32_t block_dim() const { return block_dim_; }
-  const std::vector<uint8_t> &args() const { return args_; }
-  uint32_t args_size() const { return args_size_; }
-  const std::vector<uint8_t> &sm_desc() const { return sm_desc_; }
-  const std::vector<uint8_t> &flow_table() const { return flow_table_; }
-  const std::vector<uint8_t> &args_offset() const { return args_offset_; }
-  bool is_flowtable() const { return is_flowtable_; }
-
- private:
-  cce::ccOpContext ctx_;
-  std::string stub_func_;
-  uint32_t block_dim_;
-  std::vector<uint8_t> args_;
-  uint32_t args_size_;
-  std::vector<uint8_t> sm_desc_;
-  std::vector<uint8_t> flow_table_;
-  std::vector<uint8_t> args_offset_;
-  bool is_flowtable_;
-};
-
-class TbeTaskInfo : public TaskInfo {
- public:
-  TbeTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string &stub_func, uint32_t block_dim,
-              const std::vector<uint8_t> &args, uint32_t args_size, const std::vector<uint8_t> &sm_desc, void *binary,
-              uint32_t binary_size, const std::vector<uint8_t> &meta_data, const std::vector<void *> &input_data_addrs,
-              const std::vector<void *> &output_data_addrs, const std::vector<void *> &workspace_addrs, bool dump_flag)
-      : TaskInfo(op_name, stream_id, TaskInfoType::TBE, dump_flag),
-        stub_func_(stub_func),
-        block_dim_(block_dim),
-        args_(args),
-        args_size_(args_size),
-        sm_desc_(sm_desc),
-        binary_(binary),
-        binary_size_(binary_size),
-        meta_data_(meta_data),
-        input_data_addrs_(input_data_addrs),
-        output_data_addrs_(output_data_addrs),
-        workspace_addrs_(workspace_addrs) {}
-  ~TbeTaskInfo() override {}
-
-  const std::string &stub_func() const { return stub_func_; }
-  uint32_t block_dim() const { return block_dim_; }
-  const std::vector<uint8_t> &args() const { return args_; }
-  uint32_t args_size() const { return args_size_; }
-  const std::vector<uint8_t> &sm_desc() const { return sm_desc_; }
-  void *binary() const { return binary_; }
-  uint32_t binary_size() const { return binary_size_; }
-  const std::vector<uint8_t> &meta_data() const { return meta_data_; }
-  const std::vector<void *> &input_data_addrs() const { return input_data_addrs_; }
-  const std::vector<void *> &output_data_addrs() const { return output_data_addrs_; }
-  const std::vector<void *> &workspace_addrs() const { return workspace_addrs_; }
-
-  void SetBinary(void *binary, uint32_t binary_size) {
-    binary_ = binary;
-    binary_size_ = binary_size;
-  }
-
- private:
-  std::string stub_func_;
-  uint32_t block_dim_;
-  std::vector<uint8_t> args_;
-  uint32_t args_size_;
-  std::vector<uint8_t> sm_desc_;
-  void *binary_;
-  uint32_t binary_size_;
-  std::vector<uint8_t> meta_data_;
-  std::vector<void *> input_data_addrs_;
-  std::vector<void *> output_data_addrs_;
-  std::vector<void *> workspace_addrs_;
-};
-
-class AicpuTaskInfo : public TaskInfo {
- public:
-  AicpuTaskInfo(const std::string &op_name, uint32_t stream_id, const string &so_name, const std::string &kernel_name,
-                const std::string &node_def, const std::string &ext_info, const std::vector<void *> &input_data_addrs,
-                const std::vector<void *> &output_data_addrs, bool dump_flag)
-      : TaskInfo(op_name, stream_id, TaskInfoType::AICPU, dump_flag),
-        so_name_(so_name),
-        kernel_name_(kernel_name),
-        node_def_(node_def),
-        ext_info_(ext_info),
-        input_data_addrs_(input_data_addrs),
-        output_data_addrs_(output_data_addrs) {}
-  ~AicpuTaskInfo() override {}
-
-  const std::string &so_name() const { return so_name_; }
-  const std::string &kernel_name() const { return kernel_name_; }
-  const std::string &node_def() const { return node_def_; }
-  const std::vector<void *> &input_data_addrs() const { return input_data_addrs_; }
-  const std::vector<void *> &output_data_addrs() const { return output_data_addrs_; }
-  const std::string &ext_info() const { return ext_info_; }
-
- private:
-  std::string so_name_;
-  std::string kernel_name_;
-  std::string node_def_;
-  std::string ext_info_;
-  std::vector<void *> input_data_addrs_;
-  std::vector<void *> output_data_addrs_;
-};
-
-class LabelSetTaskInfo : public TaskInfo {
- public:
-  LabelSetTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id)
-      : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_SET, false), label_id_(label_id) {}
-  ~LabelSetTaskInfo() override {}
-  uint32_t label_id() const { return label_id_; }
-
- private:
-  uint32_t label_id_;
-};
-
-class LabelGotoTaskInfo : public TaskInfo {
- public:
-  LabelGotoTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id)
-      : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_GOTO, false), label_id_(label_id) {}
-  ~LabelGotoTaskInfo() override {}
-  uint32_t label_id() const { return label_id_; }
-
- private:
-  uint32_t label_id_;
-};
-
-class LabelSwitchTaskInfo : public TaskInfo {
- public:
-  LabelSwitchTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_size,
-                      const std::vector<uint32_t> &label_list, void *cond)
-      : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_SWITCH, false),
-        label_size_(label_size),
-        label_list_(label_list),
-        cond_(cond) {}
-  ~LabelSwitchTaskInfo() override {}
-  uint32_t label_size() const { return label_size_; }
-  const std::vector<uint32_t> &label_list() const { return label_list_; }
-  void *cond() const { return cond_; }
-
- private:
-  uint32_t label_size_;
-  std::vector<uint32_t> label_list_;
-  void *cond_;
-};
-
-class EventTaskInfo : public TaskInfo {
- public:
-  uint32_t event_id() const { return event_id_; }
-
- protected:
-  EventTaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, uint32_t event_id)
-      : TaskInfo(op_name, stream_id, type, false), event_id_(event_id) {}
-  ~EventTaskInfo() override {}
-
-  uint32_t event_id_;
-};
-
-class EventRecordTaskInfo : public EventTaskInfo {
- public:
-  EventRecordTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t event_id)
-      : EventTaskInfo(op_name, stream_id, TaskInfoType::EVENT_RECORD, event_id) {}
-  ~EventRecordTaskInfo() override {}
-};
-
-class EventWaitTaskInfo : public EventTaskInfo {
- public:
-  EventWaitTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t event_id)
-      : EventTaskInfo(op_name, stream_id, TaskInfoType::EVENT_WAIT, event_id) {}
-  ~EventWaitTaskInfo() override {}
-};
-
-class FusionStartTaskInfo : public TaskInfo {
- public:
-  explicit FusionStartTaskInfo(const std::string &op_name, uint32_t stream_id)
-      : TaskInfo(op_name, stream_id, TaskInfoType::FUSION_START, false) {}
-  ~FusionStartTaskInfo() override {}
-};
-
-class FusionEndTaskInfo : public TaskInfo {
- public:
-  explicit FusionEndTaskInfo(const std::string &op_name, uint32_t stream_id)
-      : TaskInfo(op_name, stream_id, TaskInfoType::FUSION_END, false) {}
-  ~FusionEndTaskInfo() override {}
-};
-
-class HcclTaskInfo : public TaskInfo {
- public:
-  HcclTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string hccl_type, void *input_data_addr,
-               void *output_data_addr, int64_t workspace_size, int64_t hccl_stream_num,
-               const std::vector<uint8_t> &private_def, void *ops_kernel_store, int32_t count, int64_t root_id,
-               int64_t op_type, int64_t data_type, const std::string &group, bool dump_flag)
-      : TaskInfo(op_name, stream_id, TaskInfoType::HCCL, dump_flag),
-        hccl_type_(hccl_type),
-        input_data_addr_(input_data_addr),
-        output_data_addr_(output_data_addr),
-        workspace_size_(workspace_size),
-        hccl_stream_num_(hccl_stream_num),
-        private_def_(private_def),
-        ops_kernel_store_(ops_kernel_store),
-        count_(count),
-        root_id_(root_id),
-        op_type_(op_type),
-        data_type_(data_type),
-        group_(group) {}
-  ~HcclTaskInfo() override {}
-
-  const std::string &hccl_type() const { return hccl_type_; }
-  void *input_data_addr() const { return input_data_addr_; }
-  void *output_data_addr() const { return output_data_addr_; }
-  int64_t workspace_size() const { return workspace_size_; }
-  int64_t hccl_stream_num() const { return hccl_stream_num_; }
-  const std::vector<uint8_t> &private_def() const { return private_def_; }
-  void *ops_kernel_store() const { return ops_kernel_store_; }
-  int32_t count() const { return count_; }
-  int64_t root_id() const { return root_id_; }
-  int64_t op_type() const { return op_type_; }
-  int64_t data_type() const { return data_type_; }
-  const std::string &group() const { return group_; }
-
- private:
-  std::string hccl_type_;
-  void *input_data_addr_;
-  void *output_data_addr_;
-  int64_t workspace_size_;
-  int64_t hccl_stream_num_;
-  std::vector<uint8_t> private_def_;
-  void *ops_kernel_store_;
-  int32_t count_;
-  int64_t root_id_;
-  int64_t op_type_;
-  int64_t data_type_;
-  std::string group_;
-};
-
-class ProfilerTraceTaskInfo : public TaskInfo {
- public:
-  ProfilerTraceTaskInfo(const std::string &op_name, uint32_t stream_id, uint64_t log_id, bool notify, uint32_t flat)
-      : TaskInfo(op_name, stream_id, TaskInfoType::PROFILER_TRACE, false),
-        log_id_(log_id),
-        notify_(notify),
-        flat_(flat) {}
-  ~ProfilerTraceTaskInfo() override {}
-
-  uint64_t log_id() const { return log_id_; }
-  bool notify() const { return notify_; }
-  uint32_t flat() const { return flat_; }
-
- private:
-  uint64_t log_id_;
-  bool notify_;
-  uint32_t flat_;
-};
-
-class MemcpyAsyncTaskInfo : public TaskInfo {
- public:
-  MemcpyAsyncTaskInfo(const std::string &op_name, uint32_t stream_id, void *dst, uint64_t dst_max, void *src,
-                      uint64_t count, uint32_t kind, bool dump_flag)
-      : TaskInfo(op_name, stream_id, TaskInfoType::MEMCPY_ASYNC, dump_flag),
-        dst_(dst),
-        dst_max_(dst_max),
-        src_(src),
-        count_(count),
-        kind_(kind) {}
-  ~MemcpyAsyncTaskInfo() override {}
-
-  void *dst() const { return dst_; }
-  uint64_t dst_max() const { return dst_max_; }
-  void *src() const { return src_; }
-  uint64_t count() const { return count_; }
-  uint32_t kind() const { return kind_; }
-
- private:
-  void *dst_;
-  uint64_t dst_max_;
-  void *src_;
-  uint64_t count_;
-  int32_t kind_;
-};
-
-class StreamSwitchTaskInfo : public TaskInfo {
- public:
-  StreamSwitchTaskInfo(const std::string &op_name, uint32_t stream_id, int64_t true_stream_id, void *input_addr,
-                       void *value_addr, int64_t cond, int64_t data_type)
-      : TaskInfo(op_name, stream_id, TaskInfoType::STREAM_SWITCH, false),
-        true_stream_id_(true_stream_id),
-        input_addr_(input_addr),
-        value_addr_(value_addr),
-        cond_(cond),
-        data_type_(data_type) {}
-  ~StreamSwitchTaskInfo() override {}
-
-  int64_t true_stream_id() const { return true_stream_id_; }
-  void *input_addr() const { return input_addr_; }
-  void *value_addr() const { return value_addr_; }
-  int64_t cond() const { return cond_; }
-  int64_t data_type() const { return data_type_; }
-
- private:
-  int64_t true_stream_id_;
-  void *input_addr_;
-  void *value_addr_;
-  int64_t cond_;
-  int64_t data_type_;
-};
-
-class StreamActiveTaskInfo : public TaskInfo {
- public:
-  StreamActiveTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t active_stream_id)
-      : TaskInfo(op_name, stream_id, TaskInfoType::STREAM_ACTIVE, false), active_stream_id_(active_stream_id) {}
-  ~StreamActiveTaskInfo() override {}
-
-  uint32_t active_stream_id() const { return active_stream_id_; }
-
- private:
-  uint32_t active_stream_id_;
-};
-}  // namespace model_runner
-}  // namespace ge
-
-#endif  // INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_
diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h
index 5da5a593..8213c115 100644
--- a/inc/framework/generator/ge_generator.h
+++ b/inc/framework/generator/ge_generator.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,26 +21,31 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "ge/ge_ir_build.h"
-#include "common/ge_inner_error_codes.h"
-#include "common/ge_types.h"
+#include "external/ge/ge_ir_build.h"
+#include "framework/common/ge_inner_error_codes.h"
+#include "framework/common/ge_types.h"
 #include "graph/ge_tensor.h"
 #include "graph/graph.h"
 #include "graph/op_desc.h"
 #include "graph/detail/attributes_holder.h"
-#include "omg/omg_inner_types.h"
+#include "framework/omg/omg_inner_types.h"
 
 namespace ge {
+const std::string kAttrSupportDynamicShape = "support_dynamicshape";
+
 class GeRootModel;
 class GE_FUNC_VISIBILITY GeGenerator {
  public:
+  using InOutTensorRef = std::pair<const std::vector<ge::GeTensor> &, const std::vector<ge::GeTensor> &>;
   static GeGenerator &GetInstance() {
     static GeGenerator Instance;
     return Instance;
   }
   GeGenerator() = default;
 
-  ~GeGenerator() { (void)Finalize(); }
+  ~GeGenerator() {
+    (void)Finalize();
+  }
 
   GeGenerator(const GeGenerator &) = delete;
 
@@ -54,7 +59,7 @@ class GE_FUNC_VISIBILITY GeGenerator {
   Status GenerateOfflineModel(const Graph &graph, const std::string &file_name_prefix,
                               const std::vector<GeTensor> &inputs = std::vector<GeTensor>());
 
-  Status GenerateOnlineModel(const Graph &graph, const vector<GeTensor> &inputs, ge::ModelBufferData &model);
+  Status GenerateOnlineModel(const Graph &graph, const std::vector<GeTensor> &inputs, ge::ModelBufferData &model);
 
   Status GenerateInfershapeGraph(const Graph &graph);
 
@@ -81,10 +86,16 @@ class GE_FUNC_VISIBILITY GeGenerator {
   /// @param [in] compile_flag: op build flag, accurate build is 0, fuzz build is 1
   /// @param [out] model_buff: model buff of op.
   /// @return SUCCESS or FAILED
-  Status BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
-                            OpEngineType engine_type, ModelBufferData &model_buff);
-  Status BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
-                            OpEngineType engine_type, int32_t compile_flag, ModelBufferData &model_buff);
+  Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
+                            const std::vector<GeTensor> &outputs, OpEngineType engine_type,
+                            ModelBufferData &model_buff);
+  Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
+                            const std::vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag,
+                            ModelBufferData &model_buff);
+  Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
+                            const std::vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag,
+                            ModelBufferData &model_buff, GraphStage graph_stage, ComputeGraphPtr &compute_graph);
+
   ///
   /// @ingroup ge
   /// @brief: Build single Op into model buff.
@@ -94,22 +105,33 @@ class GE_FUNC_VISIBILITY GeGenerator {
   /// @param [in] graph_name: graph name.
   /// @param [out] graph: graph of single op.
   /// @return SUCCESS or FAILED
-  Status BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
-                            std::string graph_name, Graph &graph);
+  Status BuildSingleOpGraph(const OpDescPtr &op_desc, const InOutTensorRef &inputs_outputs, std::string graph_name,
+                            Graph &graph, std::vector<std::pair<std::string, std::string>> &inputs_name_type) const;
+  Status BuildOriginalGraphInfo(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
+                                const std::vector<GeTensor> &outputs, const std::string &model_file_name,
+                                bool is_offline, int32_t compile_flag, GraphStage graph_stage, Graph &graph,
+                                ComputeGraphPtr &compute_graph, bool &fuzz_compile_flag,
+                                std::vector<std::pair<std::string, std::string>> &inputs_name_type);
 
  private:
-  Status GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs,
+  Status GenerateModel(const Graph &graph, const std::string &file_name_prefix, const std::vector<GeTensor> &inputs,
                        ge::ModelBufferData &model, bool is_offline = true);
-  Status BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
-                       const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
-                       bool is_offline = true, int32_t compile_flag = 0);
+  Status BuildSingleOp(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, const std::vector<GeTensor> &outputs,
+                       const std::string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
+                       ComputeGraphPtr &comp_graph, bool is_offline = true, int32_t compile_flag = 0,
+                       GraphStage graph_stage = GraphStage::GRAPH_STAGE_RESERVED);
   bool CheckNoAicore(const ComputeGraphPtr &graph);
-  void RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs);
-  Status CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs);
-  Status InferFormatForSingleOp(OpDescPtr &op_desc, Graph &graph);
+  void RemoveConst(const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs) const;
+  Status CheckForSingleOp(const OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
+                          const std::vector<GeTensor> &outputs) const;
+  Status InferFormatForSingleOp(const OpDescPtr &op_desc, const Graph &graph) const;
 
   using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>;
   Status SetModelNameForDump(const GeRootModelPtr &ge_root_model);
+  Status CreateGeneralizedBuildAttrs(const GeRootModelPtr &ge_root_model, const std::vector<GeTensor> &inputs,
+                                     const std::vector<GeTensor> &outputs,
+                                     const std::vector<std::pair<std::string, std::string>> &inputs_name_type,
+                                     std::vector<ge::NamedAttrs> &generalized_build_attrs) const;
 
   class Impl;
 
diff --git a/inc/framework/generator/generator_api.h b/inc/framework/generator/generator_api.h
deleted file mode 100644
index 56b83a20..00000000
--- a/inc/framework/generator/generator_api.h
+++ /dev/null
@@ -1,187 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef INC_FRAMEWORK_GENERATOR_GENERATOR_API_H_
-#define INC_FRAMEWORK_GENERATOR_GENERATOR_API_H_
-
-#if defined(_MSC_VER)
-#ifdef FUNC_VISIBILITY
-#define GE_FUNC_VISIBILITY _declspec(dllexport)
-#else
-#define GE_FUNC_VISIBILITY
-#endif
-#else
-#ifdef FUNC_VISIBILITY
-#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
-#else
-#define GE_FUNC_VISIBILITY
-#endif
-#endif
-
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef uint32_t Status_t;
-
-typedef void *OpAttr_t;
-typedef void *OpTensor_t;
-
-///
-/// @ingroup ge
-/// @brief Generate offline model for the op.
-/// @param [in] op_type: type name of the op.
-/// @param [in] in_tensor: input description array (created by OpTensorCreate).
-/// @param [in] in_num: number of in_tensor.
-/// @param [in] out_tensor: output description array (created by OpTensorCreate).
-/// @param [in] out_num: number of out_tensor.
-/// @param [in] attr: the attributes of the op (created by OpAttrCreate).
-/// @param [in] om_file: file name for the om to save.
-/// @return 0 for success / others for fail
-///
-GE_FUNC_VISIBILITY extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num,
-                                                    const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr,
-                                                    const char *om_file);
-
-///
-/// @ingroup ge
-/// @brief Create Tensor Description.
-/// @param [in] format: tensor format of the data.
-/// @param [in] datatype: tensor type of the data.
-/// @param [in] shape: tensor shape array.
-/// @param [in] num: number of shape.
-/// @return OpTensor_t for success / nullptr for failure
-///
-GE_FUNC_VISIBILITY extern OpTensor_t OpTensorCreate(int format, int datatype, const int64_t *shape, int num);
-
-///
-/// @ingroup ge
-/// @brief Destroy Tensor Description.
-/// @param [in] OpTensor_t tensor: created by OpTensorCreate.
-/// @param [out] none
-/// @return 0 for success / others for failure.
-///
-GE_FUNC_VISIBILITY extern Status_t OpTensorDestroy(OpTensor_t tensor);
-
-///
-/// @ingroup ge
-/// @brief Create an attribute holder.
-/// @param [in] none
-/// @param [out] none
-/// @return OpAttr_t for success / nullptr for failure.
-///
-GE_FUNC_VISIBILITY extern OpAttr_t OpAttrCreate();
-
-///
-/// @ingroup ge
-/// @brief Destroy Attribute holder.
-/// @param [in] OpAttr_t attr: created by OpAttrCreate.
-/// @param [out] none
-/// @return 0 for success / others for failure.
-///
-GE_FUNC_VISIBILITY extern Status_t OpAttrDestroy(OpAttr_t attr);
-
-///
-/// @ingroup ge
-/// @brief Set a boolean attribute to the attribute holder.
-/// @param [in] attr: attribute holder (created by OpAttrCreate).
-/// @param [in] name: attribute name (can`t be nullptr, end with '\0').
-/// @param [in] value: attributed value.
-/// @return 0 for success / others for failure.
-///
-GE_FUNC_VISIBILITY extern Status_t SetAttrBool(OpAttr_t attr, const char *name, bool value);
-
-///
-/// @ingroup ge
-/// @brief Set an integer attribute to the attribute holder.
-/// @param [in] attr: attribute holder (created by OpAttrCreate).
-/// @param [in] name: attribute name (can`t be nullptr, end with '\0').
-/// @param [in] value: attribute value.
-/// @return 0 for success / others for failure.
-///
-GE_FUNC_VISIBILITY extern Status_t SetAttrInt(OpAttr_t attr, const char *name, int64_t value);
-
-///
-/// @ingroup ge
-/// @brief Set a float attribute to the attribute holder.
-/// @param [in] attr: attribute holder (created by OpAttrCreate).
-/// @param [in] name: attribute name (can`t be nullptr, end with '\0').
-/// @param [in] value: attribute value.
-/// @return 0 for success / others for failure.
-///
-GE_FUNC_VISIBILITY extern Status_t SetAttrFloat(OpAttr_t attr, const char *name, float value);
-
-///
-/// @ingroup ge
-/// @brief Set a string attribute to the attribute holder.
-/// @param [in] attr: attribute holder (created by OpAttrCreate).
-/// @param [in] name: attribute name (can`t be nullptr, end with '\0').
-/// @param [in] value: attribute value (can`t be nullptr, end with '\0').
-/// @return 0 for success / others for failure.
-///
-GE_FUNC_VISIBILITY extern Status_t SetAttrString(OpAttr_t attr, const char *name, const char *value);
-
-///
-/// @ingroup ge
-/// @brief Set a boolean array attribute to the attribute holder.
-/// @param [in] attr: attribute holder (created by OpAttrCreate).
-/// @param [in] name: attribute name (can`t be nullptr, end with '\0').
-/// @param [in] value: attribute value array.
-/// @param [in] num: number of value array.
-/// @return 0 for success / others for failure.
-///
-GE_FUNC_VISIBILITY extern Status_t SetAttrBoolList(OpAttr_t attr, const char *name, const bool *value, int num);
-
-///
-/// @ingroup ge
-/// @brief Set an integer array attribute to the attribute holder.
-/// @param [in] attr: attribute holder (created by OpAttrCreate).
-/// @param [in] name: attribute name (can`t be nullptr, end with '\0').
-/// @param [in] value: attribute value array.
-/// @param [in] num: number of value array.
-/// @return 0 for success / others for failure.
-///
-GE_FUNC_VISIBILITY extern Status_t SetAttrIntList(OpAttr_t attr, const char *name, const int64_t *value, int num);
-
-///
-/// @ingroup ge
-/// @brief Set a float array attribute to the attribute holder.
-/// @param [in] attr: attribute holder (created by OpAttrCreate).
-/// @param [in] name: attribute name (can`t be nullptr, end with '\0').
-/// @param [in] value: attribute value array.
-/// @param [in] num: number of value array.
-/// @return 0 for success / others for failure.
-///
-GE_FUNC_VISIBILITY extern Status_t SetAttrFloatList(OpAttr_t attr, const char *name, const float *value, int num);
-
-///
-/// @ingroup ge
-/// @brief Set a string array attribute to the attribute holder.
-/// @param [in] attr: attribute holder (created by OpAttrCreate).
-/// @param [in] name: attribute name (can`t be nullptr, end with '\0').
-/// @param [in] value: attribute value array (each value can`t be nullptr, end with '\0').
-/// @param [in] num: number of value array.
-/// @return 0 for success / others for failure.
-///
-GE_FUNC_VISIBILITY extern Status_t SetAttrStringList(OpAttr_t attr, const char *name, const char **value, int num);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // INC_FRAMEWORK_GENERATOR_GENERATOR_API_H_
diff --git a/inc/framework/memory/memory_api.h b/inc/framework/memory/memory_api.h
index a316fd59..30ade3b7 100644
--- a/inc/framework/memory/memory_api.h
+++ b/inc/framework/memory/memory_api.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,11 +17,7 @@
 #ifndef INC_FRAMEWORK_MEMORY_MEMORY_API_H_
 #define INC_FRAMEWORK_MEMORY_MEMORY_API_H_
 
-#include <string>
-#include <vector>
-
-#include "ge/ge_api_error_codes.h"
-#include "graph//types.h"
+#include "external/ge/ge_api_error_codes.h"
 #include "runtime/mem.h"
 
 namespace ge {
diff --git a/inc/framework/memory/memory_assigner.h b/inc/framework/memory/memory_assigner.h
index 173cc64e..be615426 100644
--- a/inc/framework/memory/memory_assigner.h
+++ b/inc/framework/memory/memory_assigner.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@
 
 #include <utility>
 
-#include "common/ge_inner_error_codes.h"
+#include "framework/common/ge_inner_error_codes.h"
 #include "graph/node.h"
 
 namespace ge {
@@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY MemoryAssigner {
 
   MemoryAssigner &operator=(const MemoryAssigner &) = delete;
 
-  Status AssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size);
+  Status AssignMemory(std::map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size);
 
  private:
   ge::ComputeGraphPtr compute_graph_;
diff --git a/inc/framework/omg/ge_init.h b/inc/framework/omg/ge_init.h
index 42fd8979..a2fe6fc9 100644
--- a/inc/framework/omg/ge_init.h
+++ b/inc/framework/omg/ge_init.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@
 #define INC_FRAMEWORK_OMG_GE_INIT_H_
 #include <map>
 #include <string>
-#include "common/ge_inner_error_codes.h"
+#include "framework/common/ge_inner_error_codes.h"
 
 namespace ge {
 class GE_FUNC_VISIBILITY GEInit {
diff --git a/inc/framework/omg/model_tool.h b/inc/framework/omg/model_tool.h
index 8c425823..24554e65 100644
--- a/inc/framework/omg/model_tool.h
+++ b/inc/framework/omg/model_tool.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/inc/framework/omg/omg.h b/inc/framework/omg/omg.h
index a0cdb449..0d8b59d1 100644
--- a/inc/framework/omg/omg.h
+++ b/inc/framework/omg/omg.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,10 +17,12 @@
 #ifndef INC_FRAMEWORK_OMG_OMG_H_
 #define INC_FRAMEWORK_OMG_OMG_H_
 
-#include <google/protobuf/message.h>
 #include <string>
 #include <unordered_map>
 #include <vector>
+
+#include <google/protobuf/message.h>
+#include "external/ge/ge_api_types.h"
 #include "framework/omg/omg_inner_types.h"
 #include "framework/omg/parser/parser_inner_ctx.h"
 #include "proto/ge_ir.pb.h"
@@ -31,20 +33,14 @@
 #include "graph/model.h"
 #include "runtime/kernel.h"
 
-using domi::Status;
-using std::pair;
-using std::string;
-using std::unordered_map;
-using std::vector;
-
 namespace ge {
 /**
  * @ingroup domi_omg
  * @brief init omg context
  * @return void
  */
-GE_FUNC_VISIBILITY Status InitDomiOmgContext(const string &input_shape, const string &input_format,
-                                             const string &net_format, bool is_dynamic_input);
+GE_FUNC_VISIBILITY domi::Status InitDomiOmgContext(const std::string &input_shape, const std::string &input_format,
+                                                   const std::string &net_format, bool is_dynamic_input);
 
 /**
  * @ingroup domi_omg
@@ -61,10 +57,10 @@ GE_FUNC_VISIBILITY Status InitDomiOmgContext(const string &input_shape, const st
  * @param [in] atc_params multiply atc params
  * @return Status result code
  */
-GE_FUNC_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<string, string> &atc_params,
-                                     const char *model_file, const char *weights_file, domi::FrameworkType type,
-                                     const char *op_conf = nullptr, const char *target = nullptr,
-                                     RunMode run_mode = GEN_OM_MODEL, bool is_dynamic_input = false);
+GE_FUNC_VISIBILITY domi::Status ParseGraph(ge::Graph &graph, const std::map<std::string, std::string> &atc_params,
+                                           const char *model_file, const char *weights_file, domi::FrameworkType type,
+                                           const char *op_conf = nullptr, const char *target = nullptr,
+                                           RunMode run_mode = RunMode::GEN_OM_MODEL, bool is_dynamic_input = false);
 
 /**
  * @ingroup domi_omg
@@ -74,9 +70,9 @@ GE_FUNC_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<string, st
  * @param [key] encrypted key
  * @return Status result code
  */
-GE_FUNC_VISIBILITY Status ConvertOm(const char *model_file, const char *json_file, bool is_covert_to_json);
+GE_FUNC_VISIBILITY domi::Status ConvertOm(const char *model_file, const char *json_file, bool is_covert_to_json);
 
-GE_FUNC_VISIBILITY Status ConvertPbtxtToJson(const char *model_file, const char *json_file);
+GE_FUNC_VISIBILITY domi::Status ConvertPbtxtToJson(const char *model_file, const char *json_file);
 /**
  * @ingroup domi_omg
  * @brief convert the model file in protobuf format into a JSON file.
@@ -86,23 +82,24 @@ GE_FUNC_VISIBILITY Status ConvertPbtxtToJson(const char *model_file, const char
  * @param [key] encrypted key
  * @return Status result code
  */
-GE_FUNC_VISIBILITY Status ConvertFwkModelToJson(domi::FrameworkType framework, const char *model_file,
-                                                const char *json_file);
+GE_FUNC_VISIBILITY domi::Status ConvertFwkModelToJson(const domi::FrameworkType framework, const char *model_file,
+                                                      const char *json_file);
 
-GE_FUNC_VISIBILITY void GetGroupName(ge::proto::ModelDef &model);
+GE_FUNC_VISIBILITY void GetGroupName(ge::proto::ModelDef &model_def);
 
-GE_FUNC_VISIBILITY void FindParserSo(const string &path, vector<string> &fileList, string &caffe_parser_path);
+GE_FUNC_VISIBILITY void FindParserSo(const std::string &path, std::vector<std::string> &file_list,
+                                     std::string &caffe_parser_path);
 
-GE_FUNC_VISIBILITY Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file);
+GE_FUNC_VISIBILITY domi::Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file);
 
-GE_FUNC_VISIBILITY Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type,
-                                            const std::string &output_format);
+GE_FUNC_VISIBILITY domi::Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type,
+                                                  const std::string &output);
 
-GE_FUNC_VISIBILITY Status GetOutputLeaf(ge::NodePtr node,
-                                        std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info);
+GE_FUNC_VISIBILITY domi::Status GetOutputLeaf(ge::NodePtr node,
+                                              std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info);
 
-GE_FUNC_VISIBILITY void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info,
-                                                   std::vector<std::string> &output_nodes_name);
+GE_FUNC_VISIBILITY void CreateOutputNodesInfo(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info,
+                                              std::vector<std::string> &output_nodes_name);
 
 GE_FUNC_VISIBILITY void UpdateOmgCtxWithParserCtx();
 
diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h
index 0b799bf2..1addd326 100644
--- a/inc/framework/omg/omg_inner_types.h
+++ b/inc/framework/omg/omg_inner_types.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,12 +31,7 @@
 using domi::DOMI_TENSOR_ND;
 using domi::DOMI_TENSOR_RESERVED;
 using domi::domiTensorFormat_t;
-using domi::FRAMEWORK_RESERVED;
-using domi::FrameworkType;
-using std::map;
-using std::string;
 using std::unordered_map;
-using std::vector;
 
 namespace ge {
 /**
@@ -51,34 +46,13 @@ enum RunMode {
   DISPLAY_OM_INFO = 6  // display model info
 };
 
-///
-/// @ingroup domi_omg
-/// @brief high-precision mode
-///
-enum HighPrecisionMode {
-  // the FP16 high-precision function is disabled in common mode
-  HIGH_PRECISION_DEFAULT = 0,
-
-  // high-precision mode, enabling FP16 high-precision mode (Convolution/FullConnect/AvgPooling are involved)
-  HIGH_PRECISION_FP16 = 1
-};
-
-///
-/// @ingroup domi_omg
-/// @brief description buffer data
-///
-struct OMGBufferData {
-  void *data;
-  uint32_t length;
-};
-
 struct OmgContext {
-  OmgContext() { format = DOMI_TENSOR_ND; }
-  domiTensorFormat_t format;
+  OmgContext() : format(domi::DOMI_TENSOR_ND) {}
+  domi::domiTensorFormat_t format;
 
   // format of the input specified by the command line
-  std::unordered_map<std::string, domiTensorFormat_t> input_nodes_format_map;
-  std::vector<domiTensorFormat_t> output_formats;
+  std::unordered_map<std::string, domi::domiTensorFormat_t> input_nodes_format_map;
+  std::vector<domi::domiTensorFormat_t> output_formats;
 
   // user-designate input dims
   std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims;
@@ -96,18 +70,18 @@ struct OmgContext {
   // default out nodes (this is used for determing the orders)
   std::vector<std::pair<std::string, int32_t>> default_out_nodes;
   // save the output node of the network, value = topName,
-  // topName indicates the output name of the operator.
-  std::vector<std::string> user_out_nodes_top_vec;
+  // tensorName indicates the output name of the operator.
+  std::vector<std::string> user_out_tensors;
   // net out nodes (where user_out_nodes or leaf nodes)
   std::vector<std::string> net_out_nodes;
-  // net out nodes top names(only caffe has top)
-  std::vector<std::string> out_top_names;
-  // net data nodes top names(only caffe has top)
-  std::vector<std::string> data_top_names;
+  // net out nodes tensor names(caffe or onnx)
+  std::vector<std::string> out_tensor_names;
+  // net data nodes tensor names(caffe or onnx)
+  std::vector<std::string> data_tensor_names;
   // preferential format used by the entire network
-  domiTensorFormat_t net_format = DOMI_TENSOR_RESERVED;
+  domi::domiTensorFormat_t net_format = domi::DOMI_TENSOR_RESERVED;
   domi::FrameworkType type = domi::FRAMEWORK_RESERVED;
-  RunMode run_mode = ONLY_PRE_CHECK;
+  RunMode run_mode = RunMode::ONLY_PRE_CHECK;
   bool train_flag = false;
 
   std::string output_type;
@@ -118,14 +92,13 @@ struct OmgContext {
   std::string dynamic_image_size;
   std::string dynamic_dims;
   std::string dynamic_node_type;
-  std::vector<std::vector<int64_t>> user_real_input_dims;
-  std::vector<int64_t> cur_dynamic_dims;
   bool need_multi_batch = false;
   std::vector<NodePtr> data_nodes;
   std::vector<NodePtr> getnext_nosink_nodes;
   bool fuzz_compile_flag = false;
   std::string atc_cmdline;
   bool user_attr_index_valid = false;
+  bool is_online_model = false;
 };
 }  // namespace ge
 
diff --git a/inc/framework/omg/omg_types.h b/inc/framework/omg/omg_types.h
index 771a53a4..8ec2a49b 100644
--- a/inc/framework/omg/omg_types.h
+++ b/inc/framework/omg/omg_types.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h
index 67f9338d..8c0130c8 100644
--- a/inc/framework/omg/parser/model_parser.h
+++ b/inc/framework/omg/parser/model_parser.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,8 +25,6 @@
 #include "graph/ge_tensor.h"
 #include "graph/graph.h"
 #include "graph/op_desc.h"
-#include "graph/operator.h"
-#include "graph/range_vistor.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/op_desc_utils.h"
@@ -36,7 +34,7 @@ using Status = domi::Status;
 
 namespace domi {
 using GetGraphCallback = std::function<std::unique_ptr<google::protobuf::Message>(
-  const google::protobuf::Message *root_proto, const std::string &graph)>;
+    const google::protobuf::Message *root_proto, const std::string &graph)>;
 
 using GetGraphCallbackV2 = std::function<std::string(const std::string &subgraph_name)>;
 
@@ -109,7 +107,11 @@ class GE_FUNC_VISIBILITY ModelParser {
    * @return SUCCESS
    * @return Others failed
    */
-  virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; }
+  virtual Status ToJson(const char *model_file, const char *json_file) {
+    (void)model_file;
+    (void)json_file;
+    return SUCCESS;
+  }
 
   /*
    * @ingroup domi_omg
@@ -129,7 +131,11 @@ class GE_FUNC_VISIBILITY ModelParser {
    * @return SUCCESS
    * @return Others failed
    */
-  virtual Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) { return UNSUPPORTED; }
+  virtual Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) {
+    (void)serialized_proto;
+    (void)graph;
+    return UNSUPPORTED;
+  }
 
   /**
    * @ingroup domi_omg
@@ -142,8 +148,22 @@ class GE_FUNC_VISIBILITY ModelParser {
    */
   virtual Status ParseProtoWithSubgraph(const std::string &serialized_proto, GetGraphCallbackV2 callback,
                                         ge::ComputeGraphPtr &graph) {
+    (void)serialized_proto;
+    (void)callback;
+    (void)graph;
     return UNSUPPORTED;
   }
+
+  virtual bool HasError() {
+    return false;
+  }
+
+  virtual Status Save(const std::string &file) {
+    (void)file;
+    return SUCCESS;
+  }
+
+  virtual void Clear(){};
 };
 }  // namespace domi
 
diff --git a/inc/framework/omg/parser/op_parser.h b/inc/framework/omg/parser/op_parser.h
index 70bec218..f17b6fee 100644
--- a/inc/framework/omg/parser/op_parser.h
+++ b/inc/framework/omg/parser/op_parser.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,10 +19,8 @@
 
 #include <google/protobuf/text_format.h>
 #include "framework/omg/parser/parser_types.h"
-#include "omg/omg_inner_types.h"
+#include "framework/omg/omg_inner_types.h"
 #include "proto/om.pb.h"
-#include "graph/ge_tensor.h"
-#include "graph/op_desc.h"
 #include "graph/utils/op_desc_utils.h"
 
 using google::protobuf::Message;
@@ -50,7 +48,7 @@ class GE_FUNC_VISIBILITY OpParser {
    * @return SUCCESS
    * @return FAILED
    */
-  virtual Status ParseParams(const Message *op_src, ge::OpDescPtr &op_desc) = 0;
+  virtual domi::Status ParseParams(const google::protobuf::Message *op_src, ge::OpDescPtr &op_desc) = 0;
 
   /**
    * @ingroup domi_omg
@@ -60,7 +58,7 @@ class GE_FUNC_VISIBILITY OpParser {
    * @return SUCCESS
    * @return FAILED
    */
-  virtual Status ParseParams(const Message *op_src, ge::Operator &op_dest) = 0;
+  virtual domi::Status ParseParams(const google::protobuf::Message *op_src, ge::Operator &op_dest) = 0;
 
   /**
    * @ingroup domi_omg
@@ -70,7 +68,7 @@ class GE_FUNC_VISIBILITY OpParser {
    * @return SUCCESS
    * @return FAILED
    */
-  virtual Status ParseWeights(const Message *op_src, ge::NodePtr &node) = 0;
+  virtual domi::Status ParseWeights(const google::protobuf::Message *op_src, ge::NodePtr &node) = 0;
 
   /**
    * @ingroup domi_omg
@@ -80,7 +78,7 @@ class GE_FUNC_VISIBILITY OpParser {
    * @return SUCCESS
    * @return FAILED
    */
-  virtual Status GetFormat(const Message *op_src, domi::domiTensorFormat_t &format) {
+  virtual domi::Status GetFormat(const google::protobuf::Message *op_src, domi::domiTensorFormat_t &format) {
     (void)op_src;
     // Indicates that the op does not provide a value for format
     format = domi::DOMI_TENSOR_RESERVED;
diff --git a/inc/framework/omg/parser/parser_api.h b/inc/framework/omg/parser/parser_api.h
index 6c223665..6840da2b 100644
--- a/inc/framework/omg/parser/parser_api.h
+++ b/inc/framework/omg/parser/parser_api.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,15 +17,14 @@
 #ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_
 #define INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_
 
-#include <iostream>
 #include <map>
 #include <string>
-#include "ge/ge_api_error_codes.h"
+#include "external/ge/ge_api_error_codes.h"
 
 namespace ge {
 // Initialize parser
-GE_FUNC_VISIBILITY Status ParserInitialize(const std::map<std::string, std::string>& options);
+GE_FUNC_VISIBILITY Status ParserInitialize(const std::map<std::string, std::string> &options);
 // Finalize parser, release all resources
 GE_FUNC_VISIBILITY Status ParserFinalize();
 }  // namespace ge
-#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_
+#endif  // INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_
diff --git a/inc/framework/omg/parser/parser_factory.h b/inc/framework/omg/parser/parser_factory.h
index 9d6590c0..cf6499e9 100644
--- a/inc/framework/omg/parser/parser_factory.h
+++ b/inc/framework/omg/parser/parser_factory.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,14 +23,13 @@
 #include <string>
 #include "framework/omg/omg_inner_types.h"
 #include "framework/omg/parser/parser_types.h"
-
-using Status = domi::Status;
+#include "external/register/register.h"
 
 namespace domi {
 class WeightsParser;
 class ModelParser;
 
-typedef std::shared_ptr<ModelParser> (*MODEL_PARSER_CREATOR_FUN)(void);
+using MODEL_PARSER_CREATOR_FUN = std::shared_ptr<ModelParser> (*)(void);
 
 // Create modelparser for different frameworks
 class GE_FUNC_VISIBILITY ModelParserFactory {
@@ -63,7 +62,7 @@ class GE_FUNC_VISIBILITY ModelParserFactory {
 
 class GE_FUNC_VISIBILITY ModelParserRegisterar {
  public:
-  ModelParserRegisterar(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN fun) {
+  ModelParserRegisterar(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN const fun) noexcept {
     ModelParserFactory::Instance()->RegisterCreator(type, fun);
   }
   ~ModelParserRegisterar() {}
@@ -82,7 +81,7 @@ class GE_FUNC_VISIBILITY ModelParserRegisterar {
   }                                                              \
   ModelParserRegisterar g_##type##_Model_Parser_Creator(type, Creator_##type##_Model_Parser)
 
-typedef std::shared_ptr<WeightsParser> (*WEIGHTS_PARSER_CREATOR_FUN)(void);
+using WEIGHTS_PARSER_CREATOR_FUN = std::shared_ptr<WeightsParser> (*)(void);
 
 // Create weightsparser for different frameworks
 class GE_FUNC_VISIBILITY WeightsParserFactory {
@@ -115,7 +114,7 @@ class GE_FUNC_VISIBILITY WeightsParserFactory {
 
 class GE_FUNC_VISIBILITY WeightsParserRegisterar {
  public:
-  WeightsParserRegisterar(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN fun) {
+  WeightsParserRegisterar(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN const fun) noexcept {
     WeightsParserFactory::Instance()->RegisterCreator(type, fun);
   }
   ~WeightsParserRegisterar() {}
@@ -133,6 +132,12 @@ class GE_FUNC_VISIBILITY WeightsParserRegisterar {
     return std::shared_ptr<WeightsParser>(ptr);                      \
   }                                                                  \
   WeightsParserRegisterar g_##type##_Weights_Parser_Creator(type, Creator_##type##_Weights_Parser)
-};  // namespace domi
+
+class GE_FUNC_VISIBILITY OpRegTbeParserFactory {
+ public:
+  static OpRegTbeParserFactory *Instance();
+  void Finalize(const domi::OpRegistrationData &reg_data);
+};
+}  // namespace domi
 
 #endif  // INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_
diff --git a/inc/framework/omg/parser/parser_inner_ctx.h b/inc/framework/omg/parser/parser_inner_ctx.h
index b23da53f..5cf0d00b 100644
--- a/inc/framework/omg/parser/parser_inner_ctx.h
+++ b/inc/framework/omg/parser/parser_inner_ctx.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,7 +21,6 @@
 #include <memory>
 #include <string>
 #include <unordered_map>
-#include <utility>
 #include <vector>
 #include "external/register/register_fmk_types.h"
 #include "external/register/register_types.h"
@@ -30,8 +29,8 @@
 namespace ge {
 struct ParserContext {
   // format of the input specified by the command line
-  std::unordered_map<std::string, domiTensorFormat_t> input_nodes_format_map;
-  std::vector<domiTensorFormat_t> output_formats;
+  std::unordered_map<std::string, domi::domiTensorFormat_t> input_nodes_format_map;
+  std::vector<domi::domiTensorFormat_t> output_formats;
   // user-designate input dims
   std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims;
   std::map<std::string, std::vector<int64_t>> input_dims;
@@ -46,20 +45,20 @@ struct ParserContext {
   // operator
   std::map<std::string, std::vector<int32_t>> out_nodes_map;
   // save the output node of the network, value = topName,
-  // topName indicates the output name of the operator.
-  std::vector<std::string> user_out_nodes_top_vec;
+  // tensorName indicates the output name of the operator.
+  std::vector<std::string> user_out_tensors;
   // net out nodes (where user_out_nodes or leaf nodes)
   std::vector<std::string> net_out_nodes;
-  // net data nodes top names(only caffe has top)
-  std::vector<std::string> data_top_names;
-  // net out nodes top names(only caffe has top)
-  std::vector<std::string> out_top_names;
+  // net out nodes tensor names(caffe or onnx)
+  std::vector<std::string> out_tensor_names;
+  // net data nodes tensor names(caffe or onnx)
+  std::vector<std::string> data_tensor_names;
   // Whether to use dynamic batch size or dynamic image size
   bool is_dynamic_input = false;
   bool train_flag = false;
   domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND;
   domi::FrameworkType type = domi::FRAMEWORK_RESERVED;
-  RunMode run_mode = GEN_OM_MODEL;
+  RunMode run_mode = RunMode::GEN_OM_MODEL;
   // save caffe custom proto path, used by caffe parse
   std::string custom_proto_path;
   // save caffe proto path, used by caffe parse
diff --git a/inc/framework/omg/parser/parser_types.h b/inc/framework/omg/parser/parser_types.h
index f3b7f00a..c1e94a4c 100644
--- a/inc/framework/omg/parser/parser_types.h
+++ b/inc/framework/omg/parser/parser_types.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 #ifndef PARSER_COMMON_TYPES_H_
 #define PARSER_COMMON_TYPES_H_
 
-#include <stdint.h>
+#include <cstdint>
 #include <string>
 
 #include "register/register_types.h"
@@ -133,6 +133,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *_IF;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSIF;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *IF;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CASE;
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSCASE;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *_WHILE;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *WHILE;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSWHILE;
diff --git a/inc/framework/omg/parser/weights_parser.h b/inc/framework/omg/parser/weights_parser.h
index e4436044..52c360af 100644
--- a/inc/framework/omg/parser/weights_parser.h
+++ b/inc/framework/omg/parser/weights_parser.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,13 +17,12 @@
 #ifndef INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_
 #define INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_
 
+#include "external/register/register_error_codes.h"
 #include "graph/graph.h"
 #include "graph/attr_value.h"
 #include "graph/compute_graph.h"
 #include "graph/ge_tensor.h"
 #include "graph/op_desc.h"
-#include "graph/operator.h"
-#include "graph/range_vistor.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/utils/op_desc_utils.h"
 #include "graph/utils/tensor_utils.h"
@@ -68,6 +67,17 @@ class GE_FUNC_VISIBILITY WeightsParser {
    * @author
    */
   virtual Status ParseFromMemory(const char *input, uint32_t lengt, ge::ComputeGraphPtr &graph) = 0;
+
+  virtual bool HasError() {
+    return false;
+  }
+
+  virtual Status Save(const std::string &file) {
+    (void)file;
+    return SUCCESS;
+  }
+
+  virtual void Clear() {}
 };
 }  // namespace domi
 
diff --git a/inc/framework/omg/version.h b/inc/framework/omg/version.h
index 4facba0d..e355d284 100644
--- a/inc/framework/omg/version.h
+++ b/inc/framework/omg/version.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,12 +18,9 @@
 #define INC_FRAMEWORK_OMG_VERSION_H_
 
 #include <memory>
-#include <set>
-#include <string>
-#include <vector>
 
-#include "common/debug/log.h"
-#include "common/string_util.h"
+#include "framework/common/debug/log.h"
+#include "framework/common/string_util.h"
 #include "framework/common/debug/ge_log.h"
 
 namespace ge {
@@ -33,8 +30,8 @@ class GE_FUNC_VISIBILITY PlatformVersionManager {
   ~PlatformVersionManager() = delete;
   static Status GetPlatformVersion(std::string &ver) {
     ver = "1.11.z";
-    std::vector<std::string> version_splits = StringUtils::Split(ver, '.');
-    GE_IF_BOOL_EXEC(version_splits.size() < 3, GELOGW("Read platform version error!"); return FAILED;);
+    const std::vector<std::string> version_splits = StringUtils::Split(ver, '.');
+    GE_IF_BOOL_EXEC(version_splits.size() < 3U, GELOGW("Read platform version error!"); return FAILED;);
 
     GELOGI("Read current platform version: %s.", ver.c_str());
     return SUCCESS;
diff --git a/inc/framework/pne/pne_model.h b/inc/framework/pne/pne_model.h
new file mode 100644
index 00000000..1721d09c
--- /dev/null
+++ b/inc/framework/pne/pne_model.h
@@ -0,0 +1,128 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_FRAMEWORK_PNE_MODEL_H_
+#define INC_FRAMEWORK_PNE_MODEL_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "graph/compute_graph.h"
+#include "framework/common/debug/log.h"
+#include "framework/common/ge_inner_error_codes.h"
+#include "framework/common/ge_types.h"
+#include "framework/engine/dnnengine.h"
+#include "external/ge/ge_ir_build.h"
+
+namespace ge {
+const std::string PNE_ID_NPU = "NPU";
+const std::string PNE_ID_CPU = "HOST_CPU";
+
+struct ModelRelation;
+class PneModel {
+ public:
+  PneModel() = default;
+  explicit PneModel(const ComputeGraphPtr &root_graph) : root_graph_(root_graph){};
+  virtual ~PneModel() = default;
+  PneModel(const PneModel &other) = delete;
+  PneModel &operator=(const PneModel &other) = delete;
+
+ public:
+  inline Status AddSubModel(const shared_ptr<PneModel> &submodel, std::string type = "") {
+    if (submodel == nullptr) {
+      GELOGE(INTERNAL_ERROR, "submodel is nullptr, type = %s", type.c_str());
+      return INTERNAL_ERROR;
+    }
+    submodel->SetModelType(type);
+    if (!submodels_.emplace(submodel->GetModelName(), submodel).second) {
+      GELOGE(INTERNAL_ERROR, "submodel already exist, name = %s, type = %s", submodel->GetModelName().c_str(),
+             type.c_str());
+      return INTERNAL_ERROR;
+    }
+    return SUCCESS;
+  }
+
+  inline const std::shared_ptr<PneModel> GetSubmodel(const std::string &name) const {
+    const auto &it = submodels_.find(name);
+    if (it == submodels_.end()) {
+      return nullptr;
+    }
+    return it->second;
+  }
+
+  inline const std::map<std::string, std::shared_ptr<PneModel>> &GetSubmodels() const {
+    return submodels_;
+  }
+
+  inline void SetModelType(const std::string &type) {
+    model_type_ = type;
+  }
+
+  inline const std::string &GetModelType() const {
+    return model_type_;
+  }
+
+  inline void SetModelName(const std::string &model_name) {
+    model_name_ = model_name;
+  }
+
+  inline const std::string &GetModelName() const {
+    return model_name_;
+  }
+
+  inline void SetRootGraph(const ComputeGraphPtr graph) {
+    root_graph_ = graph;
+  }
+
+  inline const ComputeGraphPtr &GetRootGraph() const {
+    return root_graph_;
+  }
+
+  inline void SetModelRelation(std::shared_ptr<ModelRelation> model_relation) {
+    model_relation_ = std::move(model_relation);
+  }
+
+  inline const std::shared_ptr<ModelRelation> GetModelRelation() const {
+    return model_relation_;
+  }
+
+ public:
+  virtual Status SerializeModel(ModelBufferData &model_buff) = 0;
+
+  virtual Status UnSerializeModel(const ModelBufferData &model_buff) = 0;
+
+  virtual void SetModelId(const uint32_t model_id) {
+    model_id_ = model_id;
+  }
+
+  virtual uint32_t GetModelId() const {
+    return model_id_;
+  }
+
+ private:
+  std::map<std::string, std::shared_ptr<PneModel>> submodels_;
+  std::shared_ptr<ModelRelation> model_relation_;
+  ComputeGraphPtr root_graph_ = nullptr;
+  std::string model_name_;
+  std::string model_type_;
+  uint32_t model_id_ = INVALID_MODEL_ID;
+};
+
+using PneModelPtr = std::shared_ptr<PneModel>;
+}  // namespace ge
+
+#endif  // INC_FRAMEWORK_PNE_MODEL_H_
diff --git a/inc/framework/pne/process_node_engine.h b/inc/framework/pne/process_node_engine.h
new file mode 100644
index 00000000..bb18b553
--- /dev/null
+++ b/inc/framework/pne/process_node_engine.h
@@ -0,0 +1,67 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_FRAMEWORK_PROCESS_NODE_ENGINE_H_
+#define INC_FRAMEWORK_PROCESS_NODE_ENGINE_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "framework/common/ge_inner_error_codes.h"
+#include "framework/common/ge_types.h"
+#include "graph/manager/graph_manager_utils.h"
+#include "framework/pne/pne_model.h"
+
+namespace ge {
+class ProcessNodeEngineImpl {
+ public:
+  virtual Status OptimizeGraph(const std::vector<GeTensor> &inputs, ComputeGraphPtr &compute_graph) = 0;
+
+  virtual Status BuildGraph(ComputeGraphPtr &compute_graph, PneModelPtr &model) = 0;
+};
+
+using ProcessNodeEngineImplPtr = std::shared_ptr<ProcessNodeEngineImpl>;
+
+class ProcessNodeEngine {
+ public:
+  ProcessNodeEngine() = default;
+  virtual ~ProcessNodeEngine() = default;
+  ProcessNodeEngine(const ProcessNodeEngine &other) = delete;
+  ProcessNodeEngine &operator=(const ProcessNodeEngine &other) = delete;
+
+ public:
+  virtual Status Initialize(const std::map<std::string, std::string> &options) = 0;
+
+  virtual Status Finalize() = 0;
+
+  virtual Status OptimizeGraph(const std::vector<GeTensor> &inputs, ComputeGraphPtr &compute_graph) = 0;
+
+  virtual Status BuildGraph(ComputeGraphPtr &compute_graph, PneModelPtr &model) = 0;
+
+  virtual const std::string &GetEngineName(const ge::NodePtr &node_ptr = nullptr) const = 0;
+
+  virtual void SetImpl(ProcessNodeEngineImplPtr impl) = 0;
+
+ protected:
+  std::string engine_id_;
+  ProcessNodeEngineImplPtr impl_ = nullptr;
+};
+
+using ProcessNodeEnginePtr = std::shared_ptr<ProcessNodeEngine>;
+}  // namespace ge
+
+#endif  // INC_FRAMEWORK_PROCESS_NODE_ENGINE_H_
diff --git a/inc/framework/runtime/gert_api.h b/inc/framework/runtime/gert_api.h
new file mode 100644
index 00000000..007993e8
--- /dev/null
+++ b/inc/framework/runtime/gert_api.h
@@ -0,0 +1,27 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_GERT_API_H_
+#define AIR_CXX_INC_FRAMEWORK_RUNTIME_GERT_API_H_
+#include "model_v2_executor.h"
+#include "common/ge_types.h"
+
+namespace gert {
+std::unique_ptr<ModelV2Executor> LoadExecutorFromFile(const char *file_path, ge::graphStatus &error_code);
+std::unique_ptr<ModelV2Executor> LoadExecutorFromModelData(const ge::ModelData &model_data,
+                                                           ge::graphStatus &error_code);
+}  // namespace gert
+#endif  // AIR_CXX_INC_FRAMEWORK_RUNTIME_GERT_API_H_
diff --git a/inc/framework/runtime/model_desc.h b/inc/framework/runtime/model_desc.h
new file mode 100644
index 00000000..46c21636
--- /dev/null
+++ b/inc/framework/runtime/model_desc.h
@@ -0,0 +1,94 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_
+#define AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_
+#include "common/ge_types.h"
+#include "exe_graph/runtime/shape.h"
+#include "exe_graph/runtime/continuous_vector.h"
+#include "exe_graph/runtime/storage_format.h"
+#include "exe_graph/runtime/storage_shape.h"
+
+namespace gert {
+class ShapeRange {
+ public:
+  const Shape &GetMin() const;
+  const Shape &GetMax() const;
+  Shape &MutableMin();
+  Shape &MutableMax();
+
+ private:
+  Shape min_;
+  Shape max_;
+};
+
+class ModelIoDesc {
+ public:
+  const char *GetName() const;
+  int32_t GetDataType() const;
+  ge::Format GetStorageFormat() const;
+  ge::Format GetOriginFormat() const;
+  int64_t GetSize() const;
+  const Shape &GetStorageShape() const;
+  const Shape &GetOriginShape() const;
+  const ShapeRange &GetOriginShapeRange() const;
+  const ShapeRange &GetStorageShapeRange() const;
+
+  void SetName(const char *name);
+  void SetDataType(int32_t data_type);
+  void SetStorageFormat(ge::Format format);
+  void SetOriginFormat(ge::Format format);
+  Shape &MutableStorageShape();
+  Shape &MutableOriginShape();
+  ShapeRange &MutableOriginShapeRange();
+  ShapeRange &MutableStorageShapeRange();
+
+ private:
+  const char *name_;
+  int32_t data_type_;
+  StorageFormat format_;
+  StorageShape shape_;
+  ShapeRange storage_shape_range_;
+  ShapeRange origin_shape_range_;
+};
+
+class ModelDesc {
+ public:
+  static size_t CalcSize(size_t input_num, size_t output_num);
+  const ModelIoDesc *GetInputDesc(size_t index) const;
+  const ModelIoDesc *GetAllInputsDesc(size_t &input_num) const;
+
+  const ModelIoDesc *GetOutputDesc(size_t index) const;
+  const ModelIoDesc *GetAllOutputsDesc(size_t &output_num) const;
+
+  ModelIoDesc *MutableInputDesc(size_t index);
+  ModelIoDesc *MutableOutputDesc(size_t index);
+  ModelIoDesc *AllMutableIoDesc(size_t &input_num, size_t &output_num);
+  void SetInputNum(size_t input_num);
+  void SetOutputNum(size_t output_num);
+
+  ge::graphStatus GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) const;
+  ge::graphStatus GetUserDesignateShapeOrder(std::vector<std::string> &user_designate_shape_order) const;
+  ge::graphStatus GetModelAttrs(std::vector<std::string> &attrs) const;
+
+ private:
+  size_t input_num_;
+  size_t output_num_;
+  ContinuousVector model_io_descs_;
+};
+}  // namespace gert
+
+#endif  // AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_
\ No newline at end of file
diff --git a/inc/framework/runtime/model_v2_executor.h b/inc/framework/runtime/model_v2_executor.h
new file mode 100644
index 00000000..277a23d0
--- /dev/null
+++ b/inc/framework/runtime/model_v2_executor.h
@@ -0,0 +1,142 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef AIR_CXX_RUNTIME_V2_CORE_MODEL_V_2_EXECUTOR_H_
+#define AIR_CXX_RUNTIME_V2_CORE_MODEL_V_2_EXECUTOR_H_
+#include <memory>
+#include "graph/compute_graph.h"
+#include "graph/ge_error_codes.h"
+#include "model_desc.h"
+#include "runtime/stream.h"
+#include "exe_graph/runtime/tensor.h"
+
+namespace gert {
+enum SubExeGraphType { kInitExeGraph, kMainExeGraph, kDeInitExeGraph, kSubExeGraphTypeEnd };
+static constexpr char *kSubExeGraphTypeStrs[kSubExeGraphTypeEnd] = {(char *)"Init", (char *)"Main", (char *)"DeInit"};
+inline const char *GetSubExeGraphTypeStr(SubExeGraphType type) {
+  return kSubExeGraphTypeStrs[type];
+}
+
+class ResourceGuard {
+ public:
+  void *ResetExecutionData(std::unique_ptr<uint8_t[]> execution_data);
+  void ResetAnyValue(std::unique_ptr<uint8_t[]> any_values, size_t count);
+  void PushNode(void *node);
+  void PushWatcher(void *watcher);
+  void *ResetNodesArray(std::unique_ptr<uint8_t[]> nodes_array);
+  void *ResetStartNodesArray(std::unique_ptr<uint8_t[]> start_nodes_array);
+  void *ResetNodesIndgreeArray(std::unique_ptr<uint8_t[]> nodes_indgree_array);
+  void *ResetNodesWaitIndgreeArray(std::unique_ptr<uint8_t[]> nodes_indgree_array);
+  void *ResetInputsArray(std::unique_ptr<uint8_t[]> inputs_array);
+  void *ResetOutputsArray(std::unique_ptr<uint8_t[]> outputs_array);
+  void *ResetWatchersArray(std::unique_ptr<uint8_t[]> watchers_array);
+  void *ResetReadyQueue(void *ready_queue);
+  void *ResetBuffer(std::unique_ptr<uint8_t[]> buffer);
+  void *ResetComputeNodeInfo(std::unique_ptr<uint8_t[]> compute_node_info);
+  void *ResetKernelExtendInfo(std::unique_ptr<uint8_t[]> kernel_extend_info);
+  void *ResetModelDesc(std::unique_ptr<uint8_t[]> model_desc);
+
+  ~ResourceGuard();
+
+ private:
+  std::unique_ptr<uint8_t[]> execution_data_holder_;
+  size_t any_values_num_;
+  std::unique_ptr<uint8_t[]> any_values_guard_;
+
+  std::vector<std::unique_ptr<void, decltype(&free)>> nodes_guarder_;
+  std::vector<std::unique_ptr<void, decltype(&free)>> watchers_guarder_;
+  std::unique_ptr<uint8_t[]> continuous_buffer_guarder_;
+  std::unique_ptr<uint8_t[]> buffer_guarder_;
+  std::unique_ptr<uint8_t[]> compute_node_info_guarder_;
+  std::unique_ptr<uint8_t[]> kernel_extend_info_guarder_;
+  std::unique_ptr<uint8_t[]> model_desc_guarder_;
+
+  std::unique_ptr<uint8_t[]> nodes_array_guarder_;
+  std::unique_ptr<uint8_t[]> start_nodes_array_guarder_;
+  std::unique_ptr<uint8_t[]> nodes_indgree_array_guarder_;
+  std::unique_ptr<uint8_t[]> nodes_wait_indgree_array_guarder_;
+  std::unique_ptr<uint8_t[]> inputs_array_guarder_;
+  std::unique_ptr<uint8_t[]> outputs_array_guarder_;
+  std::unique_ptr<uint8_t[]> watchers_array_guarder_;
+  std::unique_ptr<void, decltype(&free)> ready_queue_guarder_{nullptr, nullptr};
+};
+
+struct ModelExecuteArg {
+  rtStream_t stream;
+};
+static_assert(std::is_standard_layout<ModelExecuteArg>::value, "The class ModelExecuteArg must be a POD");
+
+class ExeGraphExecutor {
+ public:
+  // todo unload时释放anyvalue资源
+  ge::graphStatus Load() {
+    return ge::GRAPH_SUCCESS;
+  }
+  ge::graphStatus UnLoad() {
+    return ge::GRAPH_SUCCESS;
+  }
+
+  /**
+   * 设置图执行的输入/输出，需要注意的是，使用者需要自己保证inputs/outputs刷新完全！！！
+   */
+  ge::graphStatus SpecifyInputs(void **inputs, size_t start, size_t num);
+  ge::graphStatus SpecifyOutputs(void **outputs, size_t num);
+  ge::graphStatus Execute();
+
+  const void *GetExecutionData() const {
+    return execution_data_;
+  }
+
+  ResourceGuard &GetResourceGuard();
+  void *SetExecutionData(std::unique_ptr<uint8_t[]> execution_data);
+
+ private:
+  friend class ModelV2ExecutorTestHelper;
+
+  void *execution_data_;
+  ResourceGuard resource_guard_;
+};
+class ModelV2Executor {
+ public:
+  static std::unique_ptr<ModelV2Executor> Create(const ge::ComputeGraphPtr &root_graph);
+
+  ge::graphStatus Load();
+  ge::graphStatus Execute(const ModelExecuteArg &arg, Tensor **inputs, size_t input_num, Tensor **outputs,
+                          size_t output_num);
+  ge::graphStatus ExecuteSync(Tensor **inputs, size_t input_num, Tensor **outputs, size_t output_num);
+  ge::graphStatus UnLoad();
+
+  const ModelDesc &GetModelDesc() const;
+  void SetModelDesc(ModelDesc *model_desc);
+  ModelV2Executor(const ModelV2Executor &) = delete;
+  ModelV2Executor(ModelV2Executor &&) = delete;
+  ModelV2Executor &operator=(const ModelV2Executor &) = delete;
+  ModelV2Executor &operator=(ModelV2Executor &&) = delete;
+
+ private:
+  friend class ModelV2ExecutorBuilder;
+  friend class ModelV2ExecutorTestHelper;
+  ModelV2Executor() = default;
+
+ private:
+  std::array<ExeGraphExecutor, kSubExeGraphTypeEnd> graphs_;
+  ResourceGuard resource_guard_;
+  ModelDesc *model_desc_ = nullptr;
+  rtStream_t default_stream_ = nullptr;
+};
+}  // namespace gert
+
+#endif  // AIR_CXX_RUNTIME_V2_CORE_MODEL_V_2_EXECUTOR_H_
diff --git a/metadef b/metadef
index a725349b..e4d1efc4 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit a725349b65aef2940555af2ddb7b9461fbe0d5fd
+Subproject commit e4d1efc47349f13af1bcdb53ba408118779fc27e
diff --git a/scripts/format_source_code.sh b/scripts/format_source_code.sh
new file mode 100755
index 00000000..1fd0b4f6
--- /dev/null
+++ b/scripts/format_source_code.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+# Copyright 2019-2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+set -e
+
+CLANG_FORMAT=$(which clang-format) || (echo "Please install 'clang-format' tool first"; exit 1)
+
+version=$("${CLANG_FORMAT}" --version | sed -n "s/.*\ \([0-9]*\)\.[0-9]*\.[0-9]*.*/\1/p")
+if [[ "${version}" -lt "8" ]]; then
+  echo "clang-format's version must be at least 8.0.0"
+  exit 1
+fi
+
+CURRENT_PATH=$(pwd)
+SCRIPTS_PATH=$(dirname "$0")
+
+echo "CURRENT_PATH=${CURRENT_PATH}"
+echo "SCRIPTS_PATH=${SCRIPTS_PATH}"
+
+# print usage message
+function usage()
+{
+  echo "Format the specified source files to conform the code style."
+  echo "Usage:"
+  echo "bash $0 [-a] [-c] [-l] [-h]"
+  echo "e.g. $0 -c"
+  echo ""
+  echo "Options:"
+  echo "    -a format of all files"
+  echo "    -c format of the files changed compared to last commit, default case"
+  echo "    -l format of the files changed in last commit"
+  echo "    -h Print usage"
+}
+
+# check and set options
+function checkopts()
+{
+  # init variable
+  mode="changed"    # default format changed files
+
+  # Process the options
+  while getopts 'aclh' opt
+  do
+    case "${opt}" in
+      a)
+        mode="all"
+        ;;
+      c)
+        mode="changed"
+        ;;
+      l)
+        mode="lastcommit"
+        ;;
+      h)
+        usage
+        exit 0
+        ;;
+      *)
+        echo "Unknown option ${opt}!"
+        usage
+        exit 1
+    esac
+  done
+}
+
+# init variable
+# check options
+checkopts "$@"
+
+# switch to project root path, which contains clang-format config file '.clang-format'
+cd "${SCRIPTS_PATH}/.." || exit 1
+
+FMT_FILE_LIST='__format_files_list__'
+
+if [[ "X${mode}" == "Xall" ]]; then
+  find src -type f -name "*" | grep "\.h$\|\.cc$" > "${FMT_FILE_LIST}" || true
+  find inc -type f -name "*" | grep "\.h$\|\.cc$" >> "${FMT_FILE_LIST}" || true
+elif [[ "X${mode}" == "Xchanged" ]]; then
+  # --diff-filter=ACMRTUXB will ignore deleted files in commit
+  git diff --diff-filter=ACMRTUXB --name-only | grep "^inc\|^src" | grep "\.h$\|\.cc$" >> "${FMT_FILE_LIST}" || true
+else  # "X${mode}" == "Xlastcommit"
+  git diff --diff-filter=ACMRTUXB --name-only HEAD~ HEAD | grep "^inc\|^src" | grep "\.h$\|\.cc$" > "${FMT_FILE_LIST}" || true
+fi
+
+while read line; do
+  if [ -f "${line}" ]; then
+    ${CLANG_FORMAT} -i "${line}"
+  fi
+done < "${FMT_FILE_LIST}"
+
+rm "${FMT_FILE_LIST}"
+cd "${CURRENT_PATH}" || exit 1
+
+echo "Specified cpp source files have been format successfully."
diff --git a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
index 703225e8..37a2e412 100644
--- a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
+++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #ifndef AICPU_OP_TYPE_LIST_H_
 #define AICPU_OP_TYPE_LIST_H_
 
+extern "C" {
 enum OpKernelType {
     TF_KERNEL,
     CPU_KERNEL
@@ -29,19 +30,13 @@ enum ReturnCode {
 };
 
 #pragma pack(push, 1)
-//One byte alignment
+// One byte alignment
 struct SysOpInfo {
     uint64_t opLen;
     uint64_t opType;
     OpKernelType kernelsType;
 };
 
-struct OpParamInfo {
-    uint64_t num;
-    uint64_t dtypeList;
-    uint64_t formatList;
-};
-
 struct SysOpCheckInfo {
     uint64_t opListNum;
     uint64_t offSetLen;
@@ -57,4 +52,6 @@ struct SysOpCheckResp {
     uint64_t opParamInfoList;
 };
 #pragma pack(pop)
+}
+
 #endif  // AICPU_OP_TYPE_LIST_H_
diff --git a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
index 72e21f6f..2dc0b1bb 100644
--- a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
+++ b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
@@ -14,13 +14,15 @@
  * limitations under the License.
  */
 
-#ifndef AICPU_TASK_STRUCT_H_
-#define AICPU_TASK_STRUCT_H_
+#ifndef AICPU_TASK_STRUCT_H
+#define AICPU_TASK_STRUCT_H
 
 #include <cstdint>
 
 namespace aicpu {
 
+using char_t = char;
+
 #pragma pack(push, 1)
 struct AicpuParamHead
 {
@@ -29,9 +31,76 @@ struct AicpuParamHead
     uint32_t        extInfoLength;             // extInfo struct Length
     uint64_t        extInfoAddr;               // extInfo address
 };
+
+enum class AicpuConfigMsgType {
+    AICPU_CONFIG_MSG_TYPE_BUF_FREE      = 0,  /* free buf */
+    AICPU_CONFIG_MSG_TYPE_BUF_RESET     = 1,  /* reset buf */
+    AICPU_CONFIG_MSG_TYPE_BUF_SET_ADDR  = 2,  /* set buf addr to aicpu */
+};
+
+enum class AicpuErrMsgType {
+    ERR_MSG_TYPE_NULL   = 0,
+    ERR_MSG_TYPE_AICORE = 1,
+    ERR_MSG_TYPE_AICPU  = 2,
+};
+
+enum class AicpuExtInfoMsgType {
+    EXT_MODEL_ID_MSG_TYPE = 0,
+};
+
+struct AicpuConfigMsg {
+    uint8_t msgType;
+    uint8_t reserved1;
+    uint16_t bufLen;
+    uint32_t offset;
+    uint64_t bufAddr;
+    uint32_t tsId;
+    uint32_t reserved2;
+};
+
+struct AicpuModelIdInfo {
+    uint32_t modelId;
+    uint32_t extendModelId;
+    uint32_t extendInfo[13];
+};
+
+// 64 bytes
+struct AicpuExtendInfo {
+    uint8_t msgType;
+    uint8_t version;
+    uint8_t reserved[2];
+    union {
+        AicpuModelIdInfo modelIdMap;
+    };
+};
+
+struct AicoreErrMsgInfo {
+    uint8_t errType;
+    uint8_t version;
+    uint8_t reserved1[2];    /* reserved1, 4 byte alignment */
+    uint32_t errorCode;
+    uint32_t modelId;
+    uint32_t taskId;
+    uint32_t streamId;
+    uint64_t transactionId;
+    uint8_t reserved2[228];  /* the total byte is 256, reserved2 len = 256 - other lens */
+};
+
+struct AicpuErrMsgInfo {
+    uint8_t errType;
+    uint8_t version;
+    uint8_t reserved1[2];    /* reserved1, 4 byte alignment */
+    uint32_t errorCode;
+    uint32_t modelId;
+    uint32_t streamId;
+    uint64_t transactionId;
+    char_t opName[64];      /* op name str */
+    char_t errDesc[128];    /* err msg desc info */
+    uint8_t reserved2[40];  /* the total byte is 256, reserved2 len = 256 - other lens */
+};
 #pragma pack(pop)
 
 }  // namespace aicpu
 
-#endif  // AICPU_TASK_STRUCT_H_
+#endif  // AICPU_TASK_STRUCT_H
 
diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
index 5733d68f..ec92a036 100644
--- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
+++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
@@ -21,7 +21,7 @@
 
 namespace aicpu {
 namespace FWKAdapter {
-
+using char_t = char;
 // API RETURN CODE
 enum FWKAdptAPIRetCode {
   FWK_ADPT_SUCCESS = 0,                  // success
@@ -63,6 +63,8 @@ enum FWKTaskExtInfoType {
   FWK_ADPT_EXT_BITMAP,
   FWK_ADPT_EXT_TOPIC_TYPE,
   FWK_ADPT_EXT_ASYNCWAIT,
+  FWK_ADPT_EXT_UNKNOWN_SHAPE_INPUT_INDEX,
+  FWK_ADPT_EXT_UNKNOWN_SHAPE_OUTPUT_INDEX,
   FWK_ADPT_EXT_INVALID
 };
 
@@ -113,7 +115,7 @@ struct StrFWKKernel {
 typedef StrFWKKernel FWKOperateParam;
 
 // Extent info ShapeAndType
-const uint32_t kMaxShapeDims = 8;
+const uint32_t kMaxShapeDims = 8U;
 #pragma pack(push, 1)
 struct ShapeAndType {
   int32_t type;
@@ -122,13 +124,13 @@ struct ShapeAndType {
 #pragma pack(pop)
 
 // Extend info structure for extInfoAddr
-const uint32_t kExtInfoHeadSize = 8;
+const uint32_t kExtInfoHeadSize = 8U;
 
 #pragma pack(push, 1)
 struct ExtInfo {
   int32_t  infoType;    // extend type
   uint32_t infoLen;     // length for infoMsg
-  char     infoMsg[0];  // extend value
+  char_t  infoMsg[0];  // extend value
 };
 #pragma pack(pop)
 
@@ -143,9 +145,9 @@ struct ResultSummary {
 
 #pragma pack(push, 1)
 struct AsyncWait {
-  uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait
-  uint32_t waitId; // wait id, GE refresh
-  uint32_t timeOut; // reserved
+  uint8_t waitType;  // wait type, FWk_ADPT_WAIT_TPYE_EVENT: event wait
+  uint32_t waitId;  // wait id, GE refresh
+  uint32_t timeOut;  // reserved
   uint64_t reserved;
 };
 #pragma pack(pop)
diff --git a/third_party/fwkacllib/inc/cce/taskdown_common.hpp b/third_party/fwkacllib/inc/cce/taskdown_common.hpp
index 3ecea523..7954162e 100644
--- a/third_party/fwkacllib/inc/cce/taskdown_common.hpp
+++ b/third_party/fwkacllib/inc/cce/taskdown_common.hpp
@@ -27,15 +27,16 @@ namespace cce {
 #define CC_FUSION_OP_MAX 32
 
 typedef enum tagccKernelType {
-  CCE_AI_CORE = 0, /* cce aicore */
-  CCE_AI_CPU = 1,  /* cce aicpu */
-  TE = 2,          /* te operator*/
-  CUSTOMIZED = 3,  /* customized operator */
-  TE_AI_CORE = 4,  /* te aicore operator*/
-  TE_AI_CPU = 5,   /* te aicpu operator */
-  AI_CPU = 6,      /* aicpu */
-  CUST_AI_CPU = 7, /* custom aicpu*/
-  INVALID = 8,     /* unknown kernel type */
+  CCE_AI_CORE = 0,   /* cce aicore */
+  CCE_AI_CPU = 1,    /* cce aicpu */
+  TE = 2,            /* te operator*/
+  CUSTOMIZED = 3,    /* customized operator */
+  TE_AI_CORE = 4,    /* te aicore operator*/
+  TE_AI_CPU = 5,     /* te aicpu operator */
+  AI_CPU = 6,        /* aicpu */
+  CUST_AI_CPU = 7,   /* custom aicpu*/
+  HOST_CPU = 8,      /* host cpu */
+  INVALID = 10000    /* unknown kernel type */
 } ccKernelType;
 
 typedef struct tagOpContext {
diff --git a/third_party/fwkacllib/inc/common/type_def.h b/third_party/fwkacllib/inc/common/type_def.h
new file mode 100644
index 00000000..1bbaf32d
--- /dev/null
+++ b/third_party/fwkacllib/inc/common/type_def.h
@@ -0,0 +1,52 @@
+/**
+* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*
+* Description:interface.
+* Create: 2021-12-21
+*/
+#ifndef AICPU_TYPE_DEF_H
+#define AICPU_TYPE_DEF_H
+
+#include <cstdint>
+#include <cstddef>
+#ifndef char_t
+typedef char char_t;
+#endif
+
+#ifndef float32_t
+typedef float float32_t;
+#endif
+
+#ifndef float64_t
+typedef double float64_t;
+#endif
+
+inline uint64_t PtrToValue(const void *ptr)
+{
+    return static_cast<const uint64_t>(reinterpret_cast<const uintptr_t>(ptr));
+}
+
+inline void *ValueToPtr(const uint64_t value)
+{
+    return reinterpret_cast<void *>(static_cast<const uintptr_t>(value));
+}
+
+template<typename TI, typename TO>
+inline TO *PtrToPtr(TI *ptr)
+{
+    return reinterpret_cast<TO *>(ptr);
+}
+
+template<typename T>
+inline T *PtrAdd(T * const ptr, const size_t maxIdx, const size_t idx)
+{
+    if ((ptr != nullptr) && (idx < maxIdx)) {
+        return reinterpret_cast<T *>(ptr + idx);
+    }
+    return nullptr;
+}
+#endif  // AICPU_TYPE_DEF_H
diff --git a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h
old mode 100755
new mode 100644
index 9f216a56..1c8f8e44
--- a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h
+++ b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h
@@ -39,6 +39,7 @@ static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE        = 107016; // invali
 static const int32_t ACL_ERROR_RT_INVALID_HANDLE             = 107017; // invalid handle
 static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE        = 107018; // invalid malloc type
 static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT               = 107019; // wait timeout
+static const int32_t ACL_ERROR_RT_TASK_TIMEOUT               = 107020; // task timeout
 
 static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT        = 207000; // feature not support
 static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION          = 207001; // memory allocation error
@@ -52,6 +53,12 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE         = 207008; // no str
 static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE         = 207009; // no notify resource
 static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE          = 207010; // no model resource
 static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE            = 207011; // no cdq resource
+static const int32_t ACL_ERROR_RT_OVER_LIMIT                 = 207012; // over limit
+static const int32_t ACL_ERROR_RT_QUEUE_EMPTY                = 207013; // queue is empty
+static const int32_t ACL_ERROR_RT_QUEUE_FULL                 = 207014; // queue is full
+static const int32_t ACL_ERROR_RT_REPEATED_INIT              = 207015; // repeated init
+static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW            = 207016; // aivec over flow
+static const int32_t ACL_ERROR_RT_OVER_FLOW                  = 207017; // common over flow
 
 static const int32_t ACL_ERROR_RT_INTERNAL_ERROR             = 507000; // runtime internal error
 static const int32_t ACL_ERROR_RT_TS_ERROR                   = 507001; // ts internel error
@@ -91,6 +98,14 @@ static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT        = 507034; // vector
 static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION      = 507035; // vector core exception
 static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
 static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL         = 507037; // cdq alloc batch abnormal
+static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR      = 507038; // can not change die mode
+static const int32_t ACL_ERROR_RT_DIE_SET_ERROR              = 507039; // single die mode can not set die
+static const int32_t ACL_ERROR_RT_INVALID_DIEID              = 507040; // invalid die id
+static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET           = 507041; // die mode not set
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW       = 507042; // aic trap read overflow
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW      = 507043; // aic trap write overflow
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW  = 507044; // aiv trap read overflow
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow
 
 static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR         = 507899; // drv internal error
 static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR       = 507900; // aicpu internal error
@@ -99,5 +114,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE               = 507901; // hdc di
 #ifdef __cplusplus
 }
 #endif
-
 #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h
index e57563b3..5b2f34be 100644
--- a/third_party/fwkacllib/inc/hccl/base.h
+++ b/third_party/fwkacllib/inc/hccl/base.h
@@ -86,6 +86,47 @@ enum OriginalGraphShapeType {
     SHAPE_RESERVED  /**< reserved */
 };
 
+enum HcclEventType {
+    HCCL_EVENT_SEND_COMPLETION = 0,
+    HCCL_EVENT_RECV_REQUEST,
+    HCCL_EVENT_RECV_COMPLETION,
+    HCCL_EVENT_CONGESTION_RELIEF,
+    HCCL_EVENT_RESERVED /**< reserved */
+};
+
+const u32 TAG_MAX_LEN = 127; // ����tag ����
+using TagAttr = struct TagAttrDef {
+    char name[TAG_MAX_LEN + 1]; // tag��ʶ
+    // tag��ʶ�Ľ������ݣ��������Ƿ���������ý��սӿڣ�0 = ��, 1 = ��(Ԥ�����ݲ�֧��)��
+    // ����activeRecv = 0�������ղ��յ����ݻ��߷�������ʱ������֪ͨ�����ߡ�
+    uint32_t activeRecv;
+    uint32_t sendCredit; // ���ø�tag����inflight��send����
+    uint32_t eventId;
+};
+
+using HcclEventMsg = struct HcclEventMsgDef {
+    HcclComm comm;
+    u32 peerRank;
+    u32 tag;
+    // 0:HCCL_SEND_COMPLETION; 1:HCCL_RECV_COMPLETION; 2:HCCL_RECV_REQUEST; 3:HCCL_CONGESTION_RELIEF
+    u32 hcclEventType;
+    union {
+        struct {
+            u32 reserver;
+        } sendCompletionItem;
+        struct {
+            u32 reserver;
+        } recvRequestItem;
+        struct {
+            u32 reserver;
+        } recvCompletionItem;
+        struct CongestionReliefItem {
+            u32 reserver;
+        } congestionReliefItem;
+    } desc;
+};
+
+
 /**
 * @brief stream handle.
 */
@@ -124,29 +165,54 @@ struct HcomRemoteAccessAddrInfo {
 };
 
 struct HcomAllToAllVParams {
-  void *sendbuf;
-  void *sendcounts;
-  void *sdispls;
-  HcclDataType sendtype;
-  void *recvbuf;
-  void *recvcounts;
-  void *rdispls;
-  HcclDataType recvtype;
-  const char *group;
+    void *sendbuf;  // device mem
+    void *sendcounts;  // device mem;  Type: uint_64
+    void *sdispls;  // device mem;  Type: uint_64
+    HcclDataType sendtype;
+    void *recvbuf;  // device mem
+    void *recvcounts;  // device mem;  Type: uint_64 
+    void *rdispls;  // device mem;  Type: uint_64
+    HcclDataType recvtype;
+    const char *group;  // not used now
 };
 
 struct HcomGatherAllToAllVParams {
- void *addrInfo;
- void *addrInfoCountPerRank;
- void *recvbuf;
- void *recvcounts;
- void *rdispls;
- void *gatheredbuf;
- s32 addrLength;
- HcclDataType recvtype;
- const char *group;
+    void *addrInfo;  // device mem;  contains host VA[uint_64]:  [addr, length, addr, length, addr, length, ...]
+    void *addrInfoCountPerRank;  // device mem;  length: ranksize;  contains addrInfoCounts for every rank
+    void *recvbuf;  // device mem
+    void *recvcounts;  // device mem;  Type: uint_64
+    void *rdispls;  // device mem;  Type: uint_64
+    void *gatheredbuf;  // device mem
+    s32 addrLength;
+    HcclDataType recvtype;
+    const char *group;  // not used now
 };
 
+typedef enum workMode {
+HCCL_MODE_NORMAL = 0, // ��֧���κ�Probe any����֧�־�ȷ��probe
+HCCL_MODE_ANY = 1     // ��֧��ANY_SOURCE + ANY_TAG��probe
+} WorkMode;
+
+typedef struct tagCommAttr {
+    WorkMode mode;  // ͨ�����ڵ�probe����ģʽ
+    uint32_t deviceId = 0;
+} CommAttr;
+
+typedef void* HcclMessage;
+typedef void* HcclRequest;
+
+typedef struct {
+    int srcRank;    // ����/̽�⵽��msg/�ŷ�ķ��Ͷ�rank_id��MPI��׼���壬�����߿��Է���
+    int tag;        // ����/̽�⵽��msg/�ŷ��tag��MPI��׼���壬�����߿��Է���
+    int error;      // ����/̽��Ĵ�����0��no error��others��������̳�����MPI��׼���壬�����߿��Է���
+    int cancelled;  // ָ��ʵ�֣�����������߷���
+    int count;      // ����/̽�⵽��payload��С��ָ��ʵ�֣�����������߷���
+} HcclStatus;
+
+#define HCCL_REQUEST_NULL   NULL
+
+#define HCCL_TAG_ANY (1 << 30)
+
 #ifdef __cplusplus
 }
 #endif // __cplusplus
diff --git a/third_party/fwkacllib/inc/hccl/hccl_types.h b/third_party/fwkacllib/inc/hccl/hccl_types.h
deleted file mode 100644
index 50a64795..00000000
--- a/third_party/fwkacllib/inc/hccl/hccl_types.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file hccl_types.h
- * @brief HCCL data type definition 
- * 
- */
- 
-#ifndef HCCL_TYPES_H_
-#define HCCL_TYPES_H_
-
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
-/**
- * @brief HCCL functions return value definition
- */
-typedef enum {
-    HCCL_SUCCESS = 0,               /**< success */
-    HCCL_E_PARA = 1,                /**< parameter error */
-    HCCL_E_PTR = 2,                 /**< empty pointer */
-    HCCL_E_MEMORY = 3,              /**< memory error */
-    HCCL_E_INTERNAL = 4,            /**< internal error */
-    HCCL_E_NOT_SUPPORT = 5,         /**< not support feature */
-    HCCL_E_NOT_FOUND = 6,           /**< not found specific resource */
-    HCCL_E_UNAVAIL = 7,             /**< resource unavailable */
-    HCCL_E_SYSCALL = 8,             /**< call system interface error */
-    HCCL_E_TIMEOUT = 9,             /**< timeout */
-    HCCL_E_OPEN_FILE_FAILURE = 10,  /**< open file fail */
-    HCCL_E_TCP_CONNECT = 11,        /**< tcp connect fail */
-    HCCL_E_ROCE_CONNECT = 12,       /**< roce connect fail */
-    HCCL_E_TCP_TRANSFER = 13,       /**< tcp transfer fail */
-    HCCL_E_ROCE_TRANSFER = 14,      /**< roce transfer fail */
-    HCCL_E_RUNTIME = 15,            /**< call runtime api fail */
-    HCCL_E_DRV = 16,                /**< call driver api fail */
-    HCCL_E_PROFILING = 17,          /**< call profiling api fail */
-    HCCL_E_CCE = 18,                /**< call cce api fail */
-    HCCL_E_NETWORK = 19,            /**< call network api fail */
-    HCCL_E_RESERVED                 /**< reserved */
-} HcclResult;
-
-/**
- * @brief handle to HCCL communicator
- */
-typedef void *HcclComm;
-
-/**
- * @brief HCCL Reduction opperation
- */
-typedef enum {
-    HCCL_REDUCE_SUM = 0,    /**< sum */
-    HCCL_REDUCE_PROD = 1,   /**< prod */
-    HCCL_REDUCE_MAX = 2,    /**< max */
-    HCCL_REDUCE_MIN = 3,    /**< min */
-    HCCL_REDUCE_RESERVED    /**< reserved */
-} HcclReduceOp;
-
-/**
- * @brief HCCL data type
- */
-typedef enum {
-    HCCL_DATA_TYPE_INT8 = 0,    /**< int8 */
-    HCCL_DATA_TYPE_INT16 = 1,   /**< int16 */
-    HCCL_DATA_TYPE_INT32 = 2,   /**< int32 */
-    HCCL_DATA_TYPE_FP16 = 3,    /**< fp16 */
-    HCCL_DATA_TYPE_FP32 = 4,    /**< fp32 */
-    HCCL_DATA_TYPE_INT64 = 5,    /**< int64 */
-    HCCL_DATA_TYPE_UINT64 = 6,    /**< uint64 */
-    HCCL_DATA_TYPE_RESERVED     /**< reserved */
-} HcclDataType;
-
-const uint32_t HCCL_ROOT_INFO_BYTES =  4108; // 4108: root info length
-
-/**
- * @brief HCCL root info
- */
-typedef struct HcclRootInfoDef {
-    char internal[HCCL_ROOT_INFO_BYTES];
-} HcclRootInfo;
-
-#ifdef __cplusplus
-}
-#endif // __cplusplus
-#endif // HCCL_TYPES_H_
diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h
index 955764d6..cdc8d840 100644
--- a/third_party/fwkacllib/inc/hccl/hcom.h
+++ b/third_party/fwkacllib/inc/hccl/hcom.h
@@ -126,58 +126,6 @@ extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, co
  * @return HcclResult
  */
 extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList);
-
-/**
- * @brief Initialize hcom executor.
- *
- * @param void
- * @return HcclResult
- */
-HcclResult HcomExecInitialize();
-
-/**
- * @brief Finalize hcom executor.
- *
- * @param void
- * @return HcclResult
- */
-HcclResult HcomExecFinalize();
-
-/**
- * @brief Put collective communication operation into hcom executor.
- *
- * @param opInfo information about collective communication operation.
- * @param callback callback after collective communication operation.
- * @return HcclResult
- */
-HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback);
-
-/**
- * @brief Put remote access operation into hcom executor.
- *
- * @param remoteAccessType operation type (read or write).
- * @param addrInfos address information about collective communication operation.
- * @param callback callback after collective communication operation.
- * @return HcclResult
- */
-HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType,
-                                       const std::vector<HcomRemoteAccessAddrInfo>& addrInfos,
-                                       std::function<void(HcclResult status)> callback);
-
-HcclResult HcomExecEnqueueAllToAllV(HcomAllToAllVParams params, std::function<void(HcclResult status)> callback);
-
-HcclResult HcomExecEnqueueGatherAllToAllV(HcomGatherAllToAllVParams params,
-                                          std::function<void(HcclResult status)> callback);
-
-/**
- * @brief Register memories and init resources for remote access.
- *
- * @param addrList memory addresses for remote access.
- * @param count number of remote memory addresses.
- * @return HcclResult
- */
-extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count);
-
 #ifdef __cplusplus
 }
 #endif // __cplusplus
diff --git a/third_party/fwkacllib/inc/mmpa/mmpa_api.h b/third_party/fwkacllib/inc/mmpa/mmpa_api.h
index 38a689ee..f7eb7435 100644
--- a/third_party/fwkacllib/inc/mmpa/mmpa_api.h
+++ b/third_party/fwkacllib/inc/mmpa/mmpa_api.h
@@ -1,18 +1,12 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+/*
+* @file mmpa_api.h
+*
+* Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
 
 #ifndef _MMPA_API_H_
 #define _MMPA_API_H_
@@ -40,7 +34,6 @@
 #include <semaphore.h>
 #include <fcntl.h>
 #include <dlfcn.h>
-#include <signal.h>
 #include <pthread.h>
 #include <syslog.h>
 #include <dirent.h>
@@ -56,6 +49,7 @@
 #include <dirent.h>
 #include <getopt.h>
 #include <libgen.h>
+#include <malloc.h>
 
 #include <linux/types.h>
 #include <linux/hdreg.h>
diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
index 993f36ba..fa72aed2 100644
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
@@ -1,18 +1,12 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+/*
+* @file mmpa_linux.h
+*
+* Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
 
 #ifndef MMPA_LINUX_MMPA_LINUX_H
 #define MMPA_LINUX_MMPA_LINUX_H
@@ -58,31 +52,31 @@ typedef long MM_LONG;
 typedef VOID *(*userProcFunc)(VOID *pulArg);
 
 typedef struct {
-  userProcFunc procFunc;  // Callback function pointer
-  VOID *pulArg;           // Callback function parameters
+    userProcFunc procFunc;  // Callback function pointer
+    VOID *pulArg;           // Callback function parameters
 } mmUserBlock_t;
 
 typedef struct {
-  const char *dli_fname;
-  void *dli_fbase;
-  const char *dli_sname;
-  void *dli_saddr;
-  size_t dli_size; /* ELF only */
-  int dli_bind; /* ELF only */
-  int dli_type;
+    const CHAR *dli_fname;
+    VOID *dli_fbase;
+    const CHAR *dli_sname;
+    VOID *dli_saddr;
+    size_t dli_size; /* ELF only */
+    INT32 dli_bind; /* ELF only */
+    INT32 dli_type;
 } mmDlInfo;
 
 typedef struct {
-  int wSecond;             // Seconds. [0-60] (1 leap second)
-  int wMinute;             // Minutes. [0-59]
-  int wHour;               // Hours. [0-23]
-  int wDay;                // Day. [1-31]
-  int wMonth;              // Month. [1-12]
-  int wYear;               // Year
-  int wDayOfWeek;          // Day of week. [0-6]
-  int tm_yday;             // Days in year.[0-365]
-  int tm_isdst;            // DST. [-1/0/1]
-  long int wMilliseconds;  // milliseconds
+    INT32 wSecond;             // Seconds. [0-60] (1 leap second)
+    INT32 wMinute;             // Minutes. [0-59]
+    INT32 wHour;               // Hours. [0-23]
+    INT32 wDay;                // Day. [1-31]
+    INT32 wMonth;              // Month. [1-12]
+    INT32 wYear;               // Year
+    INT32 wDayOfWeek;          // Day of week. [0-6]
+    INT32 tm_yday;             // Days in year.[0-365]
+    INT32 tm_isdst;            // DST. [-1/0/1]
+    LONG wMilliseconds;        // milliseconds
 } mmSystemTime_t;
 
 typedef sem_t mmSem_t;
@@ -98,72 +92,72 @@ typedef ssize_t mmSsize_t;
 typedef size_t mmSize; // size
 
 typedef struct {
-  UINT32 createFlag;
-  INT32 oaFlag;
+    UINT32 createFlag;
+    INT32 oaFlag;
 } mmCreateFlag;
 
 typedef struct {
-  VOID *sendBuf;
-  INT32 sendLen;
+    VOID *sendBuf;
+    INT32 sendLen;
 } mmIovSegment;
 typedef struct in_addr mmInAddr;
 
 typedef struct {
-  VOID *inbuf;
-  INT32 inbufLen;
-  VOID *outbuf;
-  INT32 outbufLen;
-  mmOverLap *oa;
+    VOID *inbuf;
+    INT32 inbufLen;
+    VOID *outbuf;
+    INT32 outbufLen;
+    mmOverLap *oa;
 } mmIoctlBuf;
 
 typedef int mmAtomicType;
 typedef int mmAtomicType64;
 
 typedef enum {
-  pollTypeRead = 1,  // pipe read
-  pollTypeRecv,      // socket recv
-  pollTypeIoctl,     // ioctl
+    pollTypeRead = 1,  // pipe read
+    pollTypeRecv,      // socket recv
+    pollTypeIoctl,     // ioctl
 } mmPollType;
 
 typedef struct {
-  mmPollHandle handle;            // The file descriptor or handle of poll is required
-  mmPollType pollType;            // Operation type requiring poll
+    mmPollHandle handle;            // The file descriptor or handle of poll is required
+    mmPollType pollType;            // Operation type requiring poll
                                   // read or recv or ioctl
-  INT32 ioctlCode;                // IOCTL operation code, dedicated to IOCTL
-  mmComPletionKey completionKey;  // The default value is blank, which is used in windows
+    INT32 ioctlCode;                // IOCTL operation code, dedicated to IOCTL
+    mmComPletionKey completionKey;  // The default value is blank, which is used in windows
                                   // The data used to receive the difference between which handle is readable
 } mmPollfd;
 
 typedef struct {
-  VOID *priv;              // User defined private content
-  mmPollHandle bufHandle;  // Value of handle corresponding to buf
-  mmPollType bufType;      // Data types polled to
-  VOID *buf;               // Data used in poll
-  UINT32 bufLen;           // Data length used in poll
-  UINT32 bufRes;           // Actual return length
+    VOID *priv;              // User defined private content
+    mmPollHandle bufHandle;  // Value of handle corresponding to buf
+    mmPollType bufType;      // Data types polled to
+    VOID *buf;               // Data used in poll
+    UINT32 bufLen;           // Data length used in poll
+    UINT32 bufRes;           // Actual return length
 } mmPollData, *pmmPollData;
 
 typedef VOID (*mmPollBack)(pmmPollData);
 
 typedef struct {
-  INT32 tz_minuteswest;  // How many minutes is it different from Greenwich
-  INT32 tz_dsttime;      // type of DST correction
+    INT32 tz_minuteswest;  // How many minutes is it different from Greenwich
+    INT32 tz_dsttime;      // type of DST correction
 } mmTimezone;
 
 typedef struct {
-  LONG tv_sec;
-  LONG tv_usec;
+    LONG tv_sec;
+    LONG tv_usec;
 } mmTimeval;
 
 typedef struct {
-  MM_LONG tv_sec;
-  MM_LONG tv_nsec;
+    MM_LONG tv_sec;
+    MM_LONG tv_nsec;
 } mmTimespec;
 
 typedef struct {
-  ULONGLONG totalSize;
-  ULONGLONG freeSize;
-  ULONGLONG availSize;
+    ULONGLONG totalSize;
+    ULONGLONG freeSize;
+    ULONGLONG availSize;
 } mmDiskSize;
 
 #define mmTLS __thread
@@ -174,40 +168,40 @@ typedef mode_t mmMode_t;
 typedef struct option mmStructOption;
 
 typedef struct {
-  char addr[MMPA_MACINFO_DEFAULT_SIZE];  // ex:aa-bb-cc-dd-ee-ff\0
+    CHAR addr[MMPA_MACINFO_DEFAULT_SIZE];  // ex:aa-bb-cc-dd-ee-ff\0
 } mmMacInfo;
 
 typedef struct {
-  char **argv;
-  INT32 argvCount;
-  char **envp;
-  INT32 envpCount;
+    CHAR **argv;
+    INT32 argvCount;
+    CHAR **envp;
+    INT32 envpCount;
 } mmArgvEnv;
 
 typedef struct {
-  char arch[MMPA_CPUDESC_DEFAULT_SIZE];
-  char manufacturer[MMPA_CPUDESC_DEFAULT_SIZE];  // vendor
-  char version[MMPA_CPUDESC_DEFAULT_SIZE];       // modelname
-  INT32 frequency;                               // cpu frequency
-  INT32 maxFrequency;                            // max speed
-  INT32 ncores;                                  // cpu cores
-  INT32 nthreads;                                // cpu thread count
-  INT32 ncounts;                                 // logical cpu nums
+    CHAR arch[MMPA_CPUDESC_DEFAULT_SIZE];
+    CHAR manufacturer[MMPA_CPUDESC_DEFAULT_SIZE];  // vendor
+    CHAR version[MMPA_CPUDESC_DEFAULT_SIZE];       // modelname
+    INT32 frequency;                               // cpu frequency
+    INT32 maxFrequency;                            // max speed
+    INT32 ncores;                                  // cpu cores
+    INT32 nthreads;                                // cpu thread count
+    INT32 ncounts;                                 // logical cpu nums
 } mmCpuDesc;
 
 typedef mode_t MODE;
 
 typedef struct {
-  INT32 detachFlag;    // Determine whether to set separation property 0, not to separate 1
-  INT32 priorityFlag;  // Determine whether to set priority 0 and not set 1
-  INT32 priority;      // Priority value range to be set 1-99
-  INT32 policyFlag;    // Set scheduling policy or not 0 do not set 1 setting
-  INT32 policy;        // Scheduling policy value value
-                       //  MMPA_THREAD_SCHED_RR
-                       //  MMPA_THREAD_SCHED_OTHER
-                       //  MMPA_THREAD_SCHED_FIFO
-  INT32 stackFlag;     // Set stack size or not: 0 does not set 1 setting
-  UINT32 stackSize;    // The stack size unit bytes to be set cannot be less than MMPA_THREAD_STACK_MIN
+    INT32 detachFlag;    // Determine whether to set separation property 0, not to separate 1
+    INT32 priorityFlag;  // Determine whether to set priority 0 and not set 1
+    INT32 priority;      // Priority value range to be set 1-99
+    INT32 policyFlag;    // Set scheduling policy or not 0 do not set 1 setting
+    INT32 policy;        // Scheduling policy value value
+                         //  MMPA_THREAD_SCHED_RR
+                         //  MMPA_THREAD_SCHED_OTHER
+                         //  MMPA_THREAD_SCHED_FIFO
+    INT32 stackFlag;     // Set stack size or not: 0 does not set 1 setting
+    UINT32 stackSize;    // The stack size unit bytes to be set cannot be less than MMPA_THREAD_STACK_MIN
 } mmThreadAttr;
 
 #ifdef __ANDROID__
@@ -326,8 +320,8 @@ MMPA_FUNC_VISIBILITY INT32 mmCondDestroy(mmCond *cond);
 MMPA_FUNC_VISIBILITY INT32 mmGetPid();
 MMPA_FUNC_VISIBILITY INT32 mmGetTid();
 MMPA_FUNC_VISIBILITY INT32 mmGetPidHandle(mmProcess *processHandle);
-MMPA_FUNC_VISIBILITY INT32 mmGetLocalTime(mmSystemTime_t *sysTime);
-MMPA_FUNC_VISIBILITY INT32 mmGetSystemTime(mmSystemTime_t *sysTime);
+MMPA_FUNC_VISIBILITY INT32 mmGetLocalTime(mmSystemTime_t *sysTimePtr);
+MMPA_FUNC_VISIBILITY INT32 mmGetSystemTime(mmSystemTime_t *sysTimePtr);
 
 MMPA_FUNC_VISIBILITY INT32 mmSemInit(mmSem_t *sem, UINT32 value);
 MMPA_FUNC_VISIBILITY INT32 mmSemWait(mmSem_t *sem);
@@ -393,7 +387,7 @@ MMPA_FUNC_VISIBILITY mmSsize_t mmWritev(mmProcess fd, mmIovSegment *iov, INT32 i
 MMPA_FUNC_VISIBILITY VOID mmMb();
 MMPA_FUNC_VISIBILITY INT32 mmInetAton(const CHAR *addrStr, mmInAddr *addr);
 
-MMPA_FUNC_VISIBILITY mmProcess mmOpenFile(const CHAR *fileName, UINT32 access, mmCreateFlag fileFlag);
+MMPA_FUNC_VISIBILITY mmProcess mmOpenFile(const CHAR *fileName, UINT32 accessFlag, mmCreateFlag fileFlag);
 MMPA_FUNC_VISIBILITY mmSsize_t mmReadFile(mmProcess fileId, VOID *buffer, INT32 len);
 MMPA_FUNC_VISIBILITY mmSsize_t mmWriteFile(mmProcess fileId, VOID *buffer, INT32 len);
 MMPA_FUNC_VISIBILITY INT32 mmCloseFile(mmProcess fileId);
@@ -407,13 +401,13 @@ MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueSub64(mmAtomicType64 *ptr, mmAtomicTy
 MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithDetach(mmThread *threadHandle, mmUserBlock_t *funcBlock);
 
 // The following 3 interfaces are to be deleted
-MMPA_FUNC_VISIBILITY INT32 mmCreateNamedPipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode);
-MMPA_FUNC_VISIBILITY INT32 mmOpenNamePipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode);
+MMPA_FUNC_VISIBILITY INT32 mmCreateNamedPipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], INT32 waitMode);
+MMPA_FUNC_VISIBILITY INT32 mmOpenNamePipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], INT32 waitMode);
 MMPA_FUNC_VISIBILITY VOID mmCloseNamedPipe(mmPipeHandle namedPipe[]);
 
-MMPA_FUNC_VISIBILITY INT32 mmCreatePipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
-MMPA_FUNC_VISIBILITY INT32 mmOpenPipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
-MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount);
+MMPA_FUNC_VISIBILITY INT32 mmCreatePipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
+MMPA_FUNC_VISIBILITY INT32 mmOpenPipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
+MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipeHandle[], UINT32 pipeCount);
 
 // Poll related interface
 MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort();
@@ -467,10 +461,10 @@ MMPA_FUNC_VISIBILITY INT32 mmGetOptOpt();
 MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt);
 MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg();
 MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg);
-MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts);
+MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, CHAR *const *argv, const CHAR *opts);
 MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc,
-                                        char *const *argv,
-                                        const char *opts,
+                                        CHAR *const *argv,
+                                        const CHAR *opts,
                                         const mmStructOption *longOpts,
                                         INT32 *longIndex);
 
@@ -496,7 +490,7 @@ MMPA_FUNC_VISIBILITY INT32 mmSetEnv(const CHAR *name, const CHAR *value, INT32 o
 MMPA_FUNC_VISIBILITY CHAR *mmStrTokR(CHAR *str, const CHAR *delim, CHAR **saveptr);
 MMPA_FUNC_VISIBILITY CHAR *mmDirName(CHAR *path);
 MMPA_FUNC_VISIBILITY CHAR *mmBaseName(CHAR *path);
-MMPA_FUNC_VISIBILITY INT32 mmGetDiskFreeSpace(const char *path, mmDiskSize *diskSize);
+MMPA_FUNC_VISIBILITY INT32 mmGetDiskFreeSpace(const CHAR *path, mmDiskSize *diskSize);
 
 /*
  * Function: set the thread name created by mmcreatetask
@@ -540,7 +534,7 @@ MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count);
 MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count);
 MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName,
                                            const mmArgvEnv *env,
-                                           const char *stdoutRedirectFile,
+                                           const CHAR *stdoutRedirectFile,
                                            mmProcess *id);
 
 MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle,
@@ -550,6 +544,10 @@ MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMod
 MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name);
 MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags);
 MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra);
+
+MMPA_FUNC_VISIBILITY mmSize mmGetPageSize();
+MMPA_FUNC_VISIBILITY VOID *mmAlignMalloc(mmSize mallocSize, mmSize alignSize);
+MMPA_FUNC_VISIBILITY VOID mmAlignFree(VOID *addr);
 #define MMPA_DLL_API
 
 #ifdef __cplusplus
diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h
index 9df5b9ce..9c6f6499 100644
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h
@@ -79,6 +79,9 @@ typedef long LONG;
 #define MMPA_THREAD_SCHED_OTHER SCHED_OTHER
 #define MMPA_THREAD_MIN_STACK_SIZE PTHREAD_STACK_MIN
 
+#define MMPA_PATH_SEPARATOR_STR "/"
+#define MMPA_PATH_SEPARATOR_CHAR '/'
+
 #define MM_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
 
 #define MMPA_MAX_NI 19
diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h
index 58ebb1a0..9f8a72cd 100644
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h
@@ -63,6 +63,9 @@ extern "C" {
 #define MMPA_MIN_NI (-20)
 #define MMPA_MAX_FILE 128
 
+#define MMPA_PATH_SEPARATOR_STR "\\"
+#define MMPA_PATH_SEPARATOR_CHAR '\\'
+
 #define MMPA_MAX_THREAD_PIO 99
 #define MMPA_MIDDLE_THREAD_PIO 66
 #define MMPA_LOW_THREAD_PIO 33
diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
index 49e97a5d..e03131f2 100644
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
@@ -1,18 +1,12 @@
-﻿/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+﻿/*
+* @file mmpa_win.h
+*
+* Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
 
 #ifndef MMPA_WIN_MMPA_WIN_H
 #define MMPA_WIN_MMPA_WIN_H
@@ -368,8 +362,8 @@ MMPA_FUNC_VISIBILITY INT32 mmCondDestroy(mmCond *cond);
 MMPA_FUNC_VISIBILITY INT32 mmGetPid(VOID);
 MMPA_FUNC_VISIBILITY INT32 mmGetTid(VOID);
 MMPA_FUNC_VISIBILITY INT32 mmGetPidHandle(mmProcess *processHandle);
-MMPA_FUNC_VISIBILITY INT32 mmGetLocalTime(mmSystemTime_t *sysTime);
-MMPA_FUNC_VISIBILITY INT32 mmGetSystemTime(mmSystemTime_t *sysTime);
+MMPA_FUNC_VISIBILITY INT32 mmGetLocalTime(mmSystemTime_t *sysTimePtr);
+MMPA_FUNC_VISIBILITY INT32 mmGetSystemTime(mmSystemTime_t *sysTimePtr);
 MMPA_FUNC_VISIBILITY INT32 mmSemInit(mmSem_t *sem, UINT32 value);
 MMPA_FUNC_VISIBILITY INT32 mmSemWait(mmSem_t *sem);
 MMPA_FUNC_VISIBILITY INT32 mmSemPost(mmSem_t *sem);
@@ -431,7 +425,7 @@ MMPA_FUNC_VISIBILITY mmSsize_t mmWritev(mmSockHandle fd, mmIovSegment *iov, INT3
 MMPA_FUNC_VISIBILITY VOID mmMb();
 MMPA_FUNC_VISIBILITY INT32 mmInetAton(const CHAR *addrStr, mmInAddr *addr);
 
-MMPA_FUNC_VISIBILITY mmProcess mmOpenFile(const CHAR *fileName, UINT32 access, mmCreateFlag fileFlag);
+MMPA_FUNC_VISIBILITY mmProcess mmOpenFile(const CHAR *fileName, UINT32 accessFlag, mmCreateFlag fileFlag);
 MMPA_FUNC_VISIBILITY mmSsize_t mmReadFile(mmProcess fileId, VOID *buffer, INT32 len);
 MMPA_FUNC_VISIBILITY mmSsize_t mmWriteFile(mmProcess fileId, VOID *buffer, INT32 len);
 MMPA_FUNC_VISIBILITY INT32 mmCloseFile(mmProcess fileId);
@@ -444,13 +438,13 @@ MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueInc64(mmAtomicType64 *ptr, mmAtomicTy
 MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueSub64(mmAtomicType64 *ptr, mmAtomicType64 value);
 MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithDetach(mmThread *threadHandle, mmUserBlock_t *funcBlock);
 
-MMPA_FUNC_VISIBILITY INT32 mmCreateNamedPipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode);
-MMPA_FUNC_VISIBILITY INT32 mmOpenNamePipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode);
+MMPA_FUNC_VISIBILITY INT32 mmCreateNamedPipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], INT32 waitMode);
+MMPA_FUNC_VISIBILITY INT32 mmOpenNamePipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], INT32 waitMode);
 MMPA_FUNC_VISIBILITY VOID mmCloseNamedPipe(mmPipeHandle namedPipe[]);
 
-MMPA_FUNC_VISIBILITY INT32 mmCreatePipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
-MMPA_FUNC_VISIBILITY INT32 mmOpenPipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
-MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount);
+MMPA_FUNC_VISIBILITY INT32 mmCreatePipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
+MMPA_FUNC_VISIBILITY INT32 mmOpenPipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
+MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipeHandle[], UINT32 pipeCount);
 
 MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort();
 MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle);
@@ -557,6 +551,10 @@ MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMod
 MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name);
 MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags);
 MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra);
+
+MMPA_FUNC_VISIBILITY mmSize mmGetPageSize();
+MMPA_FUNC_VISIBILITY VOID *mmAlignMalloc(mmSize mallocSize, mmSize alignSize);
+MMPA_FUNC_VISIBILITY VOID mmAlignFree(VOID *addr);
 #ifdef __cplusplus
 #if __cplusplus
 }
diff --git a/third_party/fwkacllib/inc/ops/OWNERS b/third_party/fwkacllib/inc/ops/OWNERS
new file mode 100755
index 00000000..f95df23c
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/OWNERS
@@ -0,0 +1,65 @@
+approvers:
+- gegenhua
+- qiaohairong
+reviewers:
+- chuqingxi
+- wang-jintang
+- luanma_bl
+- chen-kang30
+- li-xulong
+- Allan_Yu
+- minshen
+- pan-jixing
+- yl_wang
+- lijie176
+- mabing726
+- miao-fangzheng
+- huang-qiang002
+- su-yueming
+- chenpeng-hw
+- wang_jianle
+- luanma_bl
+- LDLD0524
+- wywismygod2020
+- lipeiyang3699
+- koala-zhang
+- zhu-jingjing
+- zhaozhihui5
+- simbaliuxx
+- lyxyz
+- zhou-qilong
+- block0219
+- hanfuwei
+- xchu42
+- sheng-nan
+- yangjing88
+- alexlak
+- xig514
+- jellylj
+- brightlyking
+- liuzhenyuhw
+- djh602
+- wangjiangben_hw
+- li1jie
+- clinglai
+- liujun2014
+- soupkey
+- wu-shengji
+- cimeng
+- ccl_ligang
+- xiaozhedeng
+- granpad7
+- tc1qaz
+- Ronnie_zheng
+- xiexianhu
+- zhouyujoe
+- zhaoping12
+- tanshengshun
+- fanqirui
+- xu-binglin
+- yangyang016
+- zhangzhongzt
+- gegenhua
+- qiaohairong
+options:
+  no_parent_owners: true
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/ops/aipp.h b/third_party/fwkacllib/inc/ops/aipp.h
index bed984bd..86805f72 100644
--- a/third_party/fwkacllib/inc/ops/aipp.h
+++ b/third_party/fwkacllib/inc/ops/aipp.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -65,6 +65,8 @@ in aipp config file, framework will auto add one input node to graph at last. \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator AippData.
+*@par Restrictions:
+*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly.
 */
 REG_OP(AippData)
     .INPUT(data, TensorType::ALL())
diff --git a/third_party/fwkacllib/inc/ops/all_ops.h b/third_party/fwkacllib/inc/ops/all_ops.h
index 1ac83783..78ef3446 100644
--- a/third_party/fwkacllib/inc/ops/all_ops.h
+++ b/third_party/fwkacllib/inc/ops/all_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,8 +39,10 @@
 #include "image_ops.h"
 #include "internal_ops.h"
 #include "linalg_ops.h"
+#include "list_ops.h"
 #include "logging_ops.h"
 #include "lookup_ops.h"
+#include "map_ops.h"
 #include "math_ops.h"
 #include "matrix_calculation_ops.h"
 #include "nn_batch_norm_ops.h"
@@ -76,4 +78,7 @@
 #include "transformation_ops.h"
 #include "condtake_ops.h"
 #include "warp_perspective_ops.h"
+#include "vector_search.h"
+#include "deep_md.h"
+#include "encoding_ops.h"
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h
index e1f64421..9eaa779d 100644
--- a/third_party/fwkacllib/inc/ops/array_ops.h
+++ b/third_party/fwkacllib/inc/ops/array_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,7 +35,7 @@ namespace ge {
 * @li values:A `Tensor`. Must have the same type as `sorted_x`. \n
 
 *@par Attributes:
-*@li out_type:An optional `DType` from: `int32, int64`.
+*out_type:An optional `DType` from: `int32, int64`.
 Defaults to `int32`. \n
 
 *@par Outputs:
@@ -498,13 +498,34 @@ REG_OP(Constant)
     .OP_END_FACTORY_REG(Constant)
 
 /**
+*@brief Creates a file constant tensor, The operator is used to process the very large weight which is store in file. \n
+
+*@par Attributes:
+*file_path: A string, used to record file path. \n
+*file_id: A string, used to record file id. \n
+*shape: data shape. \n
+*dtype: data type. \n
+
+*@par Outputs:
+*y: The FileConstant tensor. \n
+*/
+REG_OP(FileConstant)
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .ATTR(file_path, String, "")
+    .ATTR(file_id, String, "")
+    .REQUIRED_ATTR(shape, ListInt)
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(FileConstant)
+
+/**
 *@brief Returns a copy of the input tensor. \n
 
 *@par Inputs:
 *x: A tensor. \n
 
 *@par Outputs:
-*y: A tensor. \n
+*y: A copy of input tensor. \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator Snapshot.
@@ -626,7 +647,7 @@ REG_OP(StopGradient)
 *x: A tensor. \n
 
 *@par Outputs:
-*y: A tensor. \n
+*y: A tensor with the same shape and contents as input. \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator Identity.
@@ -666,7 +687,7 @@ REG_OP(IdentityN)
 *@li axis: The dimension index at which to expand. \n
 
 *@par Outputs:
-*y: A tensor. \n
+*y: A tensor with the same data as input, with an additional dimension inserted at the index specified by axis. \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ExpandDims.
@@ -684,7 +705,9 @@ REG_OP(ExpandDims)
 
 *@par Inputs:
 *@li x: Original tensor.
-*@li axis: List of ints. \n
+
+*@par Attributes:
+*@li axes: List of ints indicating the dimensions to be inserted. \n
 
 *@par Outputs:
 *y: Reshape tensor with same data as input. \n
@@ -700,6 +723,53 @@ REG_OP(Unsqueeze)
     .OP_END_FACTORY_REG(Unsqueeze)
 
 /**
+*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape is changed, without changing the data. \n
+
+*@par Inputs:
+*@li x: Original tensor.
+
+*@par Attributes:
+*@li axes: List of ints indicating the dimensions to be inserted. \n
+
+*@par Outputs:
+*y: Reshape tensor with same data as input. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Onnx operator Unsqueeze.
+
+*@par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use Unsqueeze instead.
+*/
+
+REG_OP(UnsqueezeV2)
+    .INPUT(x, TensorType::ALL())
+    .OUTPUT(y, TensorType::ALL())
+    .ATTR(axis, ListInt, {})
+    .OP_END_FACTORY_REG(UnsqueezeV2)
+
+
+/**
+*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape
+is changed, but the data is not changed. \n
+
+*@par Inputs:
+*x: A tensor.
+*axes: A list of int64, which indicates the dimensions to be inserted. \n
+
+*@par Outputs:
+*y: Reshape tensor with same data as input. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Onnx operator Unsqueeze in V13. \n
+*/
+
+REG_OP(UnsqueezeV3)
+    .INPUT(x, TensorType::ALL())
+    .INPUT(axes, ListInt)
+    .OUTPUT(y, TensorType::ALL())
+    .OP_END_FACTORY_REG(UnsqueezeV3)
+
+/**
 *@brief Reshapes a tensor. Only the tensor shape is changed, without changing the data. \n
 
 *@par Inputs:
@@ -713,7 +783,7 @@ REG_OP(Unsqueeze)
 *@par Outputs:
 *y: A tensor. \n
 
-*@par Attention:
+*@attention Constraints:
 *This operator cannot be directly called by the acllopExecute API. \n
 
 *@par Third-party framework compatibility
@@ -752,13 +822,59 @@ REG_OP(Squeeze)
     .OP_END_FACTORY_REG(Squeeze)
 
 /**
-*@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n
+*@brief Removes dimensions of size 1 from the shape of a tensor. \n
 
 *@par Inputs:
 *x: A tensor. \n
 
+*@par Attributes:
+*axis: An optional list of int32 or int64. If not specified, squeezes all dimensions of size 1.   If specified, only squeezes the dimensions listed. It is an error to squeeze a dimension that is not 1. \n
+
 *@par Outputs:
-*y: A tensor. The rank of input tensor. \n
+*y: A tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Squeeze.
+
+*@par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use Squeeze instead.
+*/
+REG_OP(SqueezeV2)
+    .INPUT(x, TensorType::ALL())
+    .OUTPUT(y, TensorType::ALL())
+    .ATTR(axis, ListInt, {})
+    .OP_END_FACTORY_REG(SqueezeV2)
+
+/**
+*@brief Removes dimensions of size 1 from the shape of a tensor according to axes. \n
+
+*@par Inputs:
+*x: A tensor.
+*axes: An optional list of int64. If not specified, squeezes all dimensions of
+size 1. If specified, only squeezes the dimensions listed. It is an error to
+squeeze a dimension that is not 1. \n 
+
+*@par Outputs:
+*y: Reshape tensor with same data as input. \n
+
+*@par Third-party framework compatibility
+*Compatible with the onnx operator Squeeze in V13. \n
+*/
+
+REG_OP(SqueezeV3)
+    .INPUT(x, TensorType::ALL())
+    .OPTIONAL_INPUT(axes, ListInt)
+    .OUTPUT(y, TensorType::ALL())
+    .OP_END_FACTORY_REG(SqueezeV3)
+
+/**
+*@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n
+
+*@par Inputs:
+*x: A Tensor of type float32, float16, int8, int16, uint16, uint8, int32, int64, uint32, uint64, bool, double. \n
+
+*@par Outputs:
+*y: A tensor. The rank of input tensor. Type is int32. \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator Rank.
@@ -848,7 +964,6 @@ REG_OP(PlaceHolder)
 *x: A tensor. \n
 
 *@par Attributes:
-*@li dtype: data type of tensor.
 *@li shape: tensor shape. \n
 
 *@par Outputs:
@@ -867,13 +982,13 @@ REG_OP(PlaceholderWithDefault)
 *@brief Reads and returns the value of the input variable tensor. \n
 
 *@par Inputs:
-*x: A tensor. \n
+*x: A tensor must have numeric type. \n
 
 *@par Attributes:
 *dtype: An optional int32 or int64. The output data type. Defaults to int32. \n
 
 *@par Outputs:
-*y: A tensor. \n
+*y: A tensor must have numeric type. \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ReadVariableOp.
@@ -946,6 +1061,25 @@ REG_OP(Shape)
     .OP_END_FACTORY_REG(Shape)
 
 /**
+*@brief Gather selected dims of input which returns the shape of tensor shape after gathershapes.\n
+
+*@par Inputs:
+*x: A list of input tensors. It's a dynamic input. \n
+
+*@par Attributes:
+*axes: Select some dims of input. \n
+
+*@par Outputs:
+*shape: The shape of tensor shape after gathershapes. \n
+*/
+REG_OP(GatherShapes)
+    .DYNAMIC_INPUT(x, TensorType::ALL())
+    .OUTPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(axes, ListListInt)
+    .ATTR(dtype, Int, DT_INT32)
+    .OP_END_FACTORY_REG(GatherShapes)
+
+/**
 *@brief Returns shape of tensors. \n
 
 *@par Inputs:
@@ -1026,27 +1160,27 @@ REG_OP(MirrorPadGrad)
     .OP_END_FACTORY_REG(MirrorPadGrad)
 
 /**
-*@brief Returns locations of nonzero / true values in a tensor. \n
+* @brief Returns locations of nonzero / true values in a tensor. \n
 
-*@par Inputs:
-*Including:
-*x: A Tensor. Must be one of the following types:
-DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16,
-DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL. \n
+* @par Inputs:
+* Including:
+* @li x: A Tensor. Must be one of the following types:
+  DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_QINT8,
+  DT_QUINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32, DT_QINT32,
+  DT_INT64, DT_UINT64, DT_BOOL, DT_COMPLEX64, DT_COMPLEX128 \n
 
-*@par Outputs:
-*y: A Tensor of type DT_INT64. \n
+* @par Outputs:
+* @li y: A Tensor of type DT_INT64. \n
 
-*@attention Constraints:
-*Where runs on the Ascend AI CPU, which delivers poor performance.\n
+* @attention Constraints:
+* Where runs on the Ascend AI CPU, which delivers poor performance.\n
 
-*@par Third-party framework compatibility
-*Compatible with the TensorFlow operator Where.
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator Where.
 */
 
 REG_OP(Where)
-    .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \
-              DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
+    .INPUT(x, TensorType({BasicType(), DT_BOOL}))
     .OUTPUT(y, TensorType({DT_INT64}))
     .OP_END_FACTORY_REG(Where)
 
@@ -1074,6 +1208,39 @@ REG_OP(Copy)
     .OP_END_FACTORY_REG(Copy);
 
 /**
+*@brief copy the src tensor to the dst tensor according the special parameter . \n
+
+*@par Inputs:
+*Eight inputs, including:
+*dst: A tensor. Must be one of the following types:
+* double, float32, float16, int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool
+*dst_size: A tensor with type int32
+*dst_stride: A tensor with type int32
+*dst_storage_offset: A tensor with type int32
+*src: A tensor. Must be one of the following types:
+* double, float32, float16, int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool
+*src_size: A tensor with type int32
+*src_stride: A tensor with type int32
+*src_storage_offset: the storage_offset of src tensor . \n
+
+*@par Outputs:
+*dst: An ref tensor.Must be one of the following types:
+* double, float32, float16, int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool . \n
+*/
+
+REG_OP(ViewCopy)
+    .INPUT(dst, TensorType::BasicType())
+    .INPUT(dst_size, TensorType::IndexNumberType())
+    .INPUT(dst_stride, TensorType::IndexNumberType())
+    .INPUT(dst_storage_offset, TensorType::IndexNumberType())
+    .INPUT(src, TensorType::BasicType())
+    .INPUT(src_size, TensorType::IndexNumberType())
+    .INPUT(src_stride, TensorType::IndexNumberType())
+    .INPUT(src_storage_offset, TensorType::IndexNumberType())
+    .OUTPUT(dst, TensorType::BasicType())
+    .OP_END_FACTORY_REG(ViewCopy)
+
+/**
 *@brief Generates fingerprint values. \n
 
 *@par Inputs:
@@ -1134,10 +1301,10 @@ This is an M-length vector.
 This is an R-length vector
 
 *@par Attributes:
-*@li normalize: boolean (if true, edit distances are normalized by length of truth). \n
+*normalize: boolean (if true, edit distances are normalized by length of truth). \n
 
 *@par Outputs:
-*@li output: A dense float tensor with rank R - 1. \n
+*output: A dense float tensor with rank R - 1. \n
 
 *@par Third-party framework compatibility
 * Compatible with TensorFlow EditDistance operator.
@@ -1153,6 +1320,302 @@ REG_OP(EditDistance)
     .OUTPUT(output, TensorType({DT_FLOAT}))
     .OP_END_FACTORY_REG(EditDistance)
 
+/**
+* @brief sort the input tensor without returning the value of index.
+
+* @par Inputs:
+* x: An ND tensor of type float16.
+
+* @par Attributes:
+* @li axis: An optional int. The dimension to sort along. This value defaults to -1.
+* @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False.
+
+* @par Outputs:
+* y: An ND tensor of type float16.
+
+* @attention Constraints:
+* @li Axis should select the last dim.
+* @li When the sorting data is less than 150K, it is recommended to use this tbe ops,
+ and the descending performance is better than the ascending.
+* @li The upper limit of data on Ascend910 is 2000K.
+*/
+REG_OP(SortV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(axis, Int, -1)
+    .ATTR(descending, Bool, false)
+    .OP_END_FACTORY_REG(SortV2)
+
+/**
+* @brief Expand the input tensor to a compatible shape. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li x: A Tensor. Must be one of the following types:
+*     float16, float32, int32, int8, uint8, bool. \n
+* @li shape: A Tensor to specify the shape that the input tensor expanded to. \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator Expand.
+*/
+
+REG_OP(Expand)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32,DT_INT64, DT_INT8, DT_UINT8, DT_BOOL}))
+    .INPUT(shape, TensorType({DT_INT16, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32,DT_INT64, DT_INT8, DT_UINT8, DT_BOOL}))
+    .OP_END_FACTORY_REG(Expand)
+
+/**
+*@Returns a tensor containing the indices of all non-zero elements of input. \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: float16, float32, int32, int64.
+
+*@par Attributes:
+* transpose: the output tensor will be transposed if true. \n
+
+*@par Outputs:
+* y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the PyTorch operator NonZero.
+*/
+
+REG_OP(NonZero)
+    .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \
+              DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_INT64, DT_INT32}))
+    .ATTR(transpose, Bool, false)
+    .ATTR(dtype, Type, DT_INT64)
+    .OP_END_FACTORY_REG(NonZero)
+
+/**
+*@Returns a tensor containing the indices of all non-zero elements of input. \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: float16, float32, int32, int64.
+
+*@par Attributes:
+* transpose: the output tensor will be transposed if true. \n
+
+*@par Outputs:
+* value: A Tensor. Has the same type as "x" . \n
+* index: A Tensor. The type is INT32, means index for input. \n
+* count: A Scalar. The type is INT32, means count for non_zero ele in input. \n
+
+*@par Third-party framework compatibility
+*Compatible with the PyTorch operator NonZeroWithValue.
+*/
+
+REG_OP(NonZeroWithValue)
+    .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \
+           DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
+    .OUTPUT(value, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \
+            DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
+    .OUTPUT(index, TensorType({DT_INT32}))
+    .OUTPUT(count, TensorType({DT_INT32}))
+    .ATTR(transpose, Bool, false)
+    .ATTR(dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(NonZeroWithValue)
+
+
+
+/**
+*@Returns a tensor with updated shape from NonZeroWithValue. \n
+
+*@par Inputs:
+*value: A Tensor. The output of NonZeroWithValue. \n
+*index: A Tensor. The output of NonZeroWithValue. \n
+*count: A Tensor. The type is INT32, means count for non_zero ele in input. \n
+
+* out_value: A Tensor. Has the same type as "value" . \n
+* out_index: A Tensor. Has the same type as "index". \n
+*/
+REG_OP(NonZeroWithValueShape)
+    .INPUT(value, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16,
+                            DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
+    .INPUT(index, TensorType({DT_INT32}))
+    .INPUT(count, TensorType({DT_INT32}))
+    .OUTPUT(out_value, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16,
+                            DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
+    .OUTPUT(out_index, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(NonZeroWithValueShape)
+
+
+/**
+* @brief Expand the input tensor to a compatible shape. \n
+
+* @par Inputs:
+* One inputs, including:
+* x: A Tensor. Must be one of the following types:
+*     float16, float32, int32, int8, uint8, bool. \n
+
+* @par Attributes:
+* shape: A required listInt to specify the shape that the input tensor expanded to. \n
+
+
+* @par Outputs:
+* y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator Expand.
+*/
+
+REG_OP(ExpandD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL}))
+    .REQUIRED_ATTR(shape, ListInt)
+    .OP_END_FACTORY_REG(ExpandD)
+
+/**
+*@brief Get dim number in tensordesc. \n
+
+*@par Inputs:
+*x: A Tensor. \n
+
+*@par Outputs:
+*y: A 1D tensor. The data type must be int32. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(GetShape)
+    .DYNAMIC_INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \
+        DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(GetShape)
+
+/**
+*@brief Update the tensor_desc of the output. \n
+
+* @par attributes:
+* @li shape: A listInt contains the data to update. \n
+
+*@par outputs:
+* y: a tensor_desc, type is int.\n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(UpdateTensorDesc)
+    .INPUT(x, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8,
+                          DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8,
+                           DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE}))
+    .REQUIRED_ATTR(shape, ListInt)
+    .OP_END_FACTORY_REG(UpdateTensorDesc)
+
+/**
+*@brief Queue data for other operators. \n
+*@par Attributes:
+*index: Index of the input tensor.The data type must be int32 or int64.
+Assume that net has three data nodes, one should be set 0, another should
+be set 1, and the left should be set 2. \n
+*queue_name: queue name
+*output_types: types of outputs data
+*output_shapes: shapes of outputs data
+*@par Outputs:
+*y: A DT_UINT8 tensor. \n
+*/
+REG_OP(QueueData)
+    .OUTPUT(y, TensorType({DT_UINT8}))
+    .ATTR(index, Int, 0)
+    .ATTR(queue_name, String, "")
+    .ATTR(output_types, ListType, {})
+    .ATTR(output_shapes, ListListInt, {{}, {}})
+    .OP_END_FACTORY_REG(QueueData)
+
+/**
+* @brief Ensures that the tensor's shape matches the expected shape. \n
+* @par Inputs:
+* input: A Tensor. that need to be checked with desired shape 
+*        Must be one of the following types:
+*        int8, uint8, int16, uint16, int32, int64, float16, float
+*        double, complex64 complex128 \n
+* @par Attributes:
+* shape: required, a desired tensor shape. type: list int \n
+* @par Outputs:
+* output: A tensor. has the same type and contents as input 
+*        Must be one of the following types:
+*        int8, uint8, int16, uint16, int32, int64, float16, float
+*        double, complex64 complex128 \n
+*/
+REG_OP(EnsureShape)
+    .INPUT(input, TensorType({DT_INT8,DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT16, \
+                            DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(output, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT16, \
+                            DT_FLOAT,DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .REQUIRED_ATTR(shape, ListInt)
+    .OP_END_FACTORY_REG(EnsureShape)
+
+/**
+* @brief Finds the first unique element from every consecutive group of equivalent elements.
+
+* @par Inputs:
+* x: A ND tensor.
+
+* @par Attributes:
+* @li return_idx: An optional bool. Whether to also return the indices. The default value is False
+* @li return_count: An optional bool. Whether to also return the counts for each element. The default is False.
+* @li axis: An optional int. Which one axis to apply unique. The default is 1000, which means None.
+
+* @par Outputs:
+* @li y: "x" in the unique output "y".
+* @li idx: The index of each value of "x".
+* @li count: The counts of each value of "y".
+
+* @attention Constraints:
+* UniqueConsecutive runs on the Ascend AI CPU, which delivers poor performance.
+
+* @par Third-party framework compatibility
+* Compatible with the PyTorch operator UniqueConsecutive.
+*/
+
+REG_OP(UniqueConsecutive)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OUTPUT(idx, TensorType::IndexNumberType())
+    .OUTPUT(count, TensorType::IndexNumberType())
+    .ATTR(return_idx, Bool, false)
+    .ATTR(return_counts, Bool, false)
+    .ATTR(axis, Int, 1000)
+    .OP_END_FACTORY_REG(UniqueConsecutive)
+
+/**
+* @brief Decodes a variant Tensor into a RaggedTensor. \n
+*
+* @par Input:  
+* @li encoded_ragged:  A Tensor of type variant. A variant Tensor containing encoded RaggedTensors. \n
+*
+* @par Outputs:
+* @li output_nested_splits: A list of output_ragged_rank Tensor objects with type int32 or int64.
+* @li output_dense_values:  A Tensor, which must be one of the following types:
+*               double, float32, float16, int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool. \n
+*
+* @par Attributes:
+* @li input_ragged_rank: An int that is >= -1. The ragged rank of each encoded RaggedTensor component in the input. 
+*         If set to -1, this is inferred as output_n - rank(encoded_ragged).
+* @li output_ragged_rank: An int that is >= 0. The expected ragged rank of the output RaggedTensor. 
+*          The following must hold: output_n = rank(encoded_ragged) + input_n.
+* @li Tvalues: The data type of output_dense_values.
+* @li Tsplits: The data type of output_nested_splits. An optional DType of "int32, int64". Defaults to `int64`. \n
+*
+* @par Third-party framework compatibility.
+* Compatible with tensorflow RaggedTensorFromVariant operator.
+*/
+REG_OP(RaggedTensorFromVariant)
+    .INPUT(encoded_ragged, TensorType({DT_VARIANT}))
+    .DYNAMIC_OUTPUT(output_nested_splits, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(output_dense_values, TensorType::BasicType())
+    .REQUIRED_ATTR(input_ragged_rank, Int)
+    .REQUIRED_ATTR(output_ragged_rank, Int)
+    .REQUIRED_ATTR(Tvalues, Type)
+    .ATTR(Tsplits, Type, DT_INT64)
+    .OP_END_FACTORY_REG(RaggedTensorFromVariant)
+
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/audio_ops.h b/third_party/fwkacllib/inc/ops/audio_ops.h
index d9883253..f05135d1 100644
--- a/third_party/fwkacllib/inc/ops/audio_ops.h
+++ b/third_party/fwkacllib/inc/ops/audio_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h b/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h
new file mode 100644
index 00000000..70eb3272
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h
@@ -0,0 +1,58 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file avg_pool_1d_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+*@brief Generate an auxiliary matrix .  \n
+
+*@par Inputs:
+* @li x: A tensor. Must be one of the following types:uint8, int8,int16, int32,
+ int64, float16, float, double.The format must be NHWC/NCHW.
+
+*@par Attributes:
+*@li ksize: Kernel size. Input type is int.
+*@li strides: Input type is int.
+*@li pads: Input type is listInt .
+*@li ceil_mode: Bool, default value is false.
+*@li count_include_pad: Bool, default value is false.  \n
+
+*@par Outputs:
+*y_tensor: A  tensor with the same types as "x" .  \n
+*@par Third-party framework compatibility
+
+*Compatible with the TensorFlow operator Unbatch.
+*/
+REG_OP(AvgPool1DAvgMatrix)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8,
+                          DT_INT32, DT_INT64, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8,
+                           DT_INT32, DT_INT64, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, Int)
+    .REQUIRED_ATTR(strides, Int)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(count_include_pad, Bool, false)
+    .OP_END_FACTORY_REG(AvgPool1DAvgMatrix)
+}
+#endif
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/ops/batch_ops.h
index 8a1c5a7b..ca4fe1db 100644
--- a/third_party/fwkacllib/inc/ops/batch_ops.h
+++ b/third_party/fwkacllib/inc/ops/batch_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -64,10 +64,10 @@ the same types as "x_tensors" .  It's a dynamic output.  \n
 REG_OP(Batch)
   .DYNAMIC_INPUT(x_tensors, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, \
       DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE}))
-  .OUTPUT(y_index, TensorType({ DT_INT64 }))
-  .OUTPUT(y_id, TensorType({ DT_INT64 }))
   .DYNAMIC_OUTPUT(y_tensors, TensorType({DT_INT8, DT_UINT8, DT_INT16, \
       DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_BOOL}))
+  .OUTPUT(y_index, TensorType({ DT_INT64 }))
+  .OUTPUT(y_id, TensorType({ DT_INT64 }))
   .REQUIRED_ATTR(num_batch_threads, Int)
   .REQUIRED_ATTR(max_batch_size, Int)
   .ATTR(max_enqueued_batches, Int, 10)
@@ -107,11 +107,13 @@ across multiple sessions .   \n
 
 REG_OP(Unbatch)
   .INPUT(x_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
-      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
   .INPUT(index, TensorType({DT_INT64}))
   .INPUT(id, TensorType({DT_INT64}))
   .OUTPUT(y_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
-      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
   .REQUIRED_ATTR(timeout_micros, Int)
   .ATTR(container, String, "")
   .ATTR(shared_name, String, "")
@@ -146,13 +148,16 @@ across multiple sessions .   \n
 
 REG_OP(UnbatchGrad)
   .INPUT(x_input, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
-      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
   .INPUT(index, TensorType({DT_INT64}))
   .INPUT(grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
-      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
   .INPUT(id, TensorType({DT_INT64}))
   .OUTPUT(y_grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
-      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
   .ATTR(container, String, "")
   .ATTR(shared_name, String, "")
   .OP_END_FACTORY_REG(UnbatchGrad)
diff --git a/third_party/fwkacllib/inc/ops/bitwise_ops.h b/third_party/fwkacllib/inc/ops/bitwise_ops.h
index 5c83e161..dac78118 100644
--- a/third_party/fwkacllib/inc/ops/bitwise_ops.h
+++ b/third_party/fwkacllib/inc/ops/bitwise_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,6 +26,35 @@
 namespace ge {
 
 /**
+*@brief Element-wise computes the bitwise left-shift of x and y . \n
+
+*@par Inputs:
+*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper"
+are 0D scalars.
+* @li x: A Tensor. Must be one of the following types: int8, int16, int32,
+int64, uint8, uint16, uint32, uint64.
+* @li y: A Tensor. Has the same type as "x".  \n
+
+*@par Outputs:
+* z: A Tensor. Has the same type as "x".  \n
+
+*@attention Constraints:
+*Unique runs on the Ascend AI CPU, which delivers poor performance.  \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator LeftShift.
+*/
+
+REG_OP(LeftShift)
+    .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \
+           DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}))
+    .INPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \
+           DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}))
+    .OUTPUT(z, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \
+            DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}))
+    .OP_END_FACTORY_REG(LeftShift)
+
+/**
 *@brief Element-wise computes the bitwise right-shift of x and y . \n
 
 *@par Inputs:
diff --git a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h
index 550e8b7d..08e54824 100644
--- a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h
+++ b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h
index e20607bf..890c52ae 100644
--- a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h
+++ b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/cluster.h b/third_party/fwkacllib/inc/ops/cluster.h
new file mode 100644
index 00000000..6e41e569
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/cluster.h
@@ -0,0 +1,58 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file cluster.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_
+
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+/**
+* @brief Perform k-means clustering on a data matrix. \n
+
+* @par Inputs:
+* Three required inputs and one optional inputs, including:
+* @li x: A 2D tensor of data type float32. 
+* @li y: A 2D tensor of data type float32. 
+* @li sum_square_x: An optional 2D tensor of data type float32. 
+* @li sum_square_y: A 2D tensor of data type float32. \n
+
+* @par Attributes:
+* use_actual_distance: Indicates whether to calculate the complete distance. \n
+
+* @par Outputs:
+* @li segment_sum: A tensor of data type float32. 
+* @li segment_count: A tensor of data type float32. 
+* @li k_mean_total_sum: A tensor of data type float32. 
+*/
+REG_OP(KMeansCentroids)
+    .INPUT(x, TensorType({DT_FLOAT}))
+    .INPUT(y, TensorType({DT_FLOAT}))
+    .INPUT(sum_square_y, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(sum_square_x, TensorType({DT_FLOAT}))
+    .OUTPUT(segment_sum, TensorType({DT_FLOAT}))
+    .OUTPUT(segment_count, TensorType({DT_FLOAT}))
+    .OUTPUT(kmean_total_sum, TensorType({DT_FLOAT}))
+    .ATTR(use_actual_distance, Bool, false)
+    .OP_END_FACTORY_REG(KMeansCentroids)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_
diff --git a/third_party/fwkacllib/inc/ops/condtake_ops.h b/third_party/fwkacllib/inc/ops/condtake_ops.h
index 5e91eb07..029cffbf 100644
--- a/third_party/fwkacllib/inc/ops/condtake_ops.h
+++ b/third_party/fwkacllib/inc/ops/condtake_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/control_flow_ops.h b/third_party/fwkacllib/inc/ops/control_flow_ops.h
index 7196b14f..cd993599 100644
--- a/third_party/fwkacllib/inc/ops/control_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/control_flow_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -96,7 +96,7 @@ REG_OP(RefMerge)
  *       Otherwise, the data is forwarded to "output_false" . \n
 
  *@par Inputs:
- *@li data: The tensor to be forwarded. \ n
+ *@li data: The tensor to be forwarded.
  *          Must be one of the following types: float16, float32, float64,
  *          int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool.
  *@li pred: A boolean scalar. The output port that will receive data . \n
@@ -387,12 +387,12 @@ REG_OP(ControlTrigger)
 
 *@par Inputs:
 * Three inputs, including:
-*@li x: One dimensional tensore of type int32, specifying queried shape, max size is 8.
-*@li data_seq: One dimensional tensore of type int32, specifying the mapped table is queried.
-*@li level_index: One dimensional tensore of type int32, specifying secondary index. \n
+*@li x: One dimensional tensor of type int32, specifying queried shape, max size is 128.
+*@li data_seq: One dimensional tensor of type int32, specifying the mapped table is queried.
+*@li level_index: One dimensional tensor of type int32, specifying secondary index. \n
 
 *@par Outputs:
-*@li y: A Tensor with shape [batch, 8], of type int32, specifying index of shape in the map.
+*@li y: A Tensor with shape [8], of type int32, specifying index of shape in the map.
 *@par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
diff --git a/third_party/fwkacllib/inc/ops/correlation.h b/third_party/fwkacllib/inc/ops/correlation.h
new file mode 100644
index 00000000..caebba50
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/correlation.h
@@ -0,0 +1,52 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file correlation.h
+ * \brief
+ */
+#ifndef GE_OP_CORRELATION_OPS_H
+#define GE_OP_CORRELATION_OPS_H
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+*@brief Computes a 2D Correlation given 4D "x" and "filter" tensors.
+*
+*@par Inputs:
+* @li filter: A 4D tensor of filters.
+* @li x: A 4D tensor of input images, batch number must equal to batch
+* number of "filter", and channel must equal to channel of "filter".
+*
+*@par Attributes:
+* @li groups: set correlation mode, must be 1 or channel.
+*
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+
+*@par Third-party framework compatibility
+* Compatible with caffe correlation custom operator.
+*/
+REG_OP(Correlation)
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .ATTR(groups, Int, 1)
+    .OP_END_FACTORY_REG(Correlation)
+}  // namespace ge
+
+#endif  // GE_OP_NN_CALCULATION_OPS_H
diff --git a/third_party/fwkacllib/inc/ops/ctc_ops.h b/third_party/fwkacllib/inc/ops/ctc_ops.h
index 2c75fd09..17e4eabf 100644
--- a/third_party/fwkacllib/inc/ops/ctc_ops.h
+++ b/third_party/fwkacllib/inc/ops/ctc_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -74,7 +74,7 @@ REG_OP(CTCLoss)
 *@li sequence_length: A vector containing sequence lengths, size `(batch_size)`. \n
 
 *@par Attributes:
-*@li merge_repeated: If True, merge repeated classes in output. \n
+* merge_repeated: If True, merge repeated classes in output. \n
 
 *@par Outputs:
 *@li decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`,
@@ -108,6 +108,8 @@ REG_OP(CTCGreedyDecoder)
 
 *@par Attributes:
 *@li merge_repeated: If True, merge repeated classes in output. \n
+*@li beam_width:A scalar >= 0 (beam search beam width).
+*@li top_paths:A scalar >= 0, <= beam_width (controls output size).
 
 *@par Outputs:
 *@li decoded_indices: A list (length: top_paths) of indices matrices.  Matrix j,
@@ -137,6 +139,87 @@ REG_OP(CTCBeamSearchDecoder)
     .OUTPUT(log_probability, TensorType({DT_FLOAT, DT_DOUBLE}))
     .OP_END_FACTORY_REG(CTCBeamSearchDecoder)
 
+/**
+*@brief The Connectionist Temporal Classification loss.
+
+*@par Inputs:
+*@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size,
+                and C = number of classes (including blank).
+                It represent the logarithmized probabilities of the outputs.
+*@li targets: Tensor of size (N, S) or sum(target_lengths), where S = max target length.
+             It represent the target sequences.
+*@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs.
+*@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets.
+
+*@par Outputs:
+*@li neg_log_likelihood: A loss value which is differentiable with respect to each input node.
+*@li log_alpha: The probability of possible trace of input to target.
+
+*@par Attributes:
+*@li blank: Blank label. Default 0.
+*@li reduction: Specifies the reduction to apply to the output. Default: 'mean'.
+*@li zero_infinity: Whether to zero infinite losses and the associated gradients.
+
+* @par Third-party framework compatibility:
+* Compatible with Pytorch CTCLoss operator.
+
+*@attention Constraints:
+* The limit of Label’s length is 1K.
+*/
+REG_OP(CTCLossV2)
+    .INPUT(log_probs, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(targets, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(input_lengths, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(target_lengths, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(neg_log_likelihood, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(log_alpha, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(blank, Int, 0)
+    .ATTR(reduction, String, "mean")
+    .ATTR(zero_infinity, Bool, false)
+    .OP_END_FACTORY_REG(CTCLossV2)
+
+/**
+*@brief The Connectionist Temporal Classification loss grad.
+
+* @par Inputs:
+*@li grad_out: Gradient renewal coefficient. Tensor of size (N), where N = batch size.
+* @li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size,
+                and C = number of classes (including blank).
+                It represent the logarithmized probabilities of the outputs.
+*@li targets: Tensor of size (N, S) or sum(target_lengths), where S = max target length.
+             It represent the target sequences.
+* @li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs.
+*@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets.
+* @li neg_log_likelihood: A loss value which is differentiable with respect to each input node.
+* @li log_alpha: The probability of possible trace of input to target.
+
+* @par Outputs:
+*@li grad: Tensor of size (T, N, C), The grad of Connectionist Temporal Classification loss.
+
+* @par Attributes:
+*@li blank: Blank label. Default 0.
+* @li reduction: Specifies the reduction to apply to the output. Default: 'mean'.
+* @li zero_infinity: Whether to zero infinite losses and the associated gradients.
+
+* @par Third-party framework compatibility:
+* Compatible with Pytorch CTCLoss operator.
+
+* @attention Constraints:
+* The limit of Label’s length is 1K.
+*/
+REG_OP(CTCLossV2Grad)
+    .INPUT(grad_out, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(log_probs, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(targets, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(input_lengths, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(target_lengths, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(neg_log_likelihood, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(log_alpha, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(grad, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(blank, Int, 0)
+    .ATTR(reduction, String, "mean")
+    .ATTR(zero_infinity, Bool, false)
+    .OP_END_FACTORY_REG(CTCLossV2Grad)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h
index bb937a75..de351e43 100644
--- a/third_party/fwkacllib/inc/ops/data_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -908,7 +908,7 @@ REG_OP(TensorArray)
     .OUTPUT(handle, TensorType({DT_RESOURCE}))
     .OUTPUT(flow, TensorType({DT_FLOAT}))
     .REQUIRED_ATTR(dtype, Type)
-    .ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE)
+    .ATTR(element_shape, ListInt, ge::UNKNOWN_RANK)
     .ATTR(dynamic_size, Bool, false)
     .ATTR(clear_after_read, Bool, true)
     .ATTR(identical_element_shapes, Bool, false)
@@ -963,7 +963,7 @@ REG_OP(TensorArrayConcat)
         DT_QUINT8, DT_QINT32}))
     .OUTPUT(lengths, TensorType({DT_INT64}))
     .REQUIRED_ATTR(dtype, Type)
-    .ATTR(element_shape_except0, ListInt, ge::UNKNOWN_SHAPE)
+    .ATTR(element_shape_except0, ListInt, ge::UNKNOWN_RANK)
     .OP_END_FACTORY_REG(TensorArrayConcat)
 
 /**
@@ -999,7 +999,7 @@ REG_OP(TensorArrayGather)
         DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8,
         DT_QUINT8, DT_QINT32}))
     .REQUIRED_ATTR(dtype, Type)
-    .ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE)
+    .ATTR(element_shape, ListInt, ge::UNKNOWN_RANK)
     .OP_END_FACTORY_REG(TensorArrayGather)
 
 /**
@@ -1201,6 +1201,8 @@ REG_OP(TensorArraySize)
 *@brief A queue implementation that dequeues elements in a random order. \n
 
 *@par Attributes:
+*@li component_types:A list of fully-defined Tensortype objects with
+the same length as shapes, or None.
 *@li shapes: (Optional.) A list of fully-defined TensorShape objects with
 the same length as dtypes, or None.
 *@li capacity: An integer. The upper bound on the number of elements that may
@@ -1281,6 +1283,7 @@ The length of this attr must be either 0 or the same as the length of
 elements are not constrained, and only one element may be dequeued at a time.
 *@li container: An optional string. Defaults to "". If non-empty, this queue
 is placed in the given container. Otherwise, a default container is used.
+*@li capacity:An integer. The upper bound on the number of elements that may be stored in this queue.
 *@li shared_name: An optional string. Defaults to "". If non-empty, this
 queue will be shared under the given name across multiple sessions. \n
 
@@ -1431,6 +1434,24 @@ REG_OP(OrderedMapClear)
     .OP_END_FACTORY_REG(OrderedMapClear)
 
 /**
+*@brief FakeQueue, support tf api FixedLengthRecordReader. \n
+
+*@par Inputs:
+*Including:
+* resource: A Tensor of type DT_RESOURCE.
+
+*@par Outputs:
+*handle: A Tensor of type DT_STRING ref. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator FakeQueue.
+*/
+REG_OP(FakeQueue)
+    .INPUT(resource, TensorType({DT_RESOURCE}))
+    .OUTPUT(handle, TensorType({DT_STRING}))
+    .OP_END_FACTORY_REG(FakeQueue)
+
+/**
 *@brief Returns the number of incomplete elements in the underlying container. \n
 
 *@par Attributes:
@@ -1508,7 +1529,7 @@ REG_OP(OrderedMapPeek)
 
 *@par Inputs:
 *Including:
-* @li indices: A Tensor of type DT_INT32. \n
+* indices: A Tensor of type DT_INT32. \n
 
 *@par Attributes:
 *@li capacity: An optional int that is >= 0. Defaults to "0".
@@ -2241,6 +2262,33 @@ REG_OP(OutfeedEnqueueOp)
   .OP_END_FACTORY_REG(OutfeedEnqueueOp)
 
 /**
+*@brief Enqueue a Tensor on the computation outfeed. \n
+
+*@par Inputs:
+*Inputs include:
+*x: A Tensor. Must be one of the following types: float16, float32,
+float64, int8, int16, uint16, uint8, int32, int64, uint32, uint64,
+bool, double, string. It's a dynamic input. \n
+*tensor_name: A Tensor. Must be string types. \n
+
+*@par Attributes:
+*channel_name: name of operator channel, default "". \n
+
+*@attention Constraints:
+*The implementation for OutfeedEnqueueOpV2 on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow OutfeedEnqueueOpV2 operator.
+*/
+REG_OP(OutfeedEnqueueOpV2)
+  .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8,
+      DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_UINT32,
+      DT_UINT64, DT_BOOL, DT_DOUBLE, DT_STRING}))
+  .INPUT(tensor_name, TensorType({DT_STRING}))
+  .ATTR(channel_name, String, "")
+  .OP_END_FACTORY_REG(OutfeedEnqueueOpV2)
+
+/**
 *@brief LruCache, create cache resource.
 *@par Inputs:
 *No input.
@@ -2258,6 +2306,7 @@ REG_OP(LruCache)
   .ATTR(shared_name, String, "LruCache")
   .ATTR(cache_size, Int, 100000)
   .ATTR(load_factor, Float, 1)
+  .REQUIRED_ATTR(dtype, Type)
   .OP_END_FACTORY_REG(LruCache)
 
 /**
@@ -2277,9 +2326,9 @@ REG_OP(CacheAdd)
   .INPUT(cache, TensorType({DT_RESOURCE}))
   .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
   .OUTPUT(swap_in_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
-  .OUTPUT(swap_in_idx, TensorType({DT_INT64}))
+  .OUTPUT(swap_in_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
   .OUTPUT(swap_out_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
-  .OUTPUT(swap_out_idx, TensorType({DT_INT64}))
+  .OUTPUT(swap_out_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
   .OP_END_FACTORY_REG(CacheAdd)
 
 /**
@@ -2295,9 +2344,210 @@ REG_OP(CacheAdd)
 REG_OP(CacheRemoteIndexToLocal)
   .INPUT(cache, TensorType({DT_RESOURCE}))
   .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
-  .OUTPUT(local_idx, TensorType({DT_INT64}))
+  .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
   .OP_END_FACTORY_REG(CacheRemoteIndexToLocal)
 
-}   // namespace ge
+/**
+*@brief CacheAllToLocalIndex, get id in cache
+*@par Inputs:
+*cache: resource data
+*local_idx: id in cache.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(CacheAllIndexToLocal)
+  .INPUT(cache, TensorType({DT_RESOURCE}))
+  .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
+  .REQUIRED_ATTR(dtype, Type)
+  .OP_END_FACTORY_REG(CacheAllIndexToLocal)
+
+/**
+*@brief LRUCacheV2, aicore LRUCache.
+
+*@par Inputs:
+*index_list: exchange index list
+*data: host data
+*cache: gm cache
+*tag: cache's tag
+*is_last_call: if is last call write all cache to data
+
+*@par Outputs:
+*data: output data
+*cache: gm cache
+*tag: cache's tag
+*index_offset_list: index_offset_list
+*not_in_cache_index_list: output not in cache's index_list
+*not_in_cache_number: scalar
 
+*@par Attributes:
+*pre_route_count: types of all outputs
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(LRUCacheV2)
+    .INPUT(index_list, TensorType::BasicType())
+    .INPUT(data, TensorType::BasicType())
+    .INPUT(cache, TensorType::BasicType())
+    .INPUT(tag, TensorType::BasicType())
+    .INPUT(is_last_call, TensorType::BasicType())
+    .OUTPUT(data, TensorType::BasicType())
+    .OUTPUT(cache, TensorType::BasicType())
+    .OUTPUT(tag, TensorType::BasicType())
+    .OUTPUT(index_offset_list, TensorType::BasicType())
+    .OUTPUT(not_in_cache_index_list, TensorType::BasicType())
+    .OUTPUT(not_in_cache_number, TensorType::BasicType())
+    .REQUIRED_ATTR(pre_route_count, Int)
+    .OP_END_FACTORY_REG(LRUCacheV2)
+
+/**
+*@brief DynamicGetNext, dynamic get next data
+*@par Inputs:
+*x: the iterator, all types are available
+*@par Outputs:
+*y: the date in iterator, all types are available
+*@par Attributes:
+*output_types: types of all outputs
+*output_shapes: shapes of all outputs
+*_dynamic_graph_execute_mode: dynamic graph execution mode,
+value is one of lazy_recompile and dynamic_execute
+*_getnext_inputs_shape_range: shape ranges of outputs,
+it works where _dynamic_graph_execute_mode is dynamic_execute
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicGetNext)
+  .INPUT(x, TensorType::ALL())
+  .DYNAMIC_OUTPUT(y, TensorType::ALL())
+  .ATTR(output_types, ListType, {})
+  .ATTR(output_shapes, ListListInt, {{}, {}})
+  .ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile")
+  .ATTR(_getnext_inputs_shape_range, String, "")
+  .OP_END_FACTORY_REG(DynamicGetNext)
+
+/**
+@brief DynamicGetNextV2, dynamic get next data
+* @par Inputs:
+*x: the iterator, all types are available
+* @par Outputs:
+* y: the date in iterator, all types are available
+* @par Attributes:
+* output_types: types of all outputs
+* output_shapes: shapes of all outputs
+*_dynamic_graph_execute_mode: dynamic graph execution mode,
+value is one of lazy_recompile and dynamic_execute
+*_getnext_inputs_shape_range: shape ranges of outputs,
+it works where _dynamic_graph_execute_mode is dynamic_execute
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(DynamicGetNextV2)
+  .DYNAMIC_OUTPUT(y, TensorType::ALL())
+  .ATTR(output_types, ListType, {})
+  .ATTR(channel_name, String, "")
+  .ATTR(output_shapes, ListListInt, {{}, {}})
+  .ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile")
+  .ATTR(_getnext_inputs_shape_range, String, "")
+  .OP_END_FACTORY_REG(DynamicGetNextV2)
+
+/**
+*@brief AdpGetNext
+*@par Outputs:
+*y: the data in iterator, all types are available
+*@par Attributes:
+*output_types: types of all outputs
+*output_shapes: shapes of all outputs
+*queue_name: cdqm queue name
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(AdpGetNext)
+  .DYNAMIC_OUTPUT(y, TensorType::ALL())
+  .ATTR(output_types, ListType, {})
+  .ATTR(output_shapes, ListListInt, {{}, {}})
+  .ATTR(queue_name, String, "")
+  .OP_END_FACTORY_REG(AdpGetNext)
+
+/**
+*@brief GetNextV2
+*@par Outputs:
+*y: the data in iterator, all types are available
+*@par Attributes:
+*output_types: types of all outputs
+*output_shapes: shapes of all outputs
+*queue_name: cdqm queue name
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(GetNextV2)
+  .DYNAMIC_OUTPUT(y, TensorType::ALL())
+  .ATTR(output_types, ListType, {})
+  .ATTR(output_shapes, ListListInt, {{}, {}})
+  .ATTR(channel_name, String, "")
+  .OP_END_FACTORY_REG(GetNextV2)
+
+/**
+*@brief GetNextFromQueue
+*@par Inputs:
+*x: the data, only support uint8
+*@par Outputs:
+*y: the data in iterator, all types are available
+*@par Attributes:
+*output_types: types of all outputs
+*output_shapes: shapes of all outputs
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(GetNextFromQueue)
+  .INPUT(x, TensorType({DT_UINT8}))
+  .DYNAMIC_OUTPUT(y, TensorType::ALL())
+  .ATTR(output_types, ListType, {})
+  .ATTR(output_shapes, ListListInt, {{}, {}})
+  .OP_END_FACTORY_REG(GetNextFromQueue)
+
+/**
+*@brief Get the batch of data in data processing . \n
+
+*@par Attributes:
+*@li output_types: A nested structure of DType objects corresponding to each
+component of an element of this dataset.
+*@li output_shapes: A nested structure of TensorShape objects corresponding
+to each component of an element of this dataset.
+*@li channel_name: A string. Default "" . \n
+
+*@par Outputs:
+*y:A nested structure of Tensor objects . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow GetNext operator
+*/
+
+REG_OP(PeekData)
+    .DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64,
+                                   DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL}))
+    .ATTR(output_types, ListType, {})
+    .ATTR(output_shapes, ListListInt, {})
+    .ATTR(channel_name, String, "")
+    .OP_END_FACTORY_REG(PeekData)
+
+/**
+* @brief OptionalGetValue
+* @par Inputs:
+* optional: A tensor of type variant
+* @par Outputs:
+* components: A list of Tensor objects of output_types
+* @par Attributes:
+* output_types: types of all outputs
+* output_shapes: shapes of all outputs
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(OptionalGetValue)
+  .INPUT(optional, TensorType({DT_VARIANT}))
+  .DYNAMIC_OUTPUT(components, TensorType::BasicType())
+  .REQUIRED_ATTR(output_types, ListType)
+  .REQUIRED_ATTR(output_shapes, ListListInt)
+  .OP_END_FACTORY_REG(OptionalGetValue)
+} // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/deep_md.h b/third_party/fwkacllib/inc/ops/deep_md.h
new file mode 100644
index 00000000..0f5e07c1
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/deep_md.h
@@ -0,0 +1,304 @@
+/**
+ * CCopyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file deep_md.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_DEEP_MD_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_DEEP_MD_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+* @brief Calculate TabulateFusion. \n
+*
+* @par Inputs:
+* Five inputs, including:
+* @li table: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li table_info: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li em_x: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li em: A Tensor. Must be one of the following types: float16, float32, float64. \n
+*
+* @par Outputs:
+* descriptor: A Tensor. Must be one of the following types: float16, float32, float64. \n
+*
+* @par Attributes:
+* Three attributes, including:
+* @li last_layer_size: int value.
+* @li split_count: int value.
+* @li split_index: int value. \n
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(TabulateFusion)
+    .INPUT(table, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(table_info, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(em_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(em, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(descriptor, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(last_layer_size, Int)
+    .OP_END_FACTORY_REG(TabulateFusion)
+
+/**
+* @brief Calculate ProdEnvMatA. \n
+*
+* @par Inputs:
+* @li coord: A Tensor. Must be one of the following types: float32, float64.
+* @li type: A Tensor. Must be one of the following types: int32.
+* @li natoms: A Tensor. Must be one of the following types: int32.
+* @li box: A Tensor. Must be one of the following types: float32, float64.
+* @li mesh: A Tensor. Must be one of the following types: int32.
+* @li davg: A Tensor. Must be one of the following types: float32, float64.
+* @li dstd: A Tensor. Must be one of the following types: float32, float64.
+*
+* @par Outputs:
+* descrpt: A Tensor. Must be one of the following types: float32, float64.
+* descrpt_deriv: A Tensor. Must be one of the following types: float32, float64.
+* rij: A Tensor. Must be one of the following types: float32, float64.
+* nlist: A Tensor. Must be one of the following types: int32. \n
+*
+* @par Attributes:
+* @li rcut_a: A Float.
+* @li rcut_r: A Float.
+* @li rcut_r_smth: A Float.
+* @li sel_a: A ListInt.
+* @li split_count: A Int.
+* @li split_index: A Int.\n
+*
+*/
+REG_OP(ProdEnvMatA)
+    .INPUT(coord, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(type, TensorType({DT_INT32}))
+    .INPUT(natoms, TensorType({DT_INT32}))
+    .INPUT(box, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(mesh, TensorType({DT_INT32}))
+    .INPUT(davg, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(dstd, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(descrpt, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(descrpt_deriv, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(rij, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(nlist, TensorType({DT_INT32}))
+    .ATTR(rcut_a, Float, 1.0)
+    .ATTR(rcut_r, Float, 1.0)
+    .ATTR(rcut_r_smth, Float, 1.0)
+    .ATTR(sel_a, ListInt, {})
+    .ATTR(sel_r, ListInt, {})
+    .OP_END_FACTORY_REG(ProdEnvMatA)
+
+/**
+* @brief Calculate ProdEnvMatACalRij. 
+* Use type, natoms, sel_a, and rcut_r as constraints, find the central element in
+* the corresponding coord through mesh, output the index of the central element 
+* and the distance between the central element and each neighbor. \n
+*
+* @par Inputs:
+* @li coord: A Tensor. Must be one of the following types: float32, float64.
+* @li type: A Tensor. Must be one of the following types: int32.
+* @li natoms: A Tensor. Must be one of the following types: int32.
+* @li box: A Tensor. Must be one of the following types: float32, float64.
+* @li mesh: A Tensor. Must be one of the following types: int32. 
+*
+* @par Outputs:
+* rij: A Tensor. Must be one of the following types: float32, float64.
+* nlist: A Tensor. Must be one of the following types: int32.
+* distance: A Tensor. Must be one of the following types: float32, float64.
+* rij_x: A Tensor. Must be one of the following types: float32, float64.
+* rij_y: A Tensor. Must be one of the following types: float32, float64.
+* rij_z: A Tensor. Must be one of the following types: float32, float64. \n
+*
+* @par Attributes:
+* @li rcut_a: A Float.
+* @li rcut_r: A Float.
+* @li rcut_r_smth: A Float.
+* @li sel_a: A ListInt.
+* @li sel_r: A ListInt. \n
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ProdEnvMatACalcRij)
+    .INPUT(coord, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(type, TensorType({DT_INT32}))
+    .INPUT(natoms, TensorType({DT_INT32}))
+    .INPUT(box, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(mesh, TensorType({DT_INT32}))
+    .OUTPUT(rij, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(nlist, TensorType({DT_INT32}))
+    .OUTPUT(distance, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(rij_x, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(rij_y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(rij_z, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(rcut_a, Float, 1.0)
+    .ATTR(rcut_r, Float, 1.0)
+    .ATTR(rcut_r_smth, Float, 1.0)
+    .ATTR(sel_a, ListInt, {})
+    .ATTR(sel_r, ListInt, {})
+    .OP_END_FACTORY_REG(ProdEnvMatACalcRij)
+
+/**
+* @brief Calculate ProdEnvMatACalcDescrpt. \n
+*
+* @par Inputs:
+* @li distance: A Tensor. Must be one of the following types: float32, float64.
+* @li rij_x: A Tensor. Must be one of the following types: float32, float64.
+* @li rij_y: A Tensor. Must be one of the following types: float32, float64.
+* @li rij_z: A Tensor. Must be one of the following types: float32, float64.
+* @li type: A Tensor. Must be one of the following types: int32.
+* @li natoms: A Tensor. Must be one of the following types: int32.
+* @li mesh: A Tensor. Must be one of the following types: int32.
+* @li davg: A Tensor. Must be one of the following types: float32, float64.
+* @li dstd: A Tensor. Must be one of the following types: float32, float64. \n
+*
+* @par Outputs:
+* @li descrpt: A Tensor. Must be one of the following types: float32, float64.
+* @li descrpt_deriv: A Tensor. Must be one of the following types: float32, float64. \n
+*
+* @par Attributes:
+* @li rcut_a: A Float.
+* @li rcut_r: A Float.
+* @li rcut_r_smth: A Float.
+* @li sel_a: A ListInt.
+* @li sel_r: A ListInt. \n
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ProdEnvMatACalcDescrpt)
+    .INPUT(distance, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(rij_x, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(rij_y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(rij_z, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(type, TensorType({DT_INT32}))
+    .INPUT(natoms, TensorType({DT_INT32}))
+    .INPUT(mesh, TensorType({DT_INT32}))
+    .INPUT(davg, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(dstd, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(descrpt, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(descrpt_deriv, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(rcut_a, Float, 1.0)
+    .ATTR(rcut_r, Float, 1.0)
+    .ATTR(rcut_r_smth, Float, 1.0)
+    .ATTR(sel_a, ListInt, {})
+    .ATTR(sel_r, ListInt, {})
+    .OP_END_FACTORY_REG(ProdEnvMatACalcDescrpt)
+
+/**
+* @brief Calculate ProdForceSeA. \n
+*
+* @par Inputs:
+* Five inputs, including:
+* @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li nlist: A Tensor. dtype is int32.
+* @li natoms: A Tensor. dtype is int32. \n
+*
+* @par Outputs:
+* atom_force: A Tensor. Must be one of the following types: float16, float32, float64. \n
+*
+* @par Attributes:
+* Two attributes, including:
+* @li n_a_sel: A Scalar.
+* @li n_r_sel: A Scalar. \n
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ProdForceSeA)
+    .INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(nlist, TensorType({DT_INT32}))
+    .INPUT(natoms, TensorType({DT_INT32}))
+    .OUTPUT(atom_force, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(n_a_sel, Int)
+    .REQUIRED_ATTR(n_r_sel, Int)
+    .OP_END_FACTORY_REG(ProdForceSeA)
+
+/**
+* @brief Calculate ProdVirialSeA. \n
+*
+* @par Inputs:
+* Five inputs, including:
+* @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li rij: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li nlist: A Tensor. dtype is int32.
+* @li natoms: A Tensor. dtype is int32. \n
+*
+* @par Outputs:
+* Two outputs, including:
+* @li virial: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li atom_virial: A Tensor. Must be one of the following types: float16, float32, float64. \n
+*
+* @par Attributes:
+* Two attributes, including:
+* @li n_a_sel: Int value.
+* @li n_r_sel: Int value.
+* @li split_count: Int value.
+* @li split_index: Int value. \n
+*/
+REG_OP(ProdVirialSeA)
+    .INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(rij, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(nlist, TensorType({DT_INT32}))
+    .INPUT(natoms, TensorType({DT_INT32}))
+    .OUTPUT(virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(atom_virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(n_a_sel, Int)
+    .REQUIRED_ATTR(n_r_sel, Int)
+    .OP_END_FACTORY_REG(ProdVirialSeA)
+
+/**
+* @brief Calculate TabulateFusionGrad. \n
+*
+* @par Inputs:
+* Five inputs, including:
+* @li table: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li table_info: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li em_x: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li em: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li dy: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li descriptor: A Tensor. Must be one of the following types: float16, float32, float64. \n
+*
+* @par Outputs:
+* @li dy_dem_x: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li dy_dem: A Tensor. Must be one of the following types: float16, float32, float64. \n
+*
+* @par Attributes:
+* Two attributes, including:
+* @li split_count: A Scalar. 
+* @li split_index: A Scalar. \n
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(TabulateFusionGrad)
+  .INPUT(table, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+  .INPUT(table_info, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+  .INPUT(em_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+  .INPUT(em, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+  .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+  .INPUT(descriptor, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+  .OUTPUT(dy_dem_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+  .OUTPUT(dy_dem, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+  .OP_END_FACTORY_REG(TabulateFusionGrad)
+} // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_DEEP_MD_H_
diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
index c64bc138..29cfa4f5 100644
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,10 +28,13 @@ namespace ge {
 
 *@par Inputs:
 *Dynamic inputs, including:
-* @li x: A list of Tensor objects, each with same shape and type. The supported types are:
+*x: A list of Tensor objects, each with same shape and type. The supported types are:
 *   float16, float32, double, int32, uint8, int16, int8, complex64, int64,
 *   qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n
 
+*@par Attributes:
+*N: An required attribute of type int32, means nums of inputs. \n
+
 *@par Outputs:
 *y: A Tensor. Has the same shape and type as the elements of "x". \n
 
@@ -39,35 +42,35 @@ namespace ge {
 *Compatible with the TensorFlow operator AddN.
 */
 REG_OP(AddN)
-    .DYNAMIC_INPUT(x, TensorType::NumberType())
-    .OUTPUT(y, TensorType::NumberType())
+    .DYNAMIC_INPUT(x, TensorType({NumberType(), DT_VARIANT}))
+    .OUTPUT(y, TensorType({NumberType(), DT_VARIANT}))
     .REQUIRED_ATTR(N, Int)
     .OP_END_FACTORY_REG(AddN)
 
 /**
-*@brief Calculates the reversed outputs of the function "maximum"
+*@brief Calculates the reversed outputs of the function "maximum".
 
 *@par Inputs:
-*Three inputs, including:
-* @li grads: A mutable Tensor. Must be one of the following types:
-*     float16, float32, int32.
-* @li x1: A mutable Tensor of the same type as "grads".
-* @li x2: A mutable Tensor of the same type as "grads". \n
+* Three inputs, including:
+*@li grads: A mutable Tensor. Must be one of the following types:
+* float16, float32, int32.
+*@li x1: A mutable Tensor of the same type as "grads".
+*@li x2: A mutable Tensor of the same type as "grads". \n
 
 *@par Attributes:
 *@li grad_x: An optional bool. Defaults to "True".
-*     If "True", "y1" will be output.
-*     If "False", "y1" will not be output. \n
+* If "True", "y1" will be output.
+* If "False", "y1" will not be output. \n
 
 *@li grad_y: An optional bool. Defaults to "True".
-*     If "True", "y2" will be output.
-*     If "False", "y2" will not be output. \n
+* If "True", "y2" will be output.
+* If "False", "y2" will not be output. \n
 
 *@par Outputs:
-* @li y1: A mutable Tensor. Has the same type as "grads".
-* @li y2: A mutable Tensor. Has the same type as "grads". \n
+*@li y1: A mutable Tensor. Has the same type as "grads".
+*@li y2: A mutable Tensor. Has the same type as "grads". \n
 
-*@par Third-party framework compatibility
+*@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator MaximumGrad.
 */
 REG_OP(MaximumGrad)
@@ -81,29 +84,29 @@ REG_OP(MaximumGrad)
     .OP_END_FACTORY_REG(MaximumGrad)
 
 /**
-*@brief Calculates the reversed outputs of the function "minimum"
+*@brief Calculates the reversed outputs of the function "minimum".
 
 *@par Inputs:
-*Three inputs, including:
-* @li grads: A mutable Tensor. Must be one of the following types:
-*     float16, float32, int32.
-* @li x1: A mutable Tensor of the same type as "grads".
-* @li x2: A mutable Tensor of the same type as "grads". \n
+* Three inputs, including:
+*@li grads: A mutable Tensor. Must be one of the following types:
+* float16, float32, int32.
+*@li x1: A mutable Tensor of the same type as "grads".
+*@li x2: A mutable Tensor of the same type as "grads". \n
 
 *@par Attributes:
 *@li grad_x: An optional bool. Defaults to "True".
-*     If "True", "y1" will be output.
-*     If "False", "y1" will not be output. \n
+* If "True", "y1" will be output.
+* If "False", "y1" will not be output. \n
 
 *@li grad_y: An optional bool. Defaults to "True".
-*     If "True", "y2" will be output.
-*     If "False", "y2" will not be output. \n
+* If "True", "y2" will be output.
+* If "False", "y2" will not be output. \n
 
 *@par Outputs:
-* @li y1: A mutable Tensor. Has the same type as "grads".
-* @li y2: A mutable Tensor. Has the same type as "grads". \n
+*@li y1: A mutable Tensor. Has the same type as "grads".
+*@li y2: A mutable Tensor. Has the same type as "grads". \n
 
-*@par Third-party framework compatibility
+*@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator MinimumGrad.
 */
 REG_OP(MinimumGrad)
@@ -122,26 +125,29 @@ REG_OP(MinimumGrad)
 *@par Inputs:
 *One input:
 *x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8,
-   int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n
+   int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
+   For float32 type, the actual calculation on the chip is based on float16.  \n
 
 *@par Attributes:
 *dst_type: An required attribute of type int32, specifying the dst data type. \n
 
 *@par Outputs:
-*y:A Tensor. Has the same type as x.
+*y:A Tensor with same shape as x, and data type is specified by dst_type.
 */
 REG_OP(Cast)
     .INPUT(x, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8,
                           DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64,
-                          DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32}))
+                          DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32, DT_BF16}))
     .OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8,
                            DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64,
-                           DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32}))
+                           DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32, DT_BF16}))
     .REQUIRED_ATTR(dst_type, Int)
     .OP_END_FACTORY_REG(Cast)
 
 /**
 *@brief Returns the truth value of (x1 >= x2) element-wise. \n
+*when input is int32 and (x2 - x1) > 2**31 or < -2**31
+*aicore accuracy is not guaranteed \n
 
 *@par Inputs:
 *Two inputs, including:
@@ -163,6 +169,8 @@ REG_OP(GreaterEqual)
 
 /**
 *@brief Returns the truth value of (x1 < x2) element-wise. \n
+*when input is int32 and (x2 - x1) > 2**31 or < -2**31
+*aicore accuracy is not guaranteed \n
 
 *@par Inputs:
 *Two inputs, including:
@@ -278,7 +286,7 @@ REG_OP(Minimum)
 *@par Inputs:
 *One inputs, include:
 *x:A Tensor of type float16, float32, int32, int64, double,
-*     complex64, complex128.the format can be [NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND]
+*     complex64, complex128.the format can be [NCHW,NHWC,ND]
 
 *@par Outputs:
 *y:A Tensor with same type as "x". \n
@@ -322,8 +330,8 @@ REG_OP(Sub)
 *@brief computes the absolute value of a tensor. \n
 
 *@par Inputs:
-*One inputs, including:
-* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n
+*One input, including: \n
+*x: A Tensor. Must be one of the following types: float16, float32, double, int8, int16, int32, int64. \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x". \n
@@ -332,8 +340,10 @@ REG_OP(Sub)
 *Compatible with the TensorFlow operator Abs.
 */
 REG_OP(Abs)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16,
+                          DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16,
+                           DT_INT32, DT_INT64}))
     .OP_END_FACTORY_REG(Abs)
 
 /**
@@ -408,7 +418,7 @@ REG_OP(SquaredDifference)
 
 *@par Inputs:
 *x: A Tensor of type float16, float32, double, complex64, complex128.
-* the format can be [NCHW,NC1HWC0,NHWC,ND]
+* the format can be [NCHW,NHWC,ND]
 
 *@par Outputs:
 *y: A Tensor of the same type as "x". \n
@@ -429,7 +439,7 @@ REG_OP(Cos)
 * Two inputs, including:
 *@li x1: A Tensor. Must be one of the following types:
 *    float16, float32, int32, int8, uint8, float64, int64, uint16, int16,
-*    complex64, complex128, the format can be [NCHW,NC1HWC0,NHWC,ND].
+*    complex64, complex128, the format can be [NCHW,NHWC,ND].
 *@li x2: A Tensor. Has the same type and format as input "x1". \n
 
 *@par Outputs:
@@ -458,7 +468,7 @@ REG_OP(Div)
 *@li x1: A Tensor. Must be one of the following types:
 *    float16, float32, int32, int8, uint8, double, int16, int64, complex64,
 *    complex128, quint8, qint8, qint32, string, bool. the format can be
-*    [NCHW, NC1HWC0, NHWC, ND]
+*    [NCHW, NHWC, ND]
 *@li x2: A Tensor of the same type and format as "x1". \n
 
 *@par Outputs:
@@ -524,15 +534,34 @@ REG_OP(Expm1)
     .OP_END_FACTORY_REG(Expm1)
 
 /**
-*@brief: Computes the reciprocal of "x". \n
+* @brief Computes the expint(x). \n
+
+* @par Inputs:
+* One input:
+* x: A Tensor. Must be one of the following types: bfloat16, half, float32, double. \n
 
-*@par Inputs:\n
-*x: A Tensor. Must be one of the following types: float16, float32, int32, int64, double, complex64, complex128. \n
+* @par Outputs:
+* y: A Tensor of the same type as "x". \n
+
+* @par Third-party framework compatibility
+* Compatible with TensorFlow operator Expint.
+*/
+REG_OP(Expint)
+    .INPUT(x, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(Expint)
+
+/**
+*@brief: Computes the reciprocal of "x".
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: float16, float32,
+* int32, int64, double, complex64, complex128. \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type as "x". \n
+*y: A Tensor. Must be one of the following type: float16, float32, int32. \n
 
-*@par Third-party framework compatibility
+*@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator Inv.
 */
 REG_OP(Inv)
@@ -541,18 +570,19 @@ REG_OP(Inv)
     .OP_END_FACTORY_REG(Inv)
 
 /**
-*@brief: Computes "x" reciprocal grad, dx = -1*dy*y*y, where, "y = 1/x", and "dy"
-    is the corresponding input gradient. \n
+*@brief: Computes "x" reciprocal grad, dx = -1*dy*y*y, where, "y = 1/x",
+* and "dy" is the corresponding input gradient.
 
 *@par Inputs:
 * Two inputs, including:
-* @li x: A Tensor. Must be one of the following types: float16, float32, int32, int8.
-* @li grad: A Tensor. Has the same type as "x". \n
+*@li x: A Tensor. Must be one of the following types: float16, float32,
+* int32, int8.
+*@li grad: A Tensor. Has the same type as "x". \n
 
 *@par Outputs:
 *y: A Tensor, Has the same type as "x". \n
 
-*@par Third-party framework compatibility
+*@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator InvGrad.
 */
 REG_OP(InvGrad)
@@ -563,6 +593,8 @@ REG_OP(InvGrad)
 
 /**
 *@brief: Returns the truth value of (x <= y) element-wise. \n
+*when input is int32 and (x2 - x1) > 2**31 or < -2**31
+*aicore accuracy is not guaranteed \n
 
 *@par Inputs:
 * Two inputs, including:
@@ -603,16 +635,27 @@ REG_OP(Log1p)
 
 /**
 *@brief Returns element-wise remainder of division.
+
 *@par Inputs:
-*Two inputs, including:
-* @li x1: A Tensor. Must be one of the following types: float16, float32,
- * int32, int64, int8, uint8, double.
-* @li x2: A Tensor of the same type as "x1". \n
+* Two inputs, including:
+*@li x1: A Tensor. Must be one of the following types: float16, float32,
+* int32, int64, int8, uint8, double.
+*@li x2: A Tensor of the same type as "x1". \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type as "x1".
-*@par Third-party framework compatibility
-*Compatible with the TensorFlow operator Mod.
+*y: A Tensor. Has the same type as "x1". \n
+
+*@attention Constraints:
+*@li x2: The input data does not support 0.
+*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
+* requirement of double thousandths in the mini form.
+*@li Due to different architectures, the calculation results of this operator
+* on NPU and CPU may be inconsistent.
+*@li If shape is expressed as (D1,D2... ,Dn),
+* then D1*D2... *DN<=1000000,n<=8. \n
+
+*@par Third-party framework compatibility:
+* Compatible with the TensorFlow operator Mod.
 */
 REG_OP(Mod)
     .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8,
@@ -624,18 +667,18 @@ REG_OP(Mod)
     .OP_END_FACTORY_REG(Mod)
 
 /**
-*@brief: Returns the truth value of (x != y) element-wise. \n
+*@brief Returns the truth value of (x != y) element-wise.
 
 *@par Inputs:
 * Two inputs, including:
 *@li x1: A Tensor. Must be one of the following types: float16, float32, int32,
- * int8, uint8, double, int16, int64, uint16, half, uint32, uint64
+* int8, uint8, double, int16, int64, uint16, half, uint32, uint64.
 *@li x2: A Tensor of the same type as "x1". \n
 
 *@par Outputs:
 *y: A Tensor of type bool. \n
 
-*@par Third-party framework compatibility
+*@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator NotEqual.
 */
 REG_OP(NotEqual)
@@ -645,6 +688,25 @@ REG_OP(NotEqual)
     .OP_END_FACTORY_REG(NotEqual)
 
 /**
+*@brief Computes ndtri element-wise (y = sqrt(2) * erfinv(2 * x - 1)).
+
+*@par Inputs:
+* One input, including: \n
+*x: A Tensor. Must be one of the following types: bfloat16, float16,
+* float32, double. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x". \n
+
+*@par Third-party framework compatibility:
+* Compatible with the TensorFlow operator Ndtri.
+*/
+REG_OP(Ndtri)
+    .INPUT(x, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(Ndtri)
+
+/**
 *@brief Computes numerical negative value element-wise (y = -x)
 
 *@par Inputs:
@@ -664,13 +726,12 @@ REG_OP(Neg)
     .OP_END_FACTORY_REG(Neg)
 
 /**
-*@brief Returns x1/x2 element-wise for integer types. \n
+*@brief Returns x1/x2 element-wise for integer types.
 
 *@par Inputs:
 *@li x1: A Tensor. Must be one of the following types:
-*     float32, float64, int32, uint8, int16, int8,
-*     complex64, int64, qint8, quint8, qint32, uint16,
-*     complex128, float16, uint32, uint64, complex64, complex128.
+*     float32, float16, int8, uint8, int32, int16,
+*     uint16, double, int64, complex64, complex128.
 *@li x2: A Tensor of the same data type as "x1". \n
 
 *@par Outputs:
@@ -720,8 +781,33 @@ REG_OP(Xdivy)
     .OP_END_FACTORY_REG(Xdivy)
 
 /**
+* @brief Computes "x" multiplied by the logarithm of y element-wise,
+* if "x" == 0, return "0".
+
+* @par Inputs:
+* Two inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32,
+* double, complex64, complex128.
+* @li y: A Tensor. Has the same type as "x". \n
+
+* @par Outputs:
+* z: A Tensor. Has the same type as "x". \n
+
+* @par Third-party framework compatibility
+* Compatible with TensorFlow operator Xlog1py.
+*/
+REG_OP(Xlog1py)
+    .INPUT(x, TensorType({DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64,
+                          DT_COMPLEX128}))
+    .INPUT(y, TensorType({DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64,
+                          DT_COMPLEX128}))
+    .OUTPUT(z, TensorType({DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64,
+                           DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Xlog1py)
+
+/**
 *@brief Computes "x" multiplied by the logarithm of y element-wise,
-* if "x" == 0, return "0". \n
+* if "x" == 0, return "0".
 
 *@par Inputs:
 * Two inputs, including:
@@ -950,6 +1036,25 @@ REG_OP(LogicalOr)
     .OP_END_FACTORY_REG(LogicalOr)
 
 /**
+* @brief Computes spence of x element-wise.
+
+*
+* @par Inputs:
+*  x: A tensor. Must be one of the following types: bfloat16, float16, float32, double.
+*
+* @par Outputs:
+*  y: A tensor. Has the same type as "x".
+*
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator Spence.
+*
+*/
+REG_OP(Spence)
+    .INPUT(x, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(Spence)
+
+/**
 *@brief Returns the truth value of x1 AND x2 element-wise. \n
 
 *
@@ -1020,7 +1125,7 @@ REG_OP(BesselI1e)
 * y = log_base(shift + scale * x), with "base" > 0. \n
 
 * @par Inputs:
-* @li x: A Tensor of type complex64, complex128, float16, float32 or double. \n
+* x: A Tensor of type complex64, complex128, float16, float32 or double. \n
 
 * @par Attributes:
 * @li base: An optional float32, specifying the base "e". Defaults to "-1.0"
@@ -1065,7 +1170,7 @@ REG_OP(Log)
 * uint8, int8, uint16, int16, int32, int64, complex64, complex128. \n
 
 * @attention Constraints:
-* @li "x1" and "x2" have incompatible shapes or types. \n
+* "x1" and "x2" have incompatible shapes or types. \n
 
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator Multiply.
@@ -1156,6 +1261,31 @@ REG_OP(FusedMulAdd)
     .OP_END_FACTORY_REG(FusedMulAdd)
 
 /**
+*@brief Confuse mul+add+add with broadcast. \n
+
+*@par Inputs:
+*Four inputs, including:
+* @li x1: A Tensor. Must be one of the following types:int32, float16, float32.
+* @li x2: A Tensor of the same type as "x1".
+* @li x3: A Tensor of the same type as "x1".
+* @li x4: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+* y: A Tensor. Has the same type as "x1". \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+
+REG_OP(FusedMulAddAdd)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x3, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x4, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OP_END_FACTORY_REG(FusedMulAddAdd)
+	
+/**
 *@brief Returns x1 + x2 element-wise. \n
 
 *
@@ -1278,7 +1408,7 @@ REG_OP(AssignSub)
 
 *@par Inputs:
 * Two inputs, including:
-*@li y: An NCHW, NC1HWC0, NHWC, ND Tensor. Must be one of the following types: \
+*@li y: An NCHW, NHWC, ND Tensor. Must be one of the following types: \
  * float, int32, int8, double, complex64, complex128, half.
 *@li dy: A Tensor of the same type and format as "y". \n
 
@@ -1297,14 +1427,14 @@ REG_OP(RsqrtGrad)
     .OP_END_FACTORY_REG(RsqrtGrad)
 
 /**
-*@brief Computes hyperbolic sine of "x" element-wise. \n
+*@brief Computes hyperbolic sine of "x" element-wise.
 
 *@par Inputs:
-*x: An NCHW, NC1HWC0, NHWC,or ND Tensor of type float, double, complex64,
+*x: An NCHW, NHWC,or ND Tensor of type float, double, complex64,
  * complex128, half. \n
 
 *@par Outputs:
-*y: A NCHW, NC1HWC0, NHWC,or ND Tensor of type float, double, complex64,
+*y: A NCHW, NHWC,or ND Tensor of type float, double, complex64,
  * complex128, half. \n
 
 *@par Third-party framework compatibility
@@ -1344,7 +1474,7 @@ REG_OP(ClipByValue)
 
 *@par Inputs:
 *x: A Tensor of type float16, float32, double, complex64, complex128.
-* the format can be [NCHW,NC1HWC0,NHWC,ND]. \n
+* the format can be [NCHW,NHWC,ND]. \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x". \n
@@ -1364,7 +1494,7 @@ REG_OP(Cosh)
 *@par Inputs:
 * Two inputs, including:
 *@li x1: A Tensor. Must be one of the following types:float16, float32, int32,
-*    int8, uint8, double, the format can be [NCHW,NC1HWC0,NHWC,ND].
+*    int8, uint8, double, the format can be [NCHW,NHWC,ND].
 *@li x2: A Tensor of the same type as "x1". \n
 
 *@par Outputs:
@@ -1383,18 +1513,18 @@ REG_OP(DivNoNan)
     .OP_END_FACTORY_REG(DivNoNan)
 
 /**
-*@brief Reverses specific dimensions of a tensor. \n
+*@brief Reverses specific dimensions of a tensor.
 
 *@par Inputs:
 * One input: \n
 *x: A Tensor, Must be one of the following types:
-*    int32, uint8, int16, int8, int64, int64, uint16, uint32, uint64,
-*    and format can be [NCHW,NC1HWC0,NHWC,ND]
+* int32, uint8, int16, int8, int64, int64, uint16, uint32, uint64,
+* and format can be [NCHW,NHWC,ND]. \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type and format as "x"
+*y: A Tensor. Has the same type and format as "x". \n
 
-*@par Third-party framework compatibility
+*@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator Invert.
 */
 REG_OP(Invert)
@@ -1451,6 +1581,8 @@ REG_OP(ReciprocalGrad)
 
 /**
 *@brief Returns the truth value of (x1 > x2) element-wise. \n
+*when input is int32 and (x2 - x1) > 2**31 or < -2**31
+*aicore accuracy is not guaranteed \n
 
 *@par Inputs:
 *@li x1: A Tensor of type float16, float32, double, int64, int32, int16, int8,
@@ -1492,8 +1624,8 @@ REG_OP(Greater)
 * Compatible with the TensorFlow operator zeros_like.
 */
 REG_OP(ZerosLike)
-    .INPUT(x, TensorType::BasicType())
-    .OUTPUT(y, TensorType::BasicType())
+    .INPUT(x, TensorType({BasicType(), DT_VARIANT}))
+    .OUTPUT(y, TensorType({BasicType(), DT_VARIANT}))
     .OP_END_FACTORY_REG(ZerosLike)
 
 /**
@@ -1640,6 +1772,45 @@ REG_OP(Atan2)
     .OP_END_FACTORY_REG(Atan2)
 
 /**
+*@brief Computes fresnel_cos of x element-wise.
+* 
+*@par Inputs:
+*x: A tensor. Must be one of the following types: bfloat16, float16, float32,
+* double. \n
+* 
+*@par Outputs:
+*y: A tensor. Has the same type as "x". \n
+* 
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator FresnelCos.
+* 
+*/
+REG_OP(FresnelCos)
+    .INPUT(x, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(FresnelCos)
+
+/**
+*@brief Computes fresnel_sin of x element-wise.
+ 
+* 
+*@par Inputs:
+*x: A tensor. Must be one of the following types: bfloat16, float16, float32,
+* double. \n
+* 
+*@par Outputs:
+*y: A tensor. Has the same type as "x". \n
+* 
+*@par Third-party framework compatibility:
+* Compatible with the TensorFlow operator FresnelSin.
+* 
+*/
+REG_OP(FresnelSin)
+    .INPUT(x, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(FresnelSin)
+
+/**
 *@brief Returns the truth value of abs(x1-x2) < tolerance element-wise. \n
 
 *
@@ -1955,7 +2126,7 @@ REG_OP(BitwiseOr)
 *@par Inputs:
 *Two inputs, including:
 *@li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16, uint32, uint64.
-*       The format is NC1HWC0 or ND. Broadcasting is supported.
+*       The format is ND. Broadcasting is supported.
 *@li x2: A Tensor. Has the same type and format as "x1". \n
 
 *@par Outputs:
@@ -2042,6 +2213,15 @@ REG_OP(FloorDiv)
 *
 *@par Outputs:
 *y: Result remainder.
+
+*@attention Constraints:
+*@li x2: The input data does not support 0
+*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
+*requirement of double thousandths in the mini form
+*@li Due to different architectures, the calculation results of this operator
+*on NPU and CPU may be inconsistent
+*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
+
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator FloorMod.
 */
@@ -2137,7 +2317,7 @@ REG_OP(Sin)
     .OP_END_FACTORY_REG(Sin)
 
 /**
-*@brief: Computes tan of "x" element-wise. \n
+*@brief: Computes tan of "x" element-wise.
 
 *@par Inputs:
 *One input:
@@ -2157,7 +2337,7 @@ REG_OP(Tan)
     .OP_END_FACTORY_REG(Tan)
 
 /**
-*@brief Returns element-wise remainder of division. \n
+*@brief Returns element-wise remainder of division.
 
 *@par Inputs:
 *Two inputs, including:
@@ -2168,8 +2348,16 @@ REG_OP(Tan)
 *@par Outputs:
 *y: A Tensor. Has the same type as "x1". \n
 
+*@attention Constraints:
+*@li x2: The input data does not support 0
+*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
+*requirement of double thousandths in the mini form
+*@li Due to different architectures, the calculation results of this operator
+*on NPU and CPU may be inconsistent
+*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
+
 *@par Third-party framework compatibility
-*@li Compatible with the TensorFlow operator TruncateMod.
+*Compatible with the TensorFlow operator TruncateMod.
 */
 REG_OP(TruncateMod)
     .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64,
@@ -2425,6 +2613,25 @@ REG_OP(Eltwise)
     .OP_END_FACTORY_REG(Eltwise)
 
 /**
+ *@brief Computes the inverse error function of each element of input. \n
+
+ *@par Inputs:
+ *One inputs, including:
+ * input_x: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+
+ *@par Outputs:
+ *output_y: A Tensor with the same type and shape of input_x's. \n
+
+ *@par Third-party framework compatibility
+ *Compatible with the Pytorch operator Erfinv. \n
+ */
+REG_OP(Erfinv)
+    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(Erfinv)
+
+/**
 *@brief Computes element-wise population count. \n
 
 *@par Inputs:
@@ -2829,9 +3036,9 @@ REG_OP(AdamApplyOneAssign)
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(LambApplyOptimizerAssign)
-    .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT}))
     .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
     .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT}))
     .INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2842,6 +3049,8 @@ REG_OP(LambApplyOptimizerAssign)
     .INPUT(do_use_weight, TensorType({DT_FLOAT16,DT_FLOAT}))
     .INPUT(weight_decay_rate, TensorType({DT_FLOAT16,DT_FLOAT}))
     .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT}))
     .OP_END_FACTORY_REG(LambApplyOptimizerAssign)
 
 /**
@@ -2873,7 +3082,8 @@ REG_OP(LambApplyWeightAssign)
     .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
     .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
     .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(input4, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT}))
     .OP_END_FACTORY_REG(LambApplyWeightAssign)
 
 /**
@@ -2928,7 +3138,7 @@ REG_OP(SquareSumV2)
     .OP_END_FACTORY_REG(SquareSumV2)
 
 /**
-*@brief Confuse reducesumd and square. \n
+*@brief Confuse reducesumd and square.
 
 *@par Inputs:
 *x: A Tensor of type float16, float32. \n
@@ -2973,19 +3183,20 @@ REG_OP(SquareSumAll)
     .OP_END_FACTORY_REG(SquareSumAll)
 
 /**
-*@brief Confuse broadcast, addn and mul. \n
+*@brief Confuse broadcast, addn and mul.
 
 *@par Inputs:
 *Three inputs, including:
-* @li x1: A Tensor. Must be one of the following types:int32, int16, float16, float32.
-* @li x2: A Tensor of the same type as "x1".
-* @li x3: A Tensor of the same type as "x1". \n
+*@li x1: A Tensor. Must be one of the following types:int32, int16,
+* float16, float32.
+*@li x2: A Tensor of the same type as "x1".
+*@li x3: A Tensor of the same type as "x1". \n
 
 *@par Outputs:
-* y: A Tensor. Has the same type as "x1".
+*y: A Tensor. Has the same type as "x1". \n
 
 *@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(FusedMulAddN)
     .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16}))
@@ -2995,7 +3206,7 @@ REG_OP(FusedMulAddN)
     .OP_END_FACTORY_REG(FusedMulAddN)
 
 /**
-*@brief Add 'bias' to 'x'. \n
+*@brief Add 'bias' to 'x'.
 
 *@par Inputs:
 * Two inputs, including:
@@ -3004,22 +3215,31 @@ REG_OP(FusedMulAddN)
 
 *@par Attributes:
 *@li axis: An optional int32 used to compute the shape of bias input from the online bottoms. Defaults to "1".
-*@li num_axes: An optional int32 used to compute the shape of bias input from a Caffe model trained offline. Defaults to "1".
-*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline. If "false", bias is input from online bottoms. Defaults to "true". \n
+*@li num_axes: An optional int32 used to compute the shape of
+* bias input from a Caffe model trained offline. Defaults to "1".
+*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline.
+* If "false", bias is input from online bottoms. Defaults to "true". \n
 
 *@par Outputs:
 *y: An ND tensor of type float16 or float32. \n
 
-*@attention Constraints:\n
+*@attention Constraints:
 * Assume that the shape length of "x" is "n" and that of "bias" is "m".
 *@li "axis" is within the range [-n, n-1]. num_axes >= -1.
-*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis).\n
+*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0",
+* the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis).
 * If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < -axis).
 *@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with shape length 1 and dimension size 1.
-*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n
-* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes).
-*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0","axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < m).\n
-* If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m).
+*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0",
+* "axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and
+* the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).
+* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and
+* the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes).
+*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0",
+* "axis + m" must be less than or equal to "n" and the ith axis of "bias" and
+* the (i+"axis")th axis of "x" must have the same size (0 <= i < m).
+* If "axis < 0", "n + axis + m" must be less than or equal to "n" and
+* the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m). \n
 *@par Third-party framework compatibility
 * Compatible with the Caffe operator Bias.
 */
@@ -3094,13 +3314,13 @@ REG_OP(FusedMulAddNL2loss)
 *@brief Tests whether the input exceeds a threshold. \n
 
 *@par Inputs:
-*@li x: A Tensor with any format. Must be one of the following types: float16, float32. \n
+* x: A Tensor with any format. Must be one of the following types: float16, float32. \n
 
 *@par Attributes:
-*@li threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n
+* threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n
 
 *@par Outputs:
-*@li y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32.
+* y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32.
 *@par Third-party framework compatibility
 * Compatible with the Caffe operator Threshold.
 */
@@ -3115,7 +3335,7 @@ REG_OP(FusedMulAddNL2loss)
 *@brief Returns the index number corresponding to the maximum value entered. \n
 
 *@par Inputs:
-*@li x: A tensor. Must be one of the following types: float16, float32. \n
+*x: A tensor. Must be one of the following types: float16, float32. \n
 
 *@par Attributes:
 *@li axis: An optional int. Specify the axis to be cut at the input tensor. If this parameter is not provided, find the topk for each batch. Defaults to 10000
@@ -3143,12 +3363,11 @@ REG_OP(ArgMaxWithK)
 *@brief Multiply tensor with scale. \n
 
 *@par Inputs:
-*Five inputs, including:
-* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
-* @li x2: A scale. Must be float. \n
+*One input, including:
+*x: A Tensor. Must be one of the following types:int32,int16, float16, float32.
 
 *@par Outputs:
-*@li y: A Tensor. Has the same type and shape as "x1". \n
+*y: A Tensor. Has the same type and shape as "x1". \n
 
 *@par Third-party framework compatibility:
 * Compatible with the Pytorch operator muls.
@@ -3163,12 +3382,11 @@ REG_OP(Muls)
 *@brief Fill tensor with scale. \n
 
 *@par Inputs:
-*Five inputs, including:
-* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
-* @li x2: A scale. Must be float. \n
+*One input, including:
+*x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
 
 *@par Outputs:
-*@li y: A Tensor. Has the same type and shape as "x1". \n
+*y: A Tensor. Has the same type and shape as "x1". \n
 
 *@par Third-party framework compatibility:
 * Compatible with the Pytorch operator fills.
@@ -3183,12 +3401,14 @@ REG_OP(Fills)
 *@brief Add tensor with scale. \n
 
 *@par Inputs:
-*Five inputs, including:
-* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
-* @li x2: A scale. Must be float. \n
+*One input, including: \n
+*x: A Tensor. Must be one of the following types:int32,int16, float16, float32. \n
+
+*@par Attributes:
+*value: A scale. Must be float. \n
 
 *@par Outputs:
-*@li y: A Tensor. Has the same type and shape as "x1". \n
+*y: A Tensor. Has the same type and shape as "x1". \n
 
 *@par Third-party framework compatibility:
 * Compatible with the Pytorch operator adds.
@@ -3200,11 +3420,14 @@ REG_OP(Fills)
      .OP_END_FACTORY_REG(Adds)
 
 /**
-*@brief Computes the product of x and y and returns 0 if the y is zero, even if x is NaN or infinite. \n
+*@brief Computes the product of x and y and returns 0 if the y is zero,
+* even if x is NaN or infinite.
 
 *@par Inputs:
-* @li x1: A Tensor. Must be one of the following types:float16, float32, double, complex64, complex128.
-* @li x2: A Tensor. Has the same type and shape as "x1". \n
+* Two inputs, including: \n
+*@li x1: A Tensor. Must be one of the following types:float16, float32,
+* double, complex64, complex128.
+*@li x2: A Tensor. Has the same type and shape as "x1". \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type and shape as "x1". \n
@@ -3316,7 +3539,7 @@ REG_OP(TensorMove)
 
 *@par Inputs:
 *One inputs, including:
-* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n
+*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n
 
 *@par Outputs:
 *output_x: A Tensor. Has the same type as "x". \n
@@ -3329,8 +3552,486 @@ REG_OP(TensorRedirect)
     .OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8,
                            DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32}))
     .OP_END_FACTORY_REG(TensorRedirect)
-}  // namespace ge
 
+/**
+* @brief Performs the element-wise division of tensor x1 by tensor x2,
+* multiply the result by the scalar value and add it to tensor input_data.
+
+* @par Inputs:
+* Four inputs, including:
+* @li input_data: A mutable input Tensor. Must be one of the following types:
+*     float16, float32, double, int64.
+* @li x1: A mutable input Tensor of the same type as input_data.
+* @li x2: A mutable input Tensor of the same type as input_data.
+* @li value: A mutable input Tensor. Must be one of the following types:
+*     float16, float32, double, int64, int32. \n
+
+
+* @par Outputs:
+* y: A mutable Tensor. Has the same type as input_data. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Addcdiv(version-1.5.0).
+*/
+REG_OP(Addcdiv)
+    .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64}))
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64}))
+    .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32, DT_DOUBLE, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64}))
+    .OP_END_FACTORY_REG(Addcdiv)
+
+/**
+* @brief Performs the element-wise multiplication of tensor x1 by tensor x2,
+* multiply the result by the scalar value and add it to tensor input_data
+
+* @par Inputs:
+* Four inputs, including:
+* @li input_data: A mutable input Tensor. Must be one of the following types:
+*     float16, float32, double, int64, int8, int32, uint8.
+* @li x1: A mutable input Tensor of the same type as input_data.
+* @li x2: A mutable input Tensor of the same type as input_data.
+* @li value: A tensor which includes only one element of the same type as input_data. \n
+
+* @par Outputs:
+* y: A mutable output Tensor. Has the same type as input_data. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Addcmul.
+*/
+REG_OP(Addcmul)
+    .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64}))
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64}))
+    .INPUT(value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64}))
+    .OP_END_FACTORY_REG(Addcmul)
+
+/**
+* @brief Computes the result of x2 * alpha + x1.
 
+* @par Inputs:
+* @li x1: An ND tensor of type float16, float32, int32.
+* @li x2: An ND tensor of type float16, float32, int32.
+* @li alpha: A scalar tensor of type float16, float32. \n
+
+* @par Outputs:
+* y: An ND tensor tensor with the same shape and type as "x1". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Axpy.
+*/
+REG_OP(AxpyV2)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OP_END_FACTORY_REG(AxpyV2)
+
+/**
+* @brief Add the partial values of two tensors.
+
+* @par Inputs:
+* @li x1: A Tensor in 5HD, and must be one of the following types: float16,
+* float32. \n
+* @li x2: A Tensor of the same type as "x1", and the same shape as "x1",
+* except for the C1 value. \n
+
+* @par Attributes:
+* @li x1_c1_offset: A required int. Offset value of C1 in "x1". \n
+* @li x2_c1_offset: A required int. Offset value of C1 in "x2". \n
+* @li c1_len: A required int. C1 len of "y". The value must be less than
+* the difference between C1 and offset in "x1" and "x2". \n
+
+* @par Outputs:
+* y:  A Tensor of the same type as "x1", and the same shape as "x1",
+* except for the C1 value. Record the result after adding. \n
+*/
+REG_OP(StrideAdd)
+    .INPUT(x1, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(x2, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .REQUIRED_ATTR(x1_c1_offset, Int)
+    .REQUIRED_ATTR(x2_c1_offset, Int)
+    .REQUIRED_ATTR(c1_len, Int)
+    .OP_END_FACTORY_REG(StrideAdd)
+
+/**
+* @brief Compare two tensors are totally equal or not, only output a bool value"
+
+* @par Inputs:
+* Two inputs, including:
+* @li input_x: A Tensor. the first tensor. \n
+* @li input_y: A Tensor. the second tensor. \n
+
+* @par Outputs:
+*output_z: A Tensor. Bool type, compare result of the two inputs. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch equal operator. \n
+*/
+REG_OP(TensorEqual)
+    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_INT8, DT_UINT8}))
+    .INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_INT8, DT_UINT8}))
+    .OUTPUT(output_z, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(TensorEqual)
+
+/**
+ * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support).
+ * All inputs and outputs must have the same data type. This operator supports multidirectional
+ * (i.e., Numpy-style) broadcasting
+ *
+ * @par Inputs:
+ * one input including:
+ * x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64
+ *
+ * @par Outputs:
+ * one output including:
+ * y:A Tensor of the same type as x
+ *
+ */
+REG_OP(MaxN)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(MaxN)
+
+
+/**
+ * @brief Calculates x * maske * value.
+ *
+ * @par Inputs:
+ * @li x: An tensor of type float16 or float32, specifying the input to the data layer.
+ * @li mask: An tensor of type int8 or float16 or float32, be same shape with x. \n
+ *
+ * @par Attributes:
+ * value: A optional float. \n
+ *
+ * @par Outputs:
+ * y: The output tensor of type float16 or float32.
+ @ li y:A Tensor of the same type and shape as x
+ *
+ */
+REG_OP(MaskedScale)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32}))
+    .INPUT(mask, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32}))
+    .REQUIRED_ATTR(value, Float)
+    .OP_END_FACTORY_REG(MaskedScale)
+
+/**
+ * @brief Calculate the lerp function. \n
+
+ * @par Inputs:
+ * Three inputs, including:
+ * @li start: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+ * @li end: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+ * @li weight: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+
+ * @par Outputs:
+ * y: A Tensor with the same type and shape of input_x's. \n
+
+ * @par Third-party framework compatibility
+ * Compatible with the Pytorch operator Lerp. \n
+ */
+REG_OP(Lerp)
+    .INPUT(start, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(end, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(Lerp)
+
+/**
+*@brief Returns the num value of abs(x1-x2) > atol+rtol*abs(x2) element-wise. \n
+
+*
+*@par Inputs:
+*@li x1: A tensor. Must be one of the following types: float32, int32, uint8, int8, float16
+*@li x2: A tensor of the same type as "x1".
+*
+*@par Attributes:
+* atol: Defaults to "1e-05".
+* rtol: Defaults to "1e-03".
+*
+*@par Outputs:
+* num: A tensor of type float32.
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*
+*/
+REG_OP(DataCompare)
+  .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 }))
+  .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 }))
+  .OUTPUT(num, TensorType({DT_FLOAT}))
+  .ATTR(atol, Float, 1e-5)
+  .ATTR(rtol, Float, 1e-3)
+  .OP_END_FACTORY_REG(DataCompare)
+
+/**
+*@brief Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0
+*otherwise The input does not need to explicitly be a 2D vector.The "axis" attribute indicates the dimension along
+*which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the
+*corresponding input.
+*
+*@par Inputs:
+*one input including:
+*x: input A Tensor.Must be one of the following types:float32,float16
+*
+*@par Attributes:
+*axis:A required int attribute that decides which dimension will be used to cal the hard_max
+*
+*@par Outputs:
+*one output including:
+*y:A Tensor of the same type as x
+*
+*/
+REG_OP(HardMax)
+    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT }))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(axis, Int, -1)
+    .OP_END_FACTORY_REG(HardMax)
+
+/**
+* @brief Computes the dot product (inner product) of two tensors. This function does not broadcast.
+
+* @par Inputs:
+* Two inputs, including:
+* @li input_x: A Tensor. the first tensor must be 1d. \n
+* @li input_y: A Tensor. the second tensor must be 1d. \n
+
+* @par Outputs:
+* output: A Tensor. Result of the two inputs, must be 1d. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch dot operator. \n
+*/
+REG_OP(Dot)
+    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
+    .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
+    .OP_END_FACTORY_REG(Dot)
+
+/**
+*@brief Returns a new tensor with boolean elements representing \n
+*if each element of input is “close” to the corresponding element of other \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+* @li x2: A tensor with the same type and shape of x1's. \n
+
+*@par Attributes:
+*@li rtol: An optional float.Defaults to 1e-05. \n
+*@li atol: An optional float.Defaults to 1e-08. \n
+*@li equal_nan: An optional bool.Defaults to false. \n
+
+*@par Outputs:
+*y: A Tensor bool with the same shape of x1's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator isclose. \n
+*/
+REG_OP(IsClose)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .ATTR(rtol, Float, 1e-05)
+    .ATTR(atol, Float, 1e-08)
+    .ATTR(equal_nan, Bool, false)
+    .OP_END_FACTORY_REG(IsClose)
+
+/**
+* @brief Returns the reverse tensor of the ArgMax operator of a tensor. \n
+
+* @par Inputs:
+* three input, including:
+* var: A Tensor of type float16, float32, int32 or int8. \n
+* indices: A Tensor of type int32. \n
+* updates: A Tensor of type float16, float32, int32 or int8. \n
+
+* @par Attributes:
+* @li dimension: An integer of type int, specifying the axis information of the index with the maximum value.\n
+
+* @par Outputs:
+* y: A Tensor of type float16, float32, int32 or int8. \n
+*
+*@attention Constraints:
+*@li indices: only support int32,and shape same to "updates"
+*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x".
+*@li y:A Tensor, the type and shape is same to "var" \n
+
+*@par Third-party framework compatibility
+* not support all scene like pytorch operator scatter
+* exp:
+* var.shape=[2,3,4,5], dim=2, the shape of indices and updates should be [2,3,5]
+* not support the shape of indices and updates is [2,3,2,5] like pytorch operator scatter. \n
+*/
+REG_OP(ArgMaxGrad)
+    .INPUT(var, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(updates, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .REQUIRED_ATTR(dimension, Int)
+    .OP_END_FACTORY_REG(ArgMaxGrad)
+
+/**
+* @brief Returns the reverse tensor of the ArgMax operator of a tensor. \n
+
+* @par Inputs:
+* three input, including:
+* var: A Tensor of type float16, float32, int32 or int8. \n
+* indices: A Tensor of type int32. \n
+* updates: A Tensor of type float16, float32, int32 or int8. \n
+* assist: A Tensor of int32,also a assist matrix and it's shape must match the shape of var \n
+
+* @par Attributes:
+* @li dimension: An integer of type int, specifying the axis information of the index with the maximum value.\n
+
+* @par Outputs:
+* y: A Tensor of type float16, float32, int32 or int8. \n
+
+*@attention Constraints:
+*@li indices: only support int32,and shape same to "updates"
+*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x".
+*@li y:A Tensor, the type and shape is same to "var" \n
+
+*@par Third-party framework compatibility
+* not support all scene like pytorch operator scatter
+* exp:
+* var.shape=[2,3,4,5], dim=2, the shape of indices and updates should be [2,3,5]
+* not support the shape of indices and updates is [2,3,2,5] like pytorch operator scatter. \n
+*/
+REG_OP(ArgMaxGradD)
+    .INPUT(var, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(updates, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .INPUT(assist, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .REQUIRED_ATTR(dimension, Int)
+    .OP_END_FACTORY_REG(ArgMaxGradD)
+
+/**
+*@brief Calculates the reversed outputs of the function "AddMatMatElements"
+*  c = c * beta + alpha * a * b
+
+*@par Inputs:
+*Three inputs, including:
+* @li c: A mutable Tensor. Must be one of the following types:
+*     float16, float32.
+* @li a: A mutable Tensor of the same type as "c".
+* @li b: A mutable Tensor of the same type as "c".
+* @li beta: A mutable scalar of the same type as "c".
+* @li alpha: A mutable scalar of the same type as "c". \n
+
+*@par Outputs:
+* @li c: A mutable Tensor. Has the same type as "c". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator AddMatMatElements.
+*/
+REG_OP(AddMatMatElements)
+    .INPUT(c, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(a, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(b, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(c, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(AddMatMatElements)
+
+/**
+*@brief Returns cosine similarity between x1 and x2,computed along dim. \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li input_x1: A tensor. Must be the following types: float32.
+* @li input_x2: A tensor. Must of the following types: float32. \n
+
+* @par Attributes:
+* @li dim:The type is Int and the default value is 1.
+* @li eps:The type is Float and the default value is 1e-8. \n
+
+*@par Outputs:
+* output_y: A Tensor with the same type of input_x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator CosineSimilarity. \n
+*/
+REG_OP(CosineSimilarity)
+    .INPUT(input_x1, TensorType({DT_FLOAT}))  /* "First operand." */
+    .INPUT(input_x2, TensorType({DT_FLOAT}))  /* "Second operand." */
+    .OUTPUT(output_y, TensorType({DT_FLOAT})) /* "Result, has same element type as two inputs" */
+    .ATTR(dim, Int, 1)
+    .ATTR(eps, Float, 1e-8)
+    .OP_END_FACTORY_REG(CosineSimilarity)
+
+/**
+*@brief count adam result. \n
+
+*@par Inputs:
+*eleven inputs, including:
+* @li var: A Tensor. Support float16/float32.\n
+* @li m: A Tensor. Datatype and shape are same as exp_avg.\n
+* @li v: A Tensor. Datatype and shape are same as exp_avg.\n
+* @li lr: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
+* @li beta1: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
+* @li beta2: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
+* @li epsilon: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
+* @li grad: A Tensor. Datatype and shape are same as exp_avg.\n
+* @li max_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
+* @li global_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
+* @li weight_decay: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
+* @li step_size: A Optional Tensor. Datatype is same as exp_avg. Shape (1, ).\n
+
+* @par Attributes:
+* @li adam_mode: An optional bool. Defaults to "adam". \n
+
+*@par Outputs:
+*three inputs, including:
+* @li var: A Tensor. Datatype and shape are same as exp_avg.\n
+* @li m: A Tensor. Datatype and shape are same as exp_avg.\n
+* @li v: A Tensor. Datatype and shape are same as exp_avg.\n
+*/
+REG_OP(ApplyAdamV2)
+    .INPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(lr, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(beta1, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(beta2, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(epsilon, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(grad, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(max_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(global_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(weight_decay, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OPTIONAL_INPUT(step_size, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .ATTR(adam_mode, String, "adam")
+    .OP_END_FACTORY_REG(ApplyAdamV2)
+
+/**
+* @brief Computes Dawsn operation.  \n
+
+*
+* @par Inputs:
+* x: A tensor. Must be one of the following types: bfloat16, float16, float32, float64.
+*
+* @par Outputs:
+* y: A tensor. Has the same type as "x".
+*
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator Dawsn.
+*
+*/
+REG_OP(Dawsn)
+    .INPUT(x, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(Dawsn)
+}  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/encoding_ops.h b/third_party/fwkacllib/inc/ops/encoding_ops.h
new file mode 100644
index 00000000..73344353
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/encoding_ops.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd 2022-2022. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file encoding_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_ENCODING_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_ENCODING_OPS_H_
+
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+/**
+* @brief An op to decode indices for LDPC code. \n
+
+* @par Inputs:
+* @li valid_num: an int32 tensor indicates index limit for each line.
+* @li matrix_info: an int32 2D-tensor store the block indices info of connection H matrix. \n
+
+* @par Outputs:
+* indices: an int32 2D-tensor store the concrete indices value.
+*
+* @par Restrictions:
+* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(LDPCDecode)
+    .INPUT(valid_num, TensorType({DT_INT32}))
+    .INPUT(matrix_info, TensorType({DT_INT32}))
+    .OUTPUT(indices, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(LDPCDecode)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_ENCODING_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h
index 598d3ad3..da968f5e 100644
--- a/third_party/fwkacllib/inc/ops/functional_ops.h
+++ b/third_party/fwkacllib/inc/ops/functional_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -141,6 +141,30 @@ REG_OP(If)
  *@par Third-party framework compatibility
  *@Compatible with the TensorFlow operator Case.
  */
+REG_OP(StatelessCase)
+    .INPUT(branch_index, DT_INT32)
+    .DYNAMIC_INPUT(input, TensorType::ALL())
+    .DYNAMIC_OUTPUT(output, TensorType::ALL())
+    .DYNAMIC_GRAPH(branches)
+    .OP_END_FACTORY_REG(StatelessCase)
+
+/**
+ *@brief Select one of the subgraphs to pass the input tensors and return the output tensors . \n
+
+ *@par Inputs:
+ *@li branch_index: A int32 scalar which determines the selected subgraph.
+ *@li input: The input tensors, which will be passed to the subgraph . It's a dynamic input. \n
+
+ *@par Graphs:
+ *branches: A list of subgraphs, each of which takes 'input' and returns a list of tensors,
+ *          whose types are the same as what every other subgraph returns . \n
+
+ *@par Outputs:
+ *output: The output tensors returned by one of branches . It's a dynamic output. \n
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator Case.
+ */
 REG_OP(Case)
     .INPUT(branch_index, DT_INT32)
     .DYNAMIC_INPUT(input, TensorType::ALL())
@@ -163,9 +187,6 @@ REG_OP(Case)
  *          if it is not a scalar, non-empty means True and empty means False.
  *@li body: A subgraph takes 'input' and returns a another list of tensors .  \n
 
- *@par Attributes:
- *parallel_iterations: An optional int, default as 10 . \n
-
  *@par Outputs:
  *output: The output tensors returned by "body". Has the same type as "input" . \n
 
@@ -328,6 +349,19 @@ REG_OP(StatefulPartitionedCall)
     .ATTR(executor_type, String, "")
     .OP_END_FACTORY_REG(StatefulPartitionedCall)
 
+/**
+ * @par Inputs:
+ * @li input: The input tensors \n
+ *
+ * @par Outputs:
+ * @li output: The output tensors. \n
+ */
+REG_OP(ToBool)
+    .INPUT(input, TensorType({DT_INT64, DT_INT32, DT_INT16, DT_INT8, \
+        DT_UINT8, DT_FLOAT, DT_DOUBLE, DT_STRING, DT_BOOL}))
+    .OUTPUT(output, DT_BOOL)
+    .OP_END_FACTORY_REG(ToBool)
+
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/get_data_ops.h b/third_party/fwkacllib/inc/ops/get_data_ops.h
index 33dc4f14..e5518ef8 100644
--- a/third_party/fwkacllib/inc/ops/get_data_ops.h
+++ b/third_party/fwkacllib/inc/ops/get_data_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/hcom_ops.h b/third_party/fwkacllib/inc/ops/hcom_ops.h
index b90b225e..497f6a68 100644
--- a/third_party/fwkacllib/inc/ops/hcom_ops.h
+++ b/third_party/fwkacllib/inc/ops/hcom_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -45,8 +45,6 @@ REG_OP(HcomAllGather)
     .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
     .REQUIRED_ATTR(rank_size, Int)
     .REQUIRED_ATTR(group, String)
-    .ATTR(alpha, Float, 1.0)
-    .ATTR(beta, Float, 0.0)
     .OP_END_FACTORY_REG(HcomAllGather)
 
 /**
@@ -77,8 +75,6 @@ REG_OP(HcomAllReduce)
     .REQUIRED_ATTR(group, String)
     .ATTR(fusion, Int, 1)
     .ATTR(fusion_id, Int, -1)
-    .ATTR(alpha, Float, 1.0)
-    .ATTR(beta, Float, 0.0)
     .OP_END_FACTORY_REG(HcomAllReduce)
 
 /**
@@ -91,7 +87,7 @@ REG_OP(HcomAllReduce)
   input of this rank will be broadcast to other ranks.
  * @li fusion: A required integer identifying if the op need to fusion,the 
   default value is none fusion
-  * @li fusion: A required integer identifying the fusion id if para fusion
+  * @li fusion_id: A required integer identifying the fusion id if para fusion
   is set.
  * @li group: A required string identifying the group name of ranks
   participating in the op.
@@ -109,11 +105,40 @@ REG_OP(HcomBroadcast)
     .REQUIRED_ATTR(group, String)
     .ATTR(fusion, Int, 0)
     .ATTR(fusion_id, Int, -1)
-    .ATTR(alpha, Float, 1.0)
-    .ATTR(beta, Float, 0.0)
     .OP_END_FACTORY_REG(HcomBroadcast)
 
 /**
+ * @brief preforms reduction from others rank to rootrank
+ * @par Inputs:
+* @li root_rank: A required integer identifying the root rank in the op
+  the reduction result will be on this root rank
+ * x: A tensor. Must be one of the following types: int8, int16, int32, float16,
+  float32.
+ * @par Attributes:
+ * @li reduction: A required string identifying the reduction operation to
+  perform.The supported operation are: "sum", "max", "min", "prod".
+ * @li group: A required string identifying the group name of ranks
+  participating in the op.
+ * @li fusion: An optional integer identifying the fusion flag of the op.
+  0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id.
+ * @li fusion_id: An optional integer identifying the fusion id of the op.
+ * The HcomReduce ops with the same fusion id will be fused.
+ * @par Outputs:
+ * y: A Tensor. Has the same type as "x".
+ * @attention Constraints:
+ *"group" is limited to 128 characters. Use "hccl_world_group"
+  as the name of a world group.
+ */
+REG_OP(HcomReduce)
+    .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
+    .REQUIRED_ATTR(root_rank, Int)
+    .REQUIRED_ATTR(reduction, String)
+    .REQUIRED_ATTR(group, String)
+    .ATTR(fusion, Int, 0)
+    .ATTR(fusion_id, Int, -1)
+    .OP_END_FACTORY_REG(HcomReduce)
+/**
  * @brief Performs reduction across all input tensors, scattering in equal
   blocks among ranks, each rank getting a chunk of data based on its rank
   index.
@@ -139,8 +164,6 @@ REG_OP(HcomReduceScatter)
     .REQUIRED_ATTR(reduction, String)
     .REQUIRED_ATTR(group, String)
     .REQUIRED_ATTR(rank_size, Int)
-    .ATTR(alpha, Float, 1.0)
-    .ATTR(beta, Float, 0.0)
     .OP_END_FACTORY_REG(HcomReduceScatter)
 
 /**
@@ -167,8 +190,6 @@ REG_OP(HcomSend)
     .REQUIRED_ATTR(group, String)
     .REQUIRED_ATTR(sr_tag, Int)
     .REQUIRED_ATTR(dest_rank, Int)
-    .ATTR(alpha, Float, 1.0)
-    .ATTR(beta, Float, 0.0)
     .OP_END_FACTORY_REG(HcomSend)
 
 /**
@@ -202,8 +223,6 @@ REG_OP(HcomReceive)
     .REQUIRED_ATTR(src_rank, Int)
     .REQUIRED_ATTR(shape, ListInt)
     .REQUIRED_ATTR(dtype, Type)
-    .ATTR(alpha, Float, 1.0)
-    .ATTR(beta, Float, 0.0)
     .OP_END_FACTORY_REG(HcomReceive)
 
 /**
@@ -219,6 +238,15 @@ REG_OP(HcomRemoteRead)
     .REQUIRED_ATTR(dtype, Type)
     .OP_END_FACTORY_REG(HcomRemoteRead)
 
+/**
+ * @brief Performs Remote Ref Read of input tensors
+ * @par Inputs:
+ * remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length
+ * cache_var: The local base address
+ * local_offset: Skip step length
+ * @par Outputs:
+ * cache_var: The local base address
+ */
 REG_OP(HcomRemoteRefRead)
     .INPUT(remote, TensorType({DT_UINT64}))
     .INPUT(cache_var, TensorType({DT_UINT64}))
@@ -239,11 +267,90 @@ REG_OP(HcomRemoteWrite)
     .INPUT(local, TensorType::ALL())
     .OP_END_FACTORY_REG(HcomRemoteWrite)
 
+/**
+ * @brief Performs Remote Write of input tensors
+ * @par Inputs:
+ * remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length
+ * @par Inputs:
+ * local: A Tensor. whose value is length / size_of(Type)
+ */
 REG_OP(HcomRemoteScatterWrite)
     .INPUT(remote, TensorType({DT_INT64, DT_UINT64}))
     .INPUT(local, TensorType::ALL())
     .OPTIONAL_INPUT(local_offset, TensorType({DT_UINT64}))
     .OP_END_FACTORY_REG(HcomRemoteScatterWrite)
 
+/**
+ * @brief All ranks send different amount of data to, and receive different
+  amount of data from, all ranks.
+ * @par Inputs:
+ * Five inputs, including:
+ * @li send_data: A tensor. the memory to send.
+ * @li send_counts: A list, where entry i specifies the number of elements in
+  send_data to send to rank i.
+ * @li send_displacements: A list, where entry i specifies the displacement
+  (offset from sendbuf) from which to send data to rank i.
+ * @li recv_counts: A list, where entry i specifies the number of 
+  elements to receive from rank i.
+ * @li recv_displacements: A list, , where entry i specifies the displacement
+  (offset from recv_data) to which data from rank i should be written.
+ * @par Outputs:
+ * recv_data: A Tensor  has same element type as send_data.
+ * @par Attributes:
+ * @li group: A string identifying the group name of ranks participating in
+  the op.
+* @attention all ranks participating in the op should be full-mesh networking
+  using the RDMA.
+ */
+REG_OP(HcomAllToAllV)
+    .INPUT(send_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
+    .INPUT(send_counts, TensorType({DT_INT64}))
+    .INPUT(send_displacements, TensorType({DT_INT64}))
+    .INPUT(recv_counts, TensorType({DT_INT64}))
+    .INPUT(recv_displacements, TensorType({DT_INT64}))
+    .OUTPUT(recv_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
+    .REQUIRED_ATTR(group, String)
+    .OP_END_FACTORY_REG(HcomAllToAllV)
+
+/**
+ * @brief All ranks send different amount of data to, and receive different
+  amount of data from, all ranks. And concat all data descripting by addrinfo
+  togather into output gathered.
+ * @par Inputs:
+ * Four inputs, including:
+ * @li addrinfo: A tensor, descripting the memory info(address, length) to send.
+ * @li addrinfo_count_per_rank: A list, where entry i specifies the number of
+  elements in send_data to send to rank i.
+ * @li recv_counts: A list, where entry i specifies the number of 
+  elements to receive from rank i.
+ * @li recv_displacements: A list, , where entry i specifies the displacement 
+  (offset from recv_data) to which data from rank i should be written.
+ * @par Outputs:
+ * Two outputs, including:
+ * @li recv_data: A Tensor  has same element type as dtype.
+ * @li gathered: A Tensor  has same element type as dtype.
+ * @par Attributes:
+ * @li group: A string identifying the group name of ranks participating in
+  the op.
+ * @li dtype: Datatype of send buffer elements.
+ * @li addr_length: descripting the element memory length in the addrinfo.
+  -2: all element memory length in the addrinfo is the same, but it is unknown.
+  -1: all element memory length is unknown.
+  >0: all element memory length in the addrinfo is the same. the attr value is the memory length.
+ * @attention all ranks participating in the op should be full-mesh networking
+  using the RDMA.
+ */
+REG_OP(HcomGatherAllToAllV)
+    .INPUT(addrinfo, TensorType({DT_UINT64}))
+    .INPUT(addrinfo_count_per_rank, TensorType({DT_INT64}))
+    .INPUT(recv_counts, TensorType({DT_INT64}))
+    .INPUT(recv_displacements, TensorType({DT_INT64}))
+    .OUTPUT(recv_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
+    .OUTPUT(gathered, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
+    .REQUIRED_ATTR(group, String)
+    .REQUIRED_ATTR(dtype, Type)
+    .REQUIRED_ATTR(addr_length, Int)
+    .OP_END_FACTORY_REG(HcomGatherAllToAllV)
+
 } // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/hvd_ops.h b/third_party/fwkacllib/inc/ops/hvd_ops.h
index a49ec5ed..00299ef7 100644
--- a/third_party/fwkacllib/inc/ops/hvd_ops.h
+++ b/third_party/fwkacllib/inc/ops/hvd_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h
index ce3262f9..37f0e2b1 100644
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,6 +24,22 @@
 #include "graph/operator_reg.h"
 
 namespace ge {
+/**
+*@brief Decode the frame(s) of a GIF-encoded image to a uint8 tensor . \n
+
+*@par Inputs:
+*contents:A Tensor of type string. 0-D. The GIF-encoded image. \n
+
+*@par Outputs:
+*image:A Tensor of type uint8. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow DecodeGif operator.
+*/
+REG_OP(DecodeGif)
+    .INPUT(contents, TensorType({DT_STRING}))
+    .OUTPUT(image, TensorType({DT_UINT8}))
+    .OP_END_FACTORY_REG(DecodeGif)
 
 /**
 *@brief Adjust the hue of one or more images . \n
@@ -31,11 +47,12 @@ namespace ge {
 *@par Inputs:
 *Input images is a tensor of at least 3 dimensions. The last dimension is
 interpretted as channels, and must be three. Inputs include:
-*@li images:A Tensor of type float. Images to adjust. At least 3-D.
+*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format
+must be NHWC.
 *@li delta:A Tensor of type float. A float delta to add to the hue . \n
 
 *@par Outputs:
-*y:A Tensor of type float . \n
+*y:A Tensor of type float. The format must be NHWC. \n
 
 *@attention Constraints:
 *Input images is a tensor of at least 3 dimensions. The last dimension is
@@ -57,11 +74,12 @@ REG_OP(AdjustHue)
 *@par Inputs:
 *Input images is a tensor of at least 3 dimensions. The last dimension is
 interpretted as channels, and must be three. Inputs include:
-*@li images:A Tensor of type float. Images to adjust. At least 3-D.
+*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format
+must be NHWC.
 *@li scale:A Tensor of type float. A float scale to add to the saturation . \n
 
 *@par Outputs:
-*y:A Tensor of type float . \n
+*y:A Tensor of type float. The format must be NHWC. \n
 
 *@attention Constraints:
 *Input images is a tensor of at least 3 dimensions. The last dimension is
@@ -83,11 +101,12 @@ REG_OP(AdjustSaturation)
 *@par Inputs:
 *Input images is a tensor of at least 3 dimensions. The last 3 dimensions are
 interpreted as '[height, width, channels]'. Inputs include:
-*@li images:A Tensor of type float. Images to adjust. At least 3-D.
+*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format
+must be NHWC.
 *@li scale:A Tensor of type float. A float multiplier for adjusting contrast . \n
 
 *@par Outputs:
-*y:A Tensor of type float . \n
+*y:A Tensor of type float. The format must be NHWC. \n
 
 *@attention Constraints:
 *Input images is a tensor of at least 3 dimensions. The last dimension is
@@ -109,11 +128,11 @@ crops from the input image tensor and resizes them using bilinear sampling or
 nearest neighbor sampling to a common output size specified by crop_size . \n
 
 *@par Inputs:
-*Input images must be a 4-D tensor. Inputs include:
-*@li images:A Tensor. Must be one of the following types:uint8, uint16, int8,
+*Input x must be a 4-D tensor. Inputs include:
+*@li x:A Tensor. Must be one of the following types:uint8, uint16, int8,
 int16, int32, int64, float16, float, double. A 4-D tensor of shape
-[batch, image_height, image_width, depth].
-*@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4].
+[batch, image_height, image_width, depth]. The format must be NHWC.
+*@li boxes: A Tensor. Must be one of the following types: float16, float. A 2-D tensor of shape [num_boxes, 4].
 *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with
 int32 values in [0, batch).
 *@li crop_size: A Tensor of type int32. A 1-D tensor of 2 elements, crop_size
@@ -127,7 +146,7 @@ extrapolation, when applicable.
 NearestNeighbor . \n
 
 *@par Outputs:
-*y:A Tensor of type float . \n
+*y: A Tensor. Must be one of the following types: float16, float. The format must be NHWC. \n
 
 *@attention Constraints:
 *Input images must be a 4-D tensor . \n
@@ -139,10 +158,10 @@ NearestNeighbor . \n
 REG_OP(CropAndResize)
     .INPUT(x, TensorType({DT_UINT8, DT_UINT16, DT_INT8, \
         DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
-    .INPUT(boxes, TensorType({DT_FLOAT}))
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(box_index, TensorType({DT_INT32}))
     .INPUT(crop_size, TensorType({DT_INT32}))
-    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(extrapolation_value, Float, 0)
     .ATTR(method, String, "bilinear")
     .OP_END_FACTORY_REG(CropAndResize)
@@ -156,7 +175,7 @@ REG_OP(CropAndResize)
 *Input images must be a 5HD tensor. Inputs include:
 *@li x:A Tensor. Must be one of the following types:float16, float. A 5HD tensor of shape
 * [batch, C1, image_height, image_width, C0].
-*@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4].
+*@li boxes: A Tensor. Must be one of the following types: float16, float. A 2-D tensor of shape [num_boxes, 4].
 *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch) . \n
 
 *@par Attributes:
@@ -165,7 +184,7 @@ REG_OP(CropAndResize)
 *@li method: An optional string from: '"bilinear"'. Defaults to "bilinear" . \n
 
 *@par Outputs:
-*y:A Tensor of type float . \n
+*y: A Tensor. Must be one of the following types: float16, float. \n
 
 *@attention Constraints:
 *Input images must be a 5HD tensor . \n
@@ -178,9 +197,9 @@ REG_OP(CropAndResize)
 */
 REG_OP(CropAndResizeD)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .INPUT(boxes, TensorType({DT_FLOAT}))
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(box_index, TensorType({DT_INT32}))
-    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(crop_size, ListInt)
     .ATTR(extrapolation_value, Float, 0)
     .ATTR(method, String, "bilinear")
@@ -193,7 +212,9 @@ boxes tensor . \n
 *@par Inputs:
 *Input images and grads must be a 4-D tensor. Inputs include:
 *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth].
+The format must be NHWC.
 *@li images: A 4-D tensor of shape [batch, image_height, image_width, depth].
+The format must be NHWC.
 Both image_height and image_width need to be positive.
 *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor
 specifies the coordinates of a box in the box_ind[i] image and is specified in
@@ -233,6 +254,7 @@ images tensor . \n
 *@par Inputs:
 *Input grads must be a 4-D tensor. Inputs include:
 *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth].
+The format must be NHWC.
 *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor
 specifies the coordinates of a box in the box_ind[i] image and is specified
 in normalized coordinates [y1, x1, y2, x2].
@@ -244,11 +266,13 @@ depth] containing the original image size. Both image_height and image_width
 need to be positive . \n
 
 *@par Attributes:
-method: A string specifying the interpolation method. Only 'bilinear' is
-supported for now . \n
+*@li method: A string specifying the interpolation method. Only 'bilinear' is
+supported for now .
+*@li T: output of type  \n
 
 *@par Outputs:
-*y:A 4-D tensor of shape [batch, image_height, image_width, depth] . \n
+*y:A 4-D tensor of shape [batch, image_height, image_width, depth]. The format
+must be NHWC. \n
 
 *@attention Constraints:
 *Input grads must be a 4-D tensor . \n
@@ -273,6 +297,7 @@ REG_OP(CropAndResizeGradImage)
 *@par Inputs:
 *Input x must be a 4-D tensor. Inputs include:
 *@li x: A 4-D float tensor of shape [batch_size, height, width, channels].
+The format must be NHWC.
 *@li size: A 1-D tensor of 2 elements containing the size of the glimpses to
 extract. The glimpse height must be specified first, following by the glimpse
 width.
@@ -293,7 +318,7 @@ uniform_noise . \n
 
 *@par Outputs:
 *y:A tensor representing the glimpses [batch_size, glimpse_height,
-glimpse_width, channels] . \n
+glimpse_width, channels]. The format must be NHWC. \n
 
 *@attention Constraints:
 *Input x must be a 4-D tensor . \n
@@ -324,6 +349,7 @@ REG_OP(ExtractGlimpse)
 *y:images converted to RGB . \n
 
 *@attention Constraints:
+*Input images currently supports DT_FLOAT, DT_DOUBLE .
 *Last dimension of input x must be size 3 . \n
 
 *@par Third-party framework compatibility
@@ -340,7 +366,8 @@ REG_OP(HSVToRGB)
 
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
-*@li images: 4-D with shape [batch, height, width, channels].
+*@li images: 4-D with shape [batch, height, width, channels]. The format must
+be NHWC.
 *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
 size for the images.
 *@li min: A Tensor of type float.
@@ -354,6 +381,7 @@ the values at the corner pixels. Defaults to false.
 
 *@par Outputs:
 *@li resized_images: 4-D with shape [batch, new_height, new_width, channels].
+The format must be NHWC.
 *@li y_min: A Tensor of type float.
 *@li y_max: A Tensor of type float . \n
 
@@ -381,7 +409,8 @@ REG_OP(QuantizedResizeBilinear)
 
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
-*@li images: 4-D with shape [batch, height, width, channels].
+*@li images: 4-D with shape [batch, height, width, channels]. The format must
+be NHWC.
 *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width.
 The new size for the images . \n
 
@@ -391,7 +420,8 @@ output tensors are aligned, preserving the values at the corner pixels.
 Defaults to false . \n
 
 *@par Outputs:
-*y: 4-D with shape [batch, new_height, new_width, channels] . \n
+*y: 4-D with shape [batch, new_height, new_width, channels]. The format must
+be NHWC. \n
 
 *@attention Constraints:
 *Input images can be of different types but output images are always float . \n
@@ -414,10 +444,10 @@ REG_OP(ResizeArea)
 *@par Inputs:
 *Input grads must be a 4-D tensor. Inputs include:
 *@li grads: A Tensor of type float. 4-D with shape [batch, height, width,
-channels].
+channels]. The format must be NHWC.
 *@li original_image: A Tensor. Must be one of the following types: float,
 double. 4-D with shape [batch, orig_height, orig_width, channels], The image
-tensor that was resized . \n
+tensor that was resized. The format must be NHWC. \n
 
 *@par Attributes:
 *@li align_corners: An optional bool. Defaults to False. If true, the centers
@@ -426,10 +456,10 @@ false.
 *@li half_pixel_centers: An optional bool. Defaults to False . \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type as original_image . \n
+*y: A Tensor. Has the same type as original_image. The format must be NHWC. \n
 
 *@attention Constraints:
-*Input images can be of different types but output images are always float . \n
+*Input images can be of different types but output images are always float .
 
 *@par Third-party framework compatibility
 *Compatible with tensorflow ResizeBicubicGrad operator.
@@ -448,7 +478,8 @@ REG_OP(ResizeBicubicGrad)
 
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
-*@li images: 4-D with shape [batch, height, width, channels].
+*@li images: 4-D with shape [batch, height, width, channels]. The format
+must be NHWC.
 *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
 size for the images . \n
 
@@ -459,10 +490,11 @@ Defaults to false.
 *@li half_pixel_centers: An optional bool. Defaults to False . \n
 
 *@par Outputs:
-*y: 4-D with shape [batch, new_height, new_width, channels] . \n
+*y: 4-D with shape [batch, new_height, new_width, channels]. The format
+must be NHWC. \n
 
 *@attention Constraints:
-*Input images can be of different types but output images are always float . \n
+*Input images can be of different types but output images are always float .
 
 *@par Third-party framework compatibility
 *Compatible with tensorflow ResizeBicubic operator.
@@ -483,7 +515,7 @@ REG_OP(ResizeBicubic)
 *@par Inputs:
 *Input grads must be a 4-D tensor. Inputs include:
 *@li grads: A Tensor. Must be one of the following types: uint8, int8, int32,
-float16, float, double. 4-D with shape [batch, height, width, channels].
+float16, float, double. Must set the format, supported format list ["NCHW, NHWC"]
 *@li size: A 1-D int32 Tensor of 2 elements: orig_height, orig_width.
 The original input size . \n
 
@@ -550,15 +582,16 @@ REG_OP(ResizeNearestNeighborV2GradD)
 
 *@par Inputs:
 *Input grads must be a 4-D tensor. Inputs include:
-*@li grads: A Tensor of type float32. 4-D with shape [batch, height, width,
-channels].
-*@li original_image: A Tensor. 4-D with shape [batch, orig_height, orig_width,
+*@li grads: A Tensor of type float32. Must set the format, supported format list ["NCHW, NHWC"]
+*@li original_image: A Tensor. 4-D shape. Must set the format, supported format list ["NCHW, NHWC"]
 channels], The image tensor that was resized . \n
 
 *@par Attributes:
-*align_corners: An optional bool. Defaults to False. If true, the centers of
+*@li align_corners: An optional bool. Defaults to False. If true, the centers of
 the 4 corner pixels of the input and grad tensors are aligned. Defaults to
-false . \n
+false .
+*@li half_pixel_centers: indicates if the offset coordinates are normalized. Defaults
+to false . \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as original_image . \n
@@ -579,19 +612,66 @@ REG_OP(ResizeBilinearV2Grad)
     .OP_END_FACTORY_REG(ResizeBilinearV2Grad)
 
 /**
+*@brief Computes the gradient of bilinear interpolation . \n
+
+*@par Inputs:
+*Input grads must be a 4-D tensor. Inputs include:
+*@li grads: A Tensor of type float32. Must set the format, supported format list ["NCHW, NHWC"]
+*@li original_image: A Tensor. 4-D shape. Must set the format, supported format list ["NCHW, NHWC"]
+channels], The image tensor that was resized . \n
+
+*@par Attributes:
+*@li size: An optional listint. Defaults to {}.
+*@par Attributes:
+*@li ori_image_size: An optional listint. Defaults to {}.
+*@par Attributes:
+*@li src_start_w: An optional int. Defaults to 0.
+*@par Attributes:
+*@li dst_start_w: An optional int. Defaults to 0.
+*@par Attributes:
+*@li align_corners: An optional bool. Defaults to False. If true, the centers of
+the 4 corner pixels of the input and grad tensors are aligned. Defaults to
+false .
+*@li half_pixel_centers: indicates if the offset coordinates are normalized. Defaults
+to false . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as original_image . \n
+
+*@attention Constraints:
+*Input grads must be a 4-D tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with mindspore ResizeBilinearV2Grad operator.
+*/
+
+REG_OP(SyncResizeBilinearV2Grad)
+    .INPUT(grads, TensorType({DT_FLOAT}))
+    .INPUT(original_image, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(size, ListInt, {})
+    .ATTR(ori_image_size, ListInt, {})
+    .ATTR(src_start_w, Int, 0)
+    .ATTR(dst_start_w, Int, 0)
+    .ATTR(align_corners, Bool, false)
+    .ATTR(half_pixel_centers, Bool, false)
+    .OP_END_FACTORY_REG(SyncResizeBilinearV2Grad)
+
+/**
 *@brief Resize images to size using bilinear interpolation . \n
 
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
-*@li x: 4-D with shape [batch, height, width, channels].
+*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"]
 *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
 size for the images . \n
 
 *@par Attributes:
-*align_corners: If true, the centers of the 4 corner pixels of the input and
+* @li align_corners: If true, the centers of the 4 corner pixels of the input and
 output tensors are aligned, preserving the values at the corner pixels.
-Defaults to false . \n
-
+Defaults to false .
+* @li half_pixel_centers: An optional bool. Defaults to False . \n
+* @li dtype: An Type attr, support type list [DT_FP32, DT_U8]. Defaults to DT_FP32 . \n
 *@par Outputs:
 *y: 4-D with shape [batch, new_height, new_width, channels] . \n
 
@@ -603,15 +683,56 @@ Defaults to false . \n
 */
 
 REG_OP(ResizeBilinearV2)
-    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
-                               DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
+                          DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
     .INPUT(size, TensorType({DT_INT32}))
-    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_UINT8, DT_FLOAT}))
     .ATTR(align_corners, Bool, false)
     .ATTR(half_pixel_centers, Bool, false)
+    .ATTR(dtype, Type, DT_FLOAT)
     .OP_END_FACTORY_REG(ResizeBilinearV2)
 
 /**
+*@brief Resize images to size using bilinear interpolation . \n
+
+*@par Inputs:
+*Input images must be a 4-D tensor. Inputs include:
+*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"]
+*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
+size for the images . \n
+
+*@par Attributes:
+* @li align_corners: If true, the centers of the 4 corner pixels of the input and
+output tensors are aligned, preserving the values at the corner pixels.
+Defaults to false .
+* @li half_pixel_centers: An optional bool. Defaults to False . \n
+*@li ori_image_size: An optional listint. Defaults to {}.
+*@li split_size: An optional listint. Defaults to {}.
+*@li src_start_w: An optional int. Defaults to 0.
+*@li dst_start_w: An optional int. Defaults to 0.
+*@par Outputs:
+*y: 4-D with shape [batch, new_height, new_width, channels] . \n
+
+*@attention Constraints:
+*Input images can be of different types but output images are always float . \n
+
+*@par Third-party framework compatibility
+*Compatible with mindspore ResizeBilinearV2 operator.
+*/
+
+REG_OP(SyncResizeBilinearV2)
+    .INPUT(x, TensorType({DT_FLOAT}))
+    .INPUT(size, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(ori_image_size, ListInt, {})
+    .ATTR(split_size, ListInt, {})
+    .ATTR(src_start_w, Int, 0)
+    .ATTR(dst_start_w, Int, 0)
+    .ATTR(align_corners, Bool, false)
+    .ATTR(half_pixel_centers, Bool, false)
+    .OP_END_FACTORY_REG(SyncResizeBilinearV2)
+
+/**
 *@brief Converts one or more images from RGB to HSV . \n
 
 *@par Inputs:
@@ -643,6 +764,65 @@ REG_OP(RGBToHSV)
 *Input images must be a 4-D tensor. Inputs include:
 *@li image_size: 1-D, containing [height, width, channels].
 *@li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding
+boxes associated with the image. \n
+
+*@par Attributes:
+*@li seed: If either seed or seed2 are set to non-zero, the random number
+generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2: A second seed to avoid seed collision.
+*@li min_object_covered: The cropped area of the image must contain at least
+this fraction of any bounding box supplied. The value of this parameter should
+be non-negative. In the case of 0, the cropped area does not need to overlap
+any of the bounding boxes supplied .
+*@li aspect_ratio_range: The cropped area of the image must have an aspect
+ratio = width / height within this range.
+*@li area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
+cropped area of the image must contain a fraction of the supplied image
+within this range.
+*@li max_attempts: Number of attempts at generating a cropped region of the
+image of the specified constraints. After max_attempts failures, return the
+entire image.
+*@li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes
+supplied. If true, assume an implicit bounding box covering the whole input.
+If false, raise an error . \n
+
+*@par Outputs:
+*@li begin: 1-D, containing [offset_height, offset_width, 0].
+*@li size: 1-D, containing [target_height, target_width, -1].
+*@li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n
+
+*@attention Constraints:
+*Input images can be of different types but output images are always float . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow SampleDistortedBoundingBox operator.
+*/
+
+REG_OP(SampleDistortedBoundingBox)
+    .INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .INPUT(bounding_boxes, TensorType({ DT_FLOAT }))
+    .OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .OUTPUT(bboxes, TensorType({ DT_FLOAT }))
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .ATTR(min_object_covered, Float, 0.1f)
+    .ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f })
+    .ATTR(area_range, ListFloat, { 0.05f, 1.0f })
+    .ATTR(max_attempts, Int, 100)
+    .ATTR(use_image_if_no_bounding_boxes, Bool, false)
+    .OP_END_FACTORY_REG(SampleDistortedBoundingBox)
+
+/**
+*@brief Generate a single randomly distorted bounding box for an image . \n
+
+*@par Inputs:
+*Input images must be a 4-D tensor. Inputs include:
+*@li image_size: 1-D, containing [height, width, channels].
+*@li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding
 boxes associated with the image.
 *@li min_object_covered: The cropped area of the image must contain at least
 this fraction of any bounding box supplied. The value of this parameter should
@@ -655,6 +835,9 @@ generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
 *@li seed2: A second seed to avoid seed collision.
 *@li aspect_ratio_range: The cropped area of the image must have an aspect
 ratio = width / height within this range.
+*@li area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
+cropped area of the image must contain a fraction of the supplied image
+within this range.
 *@li max_attempts: Number of attempts at generating a cropped region of the
 image of the specified constraints. After max_attempts failures, return the
 entire image.
@@ -697,20 +880,21 @@ REG_OP(SampleDistortedBoundingBoxExt2)
 
 *@par Inputs:
 *Input x must be a 4-D tensor. Inputs include:
-*@li x: 4-D with shape [batch, height, width, channels].
+*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"].
 *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width.
 The new size for the images . \n
 
 *@par Attributes:
-*align_corners: If true, the centers of the 4 corner pixels of the input and
+*@li align_corners: If true, the centers of the 4 corner pixels of the input and
 output tensors are aligned, preserving the values at the corner pixels.
 Defaults to false . \n
+*@li half_pixel_centers: An optional bool. Defaults to False . \n
 
 *@par Outputs:
-*y: 4-D with shape [batch, new_height, new_width, channels] . \n
+*y: A Tensor with the same type and format as input "images" . \n
 
 *@par Third-party framework compatibility
-*Compatible with tensorflow ResizeNearestNeighborV2 operator.
+*Compatible with tensorflow ResizeNearestNeighbor operator.
 */
 
 REG_OP(ResizeNearestNeighborV2)
@@ -729,12 +913,12 @@ REG_OP(ResizeNearestNeighborV2)
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
 *@li images: A Tensor. Must be one of the following types: float. 4-D with
-shape [batch, height, width, depth]. A batch of images.
+shape [batch, height, width, depth]. A batch of images. The format must be NHWC.
 *@li boxes: A Tensor of type float32. 3-D with shape [batch,
 num_bounding_boxes, 4] containing bounding boxes . \n
 
 *@par Outputs:
-*A Tensor. Has the same type as images . \n
+*A Tensor. Has the same type as images. The format must be NHWC. \n
 
 *@attention Constraints:
 *Input images must be a 4-D tensor . \n
@@ -914,10 +1098,6 @@ deciding whether boxes overlap too.
 *@li score_threshold: A 0-D float tensor representing the threshold for
 deciding when to remove boxes based on score . \n
 
-*@par Attributes:
-*pad_to_max_output_size: If true, the output selected_indices is padded
-to be of length max_output_size. Defaults to false . \n
-
 *@par Outputs:
 *selected_indices: A 1-D integer tensor of shape [M] representing the
 selected indices from the boxes tensor, where M <= max_output_size . \n
@@ -1002,12 +1182,94 @@ REG_OP(EncodePng)
     .ATTR(compression, Int, -1)
     .OP_END_FACTORY_REG(EncodePng)
 
+
+/**
+*@brief PNG-decode an image.
+*@par Inputs:
+*contents: 0-D. PNG-decoded image .
+
+*@par Attributes:
+*@li channels: graph channels \n
+*@li dtype: type of image
+
+*@par Outputs:
+*image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels]
+where channels is: 1: for grayscale; 2: for grayscale + alpha; 3: for RGB;
+4: for RGBA . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow DecodePng operator.
+*/
+REG_OP(DecodePng)
+    .INPUT(contents, TensorType({DT_STRING}))
+    .OUTPUT(image, TensorType({DT_UINT8, DT_UINT16}))
+    .ATTR(dtype, Type, DT_UINT8)
+    .ATTR(channels, Int, 0)
+    .OP_END_FACTORY_REG(DecodePng)
+
+/**
+*@brief Bmp-decode an image. \n
+
+*@par Inputs:
+*contents: A Tensor of type string. 0-D. The BMP-encoded image. \n
+
+*@par Attributes:
+*channels: Decode the desired number of color channels of the image. \n
+
+*@par Outputs:
+*image: A Tensor dtype of uint8.
+
+* @par Third-party framework compatibility
+* Compatible with tensorflow DecodeBmp operator.
+*/
+
+REG_OP(DecodeBmp)
+    .INPUT(contents, TensorType({DT_STRING}))
+    .OUTPUT(image, TensorType({DT_UINT8}))
+    .ATTR(channels, Int, 0)
+    .OP_END_FACTORY_REG(DecodeBmp)
+
+/**
+*@brief Function parse image from string to int. \n
+
+*@par Inputs:
+*@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n
+*@li crop_window: 1-D. The crop window: [crop_y, crop_x, crop_height, crop_width]. \n
+
+*@par Attributes:
+*@li channels: An optional int. Defaults to 0. Number of color channels for the
+*decoded image.
+*@li ratio: An optional int. Defaults to 1. Downscaling ratio.
+*@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower
+*but nicer upscaling of the chroma planes
+*@li try_recover_truncated: An optional bool. Defaults to False. If true try to
+*recover an image from truncated input.
+*@li acceptable_fraction: An optional float. Defaults to 1. The minimum required
+fraction of lines before a truncated input is accepted.
+*@li dct_method: An optional string. Defaults to "". string specifying a hint
+*about the algorithm used for decompression. \n
+
+*@par Outputs:
+*image: A Tensor dtype of uint8.
+*/
+REG_OP(DecodeAndCropJpeg)
+    .INPUT(contents, TensorType({DT_STRING}))
+    .INPUT(crop_window, TensorType({DT_INT32}))
+    .OUTPUT(image, TensorType({DT_UINT8}))
+    .ATTR(channels, Int, 0)
+    .ATTR(ratio, Int, 1)
+    .ATTR(fancy_upscaling, Bool, true)
+    .ATTR(try_recover_truncated, Bool, false)
+    .ATTR(acceptable_fraction, Float, 1.0)
+    .ATTR(dct_method, String, "")
+    .OP_END_FACTORY_REG(DecodeAndCropJpeg)
+
 /**
 *@brief Resizes "images" to "size" using bilinear interpolation . \n
 
 *@par Inputs:
 * One input:
-*x: An NC1HWC0 Tensor.
+*x: A Tensor.
 * Must be one of the following types: float16, float32 . \n
 
 *@par Attributes:
@@ -1044,7 +1306,7 @@ REG_OP(ResizeBilinearV2D)
 
 *@par Inputs:
 * One input:
-*images: An NC1HWC0 Tensor.
+*images: A Tensor.
 * Must be one of the following types: float16, float32 . \n
 
 *@par Attributes:
@@ -1078,7 +1340,7 @@ REG_OP(KeepRatioResizeBilinear)
 
 *@par Inputs:
 * One input:
-*x: An NC1HWC0 Tensor.
+*x: A Tensor.
 * Must be one of the following types: float16, float32, int32, int8, uint8
 
 *@par Attributes:
@@ -1086,6 +1348,7 @@ REG_OP(KeepRatioResizeBilinear)
 No default value.
 *@li align_corners: An optional bool. If "true", the centers of the corner
 pixels of the input and output tensors are aligned. Defaults to "false" . \n
+*@li half_pixel_centers: An optional bool. Defaults to False . \n
 
 *@par Outputs:
 *y: A Tensor with the same type and format as input "images" . \n
@@ -1214,6 +1477,7 @@ REG_OP(NonMaxSuppressionV5)
 *@li scale: A `Tensor` of type `float32`.
 *@li translation: A `Tensor` of type `float32` . \n
 
+*@par Attributes:
 *@li kernel_type: type is string, default  lanczos3
 *@li antialias: type is bool, default true \n
 
@@ -1244,6 +1508,7 @@ REG_OP(ScaleAndTranslate)
 *@li scale: A `Tensor` of type `float32`.
 *@li translation: A `Tensor` of type `float32` . \n
 
+*@par Attributes:
 *@li kernel_type: type is string, default  lanczos3
 *@li antialias: type is bool, default true
 
@@ -1293,9 +1558,10 @@ if they fall beyond [0, 1]. If false, do not do clipping and output the box
 coordinates as it is. If not specified, defaults to true . \n
 
 *@par Outputs:
-*nmsed_boxes:type is float
-*nmsed_scores:type is float
-*nmsed_classes:type is float  \n
+*@li nmsed_boxes:type is float
+*@li nmsed_scores:type is float
+*@li nmsed_classes:type is float  
+*@li valid_detections:type is INT32 \n
 
 *@par Third-party framework compatibility
 * Compatible with tensorflow CombinedNonMaxSuppression operator.
@@ -1317,6 +1583,58 @@ REG_OP(CombinedNonMaxSuppression)
     .OP_END_FACTORY_REG(CombinedNonMaxSuppression)
 
 /**
+*@brief Resizes "images" with "offset" using bilinear interpolation. \n
+
+*@par Inputs:
+*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`.
+*@li warp_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for offset point.
+
+*@par Outputs:
+*warp_img: A Tensor after resize. \n
+*/
+REG_OP(IMGWarp)
+    .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
+    .INPUT(warp_offset, TensorType({DT_FLOAT32}))
+    .OUTPUT(warp_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
+    .OP_END_FACTORY_REG(IMGWarp)
+
+/**
+*@brief Resizes "images" with "offset" using bilinear interpolation. \n
+
+*@par Inputs:
+*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`.
+*@li map_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for resize point.
+
+*@par Outputs:
+*map_img: A Tensor after resize. \n
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(Remap)
+    .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
+    .INPUT(map_offset, TensorType({DT_FLOAT32}))
+    .OUTPUT(map_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
+    .OP_END_FACTORY_REG(Remap)
+
+/**
+*@brief Resizes "images" with "offset" using bilinear interpolation. \n
+
+*@par Inputs:
+*@li img: input image, A 5-D tensor of shape `[n, 4, c, h, w]`,
+and 4 mean input[(h_top, w_left), (h_top, w_right), (h_bottom, w_left),  (h_bottom, w_right)].
+*@li warp_index: the resize offset A 4-D float tensor of shape `[n, 2, h, w]`, 2 means (x, y) for resize point.
+
+*@par Outputs:
+*warp_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n
+*/
+REG_OP(IMGWarpResize)
+    .INPUT(img, TensorType({DT_FLOAT32}))
+    .INPUT(warp_index, TensorType({DT_FLOAT32}))
+    .OUTPUT(warp_img, TensorType({DT_FLOAT32}))
+    .OP_END_FACTORY_REG(IMGWarpResize)
+
+/**
 *@brief Function spatial transformer . \n
 
 *@par Inputs:
@@ -1342,6 +1660,716 @@ REG_OP(SpatialTransformerD)
     .ATTR(use_default_theta, ListBool, {})
     .OP_END_FACTORY_REG(SpatialTransformerD)
 
-}  // namespace ge
+/**
+*@brief Function spatial transformer . \n
+
+*@par Inputs:
+*@li x: A Tensor dtype of float16, float32, double, uint8, int8, uint16, int16, int32, uint32, uint64, int64.
+*@li theta: A Tensor dtype of float16, float32, double, uint8, int8, uint16, int16, int32, uint32, uint64, int64, 
+     auxiliary coefficients . \n
+
+*@par Attributes:
+*@li output_size: A tuple output size.
+*@li default_theta: A tuple default theta
+*@li use_default_theta: List use default theta
+
+*@par Outputs:
+*y: A Tensor dtype of float16, float32, double, uint8, int8, uint16, int16, int32, uint32, uint64, int64, 
+    should be same shape and type as x.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(SpatialTransformer)
+    .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16,DT_DOUBLE,DT_UINT8,DT_INT8,DT_UINT16,
+                          DT_INT16,DT_INT32,DT_UINT32,DT_UINT64,DT_INT64}))
+    .OPTIONAL_INPUT(theta, TensorType({DT_FLOAT,DT_FLOAT16,DT_DOUBLE,DT_UINT8,DT_INT8,
+                                       DT_UINT16,DT_INT16,DT_INT32,DT_UINT32,DT_UINT64,DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16,DT_DOUBLE,DT_UINT8,DT_INT8,DT_UINT16,
+                           DT_INT16,DT_INT32,DT_UINT32,DT_UINT64,DT_INT64}))
+    .ATTR(output_size, ListInt, {-1, -1})
+    .ATTR(default_theta, ListFloat, {})
+    .ATTR(align_corners, Bool, false)
+    .ATTR(use_default_theta, ListInt, {})
+    .OP_END_FACTORY_REG(SpatialTransformer)
+
+/**
+* @brief Resize the input tensor. \n
+currently, only support resize image tensor using nearest neighbor and linear interpolation.
+
+* @par Inputs:
+* Input x must be a 4-D tensor. Inputs include: \n
+* @li x: A Tensor. Must be one of the following types: uint8, int8, int16, \n
+int32, int64, float16, float, double. 4-D with shape [batch, height, width, channels] \n
+or shape [batch, channels, height, width].
+* @li roi: A 1-D float Tensor. only takes effect when attr coordinate_transformation_mode \n
+is "tf_crop_and_resize"
+* @li scales: A 1-D float Tensor, the scale array along each dimension, Only one of \n
+'scales' and 'sizes' can be specified.
+* @li sizes: A 1-D int64 Tensor, The size of the output tensor. nly one of \n
+'scales' and 'sizes' can be specified.  If 'size' is specified, then set scales \n
+to empty data (zero shape) in this operator's input list.
+
+* @par Attributes:
+* @li coordinate_transformation_mode: String. Defaults to half_pixel. how to transform \n
+the coordinate in the resized tensor to the coordinate in the original tensor. \n
+other optional: pytorch_half_pixel, align_corners, asymmetric, tf_half_pixel_for_nn, \n
+tf_crop_and_resize.
+* @li cubic_coeff_a: Float. Defaults to -0.75, only used in cubic interpolation. \n
+other optional: -0.5
+* @li exclude_outside: Int. Defaults to 0, If set to 1, the weight of sampling \n
+locations outside the tensor will be set to 0 and the weight will be renormalized \n
+so that their sum is 1.0.
+* @li extrapolation_value: Float. Defaults to 0.0f. When coordinate_transformation_mode \n
+is "tf_crop_and_resize" and x_original is outside the range [0, length_original - 1], \n
+this value is used as the corresponding output value.
+* @li mode: String. Defaults to nearest. Three interpolation modes: nearest (default), \n
+linear and cubic.
+* @li nearest_mode: String. Defaults to round_prefer_floor. Four modes: round_prefer_floor, \n
+round_prefer_ceil, floor, ceil. Only used by nearest interpolation.
+
+* @par Outputs:
+* y: A Tensor. Has the same type as x.
+
+* @attention Constraints: \n
+* Input x must be a 4-D tensor.
+
+* @par Third-party framework compatibility
+* Compatible with tensorflow ResizeNearestNeighborV2 operator.
+*/
 
+REG_OP(Resize)
+    .INPUT(x, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,
+                          DT_INT64,DT_FLOAT16,DT_FLOAT,DT_DOUBLE}))
+    .OPTIONAL_INPUT(roi, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE}))
+    .OPTIONAL_INPUT(scales, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(sizes, TensorType({DT_INT64,DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,
+                           DT_INT64,DT_FLOAT16,DT_FLOAT,DT_DOUBLE}))
+    .ATTR(coordinate_transformation_mode, String, "half_pixel")
+    .ATTR(cubic_coeff_a, Float, -0.75)
+    .ATTR(exclude_outside, Int, 0)
+    .ATTR(extrapolation_value, Float, 0.0)
+    .ATTR(mode, String, "nearest")
+    .ATTR(nearest_mode, String, "round_prefer_floor")
+    .OP_END_FACTORY_REG(Resize)
+
+/**
+*@brief Function parse image from string to int. \n
+
+*@par Inputs:
+* contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n
+
+*@par Attributes:
+*@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image.
+*@li ratio: An optional int. Defaults to 1. Downscaling ratio.
+*@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower but nicer upscaling of the chroma planes
+*@li try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input.
+*@li acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted.
+*@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n
+
+*@par Outputs:
+*image: A Tensor dtype of uint8.
+*/
+REG_OP(DecodeJpeg)
+    .INPUT(contents, TensorType({DT_STRING}))
+    .OUTPUT(image, TensorType({DT_UINT8}))
+    .ATTR(channels, Int, 0)
+    .ATTR(ratio, Int, 1)
+    .ATTR(fancy_upscaling, Bool, true)
+    .ATTR(try_recover_truncated, Bool, false)
+    .ATTR(acceptable_fraction, Float, 1.0)
+    .ATTR(dct_method, String, "")
+    .OP_END_FACTORY_REG(DecodeJpeg)
+
+/**
+*@brief Image warping using per-pixel flow vectors. \n
+
+*@par Inputs:
+*@li image: 4-D Tensor with shape `[batch, height, width, channels]`.
+*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n
+
+*@par Outputs:
+*y: Returns 4-D with the same shape and dtype as `image`. \n
+*/
+REG_OP(DenseImageWarp)
+    .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(DenseImageWarp)
+
+/**
+*@brief Calculate the resize_d function. \n
+
+*@par Inputs:
+*One inputs, including:
+* x: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+*@par Attributes:
+*@li sizes: An optional listInt. \n
+*@li scales: An optional listFloat.
+    Defaults to none. \n
+*@li roi: An optional listInt.
+    Defaults to none. \n
+*@li coordinate_transformation_mode: An optional String.
+    Defaults to "half_pixel". \n
+*@li cubic_coeff_a: An optional float.
+    Defaults to -0.75. \n
+*@li exclude_outside: An optional int.
+    Defaults to 0. \n
+*@li extrapolation_value: An optional float.
+    Defaults to 0.0. \n
+*@li mode: An optional String.
+    Defaults to "nearest". \n
+*@li nearest_mode: An optional String.
+    Defaults to "round_prefer_floor". \n
+
+*@par Outputs:
+*y: A Tensor with the same type of x's,
+    shape depends on x and sizes. \n
+*/
+REG_OP(ResizeD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(sizes, ListInt)
+    .ATTR(scales, ListFloat, {})
+    .ATTR(roi, ListInt, {})
+    .ATTR(coordinate_transformation_mode, String, "half_pixel")
+    .ATTR(cubic_coeff_a, Float, -0.75)
+    .ATTR(exclude_outside, Int, 0)
+    .ATTR(extrapolation_value, Float, 0.0)
+    .ATTR(mode, String, "nearest")
+    .ATTR(nearest_mode, String, "round_prefer_floor")
+    .OP_END_FACTORY_REG(ResizeD)
+
+/**
+*@brief Calculate the resize_grad_d function. \n
+
+*@par Inputs:
+*One inputs, including:
+* grads: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+*@par Attributes:
+*@li original_size: An optional listInt. \n
+*@li roi: An optional listInt.
+    Defaults to none. \n
+*@li scales: An optional listFloat.
+    Defaults to none. \n
+*@li coordinate_transformation_mode: An optional String.
+    Defaults to "half_pixel". \n
+*@li cubic_coeff_a: An optional float.
+    Defaults to -0.75. \n
+*@li exclude_outside: An optional int.
+    Defaults to 0. \n
+*@li extrapolation_value: An optional float.
+    Defaults to 0.0. \n
+*@li mode: An optional String.
+    Defaults to "nearest". \n
+*@li nearest_mode: An optional String.
+    Defaults to "round_prefer_floor". \n
+
+*@par Outputs:
+*y: A Tensor with the same type of x's,
+    shape depends on x and sizes. \n
+*/
+REG_OP(ResizeGradD)
+    .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(original_size, ListInt)
+    .ATTR(roi, ListInt, {})
+    .ATTR(scales, ListFloat, {})
+    .ATTR(coordinate_transformation_mode, String, "half_pixel")
+    .ATTR(cubic_coeff_a, Float, -0.75)
+    .ATTR(exclude_outside, Int, 0)
+    .ATTR(extrapolation_value, Float, 0.0)
+    .ATTR(mode, String, "nearest")
+    .ATTR(nearest_mode, String, "round_prefer_floor")
+    .OP_END_FACTORY_REG(ResizeGradD)
+
+/**
+*@brief Computes the gradients of DenseImageWarp with respect to image and flow. \n
+
+*@par Inputs:
+*@li grad: gradients with respect to DenseImageWarp output.
+*@li image: 4-D Tensor with shape `[batch, height, width, channels]`.
+*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n
+
+*@par Outputs:
+*@li grad_image: Returns 4-D with the same shape and dtype as `image`.
+*@li grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n
+*/
+REG_OP(DenseImageWarpGrad)
+    .INPUT(grad, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(grad_image, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(grad_flow, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(DenseImageWarpGrad)
+
+/**
+*@brief This operation samples input X by using interpolation based on flow field grid,
+ which is usually gennerated by affine_grid. The grid of shape [N, H, W, 2] is the concatenation of
+ (x, y) coordinates with shape [N, H, W] each, where x is indexing the 4th dimension (in width dimension) of
+ input data x and y is indexng the 3rd dimention (in height dimension), finally results is
+ the interpolation value of 4 nearest corner points. The output tensor shape will be [N, C, H, W].
+
+*@par Inputs:
+*@li x: 4-D Tensor with shape `[batch, channels, height, width]`.
+*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`.
+
+*@par Attributes:
+*@li interpolation_mode: An optional string specifying the interpolation method. Only 'bilinear' is
+ supported for now .
+*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .
+*@li align_corners: An optional bool. If "true", the centers of the corner
+ pixels of the input and output tensors are aligned. Defaults to "false" .
+
+*@par Outputs:
+*y: Returns 4-D Tensor with the same dtype as `X`.
+
+*@par Third-party framework compatibility
+*Compatible with pytorch GridSampler2D operator.
+*/
+REG_OP(GridSampler2D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(interpolation_mode, String, "bilinear")
+    .ATTR(padding_mode, String, "zeros")
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(GridSampler2D)
+
+/**
+*@brief Computes the gradients of GridSampler2D.
+
+*@par Inputs:
+*@li grad: A 4-D Tensor with shape `[batch, channels, height, width]`.
+*@li x: A 4-D Tensor with shape `[batch, channels, height, width]`.
+*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`.
+
+*@par Attributes:
+*@li interpolation_mode: An optional string specifying the interpolation method.
+ Defaults to "bilinear".
+*@li padding_mode: An optional string specifying the pad method.
+ Defaults to "zeros".
+*@li align_corners: An optional bool. If "true", the centers of the corner
+ pixels of the input and output tensors are aligned. Defaults to false.
+
+*@par Outputs:
+*dx: Returns 4-D Tensor with the same dtype and shape as `x`.
+*dgrid: Returns 4-D Tensor with the same dtype and shape as `grid`.
+
+*@par Third-party framework compatibility
+*Compatible with pytorch GridSampler2DGrad operator.
+*/
+REG_OP(GridSampler2DGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(dgrid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(interpolation_mode, String, "bilinear")
+    .ATTR(padding_mode, String, "zeros")
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(GridSampler2DGrad)
+
+/**
+*@brief This operation unnormalize input Grid, which is usually gennerated by affine_grid.
+
+*@par Inputs:
+*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`.
+*@li assist: Assist matrix, a 4-D tensor of type float16.
+
+*@par Attributes:
+*align_corners: An optional bool. If "true", the centers of the corner
+ pixels of the input and output tensors are aligned. Defaults to "false" .
+
+*@par Outputs:
+*@li diff: Returns 4-D Tensor with the same shape and dtype as `grid`.
+*@li position: Returns 4-D Tensor with the same shape as `grid`.
+*/
+REG_OP(GridUnnormal)
+    .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(assist, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(diff, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(position, TensorType({DT_INT32}))
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(GridUnnormal)
+
+/**
+*@brief This operation unfold input X based on unnormalized grid, which is gennerated by GridUnnormal.
+
+*@par Inputs:
+*@li x: 4-D Tensor with shape `[batch, channels, height, width]`.
+*@li position: 4-D Tensor with shape `[batch, output_height, output_width, 2]`.
+
+*@par Attributes:
+*padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .
+
+*@par Outputs:
+*y: Returns 4-D Tensor with the same dtype as `x`.
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ImageUnfold)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(position, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(padding_mode, String, "zeros")
+    .OP_END_FACTORY_REG(ImageUnfold)
+	
+/**
+*@brief This operation select images to warp_images according to offsets.
+
+*@par Inputs:
+*@li images: 4-D Tensor with shape `[batch, height, width, 3]`.
+*@li offsets: 4-D Tensor with shape `[batch, 4, new_height, new_width]`.
+
+*@par Outputs:
+*warp_images: Returns 5-D Tensor with shape
+`[batch, 4, new_height, new_width, 3]` and the same dtype as `images`.
+*/
+REG_OP(IMGWarpOffsets)
+    .INPUT(images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT}))
+    .INPUT(offsets, TensorType({DT_FLOAT, DT_INT32}))
+    .OUTPUT(warp_images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(IMGWarpOffsets)
+
+/**
+*@brief This operation samples 3d input x by using interpolation based on flow field grid,
+ which is usually gennerated by affine_grid.
+
+*@par Inputs:
+*@li x: 5-D Tensor with shape `[batch, channels, depth, height, width]`.
+*@li grid: flow field grid, 5-D Tensor with shape `[batch, depth, height, width, 2]`.
+
+*@par Attributes:
+*@li interpolation_mode: An optional string specifying the interpolation method.
+*@li padding_mode: An optional string specifying the pad method.
+*@li align_corners: An optional bool. If "true", the centers of the corner
+ pixels of the input and output tensors are aligned. Defaults to "false" .
+
+*@par Outputs:
+*y: Returns 5-D Tensor with the same dtype as `x`.
+
+*@par Third-party framework compatibility
+*Compatible with pytorch GridSampler3D operator.
+*/
+REG_OP(GridSampler3D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(interpolation_mode, String, "bilinear")
+    .ATTR(padding_mode, String, "zeros")
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(GridSampler3D)
+
+/**
+*@brief Computes the gradients of GridSampler3D.
+
+*@par Inputs:
+*@li grad: 5-D Tensor with shape `[batch, channels, depth, height, width]`.
+*@li x: 5-D Tensor with shape `[batch, channels, depth, height, width]`.
+*@li grid: flow field grid, 5-D Tensor with shape `[batch, depth, height, width, 2]`.
+
+*@par Attributes:
+*@li interpolation_mode: An optional string specifying the interpolation method.
+*@li padding_mode: An optional string specifying the pad method.
+*@li align_corners: An optional bool. If "true", the centers of the corner
+ pixels of the input and output tensors are aligned. Defaults to "false" .
+
+*@par Outputs:
+*dx: Returns 5-D Tensor with the same dtype and shape as `x`.
+*dgrid: Returns 5-D Tensor with the same dtype and shape as `grid`.
+
+*@par Third-party framework compatibility
+*Compatible with pytorch GridSampler3DGrad operator.
+*/
+REG_OP(GridSampler3DGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(dgrid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(interpolation_mode, String, "bilinear")
+    .ATTR(padding_mode, String, "zeros")
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(GridSampler3DGrad)
+
+/**
+*@brief Upsample the 3-D data with the nearest neighbor ​interpolation algorithm. \n
+
+*@par Inputs:
+*One inputs, including:
+*x: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
+*     float16, float32, float64. \n
+
+*@par Attributes:
+*@li output_size: An optional listInt. Defaults to none.
+    contain 3 elements: output_depth, output_height, output_width. The number of elements of 'output_size'
+    should be the same as the rank of input 'x'. Only one of 'scales' and 'output_size' can be specified. \n
+*@li scales: An optional listFloat. Defaults to none.
+    The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. 
+    The number of elements of 'scales' should be the same as the rank of input 'x'. One of 'scales' and
+    'output_size' MUST be specified and it is an error if both are specified. \n
+
+*@par Outputs:
+*y: A 5-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
+*/
+
+REG_OP(UpsampleNearest3d)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(output_size, ListInt, {})
+    .ATTR(scales, ListFloat, {})
+    .OP_END_FACTORY_REG(UpsampleNearest3d)
+
+/**
+*@brief Upsample the 3-D data with the trilinear ​interpolation algorithm. \n
+
+*@par Inputs:
+*One inputs, including:
+*x: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
+*     float16, float32, float64. \n
+
+*@par Attributes:
+*@li output_size: An optional listInt. Defaults to none.
+    contain 3 elements: output_depth, output_height, output_width. The number of elements of 'output_size' should
+    be the same as the rank of input 'x'. Only one of 'scales' and 'output_size' can be specified. \n
+*@li scales: An optional listFloat. Defaults to none.
+    The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width.
+    The number of elements of 'scales' should be the same as the rank of input 'x'.
+    One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n
+*@li align_corners: An optional bool. Defaults to false.
+    If true, the input and output tensors are aligned by the center points of their corner pixels, preserving the
+    values at the corner pixels. If false, the input and output tensors are aligned by the corner points of their
+    corner pixels, and the interpolation use edge value padding for out of boundary values. \n
+
+*@par Outputs:
+*y: A 5-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
+*/
+
+REG_OP(UpsampleTrilinear3d)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(output_size, ListInt, {})
+    .ATTR(scales, ListFloat, {})
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(UpsampleTrilinear3d)
+
+/**
+*@brief Upsample the 3-D gradient data  with the nearest neighbor ​interpolation algorithm. \n
+
+*@par Inputs:
+*One inputs, including:
+*grad_output: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
+*     float16, float32, float64. \n
+
+*@par Attributes:
+*@li input_size: An required listInt.
+    contain 5 elements: [min_batch, channels, depth, height, width]. Must:
+      input_size[0] == grad_output_tensor_size[0]
+      input_size[1] == grad_output_tensor_size[1]. \n
+*@li output_size: An optional listInt. Defaults to none.
+    contain 3 elements: depth, height, width. The number of elements of 'output_size' should
+    be the same as the rank of input 'grad_output'. Only one of 'scales' and 'output_size' can be specified. Must:
+      grad_output_tensor_size[2] == floor(input_size[2] * scales[0]) == output_size[0]
+      grad_output_tensor_size[3] == floor(input_size[3] * scales[1]) == output_size[1]
+      grad_output_tensor_size[4] == floor(input_size[4] * scales[2]) == output_size[2]. \n
+*@li scales: An optional listFloat. Defaults to none.
+    The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. 
+    The number of elements of 'scales' should be the same as the rank of input 'grad_output'.
+    One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n
+
+*@par Outputs:
+*y: A 5-D tensor. Has the same type as input grad_output, shape depends on Attributes:input_size. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(UpsampleNearest3dGrad)
+    .INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(input_size, ListInt)
+    .ATTR(output_size, ListInt, {})
+    .ATTR(scales, ListFloat, {})
+    .OP_END_FACTORY_REG(UpsampleNearest3dGrad)
+
+/**
+*@brief Upsample the 3-D gradient data  trilinear ​interpolation algorithm. \n
+
+*@par Inputs:
+*One inputs, including:
+*grad_output: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
+*     float16, float32, float64. \n
+
+*@par Attributes:
+*@li input_size: An required listInt.
+    contain 5 elements: [min_batch, channels, depth, height, width]. Must:
+      input_size[0] == grad_output_tensor_size[0]
+      input_size[1] == grad_output_tensor_size[1]. \n
+*@li output_size: An optional listInt. Defaults to none.
+    contain 3 elements: depth, height, width. The number of elements of 'output_size' should
+    be the same as the rank of input 'grad_output'. Only one of 'scales' and 'output_size' can be specified. Must:
+      grad_output_tensor_size[2] == floor(input_size[2] * scales[0]) == output_size[0]
+      grad_output_tensor_size[3] == floor(input_size[3] * scales[1]) == output_size[1]
+      grad_output_tensor_size[4] == floor(input_size[4] * scales[2]) == output_size[2]. \n
+*@li scales: An optional listFloat. Defaults to none.
+    The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. 
+    The number of elements of 'scales' should be the same as the rank of input 'grad_output'.
+    One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n
+
+*@par Outputs:
+*y: A Tensor with shape depends on intput_size and output_size/scales. Must be one of the following
+    types: float16, float32, float64. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(UpsampleTrilinear3dGrad)
+    .INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(input_size, ListInt)
+    .ATTR(output_size, ListInt, {})
+    .ATTR(scales, ListFloat, {})
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(UpsampleTrilinear3dGrad)
+
+
+/**
+*@brief Upsample the 1-D data with the nearest neighbor ​interpolation algorithm. \n
+
+*@par Inputs:
+*x: A 1-D input tensor [N, C, W]. Must be one of the following types:
+*     float16, float32, float64. \n
+
+*@par Attributes:
+*@li output_size: An required listInt contains output_width.
+*@li scales: An optional listFloat contains scale_width. Defaults to be zero. \n
+
+*@par Outputs:
+*y: A 3-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
+*/
+
+REG_OP(UpsampleNearest1d)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(output_size, ListInt)
+    .ATTR(scales, ListFloat, {})
+    .OP_END_FACTORY_REG(UpsampleNearest1d)
+
+/**
+*@brief Upsample the 1-D gradient data  with the nearest neighbor ​interpolation algorithm. \n
+
+*@par Inputs:
+*grad_output: A 3-D input tensor [N, C, W]. Must be one of the following types:
+*     float16, float32, float64. \n
+
+*@par Attributes:
+*@li output_size: An required listInt contains output_width.
+*@li scales: An optional listFloat contains scale_width. Defaults to be zero.
+*@li input_size: An required listInt contains output_width. \n
+
+*@par Outputs:
+*y: A 3-D tensor. Has the same type as input grad_output, shape depends on Attributes:input_size. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
+*/
+
+REG_OP(UpsampleNearest1dGrad)
+    .INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(input_size, ListInt)
+    .REQUIRED_ATTR(output_size, ListInt)
+    .ATTR(scales, ListFloat, {})
+    .OP_END_FACTORY_REG(UpsampleNearest1dGrad)
+
+/**
+* @brief Function parse image from string to int. \n
+
+* @par Inputs:
+* contents: A Tensor of type string. 0-D. The JPEG, GIF, PNG, BMP-encoded image. \n
+
+* @par Attributes:
+* @li channels: An optional int. Defaults to 0. Number of color channels for the decoded image.
+* @li dtype: type of image
+* @li expand_animations: Controls the shape of the returned op's output. If 'true', the returned op will
+ produce a 4-D tensor for GIF files. If 'false', the returned op will produce a 3-D tensor for GIF files.
+
+* @par Outputs:
+* image: A Tensor dtype of uint8, uint16 or float.
+
+* @par Restrictions:
+* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DecodeImage)
+    .INPUT(contents, TensorType({DT_STRING}))
+    .OUTPUT(image, TensorType({DT_UINT8, DT_UINT16, DT_FLOAT}))
+    .ATTR(channels, Int, 0)
+    .ATTR(dtype, Type, DT_UINT8)
+    .ATTR(expand_animations, Bool, true)
+    .OP_END_FACTORY_REG(DecodeImage)
+
+/**
+* @brief JPEG encode input image with provided compression quality. \n
+
+* @par Inputs:
+* @li images: image is a 3-D uint8 tensor of shape [height, width, channels]. 
+* @li quality: int32 jpeg compression quality value between 0 and 100, 0-D tensor.
+
+* @par Outputs: JPEG-encoded image
+* contents: an output string. \n
+
+* @par Third-party framework compatibility.
+* Compatible with tensorflow EncodeJpegVariableQuality operator.
+*/
+
+REG_OP(EncodeJpegVariableQuality)
+    .INPUT(images, TensorType({DT_UINT8}))
+    .INPUT(quality, TensorType({DT_INT32}))
+    .OUTPUT(contents, TensorType({DT_STRING}))
+    .OP_END_FACTORY_REG(EncodeJpegVariableQuality)
+
+/**
+* @brief image to transforms. \n
+
+* @par Inputs:
+* @li images: [batch, height, width, channels], 4-D tensor. 
+* @li transforms: [batch, 8] or [1, 8] matrix, 2-D tensor.
+* @li outout_shape: [new_height, new_width], 1-D tensor.
+
+* @par Attributes:
+* @li interpolation: Interpolation method, "NEAREST" or "BILINEAR", 0-D tensor.
+* @li fill_mode: Defaults to "CONSTANT". Fill mode, "REFLECT", "WRAP", or "CONSTANT", 0-D tensor.
+
+* @par Outputs
+* transformed_images: has the same type as iamges, 4-D tensor with shape[batch, new_height, new_width, channels]. \n
+
+* @par Third-party framework compatibility.
+* Compatible with tensorflow ImageProjectiveTransform operator.
+*/
+REG_OP(ImageProjectiveTransform)
+    .INPUT(images, TensorType({DT_UINT8, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(transforms, TensorType({DT_FLOAT}))
+    .INPUT(output_shape, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(interpolation, String)
+    .ATTR(fill_mode, String, "CONSTANT")
+    .OUTPUT(transformed_images, TensorType({DT_UINT8, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(ImageProjectiveTransform)
+}  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/internal_ops.h b/third_party/fwkacllib/inc/ops/internal_ops.h
index 9dde14a5..bcc3f1c3 100644
--- a/third_party/fwkacllib/inc/ops/internal_ops.h
+++ b/third_party/fwkacllib/inc/ops/internal_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h
index 7a6fbc59..f3cc5aed 100644
--- a/third_party/fwkacllib/inc/ops/linalg_ops.h
+++ b/third_party/fwkacllib/inc/ops/linalg_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -61,8 +61,8 @@ REG_OP(CholeskyGrad)
 
 *@par Inputs:
 *The input x has to be symmetric and positive definite.Inputs include:
-*x:A Tensor. Must be one of the following types: double, float32. Shape
-is [..., M, M] . \n
+*x:A Tensor. Must be one of the following types: double, float32, float16,
+complex64, complex128. Shape is [..., M, M] . \n
 
 *@par Outputs:
 *y:A Tensor. Has the same type as x . \n
@@ -76,19 +76,40 @@ form square matrices.
 */
 
 REG_OP(Cholesky)
-    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, \
+        DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, \
+        DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
     .OP_END_FACTORY_REG(Cholesky)
 
 /**
+*@brief Computes the outer product of two 1D vectors . \n
+
+*@par Inputs:
+*The input x1 and x2 has to be a 1D vector.Inputs include:
+*@li x1:A Tensor. Must be one of the following types: float16, float32. 
+Shape is [N] . \n
+*@li x2:A Tensor. Must have the same type as x. Shape is [M] . \n
+
+*@par Outputs:
+*y:A Tensor. Has the same type as x . \n
+*/
+
+REG_OP(Ger)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(Ger)
+
+/**
 *@brief Computes the sign and the log of the absolute value of the determinant
 of one or more square matrices . \n
 
 *@par Inputs:
 *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
 form square matrices. Inputs include:
-*x:A Tensor. Must be one of the following types: double, float32. Shape is
-[..., M, M] . \n
+*x:A Tensor. Must be one of the following types: double, float32,
+complex64, complex128. Shape is [..., M, M] . \n
 
 *@par Outputs:
 *@li y:A Tensor. Has the same type as x.
@@ -103,9 +124,9 @@ form square matrices. \n
 */
 
 REG_OP(LogMatrixDeterminant)
-    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .OP_END_FACTORY_REG(LogMatrixDeterminant)
 
 /**
@@ -114,8 +135,8 @@ REG_OP(LogMatrixDeterminant)
 *@par Inputs:
 *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
 form square matrices. Inputs include:
-*x:A Tensor. Must be one of the following types: double, float32. Shape is
-[..., M, M] . \n
+*x:A Tensor. Must be one of the following types: double, float32, complex64,
+complex128. Shape is [..., M, M] . \n
 
 *@par Outputs:
 *y:A Tensor. Has the same type as x . \n
@@ -129,8 +150,8 @@ form square matrices.
 */
 
 REG_OP(MatrixDeterminant)
-    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .OP_END_FACTORY_REG(MatrixDeterminant)
 
 /**
@@ -140,8 +161,7 @@ their adjoints (conjugate transposes) . \n
 *@par Inputs:
 *The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
 form square matrices. Inputs include:
-*x:A Tensor. Must be one of the following types: double, float. Shape is
-[..., M, M] . \n
+*x:A Tensor of input. Shape is [..., M, M] . \n
 
 *@par Attributes:
 *adjoint:An optional bool. Defaults to False.Boolean indicating whether to
@@ -159,8 +179,8 @@ form square matrices.  \n
 */
 
 REG_OP(MatrixInverse)
-    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .ATTR(adjoint, Bool, false)
     .OP_END_FACTORY_REG(MatrixInverse)
 
@@ -169,8 +189,7 @@ REG_OP(MatrixInverse)
 
 *@par Inputs:
 *The input rhs must have the same type as matrix. Inputs include:
-*@li matrix:A Tensor. Must be one of the following types: double, float.
-Shape is [..., M, M].
+*@li matrix:A Tensor of input. Shape is [..., M, M].
 *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n
 
 *@par Attributes:
@@ -189,9 +208,9 @@ dimensions form square matrices.  \n
 */
 
 REG_OP(MatrixSolve)
-    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .ATTR(adjoint, Bool, false)
     .OP_END_FACTORY_REG(MatrixSolve)
 
@@ -221,8 +240,8 @@ dimensions form square matrices.  \n
 */
 
 REG_OP(MatrixSolveLs)
-    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .INPUT(l2, TensorType({DT_DOUBLE}))
     .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
     .ATTR(fast, Bool, true)
@@ -234,8 +253,7 @@ matrices by backsubstitution . \n
 
 *@par Inputs:
 *The input rhs must have the same type as matrix. Inputs include:
-*@li matrix: A Tensor. Must be one of the following types: double, float.
-Shape is [..., M, M].
+*@li matrix: A Tensor. Shape is [..., M, M].
 *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n
 
 *@par Attributes:
@@ -256,9 +274,9 @@ dimensions form square matrices.  \n
 */
 
 REG_OP(MatrixTriangularSolve)
-    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .ATTR(lower, Bool, true)
     .ATTR(adjoint, Bool, false)
     .OP_END_FACTORY_REG(MatrixTriangularSolve)
@@ -268,8 +286,7 @@ REG_OP(MatrixTriangularSolve)
 
 *@par Inputs:
 *The input shape of x must be [..., M, N]. Inputs include:
-*x:A Tensor whose shape is [..., M, N]. Must be one of the following types:
-double, float . \n
+*x:A Tensor whose shape is [..., M, N]. \n
 
 *@par Attributes:
 *full_matrices: An optional bool. Defaults to False. If true, compute
@@ -289,9 +306,12 @@ dimensions form matrices of size [M, N].  \n
 */
 
 REG_OP(Qr)
-    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
-    .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
-    .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
+    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128 }))
     .ATTR(full_matrices, Bool, false)
     .OP_END_FACTORY_REG(Qr)
 
@@ -320,13 +340,44 @@ form square matrices.   \n
 */
 
 REG_OP(SelfAdjointEig)
-    .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT }))
-    .OUTPUT(eigen_value, TensorType({ DT_DOUBLE, DT_FLOAT }))
-    .OUTPUT(eigen_vector, TensorType({ DT_DOUBLE, DT_FLOAT }))
+    .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(eigen_value, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(eigen_vector, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
     .ATTR(compute_v, Bool, true)
     .OP_END_FACTORY_REG(SelfAdjointEig)
 
 /**
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+
+*@brief Computes the sign and the log of the absolute value of the determinant
+of one or more square matrices . \n
+
+*@par Inputs:
+*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
+form square matrices. Inputs include:
+*x:A Tensor. Must be one of the following types: double, float32, float16
+Shape is [..., M, M] . \n
+
+*@par Outputs:
+*@li y:A Tensor. Has the same type as x.
+*@li sign:A Tensor. Has the same type as x . \n
+
+*@attention Constraints:
+*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
+form square matrices. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow LogMatrixDeterminant operator.
+*/
+
+REG_OP(Slogdet)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(sign, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(Slogdet)
+
+/**
 *@brief Computes the singular value decompositions of one or more matrices . \n
 
 *@par Inputs:
@@ -334,9 +385,10 @@ REG_OP(SelfAdjointEig)
 *x:Tensor of shape [..., M, N]. Let P be the minimum of M and N . \n
 
 *@par Attributes:
-*compute_uv:If True then left and right singular vectors will be computed and
+*@li compute_uv:If True then left and right singular vectors will be computed and
 returned in u and v, respectively. Otherwise, only the singular values will
-be computed, which can be significantly faster . \n
+be computed, which can be significantly faster .
+*@li full_matrices:the param effect u,v.  \n
 
 *@par Outputs:
 *@li sigma:Singular values. Shape is [..., P]. The values are sorted in
@@ -358,10 +410,10 @@ form square matrices.  \n
 */
 
 REG_OP(Svd)
-    .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT }))
-    .OUTPUT(sigma, TensorType({ DT_DOUBLE, DT_FLOAT }))
-    .OUTPUT(u, TensorType({ DT_DOUBLE, DT_FLOAT }))
-    .OUTPUT(v, TensorType({ DT_DOUBLE, DT_FLOAT }))
+    .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(sigma, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(u, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(v, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
     .ATTR(compute_uv, Bool, true)
     .ATTR(full_matrices, Bool, false)
     .OP_END_FACTORY_REG(Svd)
@@ -379,13 +431,16 @@ denotes the lower triangular factor `L` with unit diagonal.
 *@li p: upper triangular part denotes the upper triangular factor `U`.Permutation
 of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n
 
+*@par Attributes:
+*output_idx_type: An optional DType from: int32, int64.
+
 *@par Third-party framework compatibility
 * Compatible with TensorFlow Lu operator.
 */
 
 REG_OP(Lu)
-    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .OUTPUT(p, TensorType({DT_INT32, DT_INT64}))
     .REQUIRED_ATTR(output_idx_type, Type)
     .OP_END_FACTORY_REG(Lu)
@@ -404,8 +459,8 @@ y: Shape is `[..., M, M]` . \n
 */
 
 REG_OP(MatrixSquareRoot)
-    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .OP_END_FACTORY_REG(MatrixSquareRoot)
 
 /**
@@ -419,17 +474,77 @@ left-hand side . \n
 *@par Outputs:
 y: Tensor of shape `[..., M, K]` containing the solutions \n
 
+*@par Attributes:
+*partial_pivoting: Whether to perform partial pivoting. `True` by default.
+Partial pivoting makes the procedure more stable, but slower. Partial
+pivoting is unnecessary in some cases, including diagonally dominant and
+symmetric positive definite matrices
+
 *@par Third-party framework compatibility
 * Compatible with TensorFlow TridiagonalSolve operator.
 */
 
 REG_OP(TridiagonalSolve)
-    .INPUT(diagonals, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(diagonals, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .ATTR(partial_pivoting, Bool, true)
     .OP_END_FACTORY_REG(TridiagonalSolve)
 
+/**
+* @brief Solution of banded triangular matrix . \n
+
+* @par Inputs:
+* The input bands has to be symmetric and positive definite.
+* @li bands:A Tensor. Must be one of the following types: double, float32,
+  float16,complex64, complex128. Shape is  [... K,M], K corresponds to the
+  number of bands (actually stored diagonals), and M is the data of the
+  diagonals.
+  @li rhs:shape is [...M] or [...M, N]. Has the same type as bands \n
+
+* @par Outputs:
+* @li output:A Tensor. Has the same type as bands . \n
+
+* @par Attributes:
+* @li lower:An optional bool. Defaults to True.True: indicates the lower
+  triangular matrix. False: indicates the upper triangular matrix.
+* @li adjoint:An optional bool. Defaults to False.Boolean indicating whether to
+  solve with matrix or its (block-wise) adjoint. \n
+
+* @par Third-party framework compatibility
+* Compatible with tensorflow BandedTriangularSolve operator.
+*/
+
+REG_OP(BandedTriangularSolve)
+    .INPUT(bands, TensorType({DT_FLOAT, DT_DOUBLE, \
+        DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, \
+        DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(output,TensorType({DT_FLOAT, DT_DOUBLE, \
+        DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
+    .ATTR(lower, Bool, true)
+    .ATTR(adjoint, Bool, false)
+    .OP_END_FACTORY_REG(BandedTriangularSolve)
+
+/**
+* @brief Returns the complex conjugatetranspose.
+
+* @par Inputs:
+* @li x: A Tensor. Must be one of the following types: double, float32, float16,
+         int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool
+* @li perm: A Index. Must be one of the following types: int32, int64 \n
+*
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "x" . \n
+
+* @par Third-party framework compatibility.
+* Compatible with tensorflow ConjugateTranspose operator.
+*/
+REG_OP(ConjugateTranspose)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(perm, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(ConjugateTranspose)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/list_ops.h b/third_party/fwkacllib/inc/ops/list_ops.h
new file mode 100644
index 00000000..0aa94e73
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/list_ops.h
@@ -0,0 +1,504 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file list_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_
+
+#include <algorithm>
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+
+/**
+*@brief Creates and returns an empty tensor list. \n
+
+*@par Inputs:
+*@li element_shape: A shape compatible with that of elements in the list.
+*@li max_num_elements: The maximum number of elements. \n
+
+*@par Attributes:
+*element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*handle: An empty tensor list . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow EmptyTensorList operator.
+*/
+REG_OP(EmptyTensorList)
+    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(max_num_elements, TensorType({DT_INT32}))
+    .OUTPUT(handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(EmptyTensorList)
+
+/**
+*@brief Returns a list which has the passed-in `Tensor` as last element
+and the other elements of the given list in `input_handle`. \n
+
+*@par Inputs:
+*@li input_handle: The old list.
+*@li tensor: The tensor to put on the list. \n
+
+*@par Attributes:
+*element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*output_handle:A list with the elements of old list followed by tensor. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListPushBack operator.
+*/
+REG_OP(TensorListPushBack)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListPushBack)
+
+/**
+*@brief The last element of the input list as well as a
+list with all but that element. \n
+
+*@par Inputs:
+*@li input_handle: The input list.
+*@li element_shape: A shape compatible with that of elements in the list. \n
+
+*@par Attributes:
+*element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li output_handle:A list with the elements of the old list followed by tensor.
+*@li tensor:The withdrawn last element of the list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListPopBack operator.
+*/
+REG_OP(TensorListPopBack)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(element_shape, TensorType({DT_INT32}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListPopBack)
+
+/**
+*@brief The number of tensors in the input tensor list. \n
+
+*@par Inputs:
+*input_handle: The input list. \n
+
+*@par Outputs:
+*length:The number of tensors in the list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListLength operator.
+*/
+REG_OP(TensorListLength)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .OUTPUT(length, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(TensorListLength)
+
+/**
+*@brief The shape of elements in the input tensor list. \n
+
+*@par Inputs:
+*input_handle: The input list. \n
+
+*@par Attributes:
+*shape_type: The type of shape in the list. \n
+
+*@par Outputs:
+*element_shape:A shape compatible with that of elements in the list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListElementShape operator.
+*/
+REG_OP(TensorListElementShape)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .OUTPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .ATTR(shape_type, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListElementShape)
+
+/**
+*@brief List of the given size with empty elements. \n
+
+*@par Inputs:
+*@li element_shape: A shape compatible with that of elements in the list.
+*@li num_elements: The number of elements to reserve. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list.
+*@li shape_type: The type of shape in the list. \n
+
+*@par Outputs:
+*handle: An output tensor list . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListReserve operator.
+*/
+REG_OP(TensorListReserve)
+    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(num_elements, TensorType({DT_INT32}))
+    .OUTPUT(handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .ATTR(shape_type, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListReserve)
+
+/**
+*@brief Get input tensor list elements of index position. \n
+
+*@par Inputs:
+*@li input_handle: The input list.
+*@li index: A tensor of position.
+*@li element_shape: A shape compatible with that of elements in the list. \n
+
+*@par Attributes:
+*element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*item: An output tensor value of index position . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListGetItem operator.
+*/
+REG_OP(TensorListGetItem)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(index, TensorType({DT_INT32}))
+    .INPUT(element_shape, TensorType({DT_INT32}))
+    .OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListGetItem)
+
+/**
+*@brief Sets the index-th position of the list to contain the given tensor. \n
+
+*@par Inputs:
+*@li input_handle: The input list.
+*@li index: The position in the list to which the tensor will be assigned.
+*@li item: The element to be assigned to that position. \n
+
+*@par Attributes:
+*element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*output_handle: An output tensor list . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListSetItem operator.
+*/
+REG_OP(TensorListSetItem)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(index, TensorType({DT_INT32}))
+    .INPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListSetItem)
+
+/**
+*@brief Push tensor to list. \n
+
+*@par Inputs:
+*@li input_handles: The input tensor lists.
+*@li tensor: The tensor push into tensor list. \n
+
+*@par Attributes:
+*element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*output_handles: The output tensor lists. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListPushBackBatch operator.
+*/
+REG_OP(TensorListPushBackBatch)
+    .INPUT(input_handles, TensorType({DT_VARIANT}))
+    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(output_handles, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListPushBackBatch)
+
+/**
+*@brief Stacks all tensors in the list. \n
+
+*@par Inputs:
+*@li input_handle: The input tensor list.
+*@li element_shape: A shape compatible with that of elements in the tensor. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list.
+*@li num_elements: The number of elements in the list. \n
+
+*@par Outputs:
+*tensor: The tensor of list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListStack operator.
+*/
+REG_OP(TensorListStack)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(element_shape, TensorType({DT_INT32}))
+    .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .ATTR(num_elements, Int, -1)
+    .OP_END_FACTORY_REG(TensorListStack)
+
+/**
+*@brief Concats all tensors in the list along the 0th dimension.
+Requires that all tensors have the same shape except the first dimension. \n
+
+*@par Inputs:
+*@li input_handle: The input list.
+*@li element_shape: The shape of the uninitialized elements in the list.
+If the first dimension is not -1, it is assumed that all list elements have
+the same leading dim.
+*@li leading_dims: The list of leading dims of uninitialized list elements. Used if
+the leading dim of input_handle.element_shape or the element_shape input arg
+is not already set. \n
+
+*@par Attributes:
+*element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li tensor: The concated result.
+*@li lengths: Output tensor containing sizes of the 0th dimension of tensors
+in the list, used for computing the gradient. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListConcatV2 operator.
+*/
+REG_OP(TensorListConcatV2)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(leading_dims, TensorType({DT_INT64}))
+    .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(lengths, TensorType({DT_INT64}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListConcatV2)
+
+/**
+*@brief Splits a tensor into a list. \n
+
+*@par Inputs:
+*@li tensor: The input tensor.
+*@li element_shape: A shape compatible with that of elements in the tensor.
+*@li lengths: Vector of sizes of the 0th dimension of tensors in the list. \n
+
+*@par Attributes:
+*element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*output_handle: The list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListSplit operator.
+*/
+REG_OP(TensorListSplit)
+    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(lengths, TensorType({DT_INT64}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListSplit)
+
+/**
+*@brief Creates a TensorList which, when stacked, has the value of `tensor`. \n
+
+*@par Inputs:
+*@li tensor: The input tensor.
+*@li element_shape: The shape of elements in the list. \n
+
+*@par Attributes:
+*element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*output_handle: An output tensor list . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListFromTensor operator.
+*/
+REG_OP(TensorListFromTensor)
+    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListFromTensor)
+
+/**
+*@brief Resizes the list. \n
+
+*@par Inputs:
+*@li input_handle: The input tensor list.
+*@li size: size of the output list. \n
+
+*@par Outputs:
+*output_handle: The output tensor list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListResize operator.
+*/
+REG_OP(TensorListResize)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(size, TensorType({DT_INT32}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .OP_END_FACTORY_REG(TensorListResize)
+
+/**
+*@brief Creates a Tensor by indexing into the TensorList. \n
+
+*@par Inputs:
+*@li input_handle: The input tensor list.
+*@li indices: The indices used to index into the list.
+*@li element_shape: The shape of elements in the list. \n
+
+*@par Attributes:
+*element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*values: The tensor. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListGather operator.
+*/
+REG_OP(TensorListGather)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(element_shape, TensorType({DT_INT32}))
+    .OUTPUT(values, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListGather)
+
+/**
+*@brief Creates a TensorList by indexing into a Tensor. \n
+
+*@par Inputs:
+*@li tensor: The input tensor.
+*@li indices: The indices used to index into the list.
+*@li element_shape: The shape of the elements in the list (can be less specified than
+the shape of the tensor).
+*@li num_elements: The size of the output list. Must be large enough to accommodate
+the largest index in indices. If -1, the list is just large enough to include
+the largest index in indices. \n
+
+*@par Attributes:
+*element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*output_handle: The TensorList. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListScatterV2 operator.
+*/
+REG_OP(TensorListScatterV2)
+    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(num_elements, TensorType({DT_INT32}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListScatterV2)
+
+/**
+*@brief Scatters tensor at indices in an input list. \n
+
+*@par Inputs:
+*@li input_handle: The input tensor list.
+*@li tensor: The input tensor.
+*@li indices: The indices used to index into the list. \n
+
+*@par Attributes:
+*element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*output_handle: The TensorList. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListScatterIntoExistingList operator.
+*/
+REG_OP(TensorListScatterIntoExistingList)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListScatterIntoExistingList)
+
+/**
+*@brief Concat two tensor lists to a new tensor list. \n
+
+*@par Inputs:
+*@li input_a: The input tensor list A.
+*@li input_b: The input tensor list B. \n
+
+*@par Attributes:
+*element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*output: The output list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListConcatLists operator.
+*/
+REG_OP(TensorListConcatLists)
+    .INPUT(input_a, TensorType({DT_VARIANT}))
+    .INPUT(input_b, TensorType({DT_VARIANT}))
+    .OUTPUT(output, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListConcatLists)
+}   // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/logging_ops.h b/third_party/fwkacllib/inc/ops/logging_ops.h
index bc8ae2b8..dd565657 100644
--- a/third_party/fwkacllib/inc/ops/logging_ops.h
+++ b/third_party/fwkacllib/inc/ops/logging_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -111,6 +111,38 @@ REG_OP(PrintV2)
   .INPUT(x, TensorType({DT_STRING}))
   .ATTR(output_stream, String, "stderr")
   .OP_END_FACTORY_REG(PrintV2)
+
+/**
+*@brief Prints a list of tensors. \n
+
+*@par Inputs:
+*x: A tensor passwd through this op . \n
+*data: A list of tensors to print out when op is evaluated. \n
+
+*@par Attributes:
+*message: A string, prefix of the error message. \n
+*first_n: Only log first_n number of times. Negative numbers 
+*log always; this is the default. \n
+*summarize: Only print this many entries of each tensor. 
+*If None, then a maximum of 3 elements are printed per input tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow Print operator . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(PrintV3)
+  .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32,
+    DT_INT64, DT_UINT32, DT_UINT64, DT_DOUBLE, DT_STRING}))
+  .DYNAMIC_INPUT(data, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32,
+    DT_INT64, DT_UINT32, DT_UINT64, DT_DOUBLE, DT_STRING}))
+  .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32,
+    DT_INT64, DT_UINT32, DT_UINT64, DT_DOUBLE, DT_STRING}))
+  .ATTR(message, String, "")
+  .ATTR(first_n, Int, -1)
+  .ATTR(summarize, Int, 3)
+  .OP_END_FACTORY_REG(PrintV3)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_LOGGING_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/lookup_ops.h b/third_party/fwkacllib/inc/ops/lookup_ops.h
index b37ab048..ea840d64 100644
--- a/third_party/fwkacllib/inc/ops/lookup_ops.h
+++ b/third_party/fwkacllib/inc/ops/lookup_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -77,8 +77,8 @@ REG_OP(LookupTableInsert)
 *handle: A Tensor of type resource. Handle to the table . \n
 
 *@par Attributes:
-*@li Tkeys: A DType.
-*@li Tvalues: A DType . \n
+*@li Tkeys: A DType of keys.
+*@li Tvalues: A DType of values.
 
 *@par Outputs:
 *@li keys: A Tensor of type Tkeys.
@@ -303,6 +303,21 @@ REG_OP(MutableHashTable)
     .REQUIRED_ATTR(key_dtype, Type)
     .REQUIRED_ATTR(value_dtype, Type)
     .OP_END_FACTORY_REG(MutableHashTable)
+
+/**
+* @brief Remove keys in the given table . \n
+
+* @par Inputs:
+* @li table_handle: A Tensor of type resource. Handle to the table. \n
+* @li keys: A Tensor. Any shape. Keys to remove. \n
+
+* @par Third-party framework compatibility.
+* Compatible with tensorflow LookupTableInsert operator.
+*/
+REG_OP(LookupTableRemove)
+    .INPUT(table_handle, TensorType({DT_RESOURCE}))
+    .INPUT(keys,TensorType({RealNumberType, DT_BOOL, DT_STRING}))
+    .OP_END_FACTORY_REG(LookupTableRemove)
 }   // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/map_ops.h b/third_party/fwkacllib/inc/ops/map_ops.h
new file mode 100644
index 00000000..6ac15bf6
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/map_ops.h
@@ -0,0 +1,152 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file map_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_MAP_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_MAP_OPS_H_
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+* @brief Returns whether the given key exists in the map. \n
+
+* @par Inputs:
+* @li input_handle: A scalar Tensor of type variant. The original map.
+* @li key: The key to check. Supports int32, int64, string. \n
+
+* @par Outputs:
+* has_key: A scalar Tensor of type bool. Whether the key is already in the map or not. \n
+
+* @par Third-party framework compatibility.
+* Compatible with tensorflow TensorMapHasKey operator.
+*/
+REG_OP(TensorMapHasKey)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING}))
+    .OUTPUT(has_key, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(TensorMapHasKey)
+
+/**
+* @brief Returns a tensor map with item from given key erased. \n
+
+* @par Inputs:
+* @li input_handle: A scalar Tensor of type variant. The original map.
+* @li key: The key of the value to be erased. Supports int32, int64, string. \n
+
+* @par Outputs:
+* output_handle: A scalar Tensor of type variant. The map with value from given key removed. \n
+
+* @par Third-party framework compatibility.
+* Compatible with tensorflow TensorMapErase operator.
+*/    
+REG_OP(TensorMapErase)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .OP_END_FACTORY_REG(TensorMapErase)
+
+/**
+* @brief Returns a map that is the 'input_handle'
+  with the given key-value pair inserted. \n
+
+* @par Inputs:
+* @li input_handle: The original map, Must be type: DT_VARIANT.
+* @li key: A Tensor,the key to be inserted.Must be one of
+  the following types: int32, int64, string.
+* @li value: A Tensor,the value to be inserted.Must be
+  one of BasicType types. \n
+
+* @par Outputs:
+* output_handle: The map with key and value inserted.
+  Must be type: DT_VARIANT. \n
+*/
+REG_OP(TensorMapInsert)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING}))
+    .INPUT(value, BasicType)
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .OP_END_FACTORY_REG(TensorMapInsert)
+
+/**
+* @brief Returns the value from a given key in a tensor map . \n
+
+* @par Inputs:
+* @li input_handle: The input map. Must be type: DT_VARIANT.
+* @li key: A Tensor,the key to be looked up. Must be one of 
+  the following types: int32,int64,string . \n
+
+* @par Attributes:
+* value_dtype: A int. Representing the type of value . \n
+
+* @par Outputs:
+* value: A Tensor,the value found from the given key.
+*/
+REG_OP(TensorMapLookup)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING}))
+    .OUTPUT(value, BasicType)
+    .REQUIRED_ATTR(value_dtype, Type)
+    .OP_END_FACTORY_REG(TensorMapLookup)
+
+/**
+* @brief return TensorMap Size. \n
+*
+* @par Inputs:
+* input_handle: A Tensor. Must be one of the following types: variant. \n
+*
+* @par Outputs:
+* size: A Tensor. Must be one of the following types: int32. \n
+*/
+REG_OP(TensorMapSize)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .OUTPUT(size, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(TensorMapSize)
+
+/**
+ * @brief Return TensorMapStackKeys \n
+ *
+ * @par Inputs:
+ * input_handle: A Tensor. Must be one of the following types: variant. \n
+ *
+ * @par Outputs:
+ * keys: A Tensor. Must be one of the following types: int32, int64, string. \n
+ * 
+ * @par Attributes:
+ * key_dtype: An required param. It is the dtype of the key.
+ */
+REG_OP(TensorMapStackKeys)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .OUTPUT(keys, TensorType({DT_INT32, DT_INT64, DT_STRING}))
+    .REQUIRED_ATTR(key_dtype, Type)
+    .OP_END_FACTORY_REG(TensorMapStackKeys)
+
+/**
+* @brief Creates and returns an empty tensor map. \n
+
+* @par Outputs:
+* handle: An empty tensor map . \n
+
+* @par Third-party framework compatibility.
+* Compatible with tensorflow EmptyTensorMap operator.
+*/
+REG_OP(EmptyTensorMap)
+    .OUTPUT(handle, TensorType({DT_VARIANT}))
+    .OP_END_FACTORY_REG(EmptyTensorMap)
+}  // namespace ge
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_MAP_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h
index 149e0e37..8e9ee4db 100644
--- a/third_party/fwkacllib/inc/ops/math_ops.h
+++ b/third_party/fwkacllib/inc/ops/math_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -223,6 +223,24 @@ REG_OP(Bucketize)
     .OP_END_FACTORY_REG(Bucketize)
 
 /**
+*@brief Returns a new tensor with the truncated integer values of the elements of input. \n
+
+*@par Inputs:
+*One inputs, including:
+*input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n
+
+*@par Outputs:
+*output_y: A tensor with the same type and shape of input_x \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Trunc. \n
+*/
+REG_OP(Trunc)
+    .INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
+    .OP_END_FACTORY_REG(Trunc)
+	
+/**
 *@brief Computes the sum along sparse segments of a tensor . \n
 
 *@par Inputs:
@@ -280,7 +298,7 @@ REG_OP(SparseSegmentMean)
 
 *@par Inputs:
 *The input grad must have be type float or double. Inputs include:
-*@li grad: A Tensor. Must be one of the following types: float, double.
+*@li x: A Tensor. Must be one of the following types: float, double.
 gradient propagated to the SparseSegmentMean op.
 *@li indices: A Tensor. Must be one of the following types: int32, int64.
 indices passed to the corresponding SparseSegmentMean op.
@@ -347,6 +365,7 @@ REG_OP(InitData)
 component of an element of this dataset.
 *@li output_shapes: A nested structure of TensorShape objects corresponding
 to each component of an element of this dataset.
+*@li output_num:output of nums.
 *@li channel_name: A string. Default "" . \n
 
 *@par Outputs:
@@ -358,14 +377,35 @@ to each component of an element of this dataset.
 
 REG_OP(GetNext)
     .DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64,
-                                        DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL}))
-    .ATTR(output_types, ListInt, {})
+                                   DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL}))
+    .ATTR(output_types, ListType, {})
     .ATTR(output_shapes, ListListInt, {})
     .ATTR(output_num, Int, 1)
     .ATTR(channel_name, String, "")
     .OP_END_FACTORY_REG(GetNext)
 
 /**
+*@brief Get dynamic dims after GetNext. \n
+
+*@par Inputs:
+*input: A nested structure of Tensor objects, from GetNext's output. \n
+
+*@par Attributes:
+*@li shape_info: GE shape_info for each inputs, -1 means unknow dim.
+*@li N: Inputs number. \n
+
+*@par Outputs:
+*dims: GE unknow dims, a vector of int64. \n
+*/
+
+REG_OP(GetDynamicDims)
+    .DYNAMIC_INPUT(input, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(dims, TensorType({DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(shape_info, ListInt)
+    .REQUIRED_ATTR(N, Int)
+    .OP_END_FACTORY_REG(GetDynamicDims)
+
+/**
 *@brief End of sequence . \n
 
 *@par Inputs:
@@ -385,7 +425,7 @@ REG_OP(EndOfSequence)
 
 *@par Inputs:
 *x: A Tensor of type float16, float32 or double. the format can be
-*    [NCHW,NC1HWC0,NHWC,ND]
+*    [NCHW,NHWC,ND]
 
 *@par Outputs:
 *y: A Tensor. Has the same type and format as "x" . \n
@@ -422,15 +462,15 @@ REG_OP(Erfc)
 
 *@par Inputs:
 *Three inputs, including:
-*@li x: A Tensor of type float32, float16, int32, int64.
-*@li range: A Tensor of type float32,float16,int32, int64.
+*@li x: A Tensor of type float32, int32, int64. float16 is currently not supported.
+*@li range: A Tensor of type float32, int32, int64. float16 is currently not supported.
 *@li nbins: A Tensor of type int32 . \n
 
 *@par Attributes:
 * dtype: An optional attribute. Defaults to "int32" . \n
 
 *@par Outputs:
-*y: A Tensor. A Tensor of type int32 or int64 . \n
+*y: A Tensor. A Tensor of type int32. \n
 
 *@par Third-party framework compatibility
 * Compatible with TensorFlow operator HistogramFixedWidth.
@@ -440,7 +480,7 @@ REG_OP(HistogramFixedWidth)
     .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
     .INPUT(nbins, TensorType({DT_INT32}))
     .OUTPUT(y, TensorType({DT_INT32}))
-    .ATTR(dtype, String, "int32")
+    .ATTR(dtype, Int, 3)
     .OP_END_FACTORY_REG(HistogramFixedWidth)
 
 /**
@@ -471,7 +511,7 @@ REG_OP(HistogramFixedWidthD)
     .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
     .OUTPUT(y, TensorType({DT_INT32}))
     .REQUIRED_ATTR(nbins, Int)
-    .ATTR(dtype, String, "int32")
+    .ATTR(dtype, Int, 3)
     .OP_END_FACTORY_REG(HistogramFixedWidthD)
 
 /**
@@ -495,13 +535,36 @@ REG_OP(NextAfter)
     .OP_END_FACTORY_REG(NextAfter)
 
 /**
+*@brief Calculate the P-norm distance between vectors  function. \n
+
+*@par Inputs:
+*One inputs, including:
+* input_x: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+*@par Attributes:
+*p: An optional float.Defaults to 2. \n
+
+*@par Outputs:
+*y: A Tensor with the same type and shape of input_x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Pdist. \n
+*/
+REG_OP(Pdist)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(p, Float, 2.0)
+    .OP_END_FACTORY_REG(Pdist)
+
+/**
  *@brief Compute element-wise finiteness, return a boolean tensor.
 
  *@par Inputs:
- *x:A Tensor.
+ *x:A Tensor of type float16, float32, double.
 
  *@par Outputs:
- *y:A Tensor. Has the same shape as x.
+ *y:A Tensor. Returns which elements of x are finite
 
  *@par Third-party framework compatibility.
  *Compatible with tensorflow IsFinite operator.
@@ -515,10 +578,10 @@ REG_OP(IsFinite)
  *@brief Compute element-wise infiniteness, return a boolean tensor.
 
  *@par Inputs:
- *x:A Tensor.
+ *x:A Tensor of type float16, float32, double.
 
  *@par Outputs:
- *y:A Tensor. Has the same shape as x.
+ *y:A Tensor. Has the same shape as x. Returns which elements of x are isinf.
 
  *@par Third-party framework compatibility.
  *Compatible with tensorflow IsInf operator.
@@ -532,7 +595,11 @@ REG_OP(IsInf)
  *@brief Computes the complex absolute value of a tensor.
 
  *@par Inputs:
- *x:A Tensor.
+ *x: x of complex numbers, this operation returns a tensor of type 
+ float or double that is the absolute value of each element in x .
+
+* @par Attributes:
+* Tout: representing the output of type. 
 
  *@par Outputs:
  *y:A tensor of type `float` or `double` that is the absolute value of each element in `x`.
@@ -550,10 +617,10 @@ REG_OP(ComplexAbs)
  *@brief Returns which elements of x are NaN.
 
  *@par Inputs:
- *x:A Tensor.
+ *x:A Tensor of type float16, float32, double.
 
  *@par Outputs:
- *y:A Tensor. Has the same shape as x.
+ *y:A Tensor. Has the same shape as x. Returns which elements of x are isnan
 
  *@par Third-party framework compatibility.
  *Compatible with tensorflow IsNan operator.
@@ -567,7 +634,10 @@ REG_OP(IsNan)
  *@brief Returns the real part of a complex number.
 
  *@par Inputs:
- *input:A Tensor.
+ *input:A Tensor. Must have numeric type.
+
+ *@par Attributes:
+ *Tout: Type of outputs. \n
 
  *@par Outputs:
  *output:A Tensor. Has the same shape as input.
@@ -608,7 +678,8 @@ REG_OP(Conj)
 *@li weight: A Tensor dtype of float32 . \n
 
 *@par Attributes:
-*reduction: An optional attribute. Defaults to "mean" . \n
+*@li reduction: An optional attribute. Defaults to "mean" .
+*@li ignore_index:An optional attribute.Defaults to -100 . \n
 
 *@par Outputs:
 *@li y: A Tensor dtype of float32.
@@ -620,10 +691,11 @@ REG_OP(Conj)
 REG_OP(NLLLoss)
     .INPUT(x, TensorType({DT_FLOAT}))
     .INPUT(target, TensorType({DT_INT32}))
-    .INPUT(weight, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT}))
     .OUTPUT(y, TensorType({DT_FLOAT}))
     .OUTPUT(total_weight, TensorType({DT_FLOAT}))
     .ATTR(reduction, String, "mean")
+    .ATTR(ignore_index, Int, -100)
     .OP_END_FACTORY_REG(NLLLoss)
 
 /**
@@ -637,7 +709,8 @@ REG_OP(NLLLoss)
 *@li total_weight:A Tensor dtype of float32 . \n
 
 *@par Attributes:
-*reduction: An optional attribute. Defaults to "mean" . \n
+*@li reduction: An optional attribute. Defaults to "mean" .
+*@li ignore_index:An optional attribute.Defaults to -100 . \n
 
 *@par Outputs:
 *x_grad: A Tensor. Must be the following type: float32 . \n
@@ -653,27 +726,28 @@ REG_OP(NLLLossGrad)
     .INPUT(total_weight, TensorType({DT_FLOAT}))
     .OUTPUT(x_grad, TensorType({DT_FLOAT}))
     .ATTR(reduction, String, "mean")
+    .ATTR(ignore_index, Int, -100)
     .OP_END_FACTORY_REG(NLLLossGrad)
 
 /**
-*@brief The ifmr . \n
+*@brief IFMR(Input Feature Map Reconstruction). \n
 
 *@par Inputs:
-*@li data:A Tensor of feature map
-*@li data_min:A Tensor of min value of feature map.
-*@li data_max:A Tensor of max value of feature map.
-*@li cumsum:A Tensor of cumsum bin of data . \n
+*@li data: A Tensor of feature map.
+*@li data_min: A Tensor of min value of feature map.
+*@li data_max: A Tensor of max value of feature map.
+*@li cumsum: A Tensor of cumsum bin of data . \n
 
 *@par Attributes:
-*min_percentile: min init percentile.
-*max_percentile: max init percentile.
-*search_range: search range.
-*search_step: step size of searching.
-*with_offset: whether using offset . \n
+*@li min_percentile: min init percentile.
+*@li max_percentile: max init percentile.
+*@li search_range: search range.
+*@li search_step: step size of searching.
+*@li with_offset: whether using offset . \n
 
 *@par Outputs:
-*scale: optimal scale.
-*offset: optimal offset . \n
+*@li scale: optimal scale.
+*@li offset: optimal offset . \n
 
 *@par Third-party framework compatibility
 *Compatible with mindspore
@@ -694,22 +768,25 @@ REG_OP(IFMR)
   .OP_END_FACTORY_REG(IFMR)
 
 /**
-*@brief weights adaptive range quantization. \n
+*@brief Weights Adaptive Range Quantization. \n
 
 *@par Inputs:
-*@li w:A Tensor of weights. \n
-*@li w_min:A Tensor of weights reduce_min. \n
-*@li w_max:A Tensor of weights reduce_max. \n
+*@li w: A Tensor of weights. \n
+*@li w_min: A Tensor of weights reduce_min. \n
+*@li w_max: A Tensor of weights reduce_max. \n
 
 *@par Attributes:
-*num_bits: the bits num used for quantize.
-*offset_flag: whether using offset. \n
+*@li num_bits: the bits num used for quantize.
+*@li offset_flag: whether using offset. \n
 
 *@par Outputs:
 *y: fake quantized weights. \n
 
 *@par Third-party framework compatibility
 *Compatible with mindspore
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 
 REG_OP(WtsARQ)
@@ -722,25 +799,28 @@ REG_OP(WtsARQ)
   .OP_END_FACTORY_REG(WtsARQ)
 
 /**
-*@brief The acts_ulq. \n
+*@brief Activations Universal Linear Quantization. \n
 
 *@par Inputs:
-*@li x:A Tensor of feature map
-*@li clamp _min:A Tensor of min clamp value of feature map.
-*@li clamp _max:A Tensor of max clamp value of feature map.
+*@li x: A Tensor of feature map.
+*@li clamp _min: A Tensor of min clamp value of feature map.
+*@li clamp _max: A Tensor of max clamp value of feature map.
 
 *@par Attributes:
-*fixed_min: fix min to zero.
-*num_bits: quant bits. \n
+*@li fixed_min: fix min to zero.
+*@li num_bits: quant bits. \n
 
 *@par Outputs:
-*y: output fake quant feature map.
-*clamp_min_mask: where x > clamp_min
-*clamp_min_mask: where x < clamp_max
-*x_clamped_loss: clamp loss. \n
+*@li y: output fake quant feature map.
+*@li clamp_min_mask: where x > clamp_min.
+*@li clamp_min_mask: where x < clamp_max.
+*@li x_clamped_loss: clamp loss. \n
 
 *@par Third-party framework compatibility
 *Compatible with mindspore
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 
 REG_OP(ActsULQ)
@@ -748,40 +828,43 @@ REG_OP(ActsULQ)
   .INPUT(clamp_min, TensorType({DT_FLOAT16, DT_FLOAT}))
   .INPUT(clamp_max, TensorType({DT_FLOAT16, DT_FLOAT}))
   .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
-  .OUTPUT(clamp_min_mask, TensorType({DT_BOOL}))
-  .OUTPUT(clamp_max_mask, TensorType({DT_BOOL}))
+  .OUTPUT(clamp_min_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(clamp_max_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT}))
   .OUTPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT}))
   .ATTR(fixed_min, Bool, false)
   .ATTR(num_bits, Int, 8)
   .OP_END_FACTORY_REG(ActsULQ)
 
 /**
-*@brief The acts_ulq_input_grad. \n
+*@brief The gradient of Activations Universal Linear Quantization. \n
 
 *@par Inputs:
-*@li y_grad: A Tensor of gradient
-*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed'
-*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed'
+*@li y_grad: A Tensor of gradient.
+*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed'.
+*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed'.
 
 *@par Outputs:
 *x_grapd: The gradient of inpust. \n
 
 *@par Third-party framework compatibility
 *Compatible with mindspore
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 
 REG_OP(ActsULQInputGrad)
   .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
-  .INPUT(clamp_min_mask, TensorType({DT_BOOL}))
-  .INPUT(clamp_max_mask, TensorType({DT_BOOL}))
+  .INPUT(clamp_min_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT}))
+  .INPUT(clamp_max_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT}))
   .OUTPUT(x_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
   .OP_END_FACTORY_REG(ActsULQInputGrad)
 
 /**
-*@brief The act_ulq_clamp_max_grad. \n
+*@brief The gradient of Activations Universal Linear Quantization clamp max. \n
 
 *@par Inputs:
-*@li y_grad: A Tensor of gradient
+*@li y_grad: A Tensor of gradient.
 *@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed.
 *@li x_clamped_loss: A Tensor of gradient. \n
 
@@ -790,20 +873,23 @@ REG_OP(ActsULQInputGrad)
 
 *@par Third-party framework compatibility
 *Compatible with mindspore
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 
 REG_OP(ActULQClampMaxGrad)
   .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
-  .INPUT(clamp_max_mask, TensorType({DT_BOOL}))
+  .INPUT(clamp_max_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT}))
   .INPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT}))
   .OUTPUT(clamp_max_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
   .OP_END_FACTORY_REG(ActULQClampMaxGrad)
 
 /**
-*@brief The act_ulq_clamp_min_grad. \n
+*@brief The gradient of Activations Universal Linear Quantization clamp min. \n
 
 *@par Inputs:
-*@li y_grad: A Tensor of gradient
+*@li y_grad: A Tensor of gradient.
 *@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed.
 *@li x_clamped_loss: A Tensor of gradient. \n
 
@@ -812,15 +898,553 @@ REG_OP(ActULQClampMaxGrad)
 
 *@par Third-party framework compatibility
 *Compatible with mindspore
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 
 REG_OP(ActULQClampMinGrad)
   .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
-  .INPUT(clamp_min_mask, TensorType({DT_BOOL}))
+  .INPUT(clamp_min_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT}))
   .INPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT}))
   .OUTPUT(clamp_min_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
   .OP_END_FACTORY_REG(ActULQClampMinGrad)
 
+/**
+* @brief Computes Lp norm.
+
+* @par Inputs:
+* x: An ND tensor of type float16, float32. \n
+*
+* @par Attributes:
+* @li p: Int, "inf" or "-inf", default value is 2.
+* @li axes: ListInt, {} means all axes will be computed.
+* @li keepdim: Bool, default is false.
+* @li epsilon: Float, default is 1e-12. \n
+
+* @par Outputs:
+* y: An ND tensor of type float16, float32. The shape of y is depending
+* on axes and keepdim. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator LpNorm.
+*/
+REG_OP(LpNorm)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(p, Int, 2)
+    .ATTR(axes, ListInt, {})
+    .ATTR(keepdim, Bool, false)
+    .ATTR(epsilon, Float, 1e-12)
+    .OP_END_FACTORY_REG(LpNorm)
+
+/**
+* @brief Computes LpNormReduce.
+
+* @par Inputs:
+* x: An ND tensor of type float16, float32. \n
+*
+* @par Attributes:
+* @li p: Int, "inf" or "-inf", default value is 2.
+* @li axes: ListInt, {} means all axes will be computed.
+* @li keepdim: Bool, default is false.
+* @li epsilon: Float, default is 1e-12. \n
+
+* @par Outputs:
+* y: An ND tensor of type float16, float32. The shape of y is depending
+* on axes and keepdim. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator LpNormReduce.
+*/
+REG_OP(LpNormReduce)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(p, Int, 2)
+    .ATTR(axes, ListInt, {})
+    .ATTR(keepdim, Bool, false)
+    .ATTR(epsilon, Float, 1e-12)
+    .OP_END_FACTORY_REG(LpNormReduce)
+
+/**
+* @brief Computes LpNormUpdate.
+
+* @par Inputs:
+* x: An ND tensor of type float16, float32. \n
+*
+* @par Attributes:
+* @li p: Int, "inf" or "-inf", default value is 2.
+* @li epsilon: Float, default is 1e-12. \n
+
+* @par Outputs:
+* y: An ND tensor of type float16, float32. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator LpNormUpdate.
+*/
+REG_OP(LpNormUpdate)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(p, Int, 2)
+    .ATTR(epsilon, Float, 1e-12)
+    .OP_END_FACTORY_REG(LpNormUpdate)
+
+/**
+* @brief get complex.
+
+* @par Inputs:
+* @li real: An ND tensor of type  float32 double, representing the real part of a complex number.
+* @li imag: An ND tensor of type  float32 double, representing the imaginary part of a complex number. \n
+*
+* @par Attributes:
+* Tout: representing the output of type. 
+* @par Outputs:
+* out: An ND tensor of type complex64, complex128 \n
+*/
+REG_OP(Complex)
+    .INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(imag, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(out, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .ATTR(Tout, Type, DT_COMPLEX64)
+    .OP_END_FACTORY_REG(Complex)
+
+/**
+* @brief  deal complex.
+
+* @par Inputs:
+* input: An ND tensor of type complex64, complex128 \n
+
+* @par Attributes:
+* Tout: representing the output of type. 
+
+* @par Outputs:
+* output: An ND tensor of type float32. double \n
+*/
+REG_OP(Imag)
+    .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(Tout, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(Imag)
+
+/**
+* @brief  deal complex.
+
+* @par Inputs:
+* @li input: An ND tensor of type complex64, complex128 \n
+*
+* @par Outputs:
+* @li output: An ND tensor of type float32. double \n
+*/
+REG_OP(Angle)
+    .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(Tout, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(Angle)
+
+/**
+*@brief Computes the gradient of SoftMarginLossGrad. \n
+
+*@par Inputs:
+*Three inputs, including:
+* @li predict: A tensor. Must be one of the following types:
+*     float16, float32. \n
+* @li label: A tensor with same shape of predict. Must be one of the following types:
+*     float16, float32. \n
+* @li dout: A tensor with same shpae of predcit. Must be one of the following types:
+*     float16, float32. \n
+
+*@par Attributes:
+* reduction: Specifies the reduction to apply to the output:
+*     'none' | 'mean' | 'sum'. Default: 'mean'. \n
+
+*@par Outputs:
+* gradient: A Tensor with the same type of predict. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator SoftMarginLoss Backward. \n
+*/
+REG_OP(SoftMarginLossGrad)
+    .INPUT(predict, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(label, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(dout, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(gradient, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(SoftMarginLossGrad)
+
+/**
+*@brief Calculate the cross product of two tensors. \n
+
+*@par Inputs:
+*One inputs, including:
+* @li x1: A tensor. Must be one of the following types:
+*     float16, float32, int32, int8, uint8, int16. \n
+* @li x2: A tensor. Must be one of the following types:
+*     float16, float32, int32, int8, uint8, int16. \n
+
+*@par Attributes:
+*@li dim: the dimination of compute.Defaults to -65530. \n
+
+*@par Outputs:
+*y: A Tensor with the same type and shape of x1's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator cross. \n
+*/
+REG_OP(Cross)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_INT16}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_INT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_INT16}))
+    .ATTR(dim, Int, -65530)
+    .OP_END_FACTORY_REG(Cross)
+
+/**
+ *@brief Computes batched the p-norm distance between each pair of
+ *the two collections of row vectors. \n
+
+ *@par Inputs:
+ *Two inputs, including:
+ * @li x1: A tensor with shpae: BxPXM. Must be one of the following types:
+ *     float16, float32. \n
+ * @li x2: A tensor with shpae: BxRxM. Must be one of the following types:
+ *     float16, float32. \n
+
+ *@par Attributes:
+ * @li p: An optional float >= 0 or inf. Defaults to 2.0. \n
+
+ *@par Outputs:
+ * y: A Tensor with the same type of x1's and with shape BxPxR. \n
+
+ *@par Third-party framework compatibility
+ *Compatible with the Pytorch operator Cdist. \n
+ */
+REG_OP(Cdist)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(p, Float, 2.0)
+    .OP_END_FACTORY_REG(Cdist)
+
+/**
+*@brief  Computes the grad of x1 in cdist. \n
+
+*@par Inputs:
+*Four inputs, including:
+ * @li grad: Grad with shape BxPxR. Must be one of the following types:
+*     float16, float32. \n
+* @li x1: A tensor with shpae: BxPXM. Must be one of the following types:
+*     float16, float32. \n
+* @li x2: A tensor with shpae: BxRxM. Must be one of the following types:
+*     float16, float32. \n
+* @li cdist: Output tensor of cdist forward with shpae: BxPXR.
+*     Must be one of the following types: float16, float32. \n
+
+*@par Attributes:
+* @li p: An optional float >= 0 or inf. Defaults to 2.0. \n
+
+*@par Outputs:
+* y: A Tensor with the same type and shape of x1's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Cdist Backward. \n
+*/
+REG_OP(CdistGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(cdist, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(p, Float, 2.0)
+    .OP_END_FACTORY_REG(CdistGrad)
+
+/**
+* @brief  Computes the RaggedBincount. \n
+
+* @par Inputs:
+* Four inputs, including:
+* @li splits: A tensor with shpae: BxPXM. Must be one of the following types:
+*     int64.
+* @li values: A tensor with shpae: BxPXM. Must be one of the following types:
+*     float16, float32.
+* @li size: A tensor with shpae: BxRxM. Must be one of the following types:
+*     int32, int64.
+* @li weights: A tensor with shpae: BxRxM.
+*     Must be one of the following types: int32, int64, float, double. \n
+
+* @par Attributes:
+* @li binary_output: An optional bool \n
+
+* @par Outputs:
+* output: Must be one of the following types: int32, int64, float, double. \n
+*/
+REG_OP(RaggedBincount)
+    .INPUT(splits, TensorType({DT_INT64}))
+    .INPUT(values, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(size, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(weights, TensorType({DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(output, TensorType({DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(binary_output, Bool, false)
+    .OP_END_FACTORY_REG(RaggedBincount)
+
+/**
+ * @brief Count the number of occurrences of each value in the input dense integer array,
+ * and output it according to the sparse matrix. \n
+
+ * @par Inputs:
+ * @li values: A 1D or 2D tensor of type int32 or int64.
+ * @li weights: A tensor of type int32 or int64 or float or double. \n
+
+ * @par Attributes:
+ * @li minlength: An optional int >=-1. Defaults to -1.
+ * @li maxlength: An optional int >=-1. Defaults to -1.
+ * @li binary_output: A required bool. \n
+
+ * @par Outputs:
+ * output_indices: A tensor of type int64.
+ * output_values: A tensor of the same type as "weights".
+ * output_dense_shape: A tensor of type int64. \n
+
+ * @par Third-party framework compatibility
+ * Compatible with the TensorFlow operator DenseCountSparseOutput. \n
+ */
+REG_OP(DenseCountSparseOutput)
+    .INPUT(values, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(weights, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE}))
+    .OUTPUT(output_indices, TensorType({DT_INT64}))
+    .OUTPUT(output_values, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE}))
+    .OUTPUT(output_dense_shape, TensorType({DT_INT64}))
+    .ATTR(minlength, Int, -1)
+    .ATTR(maxlength, Int, -1)
+    .REQUIRED_ATTR(binary_output, Bool)
+    .OP_END_FACTORY_REG(DenseCountSparseOutput)
+
+/**
+* @brief Computes gradients for SparseSegmentSum . \n
+
+* @par Inputs:
+* The input grad must have be type float or double. Inputs include:
+* @li grad: A Tensor. Must be one of the following types: bfloat16, float16, float32, double.
+  gradient propagated to the SparseSegmentSum op.
+* @li indices: A Tensor. Must be one of the following types: int32, int64.
+  indices passed to the corresponding SparseSegmentSum op.
+* @li segment_ids: A Tensor of type int32, int64. segment_ids passed to the
+  corresponding SparseSegmentSum op.
+* @li output_dim0: A Tensor of type int32. dimension 0 of "x" passed to
+  SparseSegmentSum op . \n
+
+* @par Outputs:
+* output:A Tensor. Has the same type as grad . \n
+
+* @par Third-party framework compatibility
+* Compatible with tensorflow SparseSegmentSumGrad operator
+*/
+
+REG_OP(SparseSegmentSumGrad)
+    .INPUT(grad, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(indices, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(segment_ids, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(output_dim0, TensorType({DT_INT32}))
+    .OUTPUT(output, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(SparseSegmentSumGrad)
+
+/**
+ * @brief Count the number of occurrences of each value in the input ragged integer array,
+ * and output it according to the sparse matrix. \n
+
+ * @par Inputs:
+ * @li splits: A 1D tensor of type int64.
+ * @li values: A 1D or 2D tensor of type int32 or int64.
+ * @li weights: A tensor of type int32 or int64 or float or double. \n
+
+ * @par Attributes:
+ * @li minlength: An optional int >=-1. Defaults to -1.
+ * @li maxlength: An optional int >=-1. Defaults to -1.
+ * @li binary_output: A required bool. \n
+
+ * @par Outputs:
+ * output_indices: A tensor of type int64.
+ * output_values: A tensor of the same type as "weights".
+ * output_dense_shape: A tensor of type int64. \n
+
+ * @par Third-party framework compatibility
+ * Compatible with the TensorFlow operator RaggedCountSparseOutput. \n
+ */
+REG_OP(RaggedCountSparseOutput)
+    .INPUT(splits, TensorType({DT_INT64}))
+    .INPUT(values, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(weights, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE}))
+    .OUTPUT(output_indices, TensorType({DT_INT64}))
+    .OUTPUT(output_values, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE}))
+    .OUTPUT(output_dense_shape, TensorType({DT_INT64}))
+    .ATTR(minlength, Int, -1)
+    .ATTR(maxlength, Int, -1)
+    .REQUIRED_ATTR(binary_output, Bool)
+    .OP_END_FACTORY_REG(RaggedCountSparseOutput)
+
+/**
+* @brief SignBitsUnpack.
+
+* @par Inputs:
+* one input, including:
+* @li x: A 1D Tensor of uint8.
+
+* @par Attributes:
+* @li size: dim of out put tensor, defaults to 1.
+* @li dtype: dtype of out put tensor: DT_FLOAT(0) or DT_FLOAT16(1).
+
+* @par Outputs:
+* @li y: A 2D Tensor of type float32 (float16) with shape (size, (x.shape * 8) / size),
+*/
+REG_OP(SignBitsUnpack)
+    .INPUT(x, TensorType({DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(size, Int)
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(SignBitsUnpack)
+
+/**
+* @brief Function scaled masked softmax . \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li x: A mutable Tensor. The type support float16/float32.
+* @li mask: An optional Tensor. Must meet all of the following rules:
+*     shape of mask should be broadcastable with x.
+*     dtype of mask should be bool.
+*     mask is binary
+
+* @par Attributes:
+* scale: A attribute used to scale tensor. The type is float. The dimension softmax would be performed on. Defaults
+*     to "1.0" . \n
+* fixed_triu_mask: A flag used to enable or disable a fixed upper triangle mask. The type is bool. Defaults
+*     to "false" . \n
+
+* @par Outputs:
+* y: A mutable Tensor. Has the same type as "x". \n
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ScaledMaskedSoftmax)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_BOOL, DT_UINT1}))
+    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .ATTR(scale, Float, 1.0)
+    .ATTR(fixed_triu_mask, Bool, false)
+    .OP_END_FACTORY_REG(ScaledMaskedSoftmax)
+
+/**
+* @brief Function scaled masked softmax grad . \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li y_grad: A mutable Tensor. The type support float16/float32.
+* @li y: A mutable Tensor. The type support float16/float32.
+* @li mask: An optional Tensor. Must meet all of the following rules:
+*     shape of mask should be broadcastable with x.
+*     dtype of mask should be bool.
+*     mask is binary
+
+* @par Attributes:
+* scale: A attribute used to scale tensor. The type is float. The dimension softmax would be performed on. Defaults
+*     to "1.0" . \n
+* fixed_triu_mask: A flag used to enable or disable a fixed upper triangle mask. The type is bool. Defaults
+*     to "false" . \n
+
+* @par Outputs:
+* x_grad: A mutable Tensor. Has the same type as "x". \n
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ScaledMaskedSoftmaxGrad)
+    .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_BOOL, DT_UINT1}))
+    .OUTPUT(x_grad, TensorType({DT_FLOAT16}))
+    .ATTR(scale, Float, 1.0)
+    .ATTR(fixed_triu_mask, Bool, false)
+    .OP_END_FACTORY_REG(ScaledMaskedSoftmaxGrad)
+    
+/**
+ * @brief SignBitsPack.
+
+ * @par Inputs:
+ * one input, including:
+ * @li x: A 1D Tensor of float32 or float16.
+ * 
+ * @par Attributes:
+ * @li size: first dim value of output tensor.
+ * 
+ * @par Outputs:
+ * @li y: A 2D Tensor of type uint8 with shape (size, N)
+ */
+REG_OP(SignBitsPack)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_UINT8}))
+    .REQUIRED_ATTR(size, Int)
+    .OP_END_FACTORY_REG(SignBitsPack)
+
+/**
+* @brief  Get sobol samples. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li dim: Dimension of results, which must be a scalar of type int32.
+* @li num_results: Number of results, which must be a scalar of type int32.
+* @li skip: Number of initial points, which must be a scalar of type int32. \n
+
+* @par Attributes:
+* @li dtype: Data type of output samples. \n
+
+* @par Outputs:
+* @li y: A Tensor with the DT_FLOAT or DT_DOUBLE type generated samples. \n
+
+* @par Third-party framework compatibility
+* @li compatible with tensorflow SobolSample operator.
+**/
+REG_OP(SobolSample)
+    .INPUT(dim, TensorType({DT_INT32}))
+    .INPUT(num_results, TensorType({DT_INT32}))
+    .INPUT(skip, TensorType({DT_INT32}))
+    .OUTPUT(samples, TensorType({DT_FLOAT,DT_DOUBLE}))
+    .ATTR(dtype, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(SobolSample)
+
+/**
+ * @brief Count the number of occurrences of each value in the input sparse integer array,
+ * and output it according to the sparse matrix. \n 
+
+ * @par Inputs:
+ * @li indices: A tensor of type int64.
+ * @li values: A tensor of type int32 or int64. 
+ * @li dense_shape: A tensor of type int64.
+ * @li weights: A tensor of type int32 or int64 or float or double. \n
+ 
+ * @par Attributes:
+ * @li minlength: An optional int >=-1. Defaults to -1. 
+ * @li maxlength: An optional int >=-1. Defaults to -1. 
+ * @li binary_output: A required bool. \n
+
+ * @par Outputs:
+ * @li output_indices: A tensor of type int64. 
+ * @li output_values: A tensor of the same type as "weights".
+ * @li output_dense_shape: A tensor of type int64. \n
+
+ * @par Third-party framework compatibility
+ * Compatible with the TensorFlow operator SparseCountSparseOutput. \n
+ */
+REG_OP(SparseCountSparseOutput)
+    .INPUT(indices, TensorType({DT_INT64}))
+    .INPUT(values, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(dense_shape, TensorType({DT_INT64}))
+    .INPUT(weights, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE}))
+    .OUTPUT(output_indices, TensorType({DT_INT64}))
+    .OUTPUT(output_values, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE}))
+    .OUTPUT(output_dense_shape, TensorType({DT_INT64}))
+    .ATTR(minlength, Int, -1)
+    .ATTR(maxlength, Int, -1)
+    .REQUIRED_ATTR(binary_output, Bool)
+    .OP_END_FACTORY_REG(SparseCountSparseOutput)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
index ed23d3f6..15f648f0 100644
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,157 +24,430 @@
 #include "graph/operator_reg.h"
 
 namespace ge {
+/**
+* @brief Backprop W of AttentionLnQKV + ReduceSumD \n
+* @par Inputs:
+* Four inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16.
+* @li query_dx: A Tensor. Must be one of the following types: float16.
+* @li key_dw: A Tensor. Must be one of the following types: float16.
+* @li value_dw: A Tensor. Must be one of the following types: float16.
+
+* @par Attributes:
+* @li trans_a: A optional attribute, the type is bool. Defaults to True.
+* @li trans_b: A optional attribute, the type is bool. Defaults to False. \n
+
+* @par Outputs:
+* Six outputs, including:
+* @li dw_query: A Tensor. Must be one of the following types: float16.
+* @li dw_key: A Tensor. Must be one of the following types: float16.
+* @li dw_value: A Tensor. Must be one of the following types: float16.
+* @li dbias_query: A Tensor. Must be one of the following types: float16.
+* @li dbias_key: A Tensor. Must be one of the following types: float16.
+* @li dbias_value: A Tensor. Must be one of the following types: float16. \n
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
+*/
+REG_OP(AttentionQKVGradW)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(query_dx, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(key_dw, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(value_dw, TensorType({DT_FLOAT16}))
+    .OUTPUT(dw_query, TensorType({DT_FLOAT16}))
+    .OUTPUT(dw_key, TensorType({DT_FLOAT16}))
+    .OUTPUT(dw_value, TensorType({DT_FLOAT16}))
+    .OUTPUT(dbias_query, TensorType({DT_FLOAT16}))
+    .OUTPUT(dbias_key, TensorType({DT_FLOAT16}))
+    .OUTPUT(dbias_value, TensorType({DT_FLOAT16}))
+    .ATTR(trans_a, Bool, true)
+    .ATTR(trans_b, Bool, false)
+    .OP_END_FACTORY_REG(AttentionQKVGradW)
 
 /**
-*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
+* @brief Backprop X of AttentionLnQKV + AddN \n
+* @par Inputs:
+* Seven inputs, including:
+* @li ln_dx: A Tensor. Must be one of the following types: float16.
+* @li query_dx: A Tensor. Must be one of the following types: float16.
+* @li key_dw: A Tensor. Must be one of the following types: float16.
+* @li value_dw: A Tensor. Must be one of the following types: float16.
+* @li kernel_query: A Tensor. Must be one of the following types: float16.
+* @li kernel_key: A Tensor. Must be one of the following types: float16.
+* @li kernel_value: A Tensor. Must be one of the following types: float16. \n
+
+* @par Attributes:
+* @li trans_a: A optional attribute, the type is bool. Defaults to False.
+* @li trans_b: A optional attribute, the type is bool. Defaults to True. \n
+
+* @par Outputs:
+* One outputs, including:
+* @li dx: A Tensor. Must be one of the following types: float16. \n
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
+*/
+REG_OP(AttentionQKVGradX)
+    .INPUT(ln_dx, TensorType({DT_FLOAT16}))
+    .INPUT(query_dx, TensorType({DT_FLOAT16}))
+    .INPUT(key_dw, TensorType({DT_FLOAT16}))
+    .INPUT(value_dw, TensorType({DT_FLOAT16}))
+    .INPUT(kernel_query, TensorType({DT_FLOAT16}))
+    .INPUT(kernel_key, TensorType({DT_FLOAT16}))
+    .INPUT(kernel_value, TensorType({DT_FLOAT16}))
+    .OUTPUT(dx, TensorType({DT_FLOAT16}))
+    .ATTR(trans_a, Bool, false)
+    .ATTR(trans_b, Bool, true)
+    .OP_END_FACTORY_REG(AttentionQKVGradX)
+
+/**
+* @brief
+             / (MatMul -> ConfusionTransposeD).
+   LayerNorm - (MatMul -> ConfusionTransposeD).
+             \ (MatMul -> ConfusionTransposeD). \n
+* @par Inputs:
+* Nine inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16.
+* @li kernel_query: A Tensor. Must be one of the following types: float16.
+* @li kernel_key: A Tensor. Must be one of the following types: float16.
+* @li kernel_value: A Tensor. Must be one of the following types: float16.
+* @li gamma: A Tensor. Must be one of the following types: float16.
+* @li beta: A Tensor. Must be one of the following types: float16.
+* @li bias_query: A Tensor. Must be one of the following types: float16.
+* @li bias_key: A Tensor. Must be one of the following types: float16.
+* @li bias_value: A Tensor. Must be one of the following types: float16. \n
+
+* @par Attributes:
+* @li epsilon: A optional attribute, the type is float32. Defaults to 1e-7.
+* @li trans_a: A optional attribute, the type is bool. Defaults to False.
+* @li trans_b: A optional attribute, the type is bool. Defaults to False. \n
+
+* @par Outputs:
+* Six outputs, including:
+* @li norm: A Tensor. Must be one of the following types: float16.
+* @li query_output: A Tensor. Must be one of the following types: float16.
+* @li key_output: A Tensor. Must be one of the following types: float16.
+* @li value_output: A Tensor. Must be one of the following types: float16.
+* @li mean: A Tensor. Must be one of the following types: float16.
+* @li variance: A Tensor. Must be one of the following types: float16. \n
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
+*/
+REG_OP(AttentionLnQKV)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(kernel_query, TensorType({DT_FLOAT16}))
+    .INPUT(kernel_key, TensorType({DT_FLOAT16}))
+    .INPUT(kernel_value, TensorType({DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT16}))
+    .INPUT(beta, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias_query, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias_key, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias_value, TensorType({DT_FLOAT16}))
+    .OUTPUT(norm, TensorType({DT_FLOAT16}))
+    .OUTPUT(query_output, TensorType({DT_FLOAT16}))
+    .OUTPUT(key_output, TensorType({DT_FLOAT16}))
+    .OUTPUT(value_output, TensorType({DT_FLOAT16}))
+    .OUTPUT(mean, TensorType({DT_FLOAT16}))
+    .OUTPUT(variance, TensorType({DT_FLOAT16}))
+    .ATTR(epsilon, Float, 0.0000001)
+    .ATTR(trans_a, Bool, false)
+    .ATTR(trans_b, Bool, false)
+    .OP_END_FACTORY_REG(AttentionLnQKV)
 
+/**
+* @brief
+   swin_transformer model specific structure.Operator only supports swin_transformer. \n
+* @par Inputs:
+* Five inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16.
+* @li gamma: A Tensor. Must be one of the following types: float16.
+* @li beta: A Tensor. Must be one of the following types: float16.
+* @li weight: A Tensor. Must be one of the following types: float16.
+* @li bias: A Tensor. Must be one of the following types: float16. \n
+
+* @par Attributes:
+* @li head_num: A optional attribute, the type is int.
+* @li head_dim: A optional attribute, the type is int.
+* @li seq_length: A optional attribute, the type is int.
+* @li shifts: A optional attribute, the type is list int. Defaults to ().
+* @li epsilon: A optional attribute, the type is float. Defaults to 1e-7. \n
+
+* @par Outputs:
+* Three outputs, including:
+* @li query_output: A Tensor. Must be one of the following types: float16.
+* @li key_output: A Tensor. Must be one of the following types: float16.
+* @li value_output: A Tensor. Must be one of the following types: float16. \n
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
+*/
+REG_OP(SwinTransformerLnQKV)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT16}))
+    .INPUT(beta, TensorType({DT_FLOAT16}))
+    .INPUT(weight, TensorType({DT_FLOAT16}))
+    .INPUT(bias, TensorType({DT_FLOAT16}))
+    .OUTPUT(query_output, TensorType({DT_FLOAT16}))
+    .OUTPUT(key_output, TensorType({DT_FLOAT16}))
+    .OUTPUT(value_output, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(head_num, Int)
+    .REQUIRED_ATTR(head_dim, Int)
+    .REQUIRED_ATTR(seq_length, Int)
+    .ATTR(shifts, ListInt, {})
+    .ATTR(epsilon, Float, 0.0000001)
+    .OP_END_FACTORY_REG(SwinTransformerLnQKV)
+
+/**
+*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n
 *@par Inputs:
 *Three inputs, including:
 * @li x1: A matrix Tensor. 2D. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
+* float32, int32, bfloat16. Has format [ND, NHWC].
 * @li x2: A matrix Tensor. 2D. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
+* float32, int32, bfloat16. Has format [ND, NHWC].
 * @li bias: A optional 1D Tensor. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC] . \n
+* float32, int32, bfloat16. Has format [ND, NHWC]. \n
 
 *@par Attributes:
-*@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
-*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
+*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to
+* [K, M].
+*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to
+* [K, M]. \n
 
 *@par Outputs:
 *y: The result matrix Tensor. 2D. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC, FRACTAL_NZ] . \n
+* float32, int32, bfloat16. Has format [ND, NHWC]. \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
 */
 REG_OP(MatMul)
-    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
-    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
     .ATTR(transpose_x1, Bool, false)
     .ATTR(transpose_x2, Bool, false)
     .OP_END_FACTORY_REG(MatMul)
 
 /**
-*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
-
+*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n
 *@par Inputs:
-*Two inputs, including:
-* @li x1: A matrix Tensor. 2D. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
-* @li x2: A matrix Tensor. 2D. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
-* @li bias: A 1D Tensor. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC] . \n
+*Four inputs, including:
+* @li x1: A matrix Tensor. 2D. Must be one of the following types: float32,
+* float16, int32, int8, int4, bfloat16. Has format [ND, NHWC].
+* @li x2: A matrix Tensor. 2D. Must be one of the following types: float32,
+* float16, int32, int8, int4, bfloat16. Has format [ND, NHWC].
+* @li bias: A 1D Tensor. Must be one of the following types: float32,
+* float16, int32 bfloat16. Has format [ND, NHWC].
+* @li offset_w: A Optional 1D Tensor for quantized inference. Type is int8.
+* Reserved. \n
 
 *@par Attributes:
-*@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
-*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
+* @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to
+* [M, K].
+* @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to
+[K, N].
+* @li offset_x: An optional integer for quantized MatMulV2.
+* The negative offset added to the input x1 for int8 type. Ensure offset_x
+* within the effective range of int8 [-128, 127]. Defaults to "0". \n
 
 *@par Outputs:
-*y: The result matrix Tensor. 2D. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC, FRACTAL_NZ] . \n
+*y: The result matrix Tensor. 2D. Must be one of the following types: float32,
+* float16, int32, bfloat16. Has format [ND, NHWC]. \n
+
+*@attention Constraints:
+* if performances better in format NZ, please close
+* "MatmulTransdataFusionPass" in fusion configuration. \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
 */
 REG_OP(MatMulV2)
-    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
-    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
-    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4}))
     .ATTR(transpose_x1, Bool, false)
     .ATTR(transpose_x2, Bool, false)
     .ATTR(offset_x, Int, 0)
     .OP_END_FACTORY_REG(MatMulV2)
 
-
 /**
-*@brief Performs Matrix-to-matrix Multiply, producing c=alpha[0]*a*b+beta[0]*c . \n
+*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n
+*@par Inputs:
+*Five inputs, including:
+* @li x1: A matrix Tensor. 2D. Must be one of the following types: int8.
+* @li x2: A matrix Tensor. 2D. Must be one of the following types: int8.
+* @li compress_index: A compress index matrix of type int8.
+* @li bias: An optional Tensor. 1D. Must be one of the following types: int32,
+* float16.
+* @li offset_w: An optional matrix Tensor. 2D. Must be one of the following
+* types: int8. \n
+
+*@par Attributes:
+*@li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to
+* [M, K].
+*@li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to
+* [K, N].
+*@li offset_x: An optional integer for quantized MatMulV2Compress.
+*The negative offset added to the input x1 for int8 type. Ensure offset_x
+* within the effective range of int8 [-128, 127]. Defaults to "0". \n
+
+*@par Outputs:
+*y: The result matrix Tensor. 2D. Must be one of the following types: int32,
+* float16. \n
 
 *@attention Constraints:
+* if performances better in format NZ, please close
+* "MatmulTransdataFusionPass" in fusion configuration.
+
+*/
+REG_OP(MatMulV2Compress)
+    .INPUT(x1, TensorType({DT_INT8}))
+    .INPUT(x2, TensorType({DT_INT8}))
+    .INPUT(compress_index, TensorType({DT_INT8}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_INT32, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_FLOAT16}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
+    .ATTR(transpose_x1, Bool, false)
+    .ATTR(transpose_x2, Bool, false)
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(MatMulV2Compress)
+
+/**
+*@brief Performs Matrix-to-matrix Multiply,
+* producing y=alpha[0]*a*b+beta[0]*c. \n
+*@attention Constraints:
 * For better performance, The k-axis must be aligned to 16 (input type
 * is float16) or 32 (input type is int8). \n
 
 *@par Inputs:
 *Five inputs, including:
-*@li a: A matrix Tensor. Must be one of the following types: float16, int8.
-* Has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ).
-*@li b: A matrix Tensor. Must be one of the following types: float16, int8.
-* Has format [ND, FRACTAL_NZ, FRACTAL_Z]. 2D(ND) or 4D(FRACTAL_NZ, FRACTAL_Z).
-*@li c: A matrix Tensor. Must be one of the following types: float16, int32,
-* float32. has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ).
-*@li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the following
-* types: float16, int32, float32. Has format [ND].
+* @li a: A matrix Tensor. Must be one of the following types:float32, float16,
+* int8, int32. Has format ND.
+* @li b: A matrix Tensor. Must be one of the following types:float32, float16,
+* int8, int32. Has format ND.
+*@li c: A matrix Tensor. Must be one of the following types:float32, float16,
+* int8, int32. Has format ND.
+* @li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the
+* following types: float16, int32, float32, int8. Has format ND.
 *@li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following
-* types: float16, int32, float32. Has format [ND].
+* types: float16, int32, float32, int8. Has format ND.\n
 * The format of a, b, c has restriction:\n
 * When type of a is int8 and type of c is int32, the format of a, b, c should
-* all be ND, or a is FRACTAL_NZ and b is FRACTAL_Z and c is ND.\n
-* When type of a is int8 and type of c is float32, the format of a, b, c should
-* all be ND or a is FRACTAL_NZ and b is FRACTAL_Z and c is FRACTAL_NZ.\n
+* all be ND.\n
+* When type of a is int8 and type of c is float32, the format of a, b, c
+* should all be ND.\n
 * When type of a is float16 and type of c is float16, the format of a, b, c
-* should all be ND or FRACTAL_NZ.\n
+* should all be ND.\n
 * When type of a is float16 and type of c is float32, the format of a, b, c
-* should all be ND or FRACTAL_NZ . \n
+* should all be ND. \n
 
 *@par Attributes:
 *Two attributes, including:
 *@li transpose_a: Optional. A bool. If True, changes the shape of "a" from
 * [M, K] to [K, M].
 *@li transpose_b: Optional. A bool. If True, changes the shape of "b" from
-* [K, N] to [N, K] . \n
+* [K, N] to [N, K]. \n
 
 *@par Outputs:
 *y: The result matrix Tensor. Must be one of the following types: float16,
-* float32, int32. Has format [ND, FRACTAL_NZ], the format should be equal to a.
-* 2D(ND) or 4D(FRACTAL_NZ).
+* float32, int32. Has format [ND], the format should be equal to a.
 */
 
 REG_OP(GEMM)
-    .INPUT(a, TensorType({DT_FLOAT16, DT_INT8}))
-    .INPUT(b, TensorType({DT_FLOAT16, DT_INT8}))
-    .INPUT(c, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
-    .INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
-    .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(a, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32}))
+    .INPUT(b, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32}))
+    .INPUT(c, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32}))
+    .INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32}))
+    .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32}))
     .ATTR(transpose_a, Bool, false)
     .ATTR(transpose_b, Bool, false)
     .OP_END_FACTORY_REG(GEMM)
 
 /**
-*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
-
+*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n
 *@par Inputs:
-*Three inputs, including:
+*Two inputs, including:
 * @li x1: A matrix Tensor. Must be one of the following types: float16,
-* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ].
+* float32, int32, bfloat16. 2D or higher. Has format [ND, NHWC].
 * @li x2: A matrix Tensor. Must be one of the following types: float16,
-* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n
+* float32, int32, bfloat16. 2D or higher. Has format [ND, NHWC]. \n
 
 *@par Attributes:
-*@li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
-*@li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n
+*@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K]
+* to [B, K, M].
+*@li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K]
+* to [B, K, M]. \n
 
 *@par Outputs:
-*y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16,
-* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n
+* y: The result matrix Tensor. 2D or higher. Must be one of the following
+* types: float16, bfloat16,
+* float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape
+* length as "x1" and "x2". \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
 */
 
 REG_OP(BatchMatMul)
-    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
-    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
     .ATTR(adj_x1, Bool, false)
     .ATTR(adj_x2, Bool, false)
     .OP_END_FACTORY_REG(BatchMatMul)
 
+
+/**
+* @brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
+* @par Inputs:
+* Three inputs, including:
+* @li x1: A matrix Tensor. Must be one of the following types: float16,
+* float32, int32, int8, int4, bfloat16. 2D or higher. Has format [ND, NHWC].
+* @li x2: A matrix Tensor. Must be one of the following types: float16,
+* float32, int32, int8, int4, bfloat16. 2D or higher. Has format [ND, NHWC].
+* @li bias: A optional Tensor. Must be one of the following types:
+* float16,
+* float32, int32, int8, int4, bfloat16. Has format [ND, NHWC].
+* @li offset_w: A optional Tensor. Must be one of the following types:
+* int8, int4. Has format [ND, NHWC]. \n
+
+* @par Attributes:
+* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to
+* [B, K, M].
+* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to
+* [B, K, M]. \n
+
+* @par Outputs:
+* y: The result matrix Tensor. 2D or higher. Must be one of the following
+* types: float16,
+* float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape
+* length as "x1" and "x2". \n
+
+*@attention Constraints:
+* if performances better in format NZ, please close
+* "MatmulTransdataFusionPass" in fusion configuration. \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator BatchMatmul.
+*/
+
+REG_OP(BatchMatMulV2)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
+    .ATTR(adj_x1, Bool, false)
+    .ATTR(adj_x2, Bool, false)
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(BatchMatMulV2)
+
 /**
 *@brief Computes half the L2 norm of a tensor without the sqrt . \n
 
@@ -184,7 +457,12 @@ REG_OP(BatchMatMul)
 *     TensorType::FloatingDataType() . \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type as "x".
+*y: A Tensor. Has the same type as "x". \n
+
+*@attention Constraints:
+* if performances better in format NZ, please close
+ "MatmulTransdataFusionPass" in fusion configuration. \n
+
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator L2Loss.
 */
@@ -325,6 +603,56 @@ REG_OP(MatrixSetDiagD)
     .OP_END_FACTORY_REG(MatrixSetDiagD)
 
 /**
+* @brief Function AttentionScore. \n
+
+* @par Inputs:
+* six inputs, including:
+* @li query: A matrix Tensor. The type only support float16.
+* @li key: A matrix Tensor. The type only support float16.
+* @li value: A matrix Tensor. The type only support float16.
+* @li padding_mask: A matrix Tensor. The type only support float16.
+* @li scale: A scalar. The type only support float16.
+* @li drop_mask: A matrix Tensor. The type only support uint8. \n
+
+* @par Attributes:
+* @li keep_prob: A mutable Tensor. Must met all of the following rules:
+ shape of "keep_prob" should be (1,) or [1,].
+* @li query_transpose: A bool. If True, changes the shape of "query" from [K, M] to
+ [M, K].
+* @li key_transpose: A bool. If True, changes the shape of "key" from [N, K] to
+ [K, N].
+* @li bmm_score_transpose_a: A bool. If True, changes the shape of "mid_data" from [K, M] to
+ [M, K].
+* @li bmm_score_transpose_b: A bool. If True, changes the shape of "value" from [N, K] to
+ [K, N].
+* @li axes: A list of int. The dimension softmax would be performed on. Defaults
+ to "[-1]" . \n
+
+* @par Outputs:
+* attention_score: The result matrix Tensor. The type only support float16.
+* softmax_output: The result matrix Tensor. The type only support float16.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(AttentionScore)
+    .INPUT(query, TensorType({DT_FLOAT16}))
+    .INPUT(key, TensorType({DT_FLOAT16}))
+    .INPUT(value, TensorType({DT_FLOAT16}))
+    .INPUT(padding_mask, TensorType({DT_FLOAT16}))
+    .INPUT(scale, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(drop_mask, TensorType({DT_INT8}))
+    .OUTPUT(attention_score, TensorType({DT_FLOAT16}))
+    .OUTPUT(softmax_output, TensorType({DT_FLOAT16}))
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(query_transpose, Bool, false)
+    .ATTR(key_transpose, Bool, false)
+    .ATTR(bmm_score_transpose_a, Bool, false)
+    .ATTR(bmm_score_transpose_b, Bool, false)
+    .ATTR(softmax_axes, ListInt, {-1})
+    .OP_END_FACTORY_REG(AttentionScore)
+
+/**
 *@brief Applies sparse "updates" to individual values or slices in a Variable . \n
 
 *@par Inputs:
@@ -334,7 +662,7 @@ REG_OP(MatrixSetDiagD)
  * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32,
  * uint64
 *@li indices: An ND Tensor.
-*Must be one of the following types: int32, int64
+*Must be one of the following types: int32 or int64
 *@li updates: An ND Tensor.
 *Must be one of the following types: float16, float32, int8, uint8, double,
  * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32,
@@ -378,6 +706,9 @@ REG_OP(ScatterNdUpdate)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator TensorScatterUpdate.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(TensorScatterUpdate)
     .INPUT(x, TensorType::BasicType())
@@ -387,23 +718,51 @@ REG_OP(TensorScatterUpdate)
     .OP_END_FACTORY_REG(TensorScatterUpdate)
 
 /**
-*@brief Adds sparse "updates" to a variable reference . \n
+*@brief Uses "updates" to update tensor "data" by "indices". \n
 
 *@par Inputs:
 * Three inputs, including:
-*@li var: An ND Tensor . \n
-
+*@li data: An ND Tensor . \n
+*Must be one of the following types: float16, float32, int32, int8, uint8
+*@li indices: An ND Tensor of type int32 or int64
+*@li updates: An Tensor. Same shape as indices. format:NCHW, NHWC . \n
 *Must be one of the following types: float16, float32, int32, int8, uint8
-*@li indices: An ND Tensor of type int32 or int64.
 
+*@par Attributes:
+*@li axis: An optional attribute. Defaults to 0.
 
-*@li updates: An Tensor. format:NCHW, NHWC . \n
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "data" . \n
 
-*Must be one of the following types: float16, float32, int32, int8, uint8
+*@par Third-party framework compatibility
+* Compatible with the ONNX operator ScatterElements.
+*/
+REG_OP(ScatterElements)
+    .INPUT(data, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .ATTR(axis, Int, 0)
+    .OP_END_FACTORY_REG(ScatterElements)
+
+/**
+*@brief Adds sparse "updates" to a variable reference . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li var: An ND Tensor .
+
+*Must be one of the following types: float16, float, int32, int8, uint8
+*@li indices: An ND Tensor . \n
+
+*Must be one of the following types: int32 or int64
+*@li updates: An ND Tensor .
+
+*Must be one of the following types: float16, float, int32, int8, uint8
 
 *@par Attributes:
-* use_locking: An optional bool. Defaults to "False". If "True", the operation
-* will be protected by a lock . \n
+*use_locking: An optional bool. Defaults to "False". If "True",
+* the operation will be protected by a lock . \n
 
 *@par Outputs:
 *var: A Tensor. Has the same type and format as input "var" . \n
@@ -412,14 +771,44 @@ REG_OP(TensorScatterUpdate)
 * Compatible with the TensorFlow operator ScatterAdd.
 */
 REG_OP(ScatterAdd)
-    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .INPUT(indices, TensorType::IndexNumberType())
-    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
-    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterAdd)
 
 /**
+*@brief Adds sparse "updates" to a variable reference . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li var: An ND Tensor .
+*Must be one of the following types: float16, float32, int32, int8, uint8
+
+*@li indices: An ND Tensor of type int32 or int64
+
+*@li updates: An ND Tensor .
+*Must be one of the following types: float16, float32, int32, int8, uint8
+
+*@par Attributes:
+* axis: An required int. The axis along which to index. \n
+
+*@par Outputs:
+*var: A Tensor. Has the same type and format as input "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the pytorch operator ScatterAdd.
+*/
+REG_OP(ScatterAddWithAxis)
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .REQUIRED_ATTR(axis, Int)
+    .OP_END_FACTORY_REG(ScatterAddWithAxis)
+
+/**
 *@brief Divides a variable reference by sparse updates . \n
 
 *@par Inputs:
@@ -428,12 +817,12 @@ REG_OP(ScatterAdd)
 *Must be one of the following types: float16, float, int32, int8, uint8
 
 *@li indices: An ND Tensor.
-*Must be one of the following types: int32
+*Must be one of the following types: int32 or int64
 *@li updates: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 
 *@par Attributes:
-*@li use_locking: An optional bool. Defaults to "False". If "True",
+*use_locking: An optional bool. Defaults to "False". If "True",
 * the operation will be protected by a lock . \n
 
 *@par Outputs:
@@ -443,10 +832,10 @@ REG_OP(ScatterAdd)
 * Compatible with the TensorFlow operator ScatterDiv.
 */
 REG_OP(ScatterDiv)
-    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
-    .INPUT(indices, TensorType({DT_INT32}))
-    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
-    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterDiv)
 
@@ -458,7 +847,7 @@ REG_OP(ScatterDiv)
 *@li var: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An ND Tensor.
-*Must be one of the following types: int32
+*Must be one of the following types: int32 or int64
 *@li updates: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@par Attributes:
@@ -472,10 +861,10 @@ REG_OP(ScatterDiv)
 * Compatible with the TensorFlow operator ScatterNdAdd.
 */
 REG_OP(ScatterNdAdd)
-    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .INPUT(indices, TensorType::IndexNumberType())
-    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
-    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterNdAdd)
 
@@ -499,6 +888,9 @@ REG_OP(ScatterNdAdd)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator TensorScatterAdd.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(TensorScatterAdd)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
@@ -515,7 +907,7 @@ REG_OP(TensorScatterAdd)
 *@li var: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An ND Tensor.
-*Must be one of the following types: int32, int64
+*Must be one of the following types: int32 or int64
 *@li updates: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 
@@ -530,10 +922,10 @@ REG_OP(TensorScatterAdd)
 * Compatible with the TensorFlow operator ScatterNdSub.
 */
 REG_OP(ScatterNdSub)
-    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .INPUT(indices, TensorType::IndexNumberType())
-    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
-    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterNdSub)
 
@@ -557,6 +949,9 @@ REG_OP(ScatterNdSub)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator TensorScatterSub.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(TensorScatterSub)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
@@ -573,7 +968,7 @@ REG_OP(TensorScatterSub)
 *@li var: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An ND Tensor.
-*Must be one of the following types: int32, int64
+*Must be one of the following types: int32 or int64
 *@li updates: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@par Attributes:
@@ -587,10 +982,10 @@ REG_OP(TensorScatterSub)
 * Compatible with the TensorFlow operator ScatterSub.
 */
 REG_OP(ScatterSub)
-    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .INPUT(indices, TensorType::IndexNumberType())
-    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
-    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterSub)
 
@@ -638,37 +1033,43 @@ REG_OP(DiagPart)
     .OP_END_FACTORY_REG(DiagPart)
 
 /**
-*@brief Also known as a "fully-connected" layer, computes an inner product with a set of learned weights, and (optionally) adds biases . \n
-
+*@brief Also known as a "fully-connected" layer, computes an inner product
+* with a set of learned weights, and (optionally) adds biases. \n
 *@par Inputs:
 * Four inputs, including:
-*@li x: A Tensor of type float16, int8.
-*@li w: A weight matrix of type float16, int8.
-*@li b: A Tensor of type float16, int32, float32.
-*@li offset_w: A Tensor of type int8 . \n
+*@li x: A Tensor of type float16, int8, int4, float32, bfloat16.
+*@li w: A weight matrix of type float16, int8, int4, float32, bfloat16.
+*@li b: An optional Tensor of type float16, int8, int4, float32, bfloat16.
+*@li offset_w: An optional Tensor of type int8, int4.
+* Reserved. Only None Supported. \n
 
 *@par Attributes:
-*@li num_output: Reserved.
-*@li transpose: A bool, specifying weight whether to transpose, either "true" or "false". Defaults to "false".
-*@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1.
-* The product of the subsequent dimensions starting form first dimension or the second dimension is "K".
-*@li offset_x: Reserved . \n
+*@li num_output: Required. An int, output neuron number. Reserved.
+*@li transpose: A bool, specifying weight whether to transpose input w,
+* either "true" or "false". Defaults to "false".
+*@li axis: Optional. An int, 1 or 2, specifying which dimension the input
+* "K" starts from. Defaults to 1.
+* The product of the subsequent dimensions starting form first dimension
+* or the second dimension is "K".
+*@li offset_x: An optional integer for quantized FullyConnection.
+*The negative offset added to the input image for int8 type. Ensure offset_x
+* within the effective range of int8 [-128, 127]. Defaults to "0". \n
 
 *@par Outputs:
-*y: The result tensor of type float16, int32, float32 . \n
+*y: The result tensor of type float16, int32, float32, bfloat16. \n
 
 *@par Third-party framework compatibility
-* Compatible with the Caffe operator InnerProduct . \n
+* Compatible with the Caffe operator InnerProduct. \n
 
 *@par Quantization supported or not
 * Yes
 */
 REG_OP(FullyConnection)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
-    .INPUT(w, TensorType({DT_FLOAT16, DT_INT8}))
-    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32}))
-    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT32, DT_BF16}))
+    .INPUT(w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT32, DT_BF16}))
+    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32, DT_BF16}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32, DT_BF16}))
     .REQUIRED_ATTR(num_output, Int)
     .ATTR(transpose, Bool, false)
     .ATTR(axis, Int, 1)
@@ -676,27 +1077,33 @@ REG_OP(FullyConnection)
     .OP_END_FACTORY_REG(FullyConnection)
 
 /**
-*@brief Also known as a "fully-connected-compress" layer, computes an inner product with a set of learned weights, and (optionally) adds biases . \n
-
+*@brief Also known as a "fully-connected-compress" layer, computes an inner
+* product with a set of learned weights, and (optionally) adds biases. \n
 *@par Inputs:
-* Four inputs, including:
+* Five inputs, including:
 *@li x: A Tensor of type uint8, int8.
-*@li w: A weight matrix of type int8, int8.
-*@li w: A compress index matrix of type int8, int8.
-*@li b: A Tensor of type float16, int32, int32.
-*@li offset_w: A Tensor of type int8.i
+*@li w: A weight matrix of type int8.
+*@li compress_index: A compress index matrix of type int8.
+*@li b: A optional Tensor of type int32.
+*@li offset_w: A optional Tensor of type int8.
 
 *@par Attributes:
-*@li num_output: Reserved.
-*@li transpose: A bool, specifying whether to transpose, either "true" or "false". Defaults to "false".
-*@li axis: Reserved.
-*@li offset_x: Reserved . \n
+*@li num_output: A int, specifying the number of outputs.
+*@li transpose: A bool, specifying whether to transpose input w, either "true"
+* or "false". Defaults to "false".
+*@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K"
+* starts from. Defaults to "1".
+*The product of the subsequent dimensions starting form first dimension or the
+* second dimension is "K".
+*@li offset_x: An optional integer for quantized FullyConnectionCompress.
+*The negative offset added to the input image for int8 type. Ensure offset_x
+* within the effective range of int8 [-128, 127]. Defaults to "0". \n
 
 *@par Outputs:
-*y: The result tensor of type int32 . \n
+*y: The result tensor of type int32. \n
 
 *@par Third-party framework compatibility
-* Compatible with the Caffe operator InnerProduct . \n
+* Compatible with the Caffe operator InnerProduct. \n
 
 *@par Quantization supported or not
 * Yes
@@ -761,7 +1168,7 @@ REG_OP(ConfusionMatrix)
 *@li var: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An ND Tensor.
-*Must be one of the following types: int32
+*Must be one of the following types: int32 or int64
 *@li updates: An ND Tensor . \n
 
 *Must be one of the following types: float16, float, int32, int8, uint8
@@ -778,7 +1185,7 @@ REG_OP(ConfusionMatrix)
 */
 REG_OP(ScatterMul)
     .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
-    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(indices, TensorType::IndexNumberType())
     .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
@@ -791,13 +1198,13 @@ REG_OP(ScatterMul)
 *@par Inputs:
 * Three inputs, including:
 *@li var: An ND Tensor.
-*Must be one of the following types: float16, float, int32
+*Must be one of the following types: float16, float, int32, int8, uint8
 
 *@li indices: An ND Tensor.
-*Must be one of the following types: int32
+*Must be one of the following types: int32 or int64
 
 *@li updates: An ND Tensor.
-*Must be one of the following types: float16, float, int32
+*Must be one of the following types: float16, float, int32, int8, uint8
 
 *@par Attributes:
 *use_locking: An optional bool. Defaults to "False". If "True", the operation
@@ -810,10 +1217,10 @@ REG_OP(ScatterMul)
 * Compatible with the TensorFlow operator ScatterMin.
 */
 REG_OP(ScatterMin)
-    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
-    .INPUT(indices, TensorType({DT_INT32}))
-    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
-    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterMin)
 
@@ -822,15 +1229,15 @@ REG_OP(ScatterMin)
 
 *@par Inputs:
 * Three inputs, including:
-*@li var: An ND Tensor . \n
+*@li var: An ND Tensor .
 
-*Must be one of the following types: float16, float, int32
+*Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An NCHW, NHWC, or ND Tensor . \n
 
-*Must be one of the following types: int32
-*@li updates: An NCHW, NHWC, or ND Tensor . \n
+*Must be one of the following types: int32 or int64
+*@li updates: An NCHW, NHWC, or ND Tensor .
 
-*Must be one of the following types: float16, float, int32
+*Must be one of the following types: float16, float, int32, int8, uint8
 
 *@par Attributes:
 *use_locking: An optional bool. Defaults to "False".
@@ -843,10 +1250,10 @@ REG_OP(ScatterMin)
 * Compatible with the TensorFlow operator ScatterMax.
 */
 REG_OP(ScatterMax)
-    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
-    .INPUT(indices, TensorType({DT_INT32}))
-    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
-    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterMax)
 
@@ -855,13 +1262,13 @@ REG_OP(ScatterMax)
 
 *@par Inputs:
 * Three inputs, including:
-*@li var: An ND Tensor . \n
+*@li var: An ND Tensor .
 
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An ND Tensor . \n
 
-*Must be one of the following types: int32
-*@li updates: An ND Tensor . \n
+*Must be one of the following types: int32 or int64
+*@li updates: An ND Tensor .
 
 *Must be one of the following types: float16, float, int32, int8, uint8
 
@@ -876,10 +1283,10 @@ REG_OP(ScatterMax)
 * Compatible with the TensorFlow operator ScatterUpdate.
 */
 REG_OP(ScatterUpdate)
-    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
-    .INPUT(indices, TensorType({DT_INT32}))
-    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
-    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterUpdate)
 
@@ -940,6 +1347,40 @@ REG_OP(MatrixSetDiagV2)
     .OP_END_FACTORY_REG(MatrixSetDiagV2)
 
 /**
+*@brief Returns a batched matrix tensor with new batched diagonal values . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li input: "Rank `r+1`, where `r >= 1`. \n
+
+*@li diagonal: Rank `r` when `k` is an integer or `k[0] == k[1]`. Otherwise, it has rank `r+1`. \n
+
+*@li k:
+*Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n
+*diagonal, and negative value means subdiagonals. `k` can be a single integer \n
+*(for a single diagonal) or a pair of integers specifying the low and high ends \n
+*of a matrix band. `k[0]` must not be larger than `k[1]`. \n
+
+*@par Attributes:
+*@li align: An optional string. Defaults to RIGHT_LEFT. It is a string specifying \n
+*how superdiagonals and subdiagonals should be aligned, respectively. \n
+*other optional: LEFT_RIGHT, LEFT_LEFT, and RIGHT_RIGHT.\n
+
+*@par Outputs:
+*output: Rank `r+1`, with `output.shape = input.shape` . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterUpdate.
+*/
+REG_OP(MatrixSetDiagV3)
+    .INPUT(input, TensorType::BasicType())
+    .INPUT(diagonal, TensorType::BasicType())
+    .INPUT(k, TensorType({DT_INT32}))
+    .OUTPUT(output, TensorType::BasicType())
+    .ATTR(align, String, "RIGHT_LEFT")
+    .OP_END_FACTORY_REG(MatrixSetDiagV3)
+
+/**
 *@brief Returns a batched diagonal tensor with given batched diagonal values . \n
 
 *@par Inputs:
@@ -979,6 +1420,455 @@ REG_OP(MatrixDiagV2)
     .OUTPUT(output, TensorType::BasicType())
     .OP_END_FACTORY_REG(MatrixDiagV2)
 
+/**
+* @brief Add updates to var_out according to axis and indices.
+
+* @par Inputs:
+* Three inputs, including:
+* @li var: A Tensor. Must be one of the following types:
+*     float16, float32, int32, int8, uint8.
+* @li indices: A Tensor of the indices, type should be int32.
+* @li updates: A Tensor of the same type as "var".
+
+* @par Attributes:
+* @li axis: An required int to specify the axis to perform indices add.
+
+* @par Outputs:
+* @li var_out: A Tensor. Same as input "var".
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator index_add.
+
+* @par Restrictions:
+* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(IndexAdd)
+    .INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(updates, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .OUTPUT(var_out, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .ATTR(axis, Int, 0)
+    .OP_END_FACTORY_REG(IndexAdd)
+
+/**
+* @brief According to the index number of indexes, replace the value
+*corresponding to X1 with the value in x2.
+
+* @par Inputs:
+* Three inputs, including:
+* @li x1:  A Tensor. Must be one of the following types:
+*float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n
+
+* @li x2: A Tensor of the same type as "x1".
+* @li indices: A Tensor of the indices,
+
+* @par Attributes:
+* @li accumulate: Does it support self accumulation.Defaults to 0.
+
+* @par Outputs:
+* @li y: A Tensor. Same as input "x1".
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator index_put.
+
+* @par Restrictions:
+* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(IndexPut)
+    .INPUT(x1, TensorType::BasicType())
+    .INPUT(x2, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(indices, ListInt)
+    .ATTR(accumulate, Int, 0)
+    .OP_END_FACTORY_REG(IndexPut)
+
+/**
+*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types:
+*float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n
+
+*@par Attributes:
+*diagonal: An optional attribute indicates the diagonal to consider. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the Pytorch operator Triu.
+*/
+REG_OP(Triu)
+    .INPUT(x, TensorType::BasicType())
+    .ATTR(diagonal, Int, 0)
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(Triu)
+
+/**
+*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types:
+*float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n
+
+*@par Attributes:
+*diagonal: An optional attribute indicates the diagonal to consider. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the Pytorch operator Tril.
+*/
+REG_OP(Tril)
+    .INPUT(x, TensorType::BasicType())
+    .ATTR(diagonal, Int, 0)
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(Tril)
+/**
+*@brief Concatenates a list of N tensors along the first dimension.
+*@par Inputs:
+* @li x: A list of Tensors. Must be one of the following types:  int32,
+* float16, float32. Tensors to be concatenated. All must have size 1 in
+*  the first dimension and same shape.It's a dynamic input. \n
+
+*@par Attributes:
+* @li equation: The subscripts for the Einstein summation. \n
+* @li N: tensor size of input. \n
+
+*@par Outputs:
+*@li y: Sums the product of the elements of the input operands along
+* dimensions specified
+* using a notation based on the Einstein summation convention. \n
+
+*@attention Constraints:
+*Input N must be Int. \n
+
+*@par Third-party framework compatibility
+*Compatible with Pytorch einsum operator.
+*/
+REG_OP(Einsum)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .REQUIRED_ATTR(equation, String)
+    .REQUIRED_ATTR(N, Int)
+    .OP_END_FACTORY_REG(Einsum)
+
+/**
+*@brief Returns a 2-D tensor with ones on the diagonal and zeros elsewhere. \n
+
+*@par Inputs:
+*No inputs
+
+*@par Attributes:
+*@li num_rows: An required int. \n
+*@li num_columns: An optional int.Defaults to 0. \n
+*@li batch_shape: An optional ListInt.Defaults to []. \n
+*@li dtype: An optional int.Defaults to 0. \n
+
+*@par Outputs:
+*y: A Tensor with targeted type and shape. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Eye. \n
+*/
+REG_OP(Eye)
+    .OUTPUT(y, TensorType::BasicType())    /* "Result, has targeted element type" */
+    .REQUIRED_ATTR(num_rows, Int)
+    .ATTR(num_columns, Int, 0)
+    .ATTR(batch_shape, ListInt, {})
+    .ATTR(dtype, Int, 0)
+    .OP_END_FACTORY_REG(Eye)
+
+/**
+*@brief: Fill diagonal of at least 2 dimension tensors with value . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types:
+*    float32, int32, int64 . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Attributes:
+*fill_value:The value to fill in
+*wrap: An optional bool. Defaults to "False". If "True", Use recursive fill. \n
+
+*@par Third-party framework compatibility
+* Compatible with the Pytorch operator FillDiagonal.
+*/
+REG_OP(FillDiagonal)
+    .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(fill_value, Float)
+    .ATTR(wrap, Bool, false)
+    .OP_END_FACTORY_REG(FillDiagonal)
+
+/**
+*@brief: Returns the sum of the elements of the diagonal of the input 2-D matrix. \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types:
+*    float16, float. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the Pytorch operator Trace.
+*/
+
+REG_OP(Trace)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(Trace)
+
+/**
+*@brief  Computes the generalized inverse of any matrix. \n
+
+*@par Inputs:
+* @li x: input matrix. Must be one of the following types:
+*     double, float. \n
+
+*@par Attributes:
+* @li rcond: An optional float >= 0 or inf. Defaults to 1e-15. \n
+
+*@par Outputs:
+* y: A Tensor with the same type and shape of x's transpose. \n
+
+*/
+REG_OP(Pinverse)
+    .INPUT(x, TensorType({ DT_FLOAT, DT_DOUBLE }))
+    .OUTPUT(y, TensorType({ DT_FLOAT, DT_DOUBLE }))
+    .ATTR(rcond, Float, 1e-15)
+    .OP_END_FACTORY_REG(Pinverse)
+
+/**
+* @brief  From the input tensor and updates tensor, select the maximum value according to indices to output. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li input: Must be one of the following types:
+*       float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*       qint8, quint8, qint32, uint16, complex128, uint32, uint64.
+* @li indices: Must be one of the following types:
+*       int32, int64.
+* @li updates: Must have the same type as input. \n
+
+* @par Outputs:
+* output: A Tensor with the same type as input. \n
+*/
+REG_OP(TensorScatterMax)
+    .INPUT(input, TensorType::BasicType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType::BasicType())
+    .OUTPUT(output, TensorType::BasicType())
+    .OP_END_FACTORY_REG(TensorScatterMax)
+
+/**
+* @brief  From the input tensor and updates tensor, select the minimum value according to indices to output. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li input: Must be one of the following types:
+*       float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*       qint8, quint8, qint32, uint16, complex128, uint32, uint64.
+* @li indices: Must be one of the following types:
+*       int32, int64.
+* @li updates: Must have the same type as input. \n
+
+* @par Outputs:
+* output: A Tensor with the same type as input. \n
+*/
+REG_OP(TensorScatterMin)
+    .INPUT(input, TensorType::BasicType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType::BasicType())
+    .OUTPUT(output, TensorType::BasicType())
+    .OP_END_FACTORY_REG(TensorScatterMin)
+
+/**
+* @brief: Returns the batched diagonal part of a batched tensor. \n
+
+* @par Inputs:
+* @li x: A Tensor. Rank r tensor where r >= 2.
+* @li k: A Tensor of type int32. Diagonal offset(s). Positive value means superdiagonal,
+         0 refers to the main diagonal, and negative value means subdiagonals. k can be a
+         single integer (for a single diagonal) or a pair of integers specifying the low and
+         high ends of a matrix band. k[0] must not be larger than k[1].
+* @li padding_value:A Tensor. Must have the same type as input. The value to fill the area
+                    outside the specified diagonal band with. Default is 0. \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "input". \n
+
+* @par Attributes:
+* @li align:An optional string from: "LEFT_RIGHT", "RIGHT_LEFT", "LEFT_LEFT", "RIGHT_RIGHT". Defaults to "RIGHT_LEFT".
+
+* @par Third-party framework compatibility
+* Compatible with the Tensorflow  operator FillDiagonal.
+*/
+ REG_OP(MatrixDiagPartV3)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(k, TensorType({DT_INT32}))
+    .INPUT(padding_value, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .ATTR(align,String ,"RIGHT_LEFT")
+    .OP_END_FACTORY_REG(MatrixDiagPartV3)
+
+/**
+* @brief Returns a batched diagonal tensor with given batched diagonal values . \n
+
+* @par Inputs:
+* Five inputs, including:
+* @li x: Rank `r`, where `r >= 1` \n
+
+* @li k:
+* Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main
+* diagonal, and negative value means subdiagonals. `k` can be a single integer
+* (for a single diagonal) or a pair of integers specifying the low and high ends
+* of a matrix band. `k[0]` must not be larger than `k[1]`. \n
+
+* @li num_rows:
+* The number of rows of the output matrix. If it is not provided, the op assumes
+* the output matrix is a square matrix and infers the matrix size from k and the
+* innermost dimension of `diagonal`. \n
+
+* @li num_cols: An NCHW, NHWC, or ND Tensor.
+* The number of columns of the output matrix. If it is not provided, the op
+* assumes the output matrix is a square matrix and infers the matrix size from
+* k and the innermost dimension of `diagonal`. \n
+
+* @li padding_value: The number to fill the area outside the specified diagonal band with. \n
+
+* @par Attributes:
+* @li align: An optional string from: "LEFT_RIGHT", "RIGHT_LEFT", "LEFT_LEFT", "RIGHT_RIGHT".
+* Defaults to "RIGHT_LEFT" \n
+
+* @par Outputs:
+* @li y: Has rank `r+1` when `k` is an integer or `k[0] == k[1]`, rank `r` otherwise . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterUpdate.
+*/
+REG_OP(MatrixDiagV3)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(k, TensorType({DT_INT32}))
+    .INPUT(num_rows, TensorType({DT_INT32}))
+    .INPUT(num_cols, TensorType({DT_INT32}))
+    .INPUT(padding_value, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .ATTR(align, String, "RIGHT_LEFT")
+    .OP_END_FACTORY_REG(MatrixDiagV3)
+
+/**
+* @brief Function SwinAttentionScore. \n
+
+* @par Inputs:
+* six inputs, including:
+* @li query: A matrix Tensor. The type only support float16.
+* @li key: A matrix Tensor. The type only support float16.
+* @li value: A matrix Tensor. The type only support float16.
+* @li padding_mask1: A matrix Tensor. The type only support float16.
+* @li padding_mask2: A matrix Tensor. The type only support float16.
+* @li scale: A scalar. The type only support float16.
+* @li drop_mask: A matrix Tensor. The type only support uint8. \n
+
+* @par Attributes:
+* @li keep_prob: A mutable Tensor. Must met all of the following rules:
+ shape of "keep_prob" should be (1,) or [1,].
+* @li query_transpose: A bool. If True, changes the shape of "query" from [K, M] to
+ [M, K].
+* @li key_transpose: A bool. If True, changes the shape of "key" from [N, K] to
+ [K, N].
+* @li bmm_score_transpose_a: A bool. If True, changes the shape of "mid_data" from [K, M] to
+ [M, K].
+* @li bmm_score_transpose_b: A bool. If True, changes the shape of "value" from [N, K] to
+ [K, N].
+* @li axes: A list of int. The dimension softmax would be performed on. Defaults
+ to "[]" . \n
+
+* @par Outputs:
+* attention_score: The result matrix Tensor. The type only support float16.
+* softmax: The result matrix Tensor. The type only support float16.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(SwinAttentionScore)
+    .INPUT(query, TensorType({DT_FLOAT16}))
+    .INPUT(key, TensorType({DT_FLOAT16}))
+    .INPUT(value, TensorType({DT_FLOAT16}))
+    .INPUT(padding_mask1, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(padding_mask2, TensorType({DT_FLOAT16}))
+    .INPUT(scale, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(drop_mask, TensorType({DT_INT8}))
+    .OUTPUT(attention_score, TensorType({DT_FLOAT16}))
+    .OUTPUT(softmax, TensorType({DT_FLOAT16}))
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(query_transpose, Bool, false)
+    .ATTR(key_transpose, Bool, false)
+    .ATTR(bmm_score_transpose_a, Bool, false)
+    .ATTR(bmm_score_transpose_b, Bool, false)
+    .ATTR(softmax_axes, ListInt, {})
+    .OP_END_FACTORY_REG(SwinAttentionScore)
+
+/**
+* @brief Uses "updates" to update tensor "data" by "indices". \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li var: A Tensor of type BasicType.
+* @li indices: An ND Tensor of type int32 or int64.
+* @li updates: An Tensor with the same dtype as 'var'. Same shape as indices. \n
+
+* @par Attributes:
+* @li use_locking: An optional bool. Defaults to "False". If "True",
+* the operation will be protected by a lock . \n
+
+* @par Outputs:
+* var: A Tensor. Has the same type and format as input "var" . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterNdMax.
+*/
+REG_OP(ScatterNdMax)
+    .INPUT(var, TensorType::BasicType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType::BasicType())
+    .OUTPUT(var,  TensorType::BasicType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ScatterNdMax)
+
+/**
+* @brief Uses "updates" to update tensor "data" by "indices". \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li var: A Tensor of type BasicType.
+* @li indices: A ND Tensor of type int32 or int64.
+* @li updates: A Tensor with the same dtype as 'var'. Same shape as indices. \n 
+
+* @par Attributes:
+* use_locking: An optional bool. Defaults to "False". If "True",
+* the operation will be protected by a lock . \n
+
+* @par Outputs:
+* var: A Tensor. Has the same type and format as input "var" . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterNdMin.
+*/
+REG_OP(ScatterNdMin)
+    .INPUT(var, TensorType::BasicType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType::BasicType())
+    .OUTPUT(var,  TensorType::BasicType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ScatterNdMin)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
index 0c6a5dff..96213764 100644
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -86,35 +86,37 @@ REG_OP(L2NormalizeGrad)
 *@brief Performs batch normalization . \n
 
 *@par Inputs:
-* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
-*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
-*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
-if input "x" is with format NC1HWC0. Specifies the scaling factor.
-*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
-if input "x" is with format NC1HWC0. Specifies the offset.
-*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
-if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
+* Five inputs, including: (NHWC, NCHW)
+*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW.
+*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. 
+Specifies the scaling factor.
+*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Specifies the offset.
+*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. 
+Specifies the mean used for inference. Must be "None" if the
 operation is used for training.
-*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be
-5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
+*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. 
+Specifies the variance used for inference. Must be "None"
 if the operation is used for training . \n
 
 *@par Attributes:
-*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
+*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. 
+Defaults to "0.0001".
 *@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
-*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
+*@li is_training: An optional bool, specifying if the operation is used for training or inference. 
+Defaults to "True" . \n
 
 *@par Outputs:
-* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
-*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
-*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
-if input "x" is with format NC1HWC0. Specifies the mean of "x".
+* Five outputs, including: (NHWC, NCHW)
+*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW.
+*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. 
+Specifies the mean of "x".
 *@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
-Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x".
+Specifies the variance of "x".
 *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
-Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
+Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
 *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
-Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
+*@li reserve_space_3: An optional Tensor of type float32. For compatibility with tensorflow, 
+only has one useless emement. \n
 
 *@attention Constraints:
 *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
@@ -136,12 +138,189 @@ REG_OP(BatchNorm)
     .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
     .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
     .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_3, TensorType({DT_FLOAT}))
     .ATTR(epsilon, Float, 0.0001)
     .ATTR(data_format, String, "NHWC")
     .ATTR(is_training, Bool, true)
     .OP_END_FACTORY_REG(BatchNorm)
 
 /**
+* @brief After the mean and reciprocal of standard deviation(invert_std) are separately calculated on each device,
+* the mena and reciprocal of standard deviation(invert_std) data on each device are normlized,
+* a total mean and reciprocal of standard deviation(invert_std) are returned, and running_var are updated.
+
+* @par Inputs:
+* include:
+* @li mean_all: A Tensor. The mean of each device. Must be one of the following types: float16, float32.
+* @li invert_std_all: A Tensor. Reciprocal of the variances of each device. Must be one of the following types: float16, float32.
+* @li count_all: A Tensor. Number of data for each device. Must be one of the following types: float16, float32.
+* @li mean_broadcast: A Tensor. The overall average and broadcast. Must be one of the following types: float16, float32.
+* @li count_sum: A Tensor. General statistics. Must be one of the following types: float16, float32.
+* @li running_var: A Tensor. Runtime variance. Must be one of the following types: float16, float32. \n
+
+* @par Attributes:
+* Two Attributes, including:
+* @li momentum: A optional float. Defaults to 0.01. \n
+* @li epsilon: An optional float. Defaults to 0.00001. \n
+
+* @par Outputs:
+* include:
+* @li invert_std: A Tensor. It's inverse of total variance.
+* @li running_var_update: A Tensor. It's moving variance of each device after the update. \n
+
+* @par Third-party framework compatibility
+* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate
+* compatible with the Pytorch operator BatchNormGatherStatsWithCounts.
+*/
+REG_OP(SyncBatchNormGatherStatsWithCounts)
+    .INPUT(mean_all, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(invert_std_all, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(count_all, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean_broadcast, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(running_var, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(invert_std, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(running_var_update, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(momentum, Float, 0.1)
+    .ATTR(epsilon, Float, 0.001)
+    .OP_END_FACTORY_REG(SyncBatchNormGatherStatsWithCounts)
+
+/**
+* @brief update running_mean.
+
+* @par Inputs:
+* include:
+* @li mean: A Tensor. The mean of each device. Must be one of the following types: float16, float32.
+* @li running_mean: A Tensor. Runtime Mean. Must be one of the following types: float16, float32. \n
+
+* @par Attributes:
+* One Attribute, including:
+* @li momentum: A optional float. Defaults to 0.01. \n
+
+* @par Outputs:
+* include:
+* @li running_mean_update: A Tensor. It's moving mean of each device after the update. \n
+
+* @par Third-party framework compatibility
+* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate
+* compatible with the Pytorch operator BatchNormGatherStatsWithCounts.
+*/
+REG_OP(SyncBNTrainingUpdate)
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(running_mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(running_mean_update, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(momentum, Float, 0.1)
+    .OP_END_FACTORY_REG(SyncBNTrainingUpdate)
+
+/**
+*@brief part of SyncBatchNormBackward . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li sum_dy: A Tensor. Must be one of the following types: float16, float32 .
+*@li sum_dy_dx_pad: A Tensor. Must be one of the following types: float16, float32 .
+*@li mean: A Tensor. Must be one of the following types: float16, float32 .
+*@li invert_std: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+*@li sum_dy_xmu: A Tensor. Has the same type and format as input "sum_dy"
+*@li y: A Tensor. Has the same type and format as input "sum_dy" . \n
+*/
+REG_OP(SyncBatchNormBackwardReduce)
+    .INPUT(sum_dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(sum_dy_dx_pad, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(invert_std, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(sum_dy_xmu, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(SyncBatchNormBackwardReduce)
+
+/**
+*@brief part of SyncBatchNormBackward . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li grad_output: A Tensor. Must be one of the following types: float16, float32 .
+*@li save_input: A Tensor. Must be one of the following types: float16, float32 .
+*@li mean: A Tensor. Must be one of the following types: float16, float32 .
+*@li invstd: A Tensor. Must be one of the following types: float16, float32 .
+*@li weight: A Tensor. Must be one of the following types: float16, float32 .
+*@li mean_dy: A Tensor. Must be one of the following types: float16, float32 .
+*@li mean_dy_xmu: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+*@li grad_input: A Tensor. Has the same type and format as input "grad_output" . \n
+*/
+REG_OP(SyncBatchNormBackwardElemt)
+    .INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(save_input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(invstd, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(mean_dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(mean_dy_xmu, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(grad_input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(SyncBatchNormBackwardElemt)
+    
+/**
+*@brief Performs batch normalization . \n
+
+*@par Inputs:
+* Five inputs, including: (NHWC, NCHW)
+*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW.
+*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. 
+Specifies the scaling factor.
+*@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW.
+Specifies the offset.
+*@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW.
+Specifies the mean used for inference. Must be "None" if the
+operation is used for training.
+*@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW.
+Specifies the variance used for inference. Must be "None"
+if the operation is used for training . \n
+
+*@par Attributes:
+*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
+*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
+*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
+
+*@par Outputs:
+* Five outputs, including: (NHWC, NCHW)
+*@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW.
+*@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW.
+Specifies the mean of "x".
+*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
+Specifies the variance of "x".
+*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
+Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
+*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
+
+*@attention Constraints:
+*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
+then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n
+
+*@par Third-party framework compatibility
+*@li Compatible with the TensorFlow operator fused_batch_norm.
+*@li Compatible with the TensorFlow operator fused_batch_norm_v2.
+*/
+REG_OP(BatchNorm3D)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(offset, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .ATTR(data_format, String, "NCDHW")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(BatchNorm3D)
+/**
 *@brief Performs batch normalization . \n
 
 *@par Inputs:
@@ -198,11 +377,12 @@ REG_OP(BatchNormExt2)
 
 *@par Inputs:
 * Five inputs, including:
-*@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the gradient.
-*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0.
-*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0.
-*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm.
-*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n
+*@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, for the gradient.
+*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW.
+*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW.
+*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW. It is an output of BatchNorm.
+*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW. It is an output of BatchNorm .
+*@li reserve_space_3: A 1D optional Tensor of type float32. It is an output of BatchNorm . \n
 
 *@par Attributes:
 *@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
@@ -210,11 +390,11 @@ REG_OP(BatchNormExt2)
 *@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n
 
 *@par Outputs:
-*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x".
-*@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "scale".
-*@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "offset".
-*@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output.
-*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output . \n
+*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, for the offset of "x".
+*@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, for the offset of "scale".
+*@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, for the offset of "offset".
+*@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW. Pass "None" to skip this output.
+*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW. Pass "None" to skip this output . \n
 
 *@attention Constraints:
 * The preceding layer of this operator must be operator BatchNorm . \n
@@ -229,6 +409,7 @@ REG_OP(BatchNormGrad)
     .INPUT(scale, TensorType({DT_FLOAT}))
     .INPUT(reserve_space_1, TensorType({DT_FLOAT}))
     .INPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(reserve_space_3, TensorType({DT_FLOAT}))
     .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
     .OUTPUT(scale_backprop, TensorType({DT_FLOAT}))
     .OUTPUT(offset_backprop, TensorType({DT_FLOAT}))
@@ -244,6 +425,52 @@ REG_OP(BatchNormGrad)
 
 *@par Inputs:
 * Five inputs, including:
+*@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, for the gradient.
+*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW.
+*@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW.
+*@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW. It is an output of BatchNorm.
+*@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW. It is an output of BatchNorm . \n
+
+*@par Attributes:
+*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
+*@li data_format: An optional string. Defaults to "NCDHW".
+*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n
+
+*@par Outputs:
+*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, for the offset of "x".
+*@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, for the offset of "scale".
+*@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, for the offset of "offset".
+*@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW. Pass "None" to skip this output.
+*@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW. Pass "None" to skip this output . \n
+
+*@attention Constraints:
+* The preceding layer of this operator must be operator BatchNorm . \n
+
+*@see BatchNorm
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operators FusedBatchNormGradV2 and FusedBatchNorm3DGrad.
+*/
+REG_OP(BatchNorm3DGrad)
+    .INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(reserve_space_1, TensorType({DT_FLOAT}))
+    .INPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(scale_backprop, TensorType({DT_FLOAT}))
+    .OUTPUT(offset_backprop, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_4, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_5, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .ATTR(data_format, String, "NCDHW")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(BatchNorm3DGrad)
+
+/**
+*@brief Performs the backpropagation of BatchNorm . \n
+
+*@par Inputs:
+* Five inputs, including:
 *@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient.
 *@li x: A 4D Tensor of type float16 or float32, with format NHWC or NCHW.
 *@li scale: A 4D Tensor of type float32, with format NHWC or NCHW.
@@ -290,7 +517,7 @@ REG_OP(BatchNormGradExt2)
 *@brief Performs batch normalization . \n
 
 *@par Inputs:
-*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
+*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW.
 *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
 *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
 *@li momentum: A Tensor,represents the mean and the variance's scale factor
@@ -315,53 +542,32 @@ REG_OP(BNInference)
     .ATTR(use_global_stats, Bool,true)
     .ATTR(mode, Int,1)
     .OP_END_FACTORY_REG(BNInference)
+
 /**
-*@brief aicpu batch normalization host  . \n
+*@brief Performs batch normalization .
 
 *@par Inputs:
+*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW.
+*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"
+* Specifies the mean used for inference.
+*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"
+* Specifies the variance used for inference.
+*@li scale: An optional tensor of type float16 or float32, no use.
+*@li offset: An optional tensor of type float16 or float32, no use. \n
 
-*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
-*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
-*@li momentum: An optional float, mean and variance's Scale factor
 *@par Attributes:
-*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
+*@li momentum: An optional float32 num, represents the mean and
+* the variance's scale factor.
+*@li epsilon: An optional float32, specifying the small value
+* added to variance to avoid dividing by zero. Defaults to "0.00001".
 *@li use_global_stats: mean inference mode , only can be "True".
-*@li mode: An optional attr, not use
-*@par Outputs:
-*@li alpha: A Tensor of type float16 or float32 for the cpu calculate mean
-*@li beta: A Tensor of type float16 or float32 for the cpu calculate variance
-*/
-REG_OP(BnHost)
-    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(momentum, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OPTIONAL_INPUT(offset, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .ATTR(epsilon, Float, 0.00001)
-    .ATTR(mode, Int, 1)
-    .ATTR(use_global_stats, Bool, true)
-    .OUTPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(mu, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OP_END_FACTORY_REG(BnHost)
-/**
-*@brief Performs batch normalization . \n
+*@li mode: An optional attr, not use. \n
 
-*@par Inputs:
-*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
-*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference.
-*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference.
-*@li scale: An optional tensor of type float16 or float32, no use
-*@li offset: An optional tensor of type float16 or float32, no use
-*@par Attributes:
-*@li momentum: An optional float32 num, represents the mean and the variance's scale factor
-*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
-*@li use_global_stats: mean inference mode , only can be "True".
-*@li mode: An optional attr, not use
 *@par Outputs:
-*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x"
+*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x". \n
+
 *@par Restrictions:
-*Warning: THIS FUNCTION IS DEPRECATED. Please use BNInference instead.
+* Warning: THIS FUNCTION IS DEPRECATED. Please use BNInference instead.
 */
 REG_OP(BNInferenceD)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
index 35296870..4c55eac0 100644
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,15 +26,14 @@
 namespace ge {
 /**
 * @brief Computes the gradients of depthwise convolution with respect to
-* the filter . \n
-
+* the filter. \n
 * @par Inputs:
-* Three inputs include: \n
+* Three inputs include: 
 * @li input: 4D origin shape of input tensor [N, C, H, W] or [N, H, W, C],
-* support float16, float32, double
-* @li filter_size: A 4D tensor of type int32, with shape [H, W, C, K]
+* support float16.
+* @li filter_size: A 4D tensor of type int32, int64, with shape [H, W, C, K]
 * @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C].
-* Must be one of the following types: float16, float32, double . \n
+* Must be one of the following types: float16. \n
 
 * @par Attributes:
 * @li strides: A required list or tuple. The stride of the sliding window
@@ -49,7 +48,7 @@ namespace ge {
 * @li pads: A required list or tuple. Padding added to each dimension of the
 * input.
 * @li data_format: An optional string. Input data format, either "NHWC" or
-* "NCHW" . \n
+* "NCHW". \n
 
 * @par Outputs:
 * filter_grad: Gradient of the deep convolution relative to the filter with
@@ -65,8 +64,9 @@ namespace ge {
 * Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the
 * data is 5D with shape [N, C1, Ho, Wo, C0],
 * where C is the same as that of the feature map and C0 is 16.\n
-* Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 *
-* stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512 . \n
+* Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) +
+* (480 * stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf
+*  <= l0b_size/512. \n
 
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter.
@@ -134,9 +134,9 @@ REG_OP(DepthwiseConv2DBackpropFilter)
 * instead.
 */
 REG_OP(DepthwiseConv2DBackpropFilterD)
-    .INPUT(input, TensorType({float16}))
-    .INPUT(out_backprop, TensorType({float16}))
-    .OUTPUT(filter_grad, TensorType({float32}))
+    .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16}))
+    .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16}))
+    .OUTPUT(filter_grad, TensorType({DT_FLOAT32}))
     .REQUIRED_ATTR(filter_size, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
@@ -146,34 +146,34 @@ REG_OP(DepthwiseConv2DBackpropFilterD)
 
 /**
 * @brief Computes the gradients of depthwise convolution with respect to the
-* input . \n
-
+* input. \n
 * @par Inputs:
 * Three inputs include: \n
 * @li input_size: 4D shape of input tensor [N, C, H, W] or [N, H, W, C],
-* support int32, int64
+* support int32, int64.
 * @li filter: 4D filter tensor with shape of [H, W, C, K], support float16.
 * @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C].
 * Must be one of the following types: float16 . \n
 
 * @par Attributes:
-* @li strides: A required list or tuple of int32. The stride of the sliding window for
-* height and width of input "x" of the convolution.
+* @li strides: A required list or tuple of int32. The stride of the sliding
+* window for height and width of input "x" of the convolution.
 * Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height,
 * stride_width, 1].
-* @li dilations: An optional list or tuple of int32. The dilation factor for each
-* dimension of input "x". Defaults to "[1, 1, 1, 1]".
+* @li dilations: An optional list or tuple of int32. The dilation factor for
+* each dimension of input "x". Defaults to "[1, 1, 1, 1]".
 * If set to k > 1, there will be k-1 skipped cells between each filter element
 * on that dimension. Must be with shape [1, 1, dilation_height, dilation_width]
 * or [1, dilation_height, dilation_width, 1].
-* @li pads: A required list or tuple of int32. Padding added to each dimension of the
-* input.
+* @li pads: A required list or tuple of int32. Padding added to each dimension
+* of the input.
 * @li data_format: An optional string. Input data format, either "NHWC" or
 * "NCHW". Defaults to "NHWC" . \n
 
 * @par Outputs:
 * input_grad: Gradient of the deep convolution relative to the input with shape
-* [N, C, H, W] or [N, H, W, C] Must be one of the following types: float16 . \n
+* [N, C, H, W] or [N, H, W, C] Must be one of the following types:
+* float16, float32. \n
 
 * @attention Constraints:\n
 * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but
@@ -195,7 +195,7 @@ REG_OP(DepthwiseConv2DBackpropInput)
     .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
     .INPUT(filter, TensorType({DT_FLOAT16}))
     .INPUT(out_backprop, TensorType({DT_FLOAT16}))
-    .OUTPUT(input_grad, TensorType({DT_FLOAT16}))
+    .OUTPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .REQUIRED_ATTR(strides, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
     .REQUIRED_ATTR(pads, ListInt)
@@ -255,7 +255,7 @@ REG_OP(DepthwiseConv2DBackpropInput)
 REG_OP(DepthwiseConv2DBackpropInputD)
     .INPUT(filter, TensorType({DT_FLOAT16}))
     .INPUT(out_backprop, TensorType({DT_FLOAT16}))
-    .OUTPUT(input_grad, TensorType({DT_FLOAT16}))
+    .OUTPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .REQUIRED_ATTR(input_size, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
@@ -269,10 +269,10 @@ REG_OP(DepthwiseConv2DBackpropInputD)
 
 *@par Inputs:
 *Two required inputs and two optional inputs, including: \n
-* @li x: A 4D tensor of type float16 or int8, with shape [N, C, H, W] or [N, H, W, C]
-* @li filter: A 4D tensor of type float16 or int8, with shape [H, W, C, K]
+* @li x: A 4D tensor of type float16 or int8 or int4, with shape [N, C, H, W] or [N, H, W, C]
+* @li filter: A 4D tensor of type float16 or int8 or int4, with shape [H, W, C, K]
 * @li bias: An optional tensor of type float16 or int32
-* @li offset_w: An optional float16 or int8, used for quantized inference
+* @li offset_w: An optional float16 or int8 or int4, used for quantized inference
 
 * @par Attributes:
 * @li strides: A required list or tuple. The stride of the sliding window for
@@ -312,11 +312,11 @@ REG_OP(DepthwiseConv2DBackpropInputD)
 * @li Compatible with the Caffe operator DepthwiseConv2D.
 */
 REG_OP(DepthwiseConv2D)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
-    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32}))
-    .OPTIONAL_INPUT(offset_w, TensorType({DT_FLOAT16, DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT}))
     .REQUIRED_ATTR(strides, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
     .REQUIRED_ATTR(pads, ListInt)
@@ -331,13 +331,13 @@ REG_OP(DepthwiseConv2D)
 *        For NCHW data format, the feature dimension is the third-to-last . \n
 
 *@par Inputs:
-*x: A Tensor of type NumberType . \n
+* x: A Tensor of type NumberType . \n
 
 *@par Attributes:
-*data_format: Data format. Defaults to "NHWC" . \n
+* data_format: Data format. Defaults to "NHWC" . \n
 
 *@par Outputs:
-*y: A Tensor.Has the same type as "x" . \n
+* y: A Tensor.Has the same type as "x" . \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BiasAddGrad.
@@ -365,6 +365,23 @@ REG_OP(BiasAddGrad)
  * 4-D with shape [batch, out_height, out_width, out_channels]
  * or [batch, out_channels, out_height, out_width].
  * Gradients with respect to the output of the convolution.
+ *\n
+ *\n
+ * The following are the supported data types and data formats:\n
+ *\n
+ *\n
+    | Tensor    | out_bckprop | filter  | y      |\n
+    |-----------|-------------|---------|--------|\n
+    | Data Type | float16     | float16 | float16|\n
+    |           | float32     | float32 | float32|\n
+    |           | float64     | float64 | float64|\n
+    | Format    | NCHW        | NCHW    | NCHW   |\n
+    |           | NHWC        | HWCN    | NHWC   |\n
+ *\n
+ * For float32 and float64 type, the actual calculation on the chip is based
+ * on float16.
+ *\n
+ *
 *@par Attributes:
  * Five attributes:
  * @li strides: A tuple/list of 4 integers. The stride of the sliding window
@@ -377,8 +394,49 @@ REG_OP(BiasAddGrad)
  * channels.
  * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
  * "NHWC". Specify the data format of the input and output data.
+ *\n
+ *\n
+ * The following value range restrictions must be met:\n
+ *\n
+ *\n
+    | Name             | Field    | Scope        |\n
+    |------------------|----------|--------------|\n
+    | input_size       | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | Filter           | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
+    | out_backprop     | H*strideH| [1, 200000]  |\n
+    |                  | W*strideW| [1, 4096]    |\n
+    | y(fmap)          | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | Stride           | H        | [1, 63]      |\n
+    |                  | W        | [1, 63]      |\n
+    | Padding          | Top      | [0, 255]     |\n
+    |                  | Bottom   | [0, 255]     |\n
+    |                  | Left     | [0, 255]     |\n
+    |                  | Right    | [0, 255]     |\n
+    | Dilation         | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
+ *\n
+
+ * In Ascend910, fmap or out_backprop's H and W not support 1 when\n
+ * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
+ * and filter_width > fmap_width.
+ * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h *
+ *  stride_w < 4096. \n
+ *
 *@par Outputs:
  * y: A Tensor. Has the same type as filter,and has same format as input_size.
+ *\n
+ *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
+ *                           (dilation_h * (filter_height - 1) + 1))
+ *                           / stride_h + 1
+ *\n
+ *     out_backprop_width = (fmap_width + pad_left + pad_right -
+ *                          (dilation_w * (filter_width - 1) + 1))
+ *                          / stride_w + 1
+ *\n
+ *
 *@par Third-party framework compatibility
  * Compatible with Tensorflow's conv2d_backprop_input
 */
@@ -424,15 +482,15 @@ REG_OP(Conv2DBackpropInput)
 *@par Outputs:
  * y: A Tensor. Has the same type as filter,4-D tensor [batch, height, width,
  * channels] or [batch, channels, height, width].
-*@par Third-party framework compatibility
+* @par Third-party framework compatibility
  * Compatible with Tensorflow's conv2d_backprop_input
 *@par Restrictions:
  * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv2DBackpropInput instead.
 */
 REG_OP(Conv2DBackpropInputD)
-    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
-    .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8, DT_BF16}))
+    .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_INT8, DT_BF16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32, DT_BF16}))
     .REQUIRED_ATTR(input_size, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
@@ -444,8 +502,8 @@ REG_OP(Conv2DBackpropInputD)
 /**
 *@brief Computes the Deconvolution with respect to the input.
 *@par Inputs:
- * Three inputs:
- * @li x: A Tensor of type float16 or int8.  4D with shape
+ * Two required inputs:
+ * @li x: A Tensor of type float16 or int8. 4D with shape
  * [batch, out_channels, out_height, out_width]. Gradients with respect
  * to the output of the convolution.
  * @li filter: A Tensor. Must have the same type as "x".
@@ -453,7 +511,21 @@ REG_OP(Conv2DBackpropInputD)
  * Two optional inputs:
  * @li bias: An optional tensor. Must have the same type as "y".
  * @li offset_w: An optional 1D tensor for quantized deconvolution.
- * Type is int8. Reserved.\n
+ * Type is int8. Reserved.
+ *\n
+ *\n
+ * The following are the supported data types and data formats:\n
+ *\n
+ *\n
+    | Tensor    | x       | filter  | bias    | y      |\n
+    |-----------|---------|---------|---------|--------|\n
+    | Data Type | float16 | float16 | float16 | float16|\n
+    |           | int8    | int8    | int32   | int32  |\n
+    | Format    | NCHW    | NCHW    | ND      | NCHW   |\n
+ *\n
+ * For int8, a dequant or requant operator must be followed.
+ *\n
+ *
 *@par Attributes:
  * Six attributes:
  * @li strides: A tuple or list of 2 integers. The stride of the sliding window
@@ -463,13 +535,54 @@ REG_OP(Conv2DBackpropInputD)
  * @li dilations: A tuple or list of 4 integers. The dilation factor for each
  * dimension of input, defaults to [1,1,1,1].
  * @li groups: Number of blocked connections from input channels to
- output channels. Defaults to "1".
+ * output channels. Defaults to "1".
  * @li data_format: An optional string from: "NCHW". Defaults to "NCHW". \n
-  Specify the data format of the input and output data.
+ * Specify the data format of the input and output data.
  * @li offset_x: An optional integer for quantized deconvolution.
- * Defaults to "0".
+ * The negative offset added to the input image for int8 type. Ensure offset_x
+ * within the effective range of int8 [-128, 127]. Defaults to "0".
+ *\n
+ *\n
+ * The following value range restrictions must be met:\n
+ *\n
+ *\n
+    | Name             | Field    | Scope        |\n
+    |------------------|----------|--------------|\n
+    | x (out_backprop) | H*strideH| [1, 200000]  |\n
+    |                  | W*strideW| [1, 4096]    |\n
+    | Filter           | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
+    | y (fmap)         | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | Stride           | H        | [1, 63]      |\n
+    |                  | W        | [1, 63]      |\n
+    | Padding          | Top      | [0, 255]     |\n
+    |                  | Bottom   | [0, 255]     |\n
+    |                  | Left     | [0, 255]     |\n
+    |                  | Right    | [0, 255]     |\n
+    | Dilation         | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
+    | Offset_x         |          | [-128, 127]  |\n
+ *\n
+ * In Ascend910, fmap or out_backprop's H and W not support 1 when\n
+ * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
+ * and filter_width > fmap_width
+ * If filter_h = 1 and filter_w = 1,
+ *  out_backprop_w * stride_h * stride_w < 4096
+ *\n
+ *
 *@par Outputs:
  * y: A Tensor. 4D tensor with shape [batch, channels, height, width].
+ *\n
+ *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
+ *                           (dilation_h * (filter_height - 1) + 1))
+ *                           / stride_h + 1
+ *\n
+ *     out_backprop_width = (fmap_width + pad_left + pad_right -
+ *                          (dilation_w * (filter_width - 1) + 1))
+ *                          / stride_w + 1
+ *\n
+ *
  * When type of x is float16, the type of y must be float16.
  * When type of x is int8, the type of y must be int32.
 */
@@ -491,7 +604,7 @@ REG_OP(Deconvolution)
 *@par Inputs:
  * Three inputs:
  * @li x: A Tensor. Must be one of the following types: float16, float32,
- * float64.4-D with shape [batch, in_height, in_width, in_channels] or
+ * float64. 4-D with shape [batch, in_height, in_width, in_channels] or
  * [batch, in_channels, in_height, in_width].
  * @li filter_size: A const Tensor of type int32. Currently does not support
  * data tensor. An integer vector representing the tensor shape of filter,
@@ -502,6 +615,23 @@ REG_OP(Deconvolution)
  * [batch, out_height, out_width, out_channels] or [batch, out_channels,
  * out_height, out_width]. Gradients with respect to the output of the
  * convolution.
+ *\n
+ *\n
+ * The following are the supported data types and data formats:\n
+ *\n
+ *\n
+    | Tensor    | x       | out_backprop | y       |\n
+    |-----------|---------|--------------|---------|\n
+    | Data Type | float16 |    float16   | float16 |\n
+    |           | float32 |    float32   | float32 |\n
+    |           | float64 |    float64   | float64 |\n
+    | Format    | NCHW    |     NCHW     | NCHW    |\n
+    |           | NHWC    |     NHWC     | HWCN    |\n
+ *\n
+ * For float32 and float64 type of x and outbackprop, the actual calculation
+ *  on the chip is based on float16.
+ *\n
+ *
 *@par Attributes:
  * Five attributes:
  * @li strides: A tuple/list of 4 integers. The stride of the sliding window
@@ -514,8 +644,42 @@ REG_OP(Deconvolution)
  * channels.
  * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
  * "NHWC". Specify the data format of the input and output data.
+ *\n
+ *\n
+ * The following value range restrictions must be met:\n
+ *\n
+ *\n
+    | Name             | Field    | Scope        |\n
+    |------------------|----------|--------------|\n
+    | x(fmap)          | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | Filter Size      | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
+    | out_backprop     | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | y                | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | Stride           | H        | [1, 63]      |\n
+    |                  | W        | [1, 63]      |\n
+    | Padding          | Top      | [0, 255]     |\n
+    |                  | Bottom   | [0, 255]     |\n
+    |                  | Left     | [0, 255]     |\n
+    |                  | Right    | [0, 255]     |\n
+    | Dilation         | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
+ *\n
 *@par Outputs:
  * y: A Tensor. Has the same type as x, has the same format as filter_size.
+ *\n
+ *     out_backprop_height = (in_height + pad_top + pad_bottom -
+ *                           (dilation_h * (filter_height - 1) + 1))
+ *                           / stride_h + 1
+ *\n
+ *     out_backprop_width = (in_width + pad_left + pad_right -
+ *                          (dilation_w * (filter_width - 1) + 1))
+ *                          / stride_w + 1
+ *\n
+ *
 *@par Third-party framework compatibility
  * Compatible with Tensorflow's conv2d_backprop_filter
 */
@@ -580,82 +744,76 @@ REG_OP(Conv2DBackpropFilterD)
     .OP_END_FACTORY_REG(Conv2DBackpropFilterD)
 
 /**
-*@brief Computes a 2D convolution given 4D "x" and "filter" tensors.
-*@par Inputs:
-*@li x: A 4D tensor of input image. With the format "NHWC", the data is stored
+* @brief Computes a 2D convolution given 4D "x" and "filter" tensors.
+* @par Inputs:
+* @li x: A 4D tensor of input image. With the format "NHWC", the data is stored
 * in the order of: [batch, in_height, in_width, in_channels].
-*@li filter: A 4D tensor of learnable filters. Must have the same type as "x".
+* @li filter: A 4D tensor of learnable filters. Must have the same type as "x".
 * With the format "HWCN" , the data is stored in the order of: [filter_height,
 * filter_width, in_channels / groups, out_channels].
-*@li bias: An optional 1D tensor of additive biases to the filter outputs.
+* @li bias: An optional 1D tensor of additive biases to the filter outputs.
 * The data is stored in the order of: [out_channels].
-*@li offset_w: Reserved.
+* @li offset_w: Reserved.
 *\n
 *\n
 * The following are the supported data types and data formats:
-*@verbatim
-    | Tensor    | x       | filter  | bias    | y
-    ------------|---------|---------|---------|--------
-    | Data Type | float16 | float16 | float16 | float16
-    |           |---------|---------|---------|--------
-    |           | float32 | float32 | float32 | float32
-    |           |---------|---------|---------|--------
-    |           | int8    | int8    | int32   | int32
-    ------------|---------|---------|---------|--------
-    | Format    | NCHW    | NCHW    | ND      | NCHW
-    |           | NHWC    | HWCN    |         | NHWC
-@endverbatim
+*\n
+*\n
+| Tensor    | x       | filter  | bias    | y       |\n
+| :-------: | :-----: | :-----: | :-----: | :-----: |\n
+| Data Type | float16 | float16 | float16 | float16 |\n
+|           | float32 | float32 | float32 | float32 |\n
+|           | int8    | int8    | int32   | int32   |\n
+| Format    | NCHW    | NCHW    | ND      | NCHW    |\n
+|           | NHWC    | HWCN    | ND      | NHWC    |\n
+*\n
 * For float32 type, the actual calculation on the chip is based on
-* float16. For int8, a dequant or requant operator must be followed.
+* float16.
 *\n
 *
-*@par Attributes:
-*@li strides: Required. A list of 4 integers. The stride of the sliding window
+* @par Attributes:
+* @li strides: Required. A list of 4 integers. The stride of the sliding window
 * for each dimension of input. The dimension order is determined by the data
 * format of "x". The N and C dimensions must be set to 1.
-*@li pads: Required. A list of 4 integers. The number of pixels to add to each
+* @li pads: Required. A list of 4 integers. The number of pixels to add to each
 * (top, bottom, left, right) side of the input.
-*@li dilations: Optional. A list of 4 integers. The dilation factor for each
+* @li dilations: Optional. A list of 4 integers. The dilation factor for each
 * dimension of input. The dimension order is determined by the data format of
-* "x". The N and C dimensions must be set to 1. The H and W dimensions must be
-* set to 1 for int8 type. Defaults to [1, 1, 1, 1].
-*@li groups: Optional. An integer of type int32. The number of blocked
+* "x". The N and C dimensions must be set to 1. Defaults to [1, 1, 1, 1].
+* @li groups: Optional. An integer of type int32. The number of blocked
 * connections from input channels to output channels. In_channels and
 * out_channels must both be divisible by "groups". Defaults to 1.
-*@li offset_x: Optional. An integer of type int32. The negative offset added
+* @li offset_x: Optional. An integer of type int32. The negative offset added
 * to the input image for int8 type. Ensure that the output is within the
 * effective range. Defaults to 0.
-*@li data_format: Reserved.
+* @li data_format: Reserved.
 *\n
 *\n
 * The following value range restrictions must be met:
-*@verbatim
-    | Name             | Field    | Scope
-    -------------------|----------|--------------
-    | Input Image Size | H        | [1, 100000]
-    |                  | W        | [1, 4096]
-    -------------------|----------|--------------
-    | Filter Size      | H        | [1, 255]
-    |                  | W        | [1, 255]
-    -------------------|----------|--------------
-    | Stride           | H        | [1, 63]
-    |                  | W        | [1, 63]
-    -------------------|----------|--------------
-    | Padding          | Top      | [0, 255]
-    |                  | Bottom   | [0, 255]
-    |                  | Left     | [0, 255]
-    |                  | Right    | [0, 255]
-    -------------------|----------|--------------
-    | Dilation         | H        | [1, 255]
-    |                  | W        | [1, 255]
-    -------------------|----------|--------------
-    | Offset_x         |          | [-128, 127]
-
-@endverbatim
+*\n
+*\n
+| Name             | Field    | Scope       |\n
+| :--------------: | :------: | :---------: |\n
+| Input Image Size | H        | [1, 100000] |\n
+|                  | W        | [1, 4096]   |\n
+| Filter Size      | H        | [1, 255]    |\n
+|                  | W        | [1, 255]    |\n
+| Stride           | H        | [1, 63]     |\n
+|                  | W        | [1, 63]     |\n
+| Padding          | Top      | [0, 255]    |\n
+|                  | Bottom   | [0, 255]    |\n
+|                  | Left     | [0, 255]    |\n
+|                  | Right    | [0, 255]    |\n
+| Dilation         | H        | [1, 255]    |\n
+|                  | W        | [1, 255]    |\n
+| Offset_x         | -        | [-128, 127] |\n
+*\n
+* The W dimension of the input image supports cases exceeding 4096, but it may
+* cause compilation errors.
 *\n
 *
 *@par Outputs:
-*@li y: A 4D Tensor of output feature map. Has the same type as "x". With the
+* y: A 4D Tensor of output feature map. Has the same type as "x". With the
 * format "NHWC", the data is stored in the order of: [batch, out_height,
 * out_width, out_channels].
 *\n
@@ -666,36 +824,21 @@ REG_OP(Conv2DBackpropFilterD)
 *     out_width = (in_width + pad_left + pad_right -
 *                  (dilation_w * (filter_width - 1) + 1))
 *                 / stride_w + 1
-*
-*@attention Constraints:
-*@li The following restrictions on the output must be met:
-*@verbatim
-    | Output  | Restrictions
-    ----------|--------------------------------
-    | H == 1  | H * W(input) == H * W(filter)
-    | W == 1  |
-    ----------|--------------------------------
-    | H != 1  | W(input) == W(filter)
-    | W == 1  | Only for Ascend310 Hi3796V300CS
-@endverbatim
-* "H * W (input)" indicates the image size after padding and "H * W (filter)"
-* indicates the filter size after dilation."W(input)" and W(filter) indicate
-* the same rule on the W dimension.
 *\n
 *
-*@par Quantization supported or not
-*@li Yes
+* @par Quantization supported or not
+* Yes
 *
-*@par Third-party framework compatibility
+* @par Third-party framework compatibility
 *@li Compatible with the TensorFlow operator "conv2d".
 *@li Compatible with the Caffe operator 2D "Convolution".
 */
 REG_OP(Conv2D)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
-    .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_BF16}))
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_BF16}))
     .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_BF16}))
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
@@ -705,67 +848,70 @@ REG_OP(Conv2D)
     .OP_END_FACTORY_REG(Conv2D)
 
 /**
-*@brief Computes a 2D convolution given 4D "x" and "filter_compress" tensors.
-*@par Inputs:
+* @brief Computes a 2D convolution given 4D "x" and "filter_compress" tensors.
+* @par Inputs:
 * @li x: A 4D tensor of input images.
-* @li filter_compress: A 4D tensor of compressed filters.
-* @li compress_index: A 1D Tensor dtype of int8.
-* @li bias: An optional 1D tensor.
-* @li offset_w: An optional 1D tensor for quantized convolution. Reserved.
+* @li filter_compress: A 4D tensor of compressed filter data blocks.
+* @li compress_index: A 1D tensor of index for decompression.
+* @li bias: An optional 1D tensor of additive biases to the filter outputs.
+* The data is stored in the order of: [out_channels].
+* @li offset_w: Reserved.
+*\n
+*\n
+* The following are the supported data types and data formats:
+*\n
+*\n
+| Tensor    | x       | filter_compress  | compress_index | bias    | y       |\n
+| :-------: | :-----: | :--------------: | :------------: | :-----: | :-----: |\n
+| Data Type |  int8   |       int8       |     int8       |  int32  |  int32  |\n
+| Format    |  NCHW   |       NCHW       |      ND        |  ND     |  NCHW   |\n
+|           |  NHWC   |       HWCN       |                |         |  NHWC   |\n
+*\n
+* For float32 type, the actual calculation on the chip is based on
+* float16.
+*\n
+*
+* @par Attributes:
+* @li strides: Required. A list of 4 integers. The stride of the sliding window
+* for each dimension of input. The dimension order is determined by the data
+* format of "x". The N and C dimensions must be set to 1.
+*@li pads: Required. A list of 4 integers. The number of pixels to add to each
+* (top, bottom, left, right) side of the input.
+*@li dilations: Optional. A list of 4 integers. The dilation factor for each
+* dimension of input. The dimension order is determined by the data format of
+* "x". The N and C dimensions must be set to 1. Defaults to [1, 1, 1, 1].
+*@li groups: Optional. An integer of type int32. The number of blocked
+* connections from input channels to output channels. In_channels and
+* out_channels must both be divisible by "groups". Only support 1.
+*@li offset_x: Optional. An integer of type int32. The negative offset added
+* to the input image for int8 type. Ensure that the output is within the
+* effective range. Defaults to 0.
+*@li data_format: Reserved.
+* @li alg: compress algorithm, default weight_unzip.
 *
-* The input and output tensor attributes are listed as follows:
-* @verbatim
-    |Tensor    | x       | filter_compress  | bias    | offset_w | y
-    -----------|---------|---------|---------|----------|--------
-    |Data Type | float16 | float16 | float16 | _        | float16
-    |          |---------|---------|---------|----------|--------
-    |          | float32 | float32 | float32 | _        | float32
-    |          |---------|---------|---------|----------|--------
-    |          | int8    | int8    | int32   | int8     | int32
-    -----------|---------|---------|---------|----------|--------
-    |Format    | NCHW    | NCHW    | ND      | ND       | NCHW
-    |          | NHWC    | NHWC    |         |          | NHWC
-    |          |         | HWCN    |         |          |
-@endverbatim
-* It should be noted that the data types must correspond to each other, but the
-* format does not need to . \n
-
-*@par Attributes:
-* @li strides: A list of 4 integers. Specifying the strides of the
-* convolution along the height and width. The dimension order is determined
-* by the data format of "x". By default the N and C dimensions are set to 1.
-* @li pads: A list of 4 integers. Specifying the top, bottom, left and right
-* padding.
-* @li dilations: A list of 4 integers. Specifying the dilation rate to use
-* for dilated convolution. Has the same dimension order and value as "strides".
-* @li groups: Number of blocked connections from input channels to output
-* channels. Input channels and output channels must both be divisible by
-* "groups".Type is int32.
-* @li offset_x: An optional integer for quantized convolution. Type is int32.
-* Defaults to "0".
-* @li data_format: An optional string from: "NHWC", "NCHW". Specifying the
-* data format of the input and output images. Type is string.
-* Defaults to "NHWC". Reserved . \n
-
 *@par Outputs:
-* @li y: A 4D Tensor of output images . \n
-
+* y: A 4D Tensor of output feature map. Has the same type as "x". With the
+* format "NHWC", the data is stored in the order of: [batch, out_height,
+* out_width, out_channels].
+*\n
+*
 *@par Restrictions:
-*Warning: THIS FUNCTION IS DEPRECATED.
+*Warning: THIS FUNCTION IS EXPERIMENTAL.
 */
 REG_OP(Conv2DCompress)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
-    .INPUT(filter_compress, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
+    .INPUT(x, TensorType({DT_INT8}))
+    .INPUT(filter_compress, TensorType({DT_INT8}))
     .INPUT(compress_index, TensorType({DT_INT8}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_INT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT32}))
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
     .ATTR(groups, Int, 1)
     .ATTR(data_format, String, "NHWC")
     .ATTR(offset_x, Int, 0)
+    .ATTR(alg, String, "weight_unzip")
     .OP_END_FACTORY_REG(Conv2DCompress)
 
 /**
@@ -778,23 +924,22 @@ REG_OP(Conv2DCompress)
 * With the format "HWCN" , the data is stored in the order of: [filter_height,
 * filter_width, in_channels / groups, out_channels].
 *@li offsets: A 4D tensor of x-y coordinates offset and mask. With the format
-* "NHWC", the data is stored in the order of: [batch, in_height, in_width,
+* "NHWC", the data is stored in the order of: [batch, out_height, out_width,
 * deformable_groups * filter_height * filter_width * 3].
 *@li bias: An optional 1D tensor of additive biases to the filter outputs.
 * The data is stored in the order of: [out_channels].
 *\n
 *\n
 * The following are the supported data types and data formats:
-*@verbatim
-    | Tensor    | x       | filter  | offsets | bias     | y
-    ------------|---------|---------|---------|----------|--------
-    | Data Type | float16 | float16 | float16 | float16  | float16
-    |           |---------|---------|---------|----------|--------
-    |           | float32 | float32 | float32 | float32  | float32
-    ------------|---------|---------|---------|----------|--------
-    | Format    | NCHW    | NCHW    | NCHW    | ND       | NCHW
-    |           | NHWC    | HWCN    | NHWC    |          | NHWC
-@endverbatim
+*\n
+*\n
+| Tensor    | x       | filter  | offsets | bias    | y       |\n
+| :-------: | :-----: | :-----: | :-----: | :-----: | :-----: |\n
+| Data Type | float16 | float16 | float16 | float16 | float16 |\n
+|           | float32 | float32 | float32 | float32 | float32 |\n
+| Format    | NCHW    | NCHW    | NCHW    | ND      | NCHW    |\n
+|           | NHWC    | HWCN    | NCHW    |         | NHWC    |\n
+*\n
 * For float32 type, the actual convolution calculation part on the chip is
 * based on float16.
 *\n
@@ -816,35 +961,23 @@ REG_OP(Conv2DCompress)
 *@li deformable_groups: Optional. An integer of type int32. The number of
 * deformable group partitions. In_channels must be divisible by
 * "deformable_groups". Defaults to 1.
+*@li modulated: Optional. Specify version of DeformableConv2D, true means v2,
+* false means v1, currently only support v2.
 *\n
 *\n
 * The following value range restrictions must be met:
-*@verbatim
-    | Name              | Field  | Scope
-    --------------------|--------|----------------------------
-    | Input Image Size  | H      | [1, 100000]
-    |                   | W      | [1, 4096]
-    --------------------|--------|----------------------------
-    | Filter Size       | H      | [1, 255]
-    |                   | W      | [1, 255]
-    --------------------|--------|----------------------------
-    | Stride            | H      | [1, 63]
-    |                   | W      | [1, 63]
-    --------------------|--------|----------------------------
-    | Padding           | Top    | [0, 255]
-    |                   | Bottom | [0, 255]
-    |                   | Left   | [0, 255]
-    |                   | Right  | [0, 255]
-    ------------ -------|--------|----------------------------
-    | Dilation          | H      | [1, 255]
-    |                   | W      | [1, 255]
-@endverbatim
-* "W(input)" indicate the image width after padding and W(filter) indicates the
-* filter width after dilation.
+*\n
+*\n
+| Name             | Field    | Scope                       |\n
+| :--------------: | :------: | :-------------------------: |\n
+| Input Image Size | H        | [1, 100000 / filter_height] |\n
+|                  | W        | [1, 4096 / filter_width]    |\n
+| Filter Size      | H        | [1, 63]                     |\n
+|                  | W        | [1, 63]                     |\n
 *\n
 *
 *@par Outputs:
-*@li y:  A 4D Tensor of output feature map. Has the same type as "x". With the
+* y:  A 4D Tensor of output feature map. Has the same type as "x". With the
 * format "NHWC", the data is stored in the order of: [batch, out_height,
 * out_width, out_channels].
 *\n
@@ -855,21 +988,7 @@ REG_OP(Conv2DCompress)
 *     out_width = (in_width + pad_left + pad_right -
 *                  (dilation_w * (filter_width - 1) + 1))
 *                 / stride_w + 1
-*
-*@attention Constraints:
-*@li The following restrictions on the output must be met:
-*@verbatim
-    | Output  | Restrictions
-    ----------|--------------------------------
-    | H == 1  | H * W(input) == H * W(filter)
-    | W == 1  |
-    ----------|--------------------------------
-    | H != 1  | W(input) == W(filter)
-    | W == 1  | Only for Ascend310 Hi3796V300CS
-@endverbatim
-* "H * W(input)" indicates the image size after padding and "H * W(filter)"
-* indicates the filter size after dilation. "W(input)" and W(filter) indicate
-* the same rule on the W dimension.
+*\n
 *
 *@par Quantization supported or not
 *@li No
@@ -891,45 +1010,42 @@ REG_OP(DeformableConv2D)
     .ATTR(groups, Int, 1)
     .ATTR(data_format, String, "NHWC")
     .ATTR(deformable_groups, Int, 1)
+    .ATTR(modulated, Bool, true)
     .OP_END_FACTORY_REG(DeformableConv2D)
 
 /**
 *@brief Computes a 3D convolution given 5D "x" and "filter" tensors.
- *@par Inputs:
+*@par Inputs:
  * @li x: A 5D tensor. Must be one of the following types: float16,
  * (Currently does not support int8). The format of x is NCDHW or NDHWC.
  * @li filter: A 5D tensor of the same type as "x".
  * (Currently does not support int8).
- * The format is NCDHW, NDHWC or DHWCN . \n
+ * The format is NCDHW, NDHWC or DHWCN.
+ * @li bias: Optional. An 1D tensor of the same type as "x".
+ * @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n
 
-*@par Optional input:
- * @li bias: An optional 1D tensor of the same type as "x".
- * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n
-
-*@par Required Attributes:
- * @li strides: A list of 5 integers. Specifies the stride of the sliding window
- * for each dimension of "x".
+*@par Attributes:
+ * @li strides: Required. A list of 5 integers. Specifies the stride of the
+ *  sliding window for each dimension of "x".
  * The N and C dimensions must be 1. Has the same format as "x".
- * @li pads: A list of 6 integers.
+ * @li pads: Required. A list of 6 integers.
  * Supports only padding along the D, H and W dimensions in sequence of head,
- * tail, top, bottom, left and right . \n
-
-*@par Attributes:
- * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
- * @li data_format: An optional string from: "NDHWC", "NCDHW".
+ * tail, top, bottom, left and right.
+ * @li dilations: Optional. A list of 5 integers. Specifies the dilation
+ *  factor for each dimension of "x".
+ * @li groups: Optional. Number of blocked connections from input channels
+ *  to output channels.
+ * @li data_format: Optional. An string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
- * @li dilations: A list of 5 integers. Specifies the dilation factor for each
- * dimension of "x", now only support [1,1,1,1,1]
- * The N and C dimensions must be 1. Has the same format as "x".
- * @li offset_x: An optional int. Input offset, used for quantized inference.
- * Defaults to 0. Reserved . \n
+ * The N, C and D dimensions must be 1. Has the same format as "x".
+ * @li offset_x: Optional. An int. Input offset, used for quantized inference.
+ * Defaults to 0. Reserved. \n
 
 *@par Outputs:
- *y: A Tensor. Has the same type and data format as "x". \n
+ * y: A Tensor. Has the same type and data format as "x". \n
 
 *@attention Constraints:
- *The image size after padding is greater than the filter size . \n
+ * The image size after padding is greater than the filter size. \n
 
 *@par Third-party framework compatibility
  * @li Compatible with the TensorFlow operator conv3d.
@@ -938,9 +1054,9 @@ REG_OP(DeformableConv2D)
 REG_OP(Conv3D)
     .INPUT(x, TensorType({DT_FLOAT16}))
     .INPUT(filter, TensorType({DT_FLOAT16}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1, 1})
@@ -953,9 +1069,8 @@ REG_OP(Conv3D)
 /**
 *@brief Computes the gradients of convolution 3d with respect to the input.
 *@par Inputs:
- * Three inputs:
- * @li input_size: A Tensor of type int32, int64. An integer vector representing
- * the shape of input, where input is a 5-D tensor
+ * @li input_size: A Tensor of type int32, int64. An integer vector
+ *  representing the shape of input, where input is a 5-D tensor
  * [batch, depth, height, width, channels] or
  * [batch, channels, depth, height, width].
  * @li filter: A Tensor. Must be one of the following types: float16, float32.
@@ -963,27 +1078,26 @@ REG_OP(Conv3D)
  * @li out_backprop: A Tensor. Must have the same type as filter.
  * 5-D with shape [batch, depth, out_height, out_width, out_channels]
  * or [batch, out_channels, depth, out_height, out_width]. Gradients with
- * respect to the output of the convolution . \n
-
-*@par Required Attributes:
- * @li strides: A list of 5 integers. Specifies the stride of the sliding window
- * for each dimension of "x".
- * The N and C dimensions must be 1. Has the same format as "x".
- * @li pads: A list of 6 integers.
- * Supports only padding along the D, H and W dimensions in sequence of head,
- * tail, top, bottom, left and right . \n
+ * respect to the output of the convolution. \n
 
 *@par Attributes:
- * Three attributes:
- * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
- * @li data_format: An optional string from: "NDHWC", "NCDHW".
- * Defaults to "NDHWC". Specify the data format of the input and output data.
- * @li dilations: A tuple/list of 5 integers, The dilation factor for each
- * dimension of the input, now only support [1,1,1,1,1]
+ * @li strides: Required. A list of 5 integers. Specifies the stride of the
+ *  sliding window for each dimension of "out_backprop".
+ * The N and C dimensions must be 1. Has the same format as "out_backprop".
+ * @li pads: Required. A list of 6 integers.
+ * Supports only padding along the D, H and W dimensions in sequence of head,
+ * tail, top, bottom, left and right.
+ * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor
+ *  for each dimension of the input.
+ * The N, C and D dimensions must be 1. Has the same format as "out_backprop".
+ * @li groups: Optional. Number of blocked connections from input channels
+ *  to output channels.
+ * @li data_format: Optional. An string from: "NDHWC", "NCDHW".
+ * Defaults to "NDHWC". Specify the data format of the input and output data. \n
 
 *@par Outputs:
- * y: A Tensor. Has the same type as filter,and has same format as input_size
+ * y: A Tensor. Has the same type as filter,and has same format as
+ * "input_size". \n
 
 *@par Third-party framework compatibility
  * Compatible with Tensorflow's conv3d_backprop_input
@@ -1002,44 +1116,44 @@ REG_OP(Conv3DBackpropInput)
 
 /**
 *@brief Computes the gradients of convolution 3d with respect to the input.
+
 *@par Inputs:
- * Two inputs:
  * @li filter: A Tensor whose type is float16. The format of filter is NCDHW,
  * NDHWC or DHWCN.
  * @li out_backprop: A Tensor. Must have the same type as filter. The format is
- * NDHWC or NCDHW.  \n
+ * NDHWC or NCDHW. \n
 
-*@par Required Attributes:
- * @li strides: A list of 5 integers. Specifies the stride of the sliding window
- * for each dimension of "x".
- * The N and C dimensions must be 1. Has the same format as "x".
- * @li pads: A list of 6 integers. Supports only padding along the D, H and W
- * dimensions in sequence of head, tail, top, bottom, left and right.
- * @li input_size: A tuple/list of type int32, int64. An integer vector
+*@par Attributes:
+ * @li input_size: Required. A tuple/list of type int32, int64. An integer vector
  * representing the shape of input, where input is a 5-D tensor
  * [batch, depth, height, width, channels] or
- * [batch, channels, depth, height, width] . \n
+ * [batch, channels, depth, height, width].
+ * @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window
+ * for each dimension of "out_backprop".
+ * The N and C dimensions must be 1. Has the same format as "out_backprop".
+ * @li pads: Required. A list of 6 integers. Supports only padding along the D, H and W
+ * dimensions in sequence of head, tail, top, bottom, left and right.
+ * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each
+ * dimension of input.
+ * The N, C and D dimensions must be 1. Has the same format as "out_backprop".
+ * @li groups: Optional. Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: Optional. An string from: "NDHWC", "NCDHW".
+ * Defaults to "NDHWC". Specify the data format of the input and output data. \n
 
-*@par Attributes:
- * Three attributes:
- * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
- * @li data_format: An optional string from: "NDHWC", "NCDHW".
- * Defaults to "NDHWC". Specify the data format of the input and output data.
- * @li dilations: A tuple/list of 5 integers, The dilation factor for each
- * dimension of input, now only support [1,1,1,1,1]
 *@par Outputs:
- * y: A Tensor. Has the same type and data format as out_backprop.
+ * y: A Tensor. Has the same type and data format as "out_backprop". \n
+
 *@par Third-party framework compatibility
- * Compatible with Tensorflow's conv3d_backprop_input
+ * Compatible with Tensorflow's conv3d_backprop_input. \n
 
 *@par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropInput instead.
+ * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropInput instead.
 */
 REG_OP(Conv3DBackpropInputD)
     .INPUT(filter, TensorType({DT_FLOAT16}))
     .INPUT(out_backprop, TensorType({DT_FLOAT16}))
-    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .REQUIRED_ATTR(input_size, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
@@ -1072,9 +1186,7 @@ REG_OP(Conv3DBackpropInputD)
 * @li c_t: A optinal Tensor dtype of float16, float32. The cell state at time t . \n
 
 *@par Third-party framework compatibility:
-* Compatible with the Pytorch operator adds.
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+* Compatible with the Caffe operator LSTM.
 */
 REG_OP(LSTM)
     .INPUT(x, TensorType({DT_FLOAT16}))
@@ -1096,9 +1208,8 @@ REG_OP(LSTM)
 /**
 *@brief Computes the gradients of convolution3D with respect to the filter
 *@par Inputs:
- * Three inputs:
- * @li x: A Tensor. Must be one of the following types: float16, float32.
- * Currently does not support double.
+ * @li x: A Tensor. Must be one of the following types: float16, float32,
+ * double. Currently does not support double.
  * 5-D with shape [batch, in_depth, in_height, in_width, in_channels]
  * or [batch, in_channels, in_depth, in_height, in_width].
  * @li filter_size: A Tensor of type int32. An integer vector representing the
@@ -1111,25 +1222,24 @@ REG_OP(LSTM)
  * or [batch, out_channels, out_depth, out_height, out_width].
  * Gradients with respect to the output of the convolution. \n
 
-*@par Required Attributes:
- * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding
- * window for each dimension of "x". The N and C dimensions must be 1.
- * Has the same format as "x".
- * @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right]
- * pads on feature map . \n
-
 *@par Attributes:
- * Three attributes:
- * @li dilations: A tuple/list of 5 integers, The dilation factor for each
- * dimension of input, now only support [1,1,1,1,1].
- * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
- * @li data_format: An optional string from: "NDHWC", "NCDHW".
- * Defaults to "NDHWC". Specify the data format of the input and output data.
+ * @li strides: Required. A tuple/list of 5 integers. Specifies the stride
+ * of the sliding window for each dimension of "x". The N and C dimensions
+ * must be 1. Has the same format as "x".
+ * @li pads: Required. A tuple/list of 6 integers, [front, back, top, bottom,
+ * left, right] pads on feature map.
+ * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor
+ * for each dimension of input.
+ * The N, C and D dimensions must be 1. Has the same format as "x".
+ * @li groups: Optional. Number of blocked connections from input channels
+ * to output channels.
+ * @li data_format: Optional. An string from: "NDHWC", "NCDHW".
+ * Defaults to "NDHWC". Specify the data format of the input and output data. \n
 
 *@par Outputs:
- * y: A Tensor that has the same type as x
- * and the format is NDHWC, NCDHW or DHWCN.
+ * y: A Tensor that has the same type as "x" and the format is NDHWC, NCDHW
+ * or DHWCN. \n
+
 *@par Third-party framework compatibility
  * Compatible with Tensorflow's conv3d_backprop_filter
 */
@@ -1147,8 +1257,8 @@ REG_OP(Conv3DBackpropFilter)
 
 /**
 *@brief Computes the gradients of convolution with respect to the filter.
+
 *@par Inputs:
- * Two inputs:
  * @li x: A Tensor of type float16.
  * 5-D with shape [batch, in_depth, in_height, in_width, in_channels]
  * or [batch, in_channels, in_depth, in_height, in_width].
@@ -1157,36 +1267,34 @@ REG_OP(Conv3DBackpropFilter)
  * or [batch, out_channels, out_depth, out_height, out_width].
  * Gradients with respect to the output of the convolution. \n
 
-*@par Required Attributes:
- * @li filter_size: A tuple/list of type integers. An integer vector
+*@par Attributes:
+ * @li filter_size: Required. A tuple/list of type integers. An integer vector
  * representing the tensor shape of filter, where filter is a 5-D tensor
  * [filter_depth, filter_height, filter_width, in_channels, out_channels],
  * [out_channels, filter_depth, filter_height, filter_width, in_channels]
  * or [out_channels, in_channels, filter_depth, filter_height, filter_width].
- * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding
+ * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding
  * window for each dimension of "x".
  * The N and C dimensions must be 1. Has the same format as "x".
- * @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right]
- * pads on feature map. \n
-
-*@par Attributes:
- * Three attributes:
- * @li dilations: A tuple/list of 5 integers, The dilation factor for each
- * dimension of input, now only support [1,1,1,1,1].
- * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
- * @li data_format: An optional string from: "NDHWC", "NCDHW".
- * Defaults to "NDHWC". Specify the data format of the input and output data.
+ * @li pads: Required. A tuple/list of 6 integers, [front, back, top, bottom, left, right]
+ * pads on feature map.
+ * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each
+ * dimension of input.
+ * The N, C and D dimensions must be 1. Has the same format as "x".
+ * @li groups: Optional. Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: Optional. An optional string from: "NDHWC", "NCDHW".
+ * Defaults to "NDHWC". Specify the data format of the input and output data. \n
 
 *@par Outputs:
- * y: A Tensor of type float32 and the format is NDHWC, NCDHW or DHWCN.
+ * y: A Tensor of type float32 and the format is NDHWC, NCDHW or DHWCN. \n
+
 *@par Third-party framework compatibility
- * Compatible with Tensorflow's conv3d_backprop_filter
+ * Compatible with Tensorflow's conv3d_backprop_filter. \n
+
 *@par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropFilter instead.
+ * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropFilter instead.
 */
-
-
 REG_OP(Conv3DBackpropFilterD)
     .INPUT(x, TensorType({DT_FLOAT16}))
     .INPUT(out_backprop, TensorType({DT_FLOAT16}))
@@ -1201,46 +1309,43 @@ REG_OP(Conv3DBackpropFilterD)
 
 /**
 *@brief Computes the transpose of convolution 3d with respect to the input.
+
 *@par Inputs:
- * Three inputs:
- * @li input_size: A Tensor of type int32. An integer vector representing the
- * shape of input.
+ * @li input_size: A Tensor of type int32, int64. An integer vector
+ * representing the shape of input.
  * @li x: A Tensor of type float16, currently does not support int8. The format
  * is NDHWC or NCDHW.
  * @li filter: A Tensor of type float16, currently does not support int8.
  * The format is NDHWC, NCDHW or DHWCN.
-
-*@par Optional input:
- * Two optional inputs
- * @li bias: An optional 1D tensor of the same type as "x". Reserved.
- * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n
-
-*@par Required Attributes:
- * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding
- * window for each dimension of "x".
- * The N and C dimensions must be 1. Has the same format as "x".
- * @li pads: A tuple/list of 6 integers
+ * @li bias: Optional. An optional 1D tensor of the same type as "x". Reserved.
+ * @li offset_w: Optional. An optional 1D tensor for quantized deconvolution.
+ *  Reserved. \n
 
 *@par Attributes:
- * Five attributes:
- * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
- * @li dilations: A tuple/list of 5 integers,
- * The dilation factor for each dimension of input, now only support [1,1,1,1,1]
- * @li data_format: An optional string from: "NDHWC", "NCDHW".
+ * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of
+ * the sliding window for each dimension of "x".
+ * The N and C dimensions must be 1. Has the same format as "x".
+ * @li pads: Required. A tuple/list of 6 integers.
+ * @li dilations: Optional. A tuple/list of 5 integers,
+ * The dilation factor for each dimension of input.
+ * The N, C and D dimensions must be 1. Has the same format as "x".
+ * @li groups: Optional. Number of blocked connections from input channels to
+ *  output channels.
+ * @li data_format: Optional. An string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
- * @li output_padding: The size will be added in the output shape.
- * @li offset_x: Input offset_x value. Reserved.
+ * @li output_padding: Optional. The size will be added in the output shape.
+ * @li offset_x: Optional. Input offset_x value. Reserved. \n
+
 *@par Outputs:
- * y: A Tensor. Has the same type and format as x.
+ * y: A Tensor. Has the same type and format as "x".
 */
 REG_OP(Conv3DTranspose)
     .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
     .INPUT(x, TensorType({DT_FLOAT16}))
     .INPUT(filter, TensorType({DT_FLOAT16}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1, 1})
@@ -1252,45 +1357,44 @@ REG_OP(Conv3DTranspose)
 
 /**
 *@brief Computes the transpose of convolution 3d with respect to the input.
+
 *@par Inputs:
  * @li x: A Tensor of type float16, currently does not support int8.
  * The format is NDHWC or NCDHW.
  * @li filter: A Tensor of type float16, currently does not support int8.
  * The format is NDHWC, NCDHW or DHWCN.
+ * @li bias: Optional. An 1D tensor of the same type as "x". Reserved.
+ * @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n
 
-*@par Optional inputs:
- * @li bias: An optional 1D tensor of the same type as "x". Reserved.
- * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n
-
-*@par Required Attributes:
- * @li input_size: A tuple/list of type int32.
- * An integer vector representing the shape of input
- * @li strides: A tuple/list of 5 integers.
+*@par Attributes:
+ * @li input_size: Required. A tuple/list of type int32.
+ * An integer vector representing the shape of input.
+ * @li strides: Required. A tuple/list of 5 integers.
  * Specifies the stride of the sliding window for each dimension of "x".
  * The N and C dimensions must be 1. Has the same format as "x".
- * @li pads: A tuple/list of 6 integers . \n
-
-*@par Attributes:
- * Five attributes:
- * @li dilations: A tuple/list of 5 integers, The dilation factor for each
- * dimension of input, now only support [1,1,1,1,1]
- * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
- * @li data_format: An optional string from: "NDHWC", "NCDHW".
+ * @li pads: Required. A tuple/list of 6 integers.
+ * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each
+ * dimension of input.
+ * The N, C and D dimensions must be 1. Has the same format as "x".
+ * @li groups: Optional. Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: Optional. An optional string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
- * @li output_padding: The size will be added in the output shape.
- * @li offset_x: Input offset_x value. Reserved.
+ * @li output_padding: Optional. The size will be added in the output shape.
+ * @li offset_x: Optional. Input offset_x value. Reserved. \n
+
 *@par Outputs:
- * y: A Tensor. Has the same type and format as x.
+ * y: A Tensor. Has the same type and format as "x". \n
+
 *@par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead.
+ * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead.
 */
 REG_OP(Conv3DTransposeD)
     .INPUT(x, TensorType({DT_FLOAT16}))
     .INPUT(filter, TensorType({DT_FLOAT16}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .REQUIRED_ATTR(input_size, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
@@ -1314,8 +1418,24 @@ REG_OP(Conv3DTransposeD)
  * 4-D with shape [filter_height, filter_width, in_channels, out_channels]
  * or [out_channels, filter_height, filter_width, in_channels]
  * or [out_channels, in_channel, filter_height, filter_width].
- * @li bias: An optional 1D tensor of type float16 or int32. Format is "ND".
+ * @li bias: An optional 1D tensor of type float16, float32, int32.
+ *  Format is "ND".
  * @li offset_w: An optional 1D tensor for quantized inference. Reserved.
+ *\n
+ *\n
+ * The following are the supported data types and data formats:\n
+ *\n
+ *\n
+    | Tensor    | x       | filter  | bias    | y      |\n
+    |-----------|---------|---------|---------|--------|\n
+    | Data Type | float16 | float16 | float16 | float16|\n
+    |           | int8    | int8    | int32   | int32  |\n
+    | Format    | NCHW    | NCHW    | ND      | NCHW   |\n
+    |           | NHWC    | HWCN    |         | NHWC   |\n
+ *\n
+ * For int8, a dequant or requant operator must be followed.
+ *\n
+ *
 *@par Required Attributes:
  * @li strides: A required tuple/list of 4 integers. The stride of the sliding
  * window for H/W dimension. The index of H/W is same as data_format.
@@ -1328,23 +1448,65 @@ REG_OP(Conv3DTransposeD)
  * Defaults to "1".
  * @li dilations: A tuple/list of 4 integers, The dilation factor for each
  * dimension of input. Must be [1, 1, 1, 1].
- * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC".
- * Specify the data format of the input and output data.
+ * @li data_format: An optional string from: "NHWC", "NCHW".
+ * Defaults to "NHWC". Specify the data format of the input and output data.
  * @li output_padding: The size will be added in the output shape. Defaults
  * to [0, 0, 0, 0].
  * @li offset_x: An optional int. Input offset, used for quantized inference.
- * Defaults to "0".
+ * The negative offset added to the input image for int8 type. Ensure offset_x
+ * within the effective range of int8 [-128, 127]. Defaults to "0".
+ *\n
+ *\n
+ * The following value range restrictions must be met:\n
+ *\n
+ *\n
+    | Name             | Field    | Scope        |\n
+    |------------------|----------|--------------|\n
+    | input_size       | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | x (out_backprop) | H*strideH| [1, 200000]  |\n
+    |                  | W*strideW| [1, 4096]    |\n
+    | filter           | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
+    | y (fmap)         | H        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | Stride           | H        | [1, 63]      |\n
+    |                  | W        | [1, 63]      |\n
+    | Padding          | Top      | [0, 255]     |\n
+    |                  | Bottom   | [0, 255]     |\n
+    |                  | Left     | [0, 255]     |\n
+    |                  | Right    | [0, 255]     |\n
+    | Dilation         | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
+    | Offset_x         |          | [-128, 127]  |\n
+ *\n
+ * In Ascend910, fmap or out_backprop's H and W not support 1 when\n
+ * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
+ * and filter_width > fmap_width.
+ * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w
+ *  < 4096. \n
+ *
 *@par Outputs:
- * y: A Tensor. A Tensor of type float16 or int32, and has same format as
- * input_size.
+ * y: A Tensor. A Tensor of type float16, int32, float32, and has
+ *  same format as input_size.
+ *\n
+ *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
+ *                           (dilation_h * (filter_height - 1) + 1))
+ *                           / stride_h + 1
+ *\n
+ *     out_backprop_width = (fmap_width + pad_left + pad_right -
+ *                          (dilation_w * (filter_width - 1) + 1))
+ *                          / stride_w + 1
+ *\n
+ *
 */
 REG_OP(Conv2DTranspose)
     .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
     .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
     .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32}))
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
@@ -1389,9 +1551,9 @@ REG_OP(Conv2DTranspose)
 REG_OP(Conv2DTransposeD)
     .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
     .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32}))
     .REQUIRED_ATTR(input_size, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
@@ -1405,21 +1567,20 @@ REG_OP(Conv2DTransposeD)
 /**
 *@brief Computes the deformed convolution output with the expected input
 *@par Inputs:
- * Four inputs:
+ * Two inputs:
  * @li x: A Tensor of type float16,float32
  * @li offsets: A Tensor of type float16,float32.Deformation offset parameter.
-*@par Required Attributes:
+*@par Attributes:
  * @li strides: A tuple/list of 4 integers.The stride of the sliding window for
  * height and width for H/W dimension.
- * @li pads: A tuple/list of 4 integers.Padding added to each dimension
+ * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension
  * of the input.
  * @li ksize: A tuple/list of 2 integers.kernel size.
-*@par Attributes:
- * Three attributes:
  * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension
  * of input.  Defaults to [1, 1, 1, 1]
  * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x.
  * @li deformable_groups: Specify the c-axis grouping number of input x.
+ * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1
 *@par Outputs:
  * y: A Tensor. A Tensor of type float16, float32.
 */
@@ -1433,7 +1594,122 @@ REG_OP(DeformableOffsets)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
     .ATTR(data_format, String, "NCHW")
     .ATTR(deformable_groups, Int, 1)
+    .ATTR(modulated, Bool, true)
     .OP_END_FACTORY_REG(DeformableOffsets)
 
+/**
+*@brief Computes the gradients of DeformableOffsets with respect to input and offsets
+*@par Inputs:
+ * Three inputs:
+ * @li grad: A Tensor of type float16,float32. gradients with respect to DeformableOffsets output
+ * @li x: A Tensor of type float16,float32.
+ * @li offsets: A Tensor of type float16,float32.Deformation offset parameter.
+*@par Attributes:
+ * @li strides: A tuple/list of 4 integers.The stride of the sliding window for
+ * height and width for H/W dimension.
+ * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension
+ * of the input.
+ * @li ksize: A tuple/list of 2 integers.kernel size.
+ * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension
+ * of input.  Defaults to [1, 1, 1, 1]
+ * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x.
+ * @li deformable_groups: Specify the c-axis grouping number of input x.
+ * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1.
+*@par Outputs:
+ * @li grad_x: A Tensor of type float16, float32. Gradients with respect to input_x
+ * @li grad_offsets: A Tensor of type float16, float32. Gradients with respect to input_offsets
+*/
+REG_OP(DeformableOffsetsGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(grad_offsets, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .REQUIRED_ATTR(ksize, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .ATTR(data_format, String, "NCHW")
+    .ATTR(deformable_groups, Int, 1)
+    .ATTR(modulated, Bool, true)
+    .OP_END_FACTORY_REG(DeformableOffsetsGrad)
+
+/**
+*@brief Computes the deformed dilation output with the expected input
+*@par Inputs:
+ * One inputs:
+ * x: A Tensor of type int8, float16, float32
+*@par Attributes:
+ * @li dilations: A tuple/list of integers.
+ * @li padding_value: default value filling in blank
+ * @li pads: A tuple/list of integers.
+*@par Outputs:
+ * y: A Tensor. A Tensor of type int8, float16, float32.
+*/
+REG_OP(Dilation)
+    .INPUT(x, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(dilations, ListInt)
+    .ATTR(pads, ListInt, {})
+    .ATTR(padding_value, Float, 0.0)
+    .OP_END_FACTORY_REG(Dilation)
+
+/**
+*@brief Computes the post-cube processing output with the expected input
+*@par Inputs:
+ * Ten inputs:
+ * x1: A Tensor of type float16, bfloat16, float32, int32
+ * x2: A Tensor of type float16, int8, int4
+ * quant_scale_0: A Tensor of type uint64
+ * relu_weight_0: A Tensor of type float32
+ * clip_value_0: A Tensor of type float16, int8, int4
+ * quant_scale_1: A Tensor of type uint64
+ * relu_weight_1: A Tensor of type float32
+ * clip_value_1: A Tensor of type float16
+ * anti_quant_scale: A Tensor of type float16
+ * anti_quant_offset: A Tensor of type int8, int4
+*@par Attributes:
+ * @li fusion_op_list: A list of String.
+ * @li unit_list: A list of String
+ * @li eltwise_mode: An optional string from "ADD", "SUB" and "".
+*@par Outputs:
+ * output: A Tensor. A Tensor of type float16, bfloat16, float32, int32, int8, int4.
+*/
+REG_OP(FixPipe)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT, DT_INT32}))
+    .OPTIONAL_INPUT(x2, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
+    .OPTIONAL_INPUT(quant_scale_0, TensorType({DT_UINT64}))
+    .OPTIONAL_INPUT(relu_weight_0, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(clip_value_0, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
+    .OPTIONAL_INPUT(quant_scale_1, TensorType({DT_UINT64}))
+    .OPTIONAL_INPUT(relu_weight_1, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(clip_value_1, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(anti_quant_scale, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(anti_quant_offset, TensorType({DT_INT8, DT_INT4}))
+    .OUTPUT(output, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT, DT_INT32, DT_INT8, DT_INT4}))
+    .REQUIRED_ATTR(fusion_op_list, ListString)
+    .REQUIRED_ATTR(unit_list, ListString)
+    .ATTR(eltwise_mode, String, "")
+    .OP_END_FACTORY_REG(FixPipe)
+
+/**
+* @brief Solves a batch of isotonic regression problems. \n
+
+* @par Inputs:
+* @li input: A Tensor.  \n
+
+* @par Attributes:
+* @li output_dtype: The data type of output. \n
+
+* @par Outputs:
+* @li output: A Tensor. A Tensor of type float16, float32, double.
+* @li segments: A Tensor. A Tensor of type int32 \n
+*/
+REG_OP(IsotonicRegression)
+    .INPUT(input, TensorType::RealNumberType())
+    .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(segments, TensorType({DT_INT32}))
+    .ATTR(output_dtype, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(IsotonicRegression)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
index a013fb33..c5724f43 100644
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -135,7 +135,8 @@ REG_OP(CheckValid)
 * the value "4" refers to "x0", "x1", "y0", and "y1" . \n
 
 *@par Attributes:
-*mode: Computation mode, a character string with the value range of [iou, iof] . \n
+*@li mode: Computation mode, a character string with the value range of [iou, iof]
+*@li eps: An optional float, prevent division by 0, default value is 1.0 . \n
 
 *@par Outputs:
 *overlap: A 2D Tensor of type float16 or float32 with shape [M, N], specifying
@@ -150,9 +151,46 @@ REG_OP(Iou)
     .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(mode, String, "iou")
+    .ATTR(eps, Float, 1.0)
     .OP_END_FACTORY_REG(Iou)
 
 /**
+*@brief First calculate the minimum closure area of the two boxes, IoU,
+* the proportion of the closed area that does not belong to the two boxes in the closure area,
+* and finally subtract this proportion from IoU to get GIoU . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
+* shape (N, 4). "N" indicates the number of bounding boxes, and the value
+* "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
+*@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
+* with shape (M, 4). "M" indicates the number of ground truth boxes, and
+* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n
+
+*@par Attributes:
+*@li trans: An optional bool, true for 'xywh', false for 'xyxy'.
+*@li is_cross: An optional bool, control whether the output shape is [M, N] or [1, N]
+*@li mode: Computation mode, a character string with the value range of [iou, iof] . \n
+
+*@par Outputs:
+* overlap: A 2D Tensor of type float16 or float32 with shape [M, N] or [1, N],
+* specifying the IoU or IoF ratio . \n
+
+*@attention Constraints:
+* Only computation of float16 data is supported. To avoid overflow, the input
+* length and width are scaled by 0.2 internally.
+*/
+REG_OP(GIoU)
+    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(trans, Bool, false)
+    .ATTR(is_cross, Bool, true)
+    .ATTR(mode, String, "iou")
+    .OP_END_FACTORY_REG(GIoU)
+
+/**
 *@brief Performs the backpropagation of ROIAlign for training scenarios . \n
 
 *@par Inputs:
@@ -169,7 +207,8 @@ the value "5" indicates the indexes of images where the ROIs are located, "x0",
 *@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image.
 *@li sample_num: An optional attribute of type int, specifying the horizontal and vertical
 sampling frequency of each output. If this attribute is set to "0", the sampling frequency is
-equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n
+equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" .
+*@li roi_end_mode: An optional attribute of type int, specifying the align mode .\n
 
 *@par Outputs:
 *xdiff: Gradient added to input "features". Has the same 5HD shape as input "features".
@@ -184,6 +223,7 @@ REG_OP(ROIAlignGrad)
     .REQUIRED_ATTR(pooled_height, Int)
     .REQUIRED_ATTR(spatial_scale, Float)
     .ATTR(sample_num, Int, 2)
+    .ATTR(roi_end_mode, Int, 1)
     .OP_END_FACTORY_REG(ROIAlignGrad)
 
 /**
@@ -228,7 +268,7 @@ REG_OP(ROIAlign)
 
 *@par Inputs:
 * Two inputs, including:
-*@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
+*@li x: An NCHW feature map of type is float32 or float16.
 *@li img: source image. Has the same type and format as "x" . \n
 
 *@par Attributes:
@@ -254,34 +294,34 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul
 *@par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
- REG_OP(PriorBox)
-     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .REQUIRED_ATTR(min_size, ListFloat)
-     .REQUIRED_ATTR(max_size, ListFloat)
-     .REQUIRED_ATTR(aspect_ratio, ListFloat)
-     .ATTR(img_h, Int, 0)
-     .ATTR(img_w, Int, 0)
-     .ATTR(step_h, Float, 0.0)
-     .ATTR(step_w, Float, 0.0)
-     .ATTR(flip, Bool, true)
-     .ATTR(clip, Bool, false)
-     .ATTR(offset, Float, 0.5)
-     .ATTR(variance, ListFloat, {0.1})
-     .OP_END_FACTORY_REG(PriorBox);
+REG_OP(PriorBox)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(min_size, ListFloat)
+    .REQUIRED_ATTR(max_size, ListFloat)
+    .REQUIRED_ATTR(aspect_ratio, ListFloat)
+    .ATTR(img_h, Int, 0)
+    .ATTR(img_w, Int, 0)
+    .ATTR(step_h, Float, 0.0)
+    .ATTR(step_w, Float, 0.0)
+    .ATTR(flip, Bool, true)
+    .ATTR(clip, Bool, false)
+    .ATTR(offset, Float, 0.5)
+    .ATTR(variance, ListFloat, {0.1})
+    .OP_END_FACTORY_REG(PriorBox);
 
 /**
 *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
 
 *@par Inputs:
 * Six inputs, including:
-*@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
+*@li x: An NCHW feature map of type is float32 or float16.
 *@li img: source image. Has the same type and format as "x".
-*@li data_h: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map height.
-*@li data_w: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map width.
-*@li box_height: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the height of each prior box.
-*@li box_width: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the width of each prior box . \n
+*@li data_h: An NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map height.
+*@li data_w: An NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map width.
+*@li box_height: An NCHW tensor of type float32 or float16, specifying the height of each prior box.
+*@li box_width: An NCHW tensor of type float32 or float16, specifying the width of each prior box . \n
 
 *@par Attributes:
 *@li min_size: A required float32, specifying the minimum edge length of a square prior box.
@@ -306,32 +346,32 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul
 *@par Restrictions:
 *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
 */
- REG_OP(PriorBoxD)
-     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .REQUIRED_ATTR(min_size, ListFloat)
-     .REQUIRED_ATTR(max_size, ListFloat)
-     .ATTR(img_h, Int, 0)
-     .ATTR(img_w, Int, 0)
-     .ATTR(step_h, Float, 0.0)
-     .ATTR(step_w, Float, 0.0)
-     .ATTR(flip, Bool, true)
-     .ATTR(clip, Bool, false)
-     .ATTR(offset, Float, 0.5)
-     .ATTR(variance, ListFloat, {0.1})
-     .OP_END_FACTORY_REG(PriorBoxD);
+REG_OP(PriorBoxD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(min_size, ListFloat)
+    .REQUIRED_ATTR(max_size, ListFloat)
+    .ATTR(img_h, Int, 0)
+    .ATTR(img_w, Int, 0)
+    .ATTR(step_h, Float, 0.0)
+    .ATTR(step_w, Float, 0.0)
+    .ATTR(flip, Bool, true)
+    .ATTR(clip, Bool, false)
+    .ATTR(offset, Float, 0.5)
+    .ATTR(variance, ListFloat, {0.1})
+    .OP_END_FACTORY_REG(PriorBoxD);
 
 /**
 *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
 
 *@par Inputs:
 * Six inputs, including:
-*@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
+*@li x: An NCHW feature map of type is float32 or float16.
 *@li img: source image. Has the same type and format as "x".
 *@li boxes: An ND tensor of type float32 or float16, specifying the prior box information. Same as output y
 
@@ -358,29 +398,29 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul
 *@par Restrictions:
 *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
 */
- REG_OP(PriorBoxDV2)
-     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .REQUIRED_ATTR(min_size, ListFloat)
-     .REQUIRED_ATTR(max_size, ListFloat)
-     .ATTR(img_h, Int, 0)
-     .ATTR(img_w, Int, 0)
-     .ATTR(step_h, Float, 0.0)
-     .ATTR(step_w, Float, 0.0)
-     .ATTR(flip, Bool, true)
-     .ATTR(clip, Bool, false)
-     .ATTR(offset, Float, 0.5)
-     .ATTR(variance, ListFloat, {0.1})
-     .OP_END_FACTORY_REG(PriorBoxDV2);
+REG_OP(PriorBoxDV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(min_size, ListFloat)
+    .REQUIRED_ATTR(max_size, ListFloat)
+    .ATTR(img_h, Int, 0)
+    .ATTR(img_w, Int, 0)
+    .ATTR(step_h, Float, 0.0)
+    .ATTR(step_w, Float, 0.0)
+    .ATTR(flip, Bool, true)
+    .ATTR(clip, Bool, false)
+    .ATTR(offset, Float, 0.5)
+    .ATTR(variance, ListFloat, {0.1})
+    .OP_END_FACTORY_REG(PriorBoxDV2);
 
 /**
 *@brief Performs Position Sensitive ROI Pooling . \n
 
 *@par Inputs:
 * Two inputs, including:
-*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
+*@li x: A tensor of type float16 or float32, describing the feature
 * map, dimension C1 must be equal to
 * (int(output_dim+15)/C0))*group_size*group_size.
 *@li rois: A tensor of type float16 or float32, with shape
@@ -398,7 +438,7 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul
 * coordinates to the ROI coordinates . \n
 
 *@par Outputs:
-*y: An NC1HWC0 tensor of type float16 or float32, describing the result
+*y: A tensor of type float16 or float32, describing the result
 * feature map . \n
 
 *@attention Constraints:
@@ -417,7 +457,7 @@ REG_OP(PSROIPooling)
 *@brief Returns detection result . \n
 
 *@par Inputs:
-* Four inputs, including:
+* Five inputs, including:
 *@li rois: An NCHW tensor of type floa16 or float32, output from operator proposal_d at the preceding layer, used as the input of operator FSRDetectionOutput.
 *@li bbox_delta: An NCHWC0 tensor of type floa16 or float32, specifying the prediction offset, used to update the coordinates [x1, y1, x2, y2] of each ROI.
 *@li score: An NCHWC0 tensor of type floa16 or float32, specifying the probability of each class. Class 0 is the background class.
@@ -459,7 +499,7 @@ REG_OP(FSRDetectionOutput)
 *@brief Returns detection result . \n
 
 *@par Inputs:
-* Four inputs, including:
+* Three inputs, including:
 *@li bbox_delta: An ND tensor of type floa16 or float32, specifying the box loc predictions, used as the input of operator SSDDetectionOutput.
 *@li score: An ND tensor of type floa16 or float32, specifying the box confidences data, used as the input of operator SSDDetectionOutput.
 *@li anchors: An ND tensor of type floa16 or float32, output from operator PriorBoxD, used as the input of operator SSDDetectionOutput.
@@ -474,7 +514,6 @@ REG_OP(FSRDetectionOutput)
 *@li code_type: An optional int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3
 *@li keep_top_k: An optional int32, specify the topk value after nms. Defaults to -1
 *@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold
-*@li kernel_name: An optional string, specifying the operator name. Defaults to "ssd_detection_output".
 *@par Outputs:
 *@li out_boxnum: A tensor of type int32, specifying the number of output boxes.
 *@li y: A tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box.
@@ -531,10 +570,10 @@ as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(Yolo)
-    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OUTPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OUTPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OUTPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(boxes, Int, 3)
     .ATTR(coords, Int, 4)
     .ATTR(classes, Int, 80)
@@ -545,6 +584,172 @@ REG_OP(Yolo)
     .OP_END_FACTORY_REG(Yolo)
 
 /**
+*@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3 . \n
+
+*@par Inputs:
+*x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W),
+where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged
+as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n
+
+*@par Attributes:
+*@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3.
+*@li coords: An int32, specifying the number of parameters required for locating an object. The value is fixed at "4", corresponding to (x,y,w,h).
+*@li classes: An int32, specifying the number of prediction classes. Defaults to "80". The value range is [1, 1024].
+*@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".Defaults to "V3"
+*@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". Defaults to "false".
+*@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". Defaults to "false".
+*@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false" . \n
+
+*@par Outputs:
+*@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2],
+* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box.
+*@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2],
+* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence.
+*@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2],
+* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n
+
+*@attention Constraints:
+*@li This operator applies to YOLO v2,v3 and v5 networks.
+*@li The succeeding layer of the Yolo operator must be operator Yolov5DetectionOutput.
+*@par Third-party framework compatibility
+* It is a custom operator.
+*/
+REG_OP(YoloPreDetection)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(boxes, Int, 3)
+    .ATTR(coords, Int, 4)
+    .ATTR(classes, Int, 80)
+    .ATTR(yolo_version, String, "V5")
+    .ATTR(softmax, Bool, false)
+    .ATTR(background, Bool, false)
+    .ATTR(softmaxtree, Bool, false)
+    .OP_END_FACTORY_REG(YoloPreDetection)
+
+/**
+*@brief Performs YOLO V5 detection . \n
+
+*@par Inputs:
+*Ten inputs, including:
+*@li Operator Yolov5DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n
+There are three Yolo operators at Yolov5DetectionOutput's preceding layer on Yolo v5. For details, see the description of operator Yolo.
+*@li img_info: A float16 or float32, describing the image information including the required image height and width \n
+* and the actual image height and width.
+
+*@par Attributes:
+*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
+*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
+*@li coords: Specifies the number of coordinate parameters. Must be 4.
+*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
+*@li relative: An optional bool. Defaults to and must be "true".
+*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
+
+*@li post_nms_topn: An optional int32. This attribute is reserved.
+*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
+
+*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n
+
+*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
+
+*@par Outputs:
+*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
+* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
+*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
+* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
+
+*@attention Constraints:\n
+*@li This operator applies only to the YOLO v5 network.
+*@li The preceding layer of operator Yolov5DetectionOutput must be three Yolo operators.
+
+*@see Yolo()
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(YoloV5DetectionOutput)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(biases, ListFloat)
+    .ATTR(boxes, Int, 3)
+    .ATTR(coords, Int, 4)
+    .ATTR(classes, Int, 80)
+    .ATTR(relative, Bool, true)
+    .ATTR(obj_threshold, Float, 0.5)
+    .ATTR(post_nms_topn, Int, 512)
+    .ATTR(score_threshold, Float, 0.5)
+    .ATTR(iou_threshold, Float, 0.45)
+    .ATTR(pre_nms_topn, Int, 512)
+    .ATTR(N, Int, 10)
+    .ATTR(resize_origin_img_to_net, Bool, false)
+    .ATTR(out_box_dim, Int, 3)
+    .ATTR(alpha, Float, 2.0)
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(box_out_num, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(YoloV5DetectionOutput)
+
+/**
+*@brief Performs YOLO V5 detection.
+
+*@par Inputs:
+*16 Input, including:
+*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v5) are used as the inputs of operator Yolov5DetectionOutput.
+* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
+*@li imginfo: A float16, describing the image information including the required image height and width
+* and the actual image height and width.
+*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
+* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)]
+* is formed for the three Yolo outputs, respectively .It's a dynamic input. \n
+
+*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
+*@par Attributes:
+*@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
+*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
+*@li coords: Specifies the number of coordinate parameters. Must be 4.
+*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
+*@li relative: An optional bool. Defaults to and must be "true".
+*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
+*@li post_nms_topn: An optional int32. This attribute is reserved.
+*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
+*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
+*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
+*
+*@par Outputs:
+*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
+*            describing the information of each output box.
+* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
+*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
+* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
+*
+*@attention Constraints:
+*@li This operator applies only to the YOLO v5 network.
+*@li The preceding layer of operator Yolov5DetectionOutput must be three Yolo operators.
+*@see Yolo()
+*@par Third-party framework compatibility
+* It is a custom operator. 
+*/
+REG_OP(YoloV5DetectionOutputD)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(biases, ListFloat)
+    .ATTR(boxes, Int, 3)
+    .ATTR(coords, Int, 4)
+    .ATTR(classes, Int, 80)
+    .ATTR(relative, Bool, true)
+    .ATTR(obj_threshold, Float, 0.5)
+    .ATTR(post_nms_topn, Int, 512)
+    .ATTR(score_threshold, Float, 0.5)
+    .ATTR(iou_threshold, Float, 0.45)
+    .ATTR(pre_nms_topn, Int, 512)
+    .ATTR(N, Int, 10)
+    .ATTR(resize_origin_img_to_net, Bool, false)
+    .ATTR(out_box_dim, Int, 3)
+    .ATTR(alpha, Float, 2.0)
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(box_out_num, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(YoloV5DetectionOutputD)
+
+/**
 *@brief Performs YOLO V2 detection . \n
 
 *@par Inputs:
@@ -584,10 +789,10 @@ REG_OP(Yolo)
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(YoloV2DetectionOutput)
-    .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(biases, ListFloat)
     .ATTR(boxes, Int, 5)
     .ATTR(coords, Int, 4)
@@ -598,7 +803,7 @@ REG_OP(YoloV2DetectionOutput)
     .ATTR(score_threshold, Float, 0.5)
     .ATTR(iou_threshold, Float, 0.45)
     .ATTR(pre_nms_topn, Int, 512)
-    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(box_out_num, TensorType({DT_INT32}))
     .OP_END_FACTORY_REG(YoloV2DetectionOutput)
 
@@ -647,12 +852,12 @@ REG_OP(YoloV2DetectionOutput)
 *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV2DetectionOutput instead.
 */
 REG_OP(YoloV2DetectionOutputD)
-    .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(biases, ListFloat)
     .ATTR(boxes, Int, 5)
     .ATTR(coords, Int, 4)
@@ -663,7 +868,7 @@ REG_OP(YoloV2DetectionOutputD)
     .ATTR(score_threshold, Float, 0.5)
     .ATTR(iou_threshold, Float, 0.45)
     .ATTR(pre_nms_topn, Int, 512)
-    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(box_out_num, TensorType({DT_INT32}))
     .OP_END_FACTORY_REG(YoloV2DetectionOutputD)
 
@@ -707,16 +912,16 @@ REG_OP(YoloV2DetectionOutputD)
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(YoloV3DetectionOutput)
-    .INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(biases_low, ListFloat)
     .REQUIRED_ATTR(biases_mid, ListFloat)
     .REQUIRED_ATTR(biases_high, ListFloat)
@@ -729,7 +934,7 @@ REG_OP(YoloV3DetectionOutput)
     .ATTR(score_threshold, Float, 0.5)
     .ATTR(iou_threshold, Float, 0.45)
     .ATTR(pre_nms_topn, Int, 512)
-    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(box_out_num, TensorType({DT_INT32}))
     .OP_END_FACTORY_REG(YoloV3DetectionOutput)
 
@@ -776,22 +981,22 @@ s
 *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutput instead.
 */
 REG_OP(YoloV3DetectionOutputD)
-    .INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(windex1, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(windex2, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(windex3, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(hindex1, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(hindex2, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(hindex3, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(windex1, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(windex2, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(windex3, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(hindex1, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(hindex2, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(hindex3, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(biases_low, ListFloat)
     .REQUIRED_ATTR(biases_mid, ListFloat)
     .REQUIRED_ATTR(biases_high, ListFloat)
@@ -804,7 +1009,7 @@ REG_OP(YoloV3DetectionOutputD)
     .ATTR(score_threshold, Float, 0.5)
     .ATTR(iou_threshold, Float, 0.45)
     .ATTR(pre_nms_topn, Int, 512)
-    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(box_out_num, TensorType({DT_INT32}))
     .OP_END_FACTORY_REG(YoloV3DetectionOutputD)
 
@@ -848,7 +1053,7 @@ There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yol
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(YoloV3DetectionOutputV2)
-    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(biases, ListFloat)
     .ATTR(boxes, Int, 3)
     .ATTR(coords, Int, 4)
@@ -862,7 +1067,7 @@ REG_OP(YoloV3DetectionOutputV2)
     .ATTR(N, Int, 10)
     .ATTR(resize_origin_img_to_net, Bool, false)
     .ATTR(out_box_dim, Int, 3)
-    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(box_out_num, TensorType({DT_INT32}))
     .OP_END_FACTORY_REG(YoloV3DetectionOutputV2)
 
@@ -910,9 +1115,9 @@ REG_OP(YoloV3DetectionOutputV2)
 * Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead.
 */
 REG_OP(YoloV3DetectionOutputV2D)
-    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(biases, ListFloat)
     .ATTR(boxes, Int, 3)
     .ATTR(coords, Int, 4)
@@ -926,7 +1131,7 @@ REG_OP(YoloV3DetectionOutputV2D)
     .ATTR(N, Int, 10)
     .ATTR(resize_origin_img_to_net, Bool, false)
     .ATTR(out_box_dim, Int, 3)
-    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(box_out_num, TensorType({DT_INT32}))
     .OP_END_FACTORY_REG(YoloV3DetectionOutputV2D)
 
@@ -966,10 +1171,16 @@ REG_OP(SPP)
 
 *@par Inputs:
 * Three inputs, including:
-*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
-* map.
-*@li rois: A tensor of type float16 or float32, with shape
-* [batch, 5, roi_max_num], describing the RIOs.
+*@li x: A tensor of type float16 or float32, describing the feature
+* map. The data of x must be greater than or equal to "0.0".
+*@li rois: A tensor of type float16 or float32, with 3D shape
+* [batch, 5, roi_max_num], describing the RIOs. Each ROI consists of five
+* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
+* the index of the input feature map, "x1", "y1", "x2", or "y2" must be
+* greater than or equal to "0.0".
+* roi_max_num must be less than or equal to 6000 and must be divided by 16.
+* The input data of the rois cannot exceed the width and height range of the x,
+* otherwise, the accuracy of the output result may not be as expected.
 *@li roi_actual_num: A  optional tensor of type int32, with shape [batch, 8], specifying
 * the number of ROIs per batch . \n
 
@@ -984,30 +1195,30 @@ REG_OP(SPP)
 * coordinates of width to the ROI coordinates . \n
 
 *@par Outputs:
-*y: An NC1HWC0 tensor of type float16 or float32, describing the result
+*y: A tensor of type float16 or float32, describing the result
 * feature map . \n
 
 *@attention Constraints:
-*@li For the feature map input:
-(1) If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
-(2) If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
-(3) If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
-(4) If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
-(5) If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
-(6) If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
-(7) If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
-(8) If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
-(9) If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
-(10) If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
-(11) If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
-(12) If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
-(13) If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
-(14) If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
-(15) If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
-(16) If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
-(17) If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
-(18) If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
-(19) If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
+* For the feature map input:
+*@li If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
+*@li If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
+*@li If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
+*@li If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
+*@li If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
+*@li If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
+*@li If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
+*@li If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
+*@li If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
 *@par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
@@ -1201,35 +1412,6 @@ REG_OP(RpnProposalsD)
     .OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
     .OP_END_FACTORY_REG(RpnProposalsD)
 
-/**
-*@brief Computes Score Filte Pre-Sort function.
-
-*@par Inputs:
-*Inputs include:
-* @li rois: A Tensor. Must be float16. N-D with shape [N, 4].
-* @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1].
-
-*@par Attributes:
-* @li score_threshold: required, float, threahold of topk process.
-* @li k: required, Int, threahold of topk process.
-* @li score_filter: bool, mark of score_filter. Defaults to "true"
-* @li core_max_num: int, max number of core. Defaults to "8"
-*@par Outputs:
-* @li sorted_proposal: A Tensor. Must be float16.
-*                      N-D with shape [8*6002, 8].
-* @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8].
-*/
-
-REG_OP(ScoreFiltePreSort)
-    .INPUT(rois, TensorType({DT_FLOAT16}))
-    .INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
-    .OUTPUT(sorted_proposal, TensorType({ DT_FLOAT16}))
-    .OUTPUT(proposal_num, TensorType({ DT_UINT32}))
-    .REQUIRED_ATTR(score_threshold, Float)
-    .REQUIRED_ATTR(k, Int)
-    .ATTR(score_filter, Bool, true)
-    .ATTR(core_max_num, Int, 8)
-    .OP_END_FACTORY_REG(ScoreFiltePreSort)
 
 /**
 *@brief Computes Score Filte Pre-Sort function.
@@ -1250,9 +1432,7 @@ REG_OP(ScoreFiltePreSort)
 * @li box_filter: bool, mark of box_filter. Defaults to "true"
 * @li core_max_num: int, max number of core. Defaults to "8"
 *@par Outputs:
-* @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
-* @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
-* @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1].
+*sorted_box: A Tensor. Must be float16. N-D with shape [N, 1].
 */
 REG_OP(RpnProposalPostProcessing)
     .INPUT(sorted_proposal, TensorType({DT_FLOAT16}))
@@ -1370,7 +1550,8 @@ REG_OP(DecodeWheelsTarget)
 *@li max_size_per_class: A required attribute of type int, specifying the nms output num per class.
 *@li max_total_size: A required attribute of type int, specifying the the nms output num per batch.
 *@li change_coordinate_frame: A optional attribute of type bool, whether to normalize coordinates after clipping.
-*@li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false" . \n
+* @li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false".
+* @li image_size: A optional attribute of type ListInt, the size of the image. \n
 
 *@par Outputs:
 *@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4),
@@ -1383,6 +1564,7 @@ REG_OP(DecodeWheelsTarget)
 
 *@attention Constraints:
 * Only computation of float16 data is supported.
+* Note: when the class num per image * max_size_per_class is too big, will compile fail with ERROR-insufficient memory
 */
 REG_OP(BatchMultiClassNonMaxSuppression)
     .INPUT(boxes, TensorType({DT_FLOAT16}))
@@ -1399,6 +1581,7 @@ REG_OP(BatchMultiClassNonMaxSuppression)
     .REQUIRED_ATTR(max_total_size, Int)
     .ATTR(change_coordinate_frame, Bool, false)
     .ATTR(transpose_box, Bool, false)
+    .ATTR(image_size, ListInt, {})
     .OP_END_FACTORY_REG(BatchMultiClassNonMaxSuppression)
 
 /**
@@ -1409,7 +1592,7 @@ REG_OP(BatchMultiClassNonMaxSuppression)
 * @li shape_hw: A 1D Tensor of type int32 . \n
 
 * @par Attributes:
-* @li reversed_box: An optional bool, specifying the last two dims is "4,num" or
+* reversed_box: An optional bool, specifying the last two dims is "4,num" or
 * "num,4", "true" for "4,num", "false" for "num,4". Defaults to "false" . \n
 
 * @par Outputs:
@@ -1456,45 +1639,765 @@ REG_OP(NormalizeBBox)
 * @li anchors: A Tensor. Must be int32.
 *
 *@par Attributes:
-* @li scales: optional, listfloat, .
+* @li scales: optional, listfloat.
 * @li decode_clip: optional, float, threahold of decode process.
-* @li reversed_boxes: optional, bool,.
+* @li reversed_boxes: optional, bool.
 *
 *@par Outputs:
 * y: A Tensor. Must have the same type as box_predictions.
 */
 REG_OP(DecodeBboxV2)
-    .INPUT(boxes, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(anchors, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(anchors, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(scales, ListFloat, {1.0, 1.0, 1.0, 1.0})
     .ATTR(decode_clip, Float, 0.0)
     .ATTR(reversed_box, Bool, false)
     .OP_END_FACTORY_REG(DecodeBboxV2)
 
 /**
-*@brief Computes sort function.
+*@brief sort the input tensor and return the value of index.
 *
 *@par Inputs:
 *Inputs include:
-* x: A Tensor. Must be float16 or float32.
-*
+* x: A Tensor. Dtype support: float16, float, int16, int8,
+                          uint8, int32, int64.
+
 *@par Attributes:
-* @li axis: optional, int.
-* @li descending: optional,bool.
+* @li axis: An optional attribute indicates the sorting axis.
+* @li descending: An optional attribute indicates desending sort or not.
 *
 *@par Outputs:
 * @li y1: A Tensor. Must have the same type as x.
-* @li y2: A Tensor. Indices of y1 in x.Dtype must be int32.
+* @li y2: A Tensor. Indices of y1 in x. Dtype must be int32.
+*
+*@attention Constraints:
+* The operator depends on the unstable sorting algorithm.
 */
 REG_OP(Sort)
-    .INPUT(x, TensorType({ DT_FLOAT16 }))
-    .OUTPUT(y1, TensorType({ DT_FLOAT16 }))
-    .OUTPUT(y2, TensorType({ DT_INT32 }))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8,
+                          DT_UINT8, DT_INT32, DT_INT64}))
+    .OUTPUT(y1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8,
+                            DT_UINT8, DT_INT32, DT_INT64}))
+    .OUTPUT(y2, TensorType({DT_INT32}))
     .ATTR(axis, Int, -1)
     .ATTR(descending, Bool, false)
     .OP_END_FACTORY_REG(Sort)
 
+/**
+*@brief Computes iou for input bboxes and gtboxes.
+
+*@par Inputs:
+* Two inputs, including:
+*@li bboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1),
+*@li gtboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1).\n
+
+*@par Attributes:
+*@li mode: A optional attribute of type string, whether judge the mode of iou. \n
+
+*@par Outputs:
+*@li overlap: A 2D Tensor of type float16 with shape [n, m]. \n
+
+*@attention Constraints:
+* Only computation of float16 data is supported.
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS DEPRECATED. Please use Iou instead.
+*/
+REG_OP(PtIou)
+    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(mode, String, "iou")
+    .OP_END_FACTORY_REG(PtIou)
+
+/**
+*@brief Greedily selects a subset of bounding boxes in descending order of
+score . \n
+
+*@par Inputs:
+*Input boxes and  scores must be float16 type. Inputs include:
+*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
+The single box data format is indicated by center_point_box.
+*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
+*@li max_output_size: A scalar integer tensor representing the maximum number
+of boxes to be selected by non max suppression.
+*@li iou_threshold: A 0-D float tensor representing the threshold for deciding
+whether boxes overlap too much with respect to IOU.
+*@li score_threshold: A 0-D float tensor representing the threshold for
+deciding when to remove boxes based on score . \n
+
+*@par Attributes:
+*center_point_box:Integer indicate the format of the box data. 
+The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] 
+where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair 
+of box corners and the coordinates can be provided as normalized 
+(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
+1 - the box data is supplied as [x_center, y_center, width, height].
+ Mostly used for Pytorch models. \n
+
+*@par Outputs:
+*@li selected_indices: A 2-D integer tensor of shape [M] representing the
+selected indices from the boxes tensor, where M <= max_output_size. \n
+
+*@attention Constraints:
+*Input boxes and  scores must be float16 type . \n
+
+*@par Third-party framework compatibility
+*Compatible with onnx NonMaxSuppression operator.
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(NonMaxSuppressionV6)
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
+    .OUTPUT(selected_indices, TensorType({DT_INT32}))
+    .ATTR(center_point_box, Int, 0)
+    .ATTR(max_boxes_size, Int, 0)
+    .OP_END_FACTORY_REG(NonMaxSuppressionV6)
+
+/**
+*@brief Greedily selects a subset of bounding boxes in descending order of
+score . \n
+
+*@par Inputs:
+*Input boxes and  scores must be float16 type. Inputs include:
+*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
+The single box data format is indicated by center_point_box.
+*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
+*@li max_output_size: A scalar integer tensor representing the maximum number
+of boxes to be selected by non max suppression.
+*@li iou_threshold: A 0-D float tensor representing the threshold for deciding
+whether boxes overlap too much with respect to IOU.
+*@li score_threshold: A 0-D float tensor representing the threshold for
+deciding when to remove boxes based on score . \n
+*@li index_id: A input tensor with shape [num_batches,num_classes,spatial_dimension,3]
+the last dim representing (batch_id,class_id,index_id)  . \n
+
+*@par Attributes:
+*@li center_point_box:Integer indicate the format of the box data. 
+The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] 
+where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair 
+of box corners and the coordinates can be provided as normalized 
+(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
+1 - the box data is supplied as [x_center, y_center, width, height].
+ Mostly used for Pytorch models. \n
+*@li max_boxes_size: An optional attribute integer representing the real maximum 
+*number of boxes to be selected by non max suppression . \n
+
+*@par Outputs:
+*selected_indices: A 2-D integer tensor of shape [M] representing the
+selected indices from the boxes tensor, where M <= max_output_size. \n
+
+*@attention Constraints:
+*Input boxes and  scores must be float16 type . \n
+
+*@par Third-party framework compatibility
+*Compatible with onnx NonMaxSuppression operator.
+*/
+
+REG_OP(NonMaxSuppressionV7)
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(index_id, TensorType({DT_FLOAT16}))
+    .OUTPUT(selected_indices, TensorType({DT_INT32}))
+    .ATTR(center_point_box, Int, 0)
+    .ATTR(max_boxes_size, Int, 0)
+    .OP_END_FACTORY_REG(NonMaxSuppressionV7)
+
+/**
+*@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li features: A 5HD Tensor list of type float32 or float16.
+*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
+* the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1".
+
+*@par Attributes:
+*@li finest_scale: A optional attribute of type int, specifying the scale of calculate levels of "rois".
+*@li roi_scale_factor: A optional attribute of type float32, specifying the rescaling of "rois" coordinates.
+*@li spatial_scale: A optional attribute of type list float32, specifying the scaling ratio of "features"
+* to the original image.
+*@li pooled_height: A optional attribute of type int32, specifying the H dimension.
+*@li pooled_width: A optional attribute of type int32, specifying the W dimension.
+*@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency
+* of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois",
+* which is a floating point number. Defaults to "0".
+*@li pool_mode: An optional attribute of type string to indicate pooling mode. Defaults to "avg" . \n
+*@li aligned: An optional attribute of type bool, specifying the align to corner. Defaults to true . \n
+
+*@par Outputs:
+* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
+* The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height",
+* "pooled_width", and "features", respectively.
+
+*@par Third-party framework compatibility
+*Compatible with mmdetection SingleRoIExtractor operator.
+*/
+REG_OP(RoiExtractor)
+    .DYNAMIC_INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(index, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(finest_scale, Int, 56)
+    .ATTR(roi_scale_factor, Float, 0)
+    .ATTR(spatial_scale, ListFloat, {1.f / 4, 1.f / 8, 1.f / 16, 1.f / 32})
+    .ATTR(pooled_height, Int, 7)
+    .ATTR(pooled_width, Int, 7)
+    .ATTR(sample_num, Int, 0)
+    .ATTR(pool_mode, String, "avg")
+    .ATTR(aligned, Bool, true)
+    .OP_END_FACTORY_REG(RoiExtractor)
+
+/**
+*@brief Performs Position Sensitive PS ROI Pooling . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A tensor of type float16 or float32, describing the feature
+* map, dimension C1 must be equal to
+* (int(output_dim+15)/C0))*group_size*group_size.
+*@li rois: A tensor of type float16 or float32, with shape
+* [batch, 5, rois_num], describing the ROIs, each ROI consists of five
+* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
+* the index of the input feature map, "x1", "y1", "x2", or "y2" must be
+* greater than or equal to "0.0" . \n
+
+*@par Attributes:
+*@li output_dim: A required int32, specifying the number of output channels,
+* must be greater than 0.
+*@li group_size: A required int32, specifying the number of groups to encode
+* position-sensitive score maps, must be within the range (0, 128).
+*@li spatial_scale: A required float32, scaling factor for mapping the input
+* coordinates to the ROI coordinates . \n
+
+*@par Outputs:
+*y: A tensor of type float16 or float32, describing the result
+* feature map . \n
+
+*@attention Constraints:
+* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
+*/
+REG_OP(PSROIPoolingV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(spatial_scale, Float)
+    .REQUIRED_ATTR(output_dim, Int)
+    .REQUIRED_ATTR(group_size, Int)
+    .OP_END_FACTORY_REG(PSROIPoolingV2)
+
+/**
+*@brief Performs Position Sensitive PS ROI Pooling Grad . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A tensor of type float16 or float32, describing the result
+* feature map . \n
+*@li rois: A tensor of type float16 or float32, with shape
+* [batch, 5, rois_num], describing the ROIs, each ROI consists of five
+* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
+* the index of the input feature map, "x1", "y1", "x2", or "y2" must be
+* greater than or equal to "0.0" . \n
+
+*@par Attributes:
+*@li output_dim: A required int32, specifying the number of output channels,
+* must be greater than 0.
+*@li group_size: A required int32, specifying the number of groups to encode
+* position-sensitive score maps, must be within the range (0, 128).
+*@li spatial_scale: A required float32, scaling factor for mapping the input
+* coordinates to the ROI coordinates . \n
+*@li input_size: A required listInt, mapping the gradinput size: (H, W)
+
+*@par Outputs:
+*y: A tensor of type float16 or float32, describing the feature
+* map, dimension C1 must be equal to
+* (int(output_dim+15)/C0))*group_size*group_size.
+
+*@attention Constraints:
+* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
+*/
+REG_OP(PSROIPoolingGradV2D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(spatial_scale, Float)
+    .REQUIRED_ATTR(output_dim, Int)
+    .REQUIRED_ATTR(group_size, Int)
+    .REQUIRED_ATTR(input_size, ListInt)
+    .OP_END_FACTORY_REG(PSROIPoolingGradV2D)
+
+/**
+*@brief Generate the responsible flags of anchor in a single feature map.
+
+*@par Inputs:
+*@li gt_bboxes: Ground truth box, 2-D Tensor with shape `[batch, 4]`.
+
+*@par Attributes:
+*@li featmap_size: The size of feature maps, listint.
+*@li strides: Stride of current level, listint.
+*@li num_base_anchors: The number of base anchors.
+
+*@par Outputs:
+*flags: The valid flags of each anchor in a single level.
+*/
+REG_OP(AnchorResponseFlags)
+    .INPUT(gt_bboxes, TensorType({DT_FLOAT}))
+    .OUTPUT(flags, TensorType({DT_UINT8}))
+    .REQUIRED_ATTR(featmap_size, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(num_base_anchors, Int)
+    .OP_END_FACTORY_REG(AnchorResponseFlags)
+
+/**
+*@brief Generates bounding boxes based on yolo's "anchor" and "ground-truth" boxes.
+* It is a customized mmdetection operator . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li anchor_boxes: anchor boxes generated by the yolo training set.
+*  A 2D Tensor of type float32 or float16 with shape (N, 4). "N" indicates the number
+* of ROIs, "N" indicates the number of ROIs, and the value "4" refers to (tx, ty, tw, th).
+*@li gt_bboxes: target of the transformation, e.g, ground-truth boxes.
+*  A 2D Tensor of type float32 or float16 with shape (N, 4).
+* "N" indicates the number of ROIs, and 4 indicates "dx", "dy", "dw", and "dh" .
+*@li stride: Scale for each box.
+*  A 1D Tensor of type int32 shape (N,).
+* "N" indicates the number of ROIs. \n
+
+*@par Attributes:
+*performance_mode: select performance mode, "high_precision" or "high_performance".
+* select "high_precision" when input type is float32, the output tensor precision
+* will be smaller than 0.0001, select "high_performance" when input type is float32,
+* the ops will be best performance, but precision will be only smaller than 0.005.
+
+*@par Outputs:
+*encoded_bboxes: Bboxes generated based on "anchor_boxes" and "gt_bboxes". Have the
+* same format and type as "anchor_boxes".
+*
+*@attention Constraints:
+* input anchor boxes only support maximum N=20480. \n
+*/
+REG_OP(YoloBoxesEncode)
+    .INPUT(anchor_boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gt_bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(stride, TensorType({DT_INT32}))
+    .ATTR(performance_mode, String, "high_precision")
+    .OUTPUT(encoded_bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(YoloBoxesEncode)
+
+/**
+*@brief Performs Position Sensitive PS ROI Pooling Grad.
+
+*@par Inputs:
+* Eight inputs, including:
+*@li assigned_gt_inds: Tensor of type float16 or float32, shape (n, )
+*@li overlaps: A Tensor. Datatype is same as assigned_gt_inds. IOU between gt_bboxes and bboxes. shape(k, n)
+*@li box_responsible_flags: A Tensor. Support uint8. Flag to indicate whether box is responsible.
+*@li max_overlaps: A Tensor. Datatype is same as assigned_gt_inds. overlaps.max(axis=0).
+*@li argmax_overlaps: A Tensor. Support int32. overlaps.argmax(axis=0).
+*@li gt_max_overlaps: A Tensor. Datatype is same as assigned_gt_inds. overlaps.max(axis=1).
+*@li gt_argmax_overlaps: A Tensor. Support int32. overlaps.argmax(axis=1).
+*@li num_gts: A Tensor. Support int32. real k. shape (1, )
+
+*@par Attributes:
+*@li pos_iou_thr: float. IOU threshold for positive bboxes.
+*@li min_pos_iou: float. minimum iou for a bbox to be considered as a positive bbox
+*@li gt_max_assign_all: bool. whether to assign all bboxes with the same highest overlap with some gt to that gt.
+
+*@par Outputs:
+* assigned_gt_inds_pos: A Tensor. Support float16/float32. shape (n, ).
+*/
+REG_OP(GridAssignPositive)
+    .INPUT(assigned_gt_inds, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(box_responsible_flags, TensorType({ DT_UINT8 }))
+    .INPUT(max_overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(argmax_overlaps, TensorType({ DT_INT32 }))
+    .INPUT(gt_max_overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(gt_argmax_overlaps, TensorType({ DT_INT32 }))
+    .INPUT(num_gts, TensorType({ DT_INT32 }))
+    .OUTPUT(assigned_gt_inds_pos, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(pos_iou_thr, Float)
+    .REQUIRED_ATTR(min_pos_iou, Float)
+    .REQUIRED_ATTR(gt_max_assign_all, Bool)
+    .OP_END_FACTORY_REG(GridAssignPositive)
+
+/**
+* @brief Calculate the inverse gradient of GIoU. \n
+
+*@par Inputs:
+*@li dy : data of grad increment, a 1D Tensor of type float16 or float32 with
+* shape (N,).
+*@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
+* shape (4, N). "N" indicates the number of bounding boxes, and the value
+* "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
+*@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
+* with shape (4, M). "M" indicates the number of ground truth boxes, and
+* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n
+
+*@par Attributes:
+*@li trans: An optional attr, true for 'xywh', false for 'xyxy', only support true now.
+*@li is_cross: An optional attr, if false M equals N, only support false now.
+*@li mode: An optional attr, a character string with the value range of ['iou', 'iof'],
+*          only support 'iou' now. \n
+
+*@par Outputs:
+*@li dbboxes: A 2D Tensor of type float16 or float32 with shape [4, N].
+*@li dgtboxes: A 2D Tensor of type float16 or float32 with shape [4, M].
+*/
+REG_OP(GIoUGrad)
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dbboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dgtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(trans, Bool, false)
+    .ATTR(is_cross, Bool, true)
+    .ATTR(mode, String, "iou")
+    .OP_END_FACTORY_REG(GIoUGrad)
+
+/**
+* @brief Calculate the inverse gradient of DIoU. \n
+
+* @par Inputs:
+* @li dy : data of grad increment, a 1D Tensor of type float16 or float32 with
+* shape (N,).
+* @li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
+* shape (4, N). "N" indicates the number of bounding boxes, and the value
+* "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
+* @li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
+* with shape (4, M). "M" indicates the number of ground truth boxes, and
+* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n
+
+* @par Attributes:
+* @li trans: An optional attr, true for 'xywh', false for 'xyxy', only support true now.
+* @li is_cross: An optional attr, if false M equals N, only support false now.
+* @li mode: An optional attr, a character string with the value range of ['iou', 'iof'],
+*          only support 'iou' now. \n
+
+* @par Outputs:
+* @li dbboxes: A 2D Tensor of type float16 or float32 with shape [4, N].
+* @li dgtboxes: A 2D Tensor of type float16 or float32 with shape [4, M].
+*/
+REG_OP(DIoUGrad)
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dbboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dgtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(trans, Bool, false)
+    .ATTR(is_cross, Bool, true)
+    .ATTR(mode, String, "iou")
+    .OP_END_FACTORY_REG(DIoUGrad)
+
+/**
+* @brief Calculate the inverse gradient of CIoU. \n
+
+* @par Inputs:
+* @li dy : data of grad increment, a 1D Tensor of type float16 or float32 with
+* shape (N,).
+* @li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
+* shape (4, N). "N" indicates the number of bounding boxes, and the value
+* "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
+* @li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
+* with shape (4, M). "M" indicates the number of ground truth boxes, and
+* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] .
+* @li atan_sub: Intermediate result of forward calculation, 
+* a 1D Tensor of type float16 or float32 with shape (N,). \n
+
+* @par Attributes:
+* @li trans: An optional attr, true for 'xywh', false for 'xyxy', only support true now.
+* @li is_cross: An optional attr, if false M equals N, only support false now.
+* @li mode: An optional attr, a character string with the value range of ['iou', 'iof'],
+*          only support 'iou' now. \n
+
+* @par Outputs:
+* @li dbboxes: A 2D Tensor of type float16 or float32 with shape [4, N].
+* @li dgtboxes: A 2D Tensor of type float16 or float32 with shape [4, M].
+*/
+REG_OP(CIoUGrad)
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(atan_sub, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dbboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dgtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(trans, Bool, false)
+    .ATTR(is_cross, Bool, true)
+    .ATTR(mode, String, "iou")
+    .OP_END_FACTORY_REG(CIoUGrad)
+
+/**
+* @brief RotatedOverlaps . \n
+
+*@par Inputs:
+*@li boxes : data of grad increment, a 3D Tensor of type float32 with
+* shape (B, 5, N). "N" indicates the number of boxes, and the value
+* "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta].
+* @li query_boxes: Bounding boxes, a 3D Tensor of type float32 with
+* shape (B, 5, K). "K" indicates the number of boxes, and the value
+* "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta].
+
+* @par Attributes:
+* trans: An optional attr, true for 'xyxyt', false for 'xywht'.
+
+*@par Outputs:
+* overlaps: A 3D Tensor of type float32 with shape [B, N, K].
+
+*@attention Constraints:
+* In each batch, the invalid box cannot appear before the valid box.
+*/
+REG_OP(RotatedOverlaps)
+    .INPUT(boxes, TensorType({DT_FLOAT}))
+    .INPUT(query_boxes, TensorType({DT_FLOAT}))
+    .OUTPUT(overlaps, TensorType({DT_FLOAT}))
+    .ATTR(trans, Bool, false)
+    .OP_END_FACTORY_REG(RotatedOverlaps)
+
+/**
+*@brief RotatedIou . \n
+
+* @par Inputs:
+*@li boxes : data of grad increment, a 3D Tensor of type float32 with
+* shape (B, 5, N). "N" indicates the number of boxes, and the value
+* "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta].
+* @li query_boxes: Bounding boxes, a 3D Tensor of type float32 with
+* shape (B, 5, K). "K" indicates the number of boxes, and the value
+* "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta].
+
+* @par Attributes:
+*@li trans: An optional attr, true for 'xyxyt', false for 'xywht'.
+* @li mode: An optional attr, a character string with the value range of ['iou', 'iof'],
+* only support 'iou' now.
+*@li is_cross: Cross calculation when it is True, and one-to-one calculation when it is False.
+*@li v_threshold: An optional attr, provide condition relaxation for intersection calculation.
+* @li e_threshold: An optional attr, provide condition relaxation for intersection calculation.
+
+*@par Outputs:
+* iou: A 3D Tensor of float32 with shape [B, N, K].
+
+*@attention Constraints:
+* In each batch, the invalid box cannot appear before the valid box.
+*/
+REG_OP(RotatedIou)
+    .INPUT(boxes, TensorType({DT_FLOAT}))
+    .INPUT(query_boxes, TensorType({DT_FLOAT}))
+    .OUTPUT(iou, TensorType({DT_FLOAT}))
+    .ATTR(trans, Bool, false)
+    .ATTR(mode, String, "iou")
+    .ATTR(is_cross, Bool, true)
+    .ATTR(v_threshold, Float, 0)
+    .ATTR(e_threshold, Float, 0)
+    .OP_END_FACTORY_REG(RotatedIou)
+
+/**
+*@brief RotatedBoxEncode. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li anchor_box: A 3D Tensor of float32 (float16) with shape (B, 5, N).
+* "B" indicates the number of batch size
+* "N" indicates the number of bounding boxes, and the value "5" refers to
+* "x0", "x1", "y0", "y1" and "angle".
+*@li gt_box: A 3D Tensor of float32 (float16) with shape (B, 5, N). 
+* "B" indicates the number of batch size 
+* "N" indicates the number of bounding boxes, and the value "5" refers to
+* "x0", "x1", "y0", "y1" and "angle". \n
+
+* @par Attributes:
+*@li weight: A float list for "x0", "x1", "y0", "y1" and "angle",
+* defaults to [1.0, 1.0, 1.0, 1.0, 1.0].
+
+*@par Outputs:
+*@li y: A 3D Tensor of type float32 (float16) with shape (B, 5, N),
+* specifying the variations between all anchor boxes and ground truth boxes.
+*/
+REG_OP(RotatedBoxEncode)
+    .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gt_box, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(weight, ListFloat, {1.0, 1.0, 1.0, 1.0, 1.0})
+    .OP_END_FACTORY_REG(RotatedBoxEncode)
+
+/**
+*@brief RotatedBoxDecode. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li anchor_box: A 3D Tensor of float32 (float16) with shape (B, 5, N).
+* "B" indicates the number of batch size
+* "N" indicates the number of bounding boxes, and the value "5" refers to
+* "x0", "x1", "y0", "y1" and "angle".
+*@li deltas: A 3D Tensor of float32 (float16) with shape (B, 5, N). 
+* "B" indicates the number of batch size 
+* "N" indicates the number of bounding boxes, and the value "5" refers to
+* "x0", "x1", "y0", "y1" and "angle". \n
+
+*@par Attributes:
+*@li weight: A float list for "x0", "x1", "y0", "y1" and "angle",
+* defaults to [1.0, 1.0, 1.0, 1.0, 1.0].
+
+*@par Outputs:
+*@li y: A 3D Tensor of type float32 (float16) with shape (B, 5, N),
+* specifying the variations between all anchor boxes and ground truth boxes.
+*/
+REG_OP(RotatedBoxDecode)
+    .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(deltas, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(weight, ListFloat, {1.0, 1.0, 1.0, 1.0, 1.0})
+    .OP_END_FACTORY_REG(RotatedBoxDecode)
+
+/**
+* @brief sort rois to balance on each core. \n
+
+* @par Inputs:
+* one inputs, including:
+* @li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
+* the value "5" indicates the indexes of images where the ROIs are located, "batch", "x0", "y0", "x1", and "y1".
+
+* @par Outputs:
+* @li balance_rois:  A 2D Tensor of float32 or float16 with shape (N, 5), Outputs of the rois which balance.
+* @li index: 1D Tensor 0f int32 with shape (N,), that is the index of origin rois.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(BalanceRois)
+    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(balance_rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(index, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(BalanceRois)
+
+/**
+* @brief First calculate the minimum closure area of the two boxes, IoU,
+* The CIoU is obtained by combining the center distance and width to height ratio and IoU. \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
+* shape (4, N). "N" indicates the number of bounding boxes, and the value
+* "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
+* @li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
+* with shape (4, M). "M" indicates the number of ground truth boxes, and
+* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n
+
+* @par Attributes:
+* @li trans: An optional bool, true for 'xywh', false for 'xyxy'.
+* @li is_cross: An optional bool, control whether the output shape is [N, M] or [1, N]
+* @li mode: An optional string, computation mode, a character string with the value range of [iou, iof]
+* @li atan_sub_flag: An optional bool, control whether to output atan_sub. \n
+
+* @par Outputs:
+* Two outputs, including:
+* @li overlap: A 2D Tensor of type float16 or float32 with shape [N, M] or [1, N],
+* specifying the IoU or IoF ratio .
+* @li atan_sub: A 2D Tensor of type float16 or float32 with shape [N, M] or [1, N],
+* specifying the IoU or IoF ratio . \n
+
+* @attention Constraints:
+* "is_cross" only support false, "atan_sub_flag" only support true.
+*/
+REG_OP(CIoU)
+    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(atan_sub, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(trans, Bool, false)
+    .ATTR(is_cross, Bool, true)
+    .ATTR(mode, String, "iou")
+    .ATTR(atan_sub_flag, Bool, false)
+    .OP_END_FACTORY_REG(CIoU)
+
+/**
+* @brief First calculate the minimum closure area of the two boxes, IoU,
+* The DIoU is obtained by combining the center distance and IoU. \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
+* shape (4, N). "N" indicates the number of bounding boxes, and the value
+* "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
+* @li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
+* with shape (4, M). "M" indicates the number of ground truth boxes, and
+* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n
+
+* @par Attributes:
+* @li trans: An optional bool, true for 'xywh', false for 'xyxy'.
+* @li is_cross: An optional bool, control whether the output shape is [N, M] or [1, N].
+* @li mode: An optional string, computation mode, a character string with the value range of [iou, iof]. \n
+
+* @par Outputs:
+* overlap: A 2D Tensor of type float16 or float32 with shape [N, M] or [1, N],
+* specifying the IoU or IoF ratio . \n
+
+* @attention Constraints:
+* "is_cross" only support false.
+*/
+REG_OP(DIoU)
+    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(trans, Bool, false)
+    .ATTR(is_cross, Bool, true)
+    .ATTR(mode, String, "iou")
+    .OP_END_FACTORY_REG(DIoU)
+
+/**
+* @brief Calculate the intersection ratio of two rotated cuboids . \n
+
+* @par Inputs:
+* @li bboxes : data of grad increment, a 3D Tensor of type float32 with
+* shape (B, 7, N). "N" indicates the number of boxes, and the value
+* "7" refers to [x, y, z, w, h, d, theta].
+* @li gtboxes: Bounding boxes, a 3D Tensor of type float32 with
+* shape (B, 7, K). "K" indcates the number of boxes, and the value
+* "7" refers to [x, y, z, w, h, d, theta].
+
+* @par Outputs:
+* iou: A 3D Tensor of float32 with shape [B, N, K].
+
+* @attention Constraints:
+* In each batch, the invalid box cannot appear before the valid box.
+*/
+REG_OP(Iou3D)
+    .INPUT(bboxes, TensorType({DT_FLOAT}))
+    .INPUT(gtboxes, TensorType({DT_FLOAT}))
+    .OUTPUT(iou, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(Iou3D)
+
+/**
+* @brief Generates bounding boxes based on "priors" and "bboxes".
+* It is a customized yolox operator . \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li priors: prior sample boxes of origin image 
+* A 2D Tensor of type float32 or float16 with shape (N, 4).
+* "N" indicates the number of boxes, and the value "4" refers to "x0", "x1", "y0", and "y1".
+* @li bboxes_input: bboxes predicted by the model. A 2D Tensor of type float32 or float16 with shape (B, N, 4).
+* "B" indicates the batch_size, N indicates the number of boxes, 4 indicates "dx", "dy", "dw", and "dh" . \n
+
+* @par Outputs:
+* bboxes_output: Bboxes generated based on "priors" and "bboxes_input". Have the same format
+* and type as "bboxes_input".
+*/
+REG_OP(YoloxBoundingBoxDecode)
+    .INPUT(priors, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(decoded_bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(YoloxBoundingBoxDecode)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
+
diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
index 35c4c7d4..65411e2a 100644
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -54,13 +54,16 @@ REG_OP(LogSoftmaxGrad)
 *@par Inputs:
 *Two inputs, including:
 * @li features: A Tensor. Must be one of the following types: half, float32, double.
-*    A "batch_size * num_classes" matrix.
-* @li labels: A Tensor of the same type as "features". batch_size vector with values in [0, num_classes).
+*A "batch_size * num_classes" matrix.
+* @li labels: A Tensor. Must be one of the following types: 'int32', 'int64'.
+*batch_size vector with values in [0, num_classes).
+*This is the label for the given minibatch entry. \n
 
 
 *@par Outputs:
-*loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
-*backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n
+*@li loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
+*@li backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). 
+Has the same type as "features" . \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator SparseSoftmaxCrossEntropyWithLogits.
@@ -82,8 +85,8 @@ REG_OP(SparseSoftmaxCrossEntropyWithLogits)
 * @li labels: A Tensor of the same type as "features". A "batch_size * num_classes" matrix . \n
 
 *@par Outputs:
-*loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
-*backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n
+* @li loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
+* @li backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator SoftmaxCrossEntropyWithLogits.
@@ -101,9 +104,11 @@ REG_OP(SoftmaxCrossEntropyWithLogits)
 *@par Inputs:
 * Two inputs, including:
 * @li softmax: Output of the softmax operator. Must be one of the following
-* types: float16, float31, int32, int8, uint8. The format is NC1HWC0 or DN.
-* @li grad_softmax: A Tensor. Has the same shape and type as "softmax".
-* The format is NC1HWC0 or DN . \n
+* types: float16, float31, int32, int8, uint8.
+* @li grad_softmax: A Tensor. Has the same shape and type as "softmax".\n
+
+*@par Attributes:
+* axes: An optional list of ints. Defaults to "{-1}" . \n
 
 *@par Outputs:
 *grad_x: A Tensor. Has the same shape and type as "softmax" . \n
@@ -115,18 +120,20 @@ REG_OP(SoftmaxGrad)
     .INPUT(softmax, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .INPUT(grad_softmax, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .OUTPUT(grad_x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .ATTR(axes, ListInt, {-1})
     .OP_END_FACTORY_REG(SoftmaxGrad)
 
 /**
-*@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n
+* @brief Computes the sigmoid cross entropy loss of "predict" and "target" .
 
 *@par Inputs:
-* Two inputs, including:
+* Three inputs, including:
 *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
-*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n
+*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value .
+*@li dout:A multi-dimensional Tensor of float16 or float32,specifying the gradient transferred from the upper layer. \n
 
 *@par Outputs:
-*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n
+*gradient: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n
 
 *@par Third-party framework compatibility
 * Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SigmoidCrossEntropyWithLogitsGrad.
@@ -139,16 +146,15 @@ REG_OP(SigmoidCrossEntropyWithLogitsGrad)
     .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGrad)
 
 /**
-*@brief Performs the backpropagation of SigmoidCrossEntropyWithLogits for training scenarios . \n
+* @brief Performs the backpropagation of SigmoidCrossEntropyWithLogits for training scenarios .
 
 *@par Inputs:
-* Three inputs, including:
+* Two inputs, including:
 *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
-*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value.
-*@li dout: A multi-dimensional Tensor of float16 or float32, specifying the gradient transferred from the upper layer . \n
+*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value. \n
 
 *@par Outputs:
-*gradient: Return gradient. Has the same dimensions and type as "predict" . \n
+*loss: Return loss. Has the same dimensions and type as "predict" . \n
 
 *@par Third-party framework compatibility
 * Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SigmoidCrossEntropyWithLogits.
@@ -160,20 +166,20 @@ REG_OP(SigmoidCrossEntropyWithLogits)
     .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogits)
 
 /**
-*@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n
+*@brief Computes the sigmoid cross entropy loss of "predict" and "target".
 
 *@par Inputs:
 * four inputs, including:
 *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
-*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n
-*@li weight: An multi-dimensional Tensor, specifying the weight value. \n
+*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value.
+*@li weight: An multi-dimensional Tensor, specifying the weight value.
 *@li pos_weight: An multi-dimensional Tensor, specifying the pos weight value. \n
 
 *@par Attributes:
-*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean" . \n
+*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean". \n
 
 *@par Outputs:
-*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n
+*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict". \n
 
 *@par Third-party framework compatibility
 * Compatible with PyTorch operator BCEWithLogitsLoss.
@@ -188,7 +194,7 @@ REG_OP(SigmoidCrossEntropyWithLogitsV2)
     .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsV2)
 
 /**
-*@brief Computes the regression box of the RPN. It is a FasterRCNN operator . \n
+* @brief Computes the regression box of the RPN. It is a FasterRCNN operator .
 
 *@par Inputs:
 * Two inputs, including:
@@ -215,7 +221,7 @@ REG_OP(SmoothL1Loss)
     .OP_END_FACTORY_REG(SmoothL1Loss)
 
 /**
-*@brief Performs the backpropagation of SmoothL1Loss for training scenarios . \n
+* @brief Performs the backpropagation of SmoothL1Loss for training scenarios .
 
 *@par Inputs:
 * Three inputs, including:
@@ -331,6 +337,41 @@ REG_OP(SoftmaxV2)
     .OP_END_FACTORY_REG(SoftmaxV2)
 
 /**
+*@brief Function softmax with dropoutDoMaskV3D
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A mutable Tensor. The type only support float16.
+* @li mask: A mutable Tensor. Must met all of the following rules:
+*     shape of mask should be 1D.
+*     dtype of mask should be uint8.
+*     value of shape should met the following algorithm:
+*     value = (size(x) + 128 - 1) // 128 * 128
+
+*@par Attributes:
+* @li keep_prob: A mutable Tensor. Must met all of the following rules:
+*     shape of "keep_prob" should be (1,) or [1,].
+*     Has the same type as "x" . \n
+* @li axes: A list of int. The dimension softmax would be performed on. Defaults
+*     to "[-1]" . \n
+
+*@par Outputs:
+*y1: A mutable Tensor. Has the same type as "x".
+*y2: A mutable Tensor. Has the same type as "x". \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(SoftmaxV2WithDropOutDoMaskV3D)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(y1, TensorType({DT_FLOAT16}))
+    .OUTPUT(y2, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(keep_prob, Float)
+    .ATTR(axes, ListInt, {-1})
+    .OP_END_FACTORY_REG(SoftmaxV2WithDropOutDoMaskV3D)
+
+/**
 *@brief Computes log softmax activations . \n
 
 *@par Inputs:
@@ -385,7 +426,10 @@ REG_OP(ConfusionSoftmaxGrad)
 *@li keepdims: A bool Scalar. If true, retains reduced dimensions with length 1 . \n
 
 *@par Outputs:
-*y: A Tensor dtype of float16, float32.
+* y: A Tensor dtype of float16, float32. \n
+
+*@attention Constraints:
+* THIS OPERATOR IS DEPRECATED. It will be removed in a future version.
 */
 REG_OP(SoftmaxGradExt)
   .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -428,6 +472,33 @@ REG_OP(MVN)
     .OP_END_FACTORY_REG(MVN)
 
 /**
+*@brief Normalizes the input . \n
+
+*@par Inputs:
+* One input:
+*x: An NCHW tensor of type float16 or float32 . \n
+
+*@par Attributes:
+*@li eps: An optional float32 epsilon for not dividing by zero. Defaults to "1e-9" . \n
+*@li axes: A list of Intefers, along which axis to reduce. Defaults to "[0, 2, 3]" . \n
+
+*@par Outputs:
+*y: An NCHW tensor of type float16 or float32 . \n
+
+*@attention Constraints:
+* The input tensor must have the NCHW format, whose shape length must be 4.
+*@par Third-party framework compatibility
+* Compatible with the ONNX operator MeanVarianceNormalization.
+*/
+
+REG_OP(MVNV2)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) /* "First operand." */
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))  /* "Result, has same element type as inputs" */
+    .ATTR(eps, Float, 1e-9)
+    .ATTR(axes, ListInt, {0, 2, 3})
+    .OP_END_FACTORY_REG(MVNV2)
+
+/**
 *@brief Normalizes the input "x1" . \n
 
 *@par Inputs:
@@ -499,6 +570,31 @@ REG_OP(LayerNorm)
     .OP_END_FACTORY_REG(LayerNorm)
 
 /**
+*@brief Returns a tensor where each sub-tensor of input along dimension 
+*       dim is normalized such that the p-norm of the sub-tensor is lower than the value maxnorm. \n
+
+*@par Inputs:
+*One input, including:
+* x: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Attributes:
+* @li p: Specify L_p norm, the type is float. 
+* @li dim: The processed dim, the type is int.
+* @li maxnorm: Threshold for comparison, the type is float.  \n
+
+*@par Outputs:
+*One outputs, including:
+* y: shape and dtype of output, should be same shape and type as input.
+*/
+REG_OP(Renorm)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(p, Float)
+    .REQUIRED_ATTR(dim, Int)
+    .REQUIRED_ATTR(maxnorm, Float)
+    .OP_END_FACTORY_REG(Renorm)
+
+/**
 *@brief LayerNormGrad operator interface implementation
 *  calculating: dy, x, variance, mean, gamma
 *  pd_xl = data_dy*data_gamma
@@ -587,6 +683,48 @@ REG_OP(LayerNormXBackprop)
     .OP_END_FACTORY_REG(LayerNormXBackprop)
 
 /**
+*@brief LayerNormXBackpropV2 operator interface implementation
+*  calculating: dy, x, variance, mean, gamma
+*  pd_xl = data_dy*data_gamma
+*  pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean)
+*           np.power((data_variance + EPSLON), (-1.5))),
+*           reduce_axis, keepdims=True)
+*  pd_mean = np.sum(((-1.0)*pd_xl
+*            np.power((data_variance + EPSLON), (-0.5))),
+*            reduce_axis, keepdims=True)
+*            + pd_var*(1.0/m)
+*            np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True)
+*  pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) +
+*         pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m)
+*  res_for_gamma = (data_x - data_mean) * np.power((data_variance + EPSLON), (-0.5))
+
+*@par Inputs:
+*Five inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
+* @li res_for_gamma: A Tensor. Must be one of the following types: float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LayerNormXBackpropV2)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(res_for_gamma, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(LayerNormXBackpropV2)
+
+/**
 *@brief LayerNormBetaGammaBackprop operator interface implementation
 *  calculating: dy, x, variance, mean
 *  pd_xl = data_dy*data_gamma
@@ -630,6 +768,81 @@ REG_OP(LayerNormBetaGammaBackprop)
     .OP_END_FACTORY_REG(LayerNormBetaGammaBackprop)
 
 /**
+*@brief LayerNormBetaGammaBackpropV2 operator interface implementation
+*  calculating: dy, x, variance, mean
+*  pd_gamma = np.sum((data_dy*res_for_gamma), param_axis, keepdims=True)
+*  pd_beta = np.sum(data_dy, param_axis, keepdims=True)
+
+*@par Inputs:
+*Three inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
+* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LayerNormBetaGammaBackpropV2)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(res_for_gamma, TensorType({DT_FLOAT}))
+    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(shape_gamma, ListInt)
+    .OP_END_FACTORY_REG(LayerNormBetaGammaBackpropV2)
+
+/**
+* @brief LNDropoutGrad operator interface implementation
+*   calculating: dy, x, variance, mean, gamma
+*   pd_xl = dy*gamma
+*   sub_x_mean = x - mean
+*   var_elta_2 = np.power((variance + EPSLON), (-0.5))
+*   pd_var = sum(pd_xl * sub_x_mean, reduce_axis, keepdims=True) * var_elta_2 * var_elta_2 * var_elta_2 * (-0.5)
+*   pd_mean = sum(pd_xl, reduce_axis, keepdims=True) * var_elta_2 * (-1.0)
+*   pd_x = pd_xl * var_elta_2 + pd_var * (2.0 / m) * sub_x_mean + pd_mean * (1.0 / m)
+*   pd_x_dropout = pd_x * mask * (1 / keep_prob)
+*   pd_gamma = sum(dy * sub_x_mean * var_elta_2, param_axis, keepdims=True)
+*   pd_beta = sum(dy, param_axis, keepdims=True)
+
+* @par Inputs:
+* Six inputs, including:
+*  @li dy: A Tensor. Must be one of the following types: float16, float32.
+*  @li x: A Tensor. Must be one of the following types: float16, float32.
+*  @li variance: A Tensor. Must be one of the following types: float16, float32.
+*  @li mean: A Tensor. Must be one of the following types: float16, float32.
+*  @li gamma: A Tensor. Must be one of the following types: float16, float32.
+*  @li mask: A Tensor. Must be one of the following types: uint8.\n
+
+* @par Outputs:
+* Four outputs, including:
+*  @li pd_x: A Tensor. Must be one of the following types: float16, float32.
+*  @li pd_x_dropout: A Tensor. Must be one of the following types: float16, float32.
+*  @li pd_gamma: A Tensor. Must be one of the following types:  float16, float32.
+*  @li pd_beta: A Tensor. Must be one of the following types:  float16, float32.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LNDropoutGrad)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_x_dropout, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(keep_prob, Float)
+    .OP_END_FACTORY_REG(LNDropoutGrad)
+
+/**
 *@brief Return "output" according to the algorithm of dropout_do_mask:
 *  scale_x = x *(1 / keep_prob)
 *  output = select(mask == 1, scale_x, 0)
@@ -647,7 +860,7 @@ REG_OP(LayerNormBetaGammaBackprop)
 *     shape of "keep_prob" should be (1,) or [1,].
 *     Has the same type as "x" . \n
 
-*@par Output:
+*@par Outputs:
 *y: A mutable Tensor. Has the same type as "x".
 */
 REG_OP(DropOutDoMask)
@@ -656,7 +869,68 @@ REG_OP(DropOutDoMask)
     .INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16}))
     .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
     .OP_END_FACTORY_REG(DropOutDoMask)
-	
+
+/**
+*@brief Return "output" according to the algorithm of dropout_do_mask:
+*  scale_x = x *(1 / keep_prob)
+*  output = select(mask == 1, scale_x, 0)
+
+*@par Inputs:
+*Three inputs, including:
+* @li x: A mutable Tensor. Must be one of the following types:
+*     float16, float32
+* @li mask: A mutable Tensor. Must met all of the following rules:
+*     shape of mask should be 1D.
+*     dtype of mask should be uint8.
+*     value of shape should met the following algorithm:
+*     value = (size(x) + 128 - 1) // 128 * 128
+* @li keep_prob: A mutable Tensor. Must met all of the following rules:
+*     shape of "keep_prob" should be (1,) or [1,].
+*     Has the same type as "x" . \n
+
+*@par Outputs:
+*y: A mutable Tensor. Has the same type as "x".
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DropOutDoMaskV3)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mask, TensorType({DT_UINT8}))
+    .INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(DropOutDoMaskV3)
+
+/**
+*@brief Return "output" according to the algorithm of dropout_do_mask:
+*  scale_x = x *(1 / keep_prob)
+*  output = select(mask == 1, scale_x, 0)
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A mutable Tensor. Must be one of the following types:
+*     float16, float32
+* @li mask: A mutable Tensor. Must met all of the following rules:
+*     shape of mask should be 1D.
+*     dtype of mask should be uint8.
+*     value of shape should met the following algorithm:
+*     value = (size(x) + 128 - 1) // 128 * 128
+*@par Attributes:
+* @li keep_prob: A mutable Tensor. Must met all of the following rules:
+*     shape of "keep_prob" should be (1,) or [1,].
+*     Has the same type as "x" . \n
+
+*@par Output:
+*y: A mutable Tensor. Has the same type as "x".
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DropOutDoMaskV3D)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(keep_prob, Float)
+    .OP_END_FACTORY_REG(DropOutDoMaskV3D)
+
 /**
 *@brief Scales the input . \n
 
@@ -703,7 +977,7 @@ REG_OP(Scale)
 
 *@par Inputs:
 *One input, including:
-*@li x: A Tensor. Must be 4-D shape, and only support the following types: float16, float32 . \n
+*x: A Tensor. Must be 4-D shape, and only support the following types: float16, float32 . \n
 
 *@par Attributes:
 *@li depth_radius: An optional int32, specifying the half-width of the normalization window. Defaults to "5".
@@ -785,7 +1059,7 @@ REG_OP(LRNGrad)
  *@li grads: A Tensor. Has the same type as acts.
 
  *@par Attributes:
- *@li blank_label: An optional attribute. Defaults to 0.
+ *blank_label: An optional attribute. Defaults to 0.
 
  *@par Third-party framework compatibility
  * Compatible with TensorFlow RNNTLoss operator.
@@ -801,82 +1075,56 @@ REG_OP(RNNTLoss)
     .OP_END_FACTORY_REG(RNNTLoss)
 
 /**
-*@brief Performs group normalization . \n
+* @brief Performs group normalization . \n
 
-*@par Inputs:
-* Five inputs, including: (NHWC, NCHW supported)
-*@li x: A 4D Tensor of type float16 or float32, with format NHWC or
-NCHW for 4D.
-*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format
-NHWC or NCHW. Specifies the scaling factor.
-*@li offset: A Tensor of type float32. Must be 1D if input "x" is with
-format NHWC or NCHW. Specifies the offset.
-*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format
-NHWC or NCHW. Reserved. Mu
-st be "None" if the operation is used for training.
-*@li variance: A Tensor of type float32. Must be 1D if input "x" is with
-format NHWC or NCHW. Specifies the variance used for inference. Reserved . \n
+* @par Inputs:
+* Three inputs
+* @li x: A ND Tensor of type float16 or float32, with format NCHW for 4D.
+* @li gamma: A Tensor of type float16 or float32. Must be 1D. Specifies the scaling factor.
+* @li beta: A Tensor of type float16 or float32. Must be 1D. Specifies the offset. \n
 
-*@par Attributes:
-*@li epsilon: An optional float32, specifying the small value added to
+* @par Attributes:
+* @li num_groups: An required int32, specifying the number of group.
+* @li eps: An optional float32, specifying the small value added to
 variance to avoid dividing by zero. Defaults to "0.0001".
-*@li data_format: An optional string, specifying the format of "x".
+* @li data_format: An optional string, specifying the format of "x".
 Defaults to "NHWC".
-*@li is_training: An optional bool, specifying if the operation is used for
+* @li is_training: An optional bool, specifying if the operation is used for
 training or inference. Defaults to "True" . \n
 
-*@par Outputs:
-* Five outputs, including: (NHWC, NCHW supported)
-*@li y: A 4D Tensor of type float16 or float32 for the normalized "x",
-with format NHWC or NCHW for 4D.
-*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with
-format NHWC or NCHW. Specifies the mean of "x".
-*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is
-with format NHWC or NCHW. Specifies the variance of "x".
-*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if
-input "x" is with format NHWC or NCHW. Specifies the mean o
-f "x" for gradient computation. Pass "None" to skip this output.
-*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if
-input "x" is with format NHWC or NCHW. Specifies the varian
-ce of "x" for gradient computation. Pass "None" to skip this output . \n
+* @par Outputs:
+* Three outputs
+* @li y: A ND Tensor of type float16 or float32 for the normalized "x",
+with format NCHW for 4D.
+* @li mean: A Tensor of type float16 or float32. Must be 1D. Specifies the mean of "x".
+* @li variance: A Tensor of type float16 or float32. Must be 1D. Specifies the variance of "x". \n
 
-*@attention Constraints:
-*@li If the operation is used for inference and outputs "reserve_space_1"
-and "reserve_space_2" are available, then "reserve_space_1" has the same
-value as "mean" and "reserve_spa
-ce_2" has the same value as "variance".
-*@li For Ascend 310, the result accuracy fails  due to the square root
-instruction . \n
+* @attention Constraints:
+* @li For Ascend 310, only support NCHW which can be trans to 5HD. \n
 
-*@par Third-party framework compatibility
-*@li Compatible with the PyTorch operator GroupNorm.
+* @par Third-party framework compatibility
+* @li Compatible with the PyTorch operator GroupNorm.
 
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(GroupNorm)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .INPUT(scale, TensorType({DT_FLOAT,}))
-    .INPUT(offset, TensorType({DT_FLOAT,}))
-    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
-    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
+    .INPUT(gamma, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(beta, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
-    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
-    .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
-    .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
-    .ATTR(epsilon, Float, 0.0001)
+    .OUTPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(variance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(num_groups, Int)
     .ATTR(data_format, String, "NHWC")
+    .ATTR(eps, Float, 0.0001)
     .ATTR(is_training, Bool, true)
-    .ATTR(num_groups, Int, 2)
     .OP_END_FACTORY_REG(GroupNorm)
 
 /**
 *@brief Performs instance normalization . \n
 
 *@par Inputs:
-* Five inputs, including: (NC1HWC0, supported)
-*@li x: A 5D Tensor of type float16 or float32, NC1HWC0.
+* Five inputs, including:
+*@li x: A 5D Tensor of type float16 or float32.
 *@li gamma: A Tensor of type float32.
 A 5D Tensor for scaling factor, to scale the normalized x.
 *@li beta: A Tensor of type float32.
@@ -895,7 +1143,7 @@ the value used for the running_mean and running_var computation. Default: "0.1".
 variance to avoid dividing by zero. Defaults to "0.00001" . \n
 
 *@par Outputs:
-* Three outputs, including: (NHWC, NCHW NC1HWC0 supported)
+* Three outputs, including: (NHWC, NCHW supported)
 *@li y: A 5D tensor of type float16 or float32 for the normalized "x",
 *@li batch_mean: A Tensor of type float32.
 Specifies the mean of "x".
@@ -928,7 +1176,7 @@ REG_OP(InstanceNormV2)
 *@brief Performs instance normalization for inference.
 
 *@par Inputs:\n
-* Five inputs, including: (NC1HWC0 supported)
+* Five inputs, including:
 *@li x: A Tensor of type float16 or float32.
 *@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma.
 *@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta.
@@ -960,24 +1208,631 @@ REG_OP(INInferV2D)
     .OP_END_FACTORY_REG(INInferV2D)
 
 /**
-*@brief Performs instance normalization for inference of InHost part.
+* @brief InstanceNorm operator interface implementation.
 
-*@par Inputs:\n
-* One input, including: (NC1HWC0 supported)
-* variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance.
+* @par Inputs:
+* Three inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32.
+* @li beta: A Tensor. Must be one of the following types: float16, float32.
+
+* @par Attributes:
+* @li data_format: An attribute of type String \n
+* @li epsilon: An attribute of type Float. \n
+
+* @par Outputs:
+* Three outputs, including:
+* @li y: A Tensor. Has the same type as "x". \n
+* @li mean: A Tensor. Has the same type as "x". \n
+* @li variance: A Tensor. Has the same type as "x". \n
+
+*/
+REG_OP(InstanceNorm)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gamma, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(beta, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(variance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(data_format, String, "NDHWC")
+    .ATTR(epsilon, Float, 1e-6)
+    .OP_END_FACTORY_REG(InstanceNorm)
+
+/**
+* @brief InstanceNormGrad operator interface implementation.
+
+* @par Inputs:
+* Five inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n
+
+* @par Outputs:
+* Three outputs, including:
+* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
+* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
+* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
+
+*/
+REG_OP(InstanceNormGrad)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(InstanceNormGrad)
+
+/**
+* @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li grad: A Tensor. Must be one of the following types: float16, float32.
+* Required.
+* @li input: A Tensor. Has the same type as "grad". Required.
+* @li target: A Tensor. Has the same type as "grad". Required. \n
+
+* @par Attributes:
+* @li reduction: An optional attribute of type String. Defaults to "mean". \n
+* @li log_target: An optional attribute of type Bool. Defaults to false. \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "grad". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator KlDivLossGrad.
+*/
+REG_OP(KlDivLossGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .ATTR(log_target, Bool, false)
+    .OP_END_FACTORY_REG(KlDivLossGrad)
+
+/**
+* @brief Computes l1_loss_grad or l1_loss_backward. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li grads: A Tensor. Must be one of the following types: float16, float32.
+* Required.
+* @li predict: A Tensor. Has the same type as "grads". Required.
+* @li label: A Tensor. Has the same type as "grads". Required. \n
+
+* @par Attributes:
+* reduction: An optional attribute of type String. Defaults to "mean". \n
+
+* @par Outputs:
+* y: A Tensor. Has the same type as "x". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator L1LossGrad.
+*/
+REG_OP(L1LossGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(L1LossGrad)
+
+/**
+* @brief Computes loss of lp, p=1,2,3....
+
+* @par Inputs:
+* @li predict: An ND tensor of type float16, float32.
+* @li label: An ND tensor of type float16, float32. \n
+
+* @par Attributes:
+* @li p: A required int attribute that decides which loss to compute, now the p only can be 1 to compute l1_loss.
+* @li reduction: An optional string.Defaults to "mean". \n
+
+* @par Outputs:
+*  y: An ND tensor tensor with the same shape and type as "predict". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator LpLoss.
+*/
+REG_OP(LpLoss)
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(p, Int)
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(LpLoss)
+
+/**
+* @brief Computes gradients of mse loss.
+
+* @par Inputs:
+* @li predict: An ND tensor of type float16, float32.
+* @li label: An ND tensor of type float16, float32.
+* @li dout: An ND tensor of type float16, float32. \n
+
+* @par Attributes:
+* reduction: An optional string.Defaults to "mean". \n
+
+* @par Outputs:
+* y: An ND tensor tensor with the same shape and type as "predict". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator MseLossGrad.
+*/
+REG_OP(MseLossGrad)
+    .INPUT(predict, TensorType({DT_FLOAT32, DT_FLOAT16}))
+    .INPUT(label, TensorType({DT_FLOAT32, DT_FLOAT16}))
+    .INPUT(dout, TensorType({DT_FLOAT32, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT32, DT_FLOAT16}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(MseLossGrad)
+
+/**
+* @brief Computes mse loss.
+* @par Inputs:
+* two inputs, including:
+*  @li predict: An ND Tensor of dtype float16 or float32.
+*  @li label: An ND Tensor of dtype float16 or float32.\n
+*
+* @par Attributes:
+* reduction:An optional str from sum, none, mean, Defaults to "mean".\n
+*
+* @par Outputs:
+* y: when reduction=sum/mean, y is scale. when reduction=none, y has
+*    same type and shape as "predict".\n
+*/
+REG_OP(MseLoss)
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(MseLoss)
+
+/**
+* @brief Calculates the reversed outputs of the function "smooth_l1_loss_v2". \n
+
+* @par Inputs:
+* Three Inputs, including:
+* @li predict: A Tensor. Must be one of the following types:
+*     float16, float32.
+* @li label: A Tensor. Has the same type as "predict".
+* @li dout: A Tensor. Has the same type as "predict". \n
+
+* @par Attributes:
+* Two Attributes, including:
+* @li sigma: An optional float. Defaults to 1.0. \n
+
+* @li reduction: An optional string. Defaults to "mean",
+*    Must be one of the following: "none", "mean", "sum". \n
+
+* @par Outputs:
+*  gradient: A Tensor. Has the same type as "predict". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator SmoothL1LossBackward.
+*/
+REG_OP(SmoothL1LossGradV2)
+    .INPUT(predict, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(label, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(dout, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(gradient, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(sigma, Float, 1.0)
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(SmoothL1LossGradV2)
+
+/**
+* @brief Creates a criterion that uses a squared term if the absolute
+* element-wise error falls below beta and an L1 term otherwise. It is
+* less sensitive to outliers than the MSELoss and in some cases prevents
+* exploding gradients.
+
+* @par Inputs:
+* @li predict: A multi-dimensional Tensor of type float16 or float32,
+* specifying the predictive value. \n
+* @li label: A multi-dimensional Tensor of type float16 or float32,
+* specifying the target value. \n
+
+* @par Attributes:
+* @li sigma: An optional int. Specifies the threshold of loss. Defaults
+* to "1.0". \n
+* @li reduction: An optional str. Specifies the reduction to apply to
+* the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied,
+* 'mean': the sum of the output will be divided by the number of elements in
+* the output,'sum': the output will be summed. Default: 'mean'. \n
+
+* @par Outputs:
+* loss: Indicates the loss between the predictive value and target value.
+* Has the same dimensions as "predict". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator smooth_l1_loss. \n
+*/
+REG_OP(SmoothL1LossV2)
+    .INPUT(predict, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(label, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(loss, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .ATTR(sigma, Float, 1.0)
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(SmoothL1LossV2)
+
+/**
+* @brief Computes Centralization. result = x - mean(x, axes)
+
+* @par Inputs:
+*  x: An ND tensor of type float16, float32.
+* @par Attributes:
+* axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType.
+* Must be in the range [-rank(x), rank(x)).
+* @par Outputs:
+* y: A Tensor. Has the same type as "x". \n
+
+* @par Third-party framework compatibility
+* custom operator \n
+*/
+REG_OP(Centralization)
+    .INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .ATTR(axes, ListInt, {-1})
+    .OP_END_FACTORY_REG(Centralization)
+
+/**
+*@brief Roll the tensor along the given dimension(s).
+* Elements that are shifted beyond the last position are re-introduced at the first position.
+* If a dimension is not specified, the tensor will be flattened before rolling and then restored to the original shape. \n
+
+*@par Inputs:
+*One inputs, including:
+* x: A tensor . Must be one of the following types:
+*     float16, float32, int32, uint32, int8, uint8. \n
 
 *@par Attributes:
-* epsilon: An optional float32, specifying the small value added to
-variance to avoid dividing by zero. Defaults to "0.00001" . \n
+* @li shifts: The number of places by which the elements of the tensor are shifted. \n
+* @li dims: Axis along which to roll. \n
 
-*@par Outputs:\n
-* variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt.
+*@par Outputs:
+* y: A Tensor with the same type and shape of x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Roll. \n
 */
-REG_OP(InHost)
-     .INPUT(variance, TensorType({DT_FLOAT}))
-     .OUTPUT(variance_sqrt, TensorType({DT_FLOAT}))
-     .ATTR(epsilon, Float, 0.00001)
-     .OP_END_FACTORY_REG(InHost)
-}  // namespace ge
+REG_OP(Roll)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8}))
+    .REQUIRED_ATTR(shifts, ListInt)
+    .ATTR(dims, ListInt, {})
+    .OP_END_FACTORY_REG(Roll)
+
+/**
+* @brief Roll the tensor along the given dimension(s).
+
+* @par Inputs:
+* One inputs, including:
+* x: A tensor
+
+* @par Attributes:
+* @li shift: The number of places by which the elements of the tensor are shifted. \n
+* @li axes: Axis along which to roll. \n
+
+* @par Outputs:
+* y: A Tensor with the same type and shape of x's. \n
 
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Roll. \n
+*/
+REG_OP(RollV2)
+    .INPUT(input, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \
+                            DT_FLOAT,DT_DOUBLE}))
+    .INPUT(shift, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(axes, TensorType({DT_INT32,DT_INT64}))
+    .OUTPUT(output, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \
+                            DT_FLOAT,DT_DOUBLE}))
+    .OP_END_FACTORY_REG(RollV2)
+
+/**
+ * @brief Calculate the loss. Creates a criterion that optimizes a two-class classification
+ * logistic loss between input_x and input_y (containing 1 or -1). \n
+
+ * @par Inputs:
+ * Tow inputs, including:
+ * @li input_x: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+ * @li input_y: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+
+ * @par Attributes:
+ * reduction: An optional string.Defaults to "mean". \n
+
+ * @par Outputs:
+ * output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n
+ *          while reduction == "sum" or "mean", A Tensor with the same type of input_x , shape of which is (1,)
+
+ * @par Third-party framework compatibility
+ * Compatible with the Pytorch operator SoftMarginLoss. \n
+ */
+REG_OP(SoftMarginLoss)
+    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(reduction, String, "mean")
+    .OUTPUT(output_z, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(SoftMarginLoss)
+
+/**
+* @brief Computes gradients of sigmoid_cross_entropy_with_logits_v2.
+
+* @par Inputs:
+* @li predict: An ND tensor of type float16, float32.
+* @li target: An ND tensor of type float16, float32.
+* @li dout: An ND tensor of type float16, float32.
+* @li weight: An optional ND tensor of type float16, float32.
+* @li pos_weight: An optional ND tensor of type float16, float32. \n
+
+* @par Attributes:
+* reduction: An optional string.Defaults to "mean". \n
+
+* @par Outputs:
+* gradient: An ND tensor tensor with the same shape and type as "predict". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator SigmoidCrossEntropyWithLogitsGrad.
+*/
+REG_OP(SigmoidCrossEntropyWithLogitsGradV2)
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(pos_weight, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2)
+/**
+ * @brief Calculate the PoissonNllLoss function. 
+ *        target∼Poisson(input)loss(input,target)=input−target∗log(input)+log(target!) \n
+
+ * @par Inputs:
+ * Two inputs, including:
+ * @li input_x: A tensor. Must be one of the following types: float16, float32.
+ * @li target: A tensor. Must be one of the following types: float16, float32. \n
+
+ * @par Attributes:
+ * four Attributes, including:
+ * @li log_input: An optional bool. Defaults to "True"
+ * @li full: An optional bool. Defaults to "False"
+ * @li eps: An optional float. Defaults to "1e-8"
+ * @li reduction: An optional string. Defaults to "mean" \n
+
+ * @par Outputs:
+ * loss: A Tensor has same element type as two inputs. \n
+
+ * @par Third-party framework compatibility
+ * Compatible with the Pytorch operator PoissonNllLoss. \n
+ */
+REG_OP(PoissonNllLoss)
+    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(log_input, Bool, true)
+    .ATTR(full, Bool, false)
+    .ATTR(eps, Float, 1e-8)
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(PoissonNllLoss)
+/**
+ *@brief rnn_gen_mask
+ * @par Inputs:
+ * seq_length: A ND Tensor of type int32. Recoed the current length of each batch.\n
+ *
+ * @par Attributes:
+ * @li num_step: A required int.\n
+ * @li hidden_size: A required int. \n
+ *
+ * 
+ * @par Ouputs:
+ * y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n
+ *
+ */
+REG_OP(RnnGenMask)
+    .INPUT(seq_length, TensorType({DT_INT32}))
+    .OUTPUT(seq_mask, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(num_step, Int)
+    .REQUIRED_ATTR(hidden_size, Int)
+    .OP_END_FACTORY_REG(RnnGenMask)
+
+/**
+* @brief Creates a criterion that optimizes a multi-class multi-classification hinge loss (margin-based loss) 
+*        between input x (a 2D mini-batch Tensor) and output y (which is a 2D Tensor of target class indices) \n
+ 
+* @par Inputs:
+* Two inputs, including:
+* @li x: A tensor. Must be one of the following types:
+*     float16, float32.
+* @li target: A tensor. Must be the following types:
+*     int32. \n
+
+* @par Attributes:
+* reduction: An optional string. Defaults to "mean" \n
+
+* @par Outputs:
+* @li y: A Tensor has same element type as input x. \n
+* @li is_target: A Tensor has same element type as input target. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator MultiLabelMarginLoss. \n
+*/
+REG_OP(MultilabelMarginLoss)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(target, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(is_target, TensorType({DT_INT32}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(MultilabelMarginLoss)
+
+/**
+* @brief Performs batch normalization . \n
+* @par Inputs:
+* Two inputs
+* @li input_x: A Tensor. Support float32. shape (n, c, d).
+* @li seq_len: A Tensor. Each batch normalize data num. Support Int32. Shape (n, ). \n
+* @par Attributes:
+* @li normalize_type: Str. Support "per_feature" or "all_features".
+* @li epsilon: An optional float32, specifying the small value added to
+* variance to avoid dividing by zero. Defaults to "0.00001" . \n
+* @par Outputs:
+* One outputs
+* @li output_y: A Tensor for the normalized "x".Support float32. shape (n, c, d).\n
+*/
+REG_OP(NormalizeBatch)
+    .INPUT(input_x, TensorType({ DT_FLOAT }))
+    .INPUT(seq_len, TensorType({ DT_INT32 }))
+    .OUTPUT(output_y, TensorType({ DT_FLOAT }))
+    .REQUIRED_ATTR(normalize_type, String)
+    .ATTR(epsilon, Float, 0.00001)
+    .OP_END_FACTORY_REG(NormalizeBatch)
+
+/**
+*@brief GroupNorm and Reul operator
+*  calculating: x, gamma, beta
+*  y = relu(gamma*((x - mean) / np.sqrt(variance + 0.001)) + beta)
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32.
+* @li beta: A Tensor. Must be one of the following types: float16, float32 . \n
+
+* @par Attributes:
+* @li num_groups: A require attribute, the type is int32.
+* @li eps: A optional attribute, the type is float32. Defaults to 0.00001. \n
+
+* @par Outputs:
+* One outputs, including:
+* @li y: A Tensor. Must be one of the following types: float16, float32.
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use/
+*/
+REG_OP(GroupNormRelu)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(num_groups, Int)
+    .ATTR(eps, Float, 0.00001)
+    .OP_END_FACTORY_REG(GroupNormRelu)
+
+/**
+* @brief Function dropout with softmaxgrad and muls
+
+* @par Inputs:
+* Two inputs, including:
+* @li y_grad: A mutable Tensor. The type only support float16.
+* @li mask: A mutable Tensor. Must met all of the following rules:
+*     shape of mask should be 1D.
+*     dtype of mask should be uint8.
+*     value of shape should met the following algorithm:
+*     value = (size(x) + 128 - 1) // 128 * 128
+* @li softmax_output: A mutable Tensor. Must met all of the following rules:
+*     shape of softmax_output should be NZ.
+*     dtype of softmax_output should be float16.
+*     it is the output of softmax
+
+* @par Attributes:
+* @li input_keep_prob:A attribute used to judge which units should be keep.
+*     Has the same type as "x" . \n
+* @li alpha: A attribute used to scale tensor.
+*     Has the same type as "x" . \n
+* @li axes: A list of int. The dimension softmax would be performed on. Defaults
+*     to "[-1]" . \n
+
+* @par Outputs:
+* x_grad: A mutable Tensor. Has the same type as "x". \n
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DropoutWithMulsAndSoftmaxGrad)
+    .INPUT(y_grad, TensorType({ DT_FLOAT16 }))
+    .INPUT(mask, TensorType({ DT_UINT8 }))
+    .INPUT(softmax_output, TensorType({ DT_FLOAT16 }))
+    .OUTPUT(x_grad, TensorType({ DT_FLOAT16 }))
+    .REQUIRED_ATTR(input_keep_prob, Float)
+    .REQUIRED_ATTR(alpha, Float)
+    .ATTR(axes, ListInt, { -1 })
+    .OP_END_FACTORY_REG(DropoutWithMulsAndSoftmaxGrad)
+
+/**
+* @brief Loss function that measures the softmax cross entropy. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li scores: A Tensor. Must be one of the following types: half, float32, double.
+* A "batch_size * num_classes" matrix.
+* @li labels: A Tensor. Must be one of the following types: "int32", "int64".
+* @li weights: A manual rescaling weight given to each class. 
+* If given, it has to be a 1D Tensor assigning weight to each of the classes.
+* Otherwise, it is treated as if having all ones. \n
+
+* @par Attributes:
+* ignore_index:Specifies a target value that is ignored and does not contribute to the input gradient.
+* It's an optional value.
+* reduction: A character string from "none", "mean", and "sum", specifying the gradient output mode. Defaults to "mean" . \n
+
+* @par Outputs:
+* @li loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "scores".
+* @li log_prop: A Tensor. Has the same type as "scores" . \n
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator SoftmaxCrossEntropyLoss.
+*/
+REG_OP(SoftmaxCrossEntropyLoss)
+    .INPUT(scores, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16}))
+    .INPUT(labels, TensorType({DT_INT32, DT_INT64}))
+    .OPTIONAL_INPUT(weights, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16}))
+    .ATTR(ignore_index, Int, 0)
+    .ATTR(reduction, String, "mean")
+    .OUTPUT(loss, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16}))
+    .OUTPUT(log_prop, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16}))
+    .OP_END_FACTORY_REG(SoftmaxCrossEntropyLoss)
+
+/**
+* @brief Function axpy with softmax and dropoutdomask . \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li x1: A mutable Tensor. The type only support float16.
+* @li x2: A mutable Tensor. The type only support float16.
+* @li mask: A mutable Tensor. Must meet all of the following rules:
+*     shape of mask should be 1D.
+*     dtype of mask should be uint8.
+*     value of shape should meet the following algorithm:
+*     value = (size(x) + 128 - 1) // 128 * 128 . \n
+
+* @par Attributes:
+* @li alpha: A attribute used to scale tensor. The type is float . \n
+* @li input_keep_prob: A attribute used to judge which units should be keep.
+*     The type is float . \n
+* @li axis: A list of int. The dimension softmax would be performed on. Defaults
+*     to "[-1]" . \n
+
+* @par Outputs:
+* y1: A mutable Tensor. Has the same type as "x1". \n
+* y2: A mutable Tensor. Has the same type as "x1". \n
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(AxpyWithSoftmaxAndDropOutDoMask)
+    .INPUT(x1, TensorType({DT_FLOAT16}))
+    .INPUT(x2, TensorType({DT_FLOAT16}))
+    .INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(y1, TensorType({DT_FLOAT16}))
+    .OUTPUT(y2, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(alpha, Float)
+    .REQUIRED_ATTR(input_keep_prob, Float)
+    .ATTR(axis, ListInt, {-1})
+    .OP_END_FACTORY_REG(AxpyWithSoftmaxAndDropOutDoMask)
+}  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h
index 9edc469a..8c6987ca 100644
--- a/third_party/fwkacllib/inc/ops/nn_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,7 +20,318 @@
  */
 #ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
 #define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
-
+#include "graph/operator_reg.h"
 #include "nn_pooling_ops.h"
 
+namespace ge {
+/**
+* @brief Says whether the targets are in the top "k" predictions . \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li predictions: A 2D Tensor of type float32. A "batch_size * classes" tensor.
+* @li targets: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids.
+* @li k: A 1D Tensor of the same type as "targets".
+* Specifies the number of top elements to look at for computing precision . \n
+
+* @par Outputs:
+* precision: A Tensor of type bool . \n
+
+* @attention Constraints:
+* @li targets must be non-negative tensor.
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator InTopKV2.
+*/
+REG_OP(InTopKV2)
+    .INPUT(predictions, TensorType({DT_FLOAT}))
+    .INPUT(targets, TensorType(IndexNumberType))
+    .INPUT(k, TensorType({IndexNumberType}))
+    .OUTPUT(precision, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(InTopKV2)
+
+/**
+*@brief Performs batch normalization . \n
+
+*@par Inputs:
+* Five inputs, including: (NHWC, NCHW supported)
+*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D.
+*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Specifies the scaling factor.
+*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Specifies the offset.
+*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Specifies the mean used for inference. Must be "None" if the
+operation is used for training.
+*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Specifies the variance used for inference. Must be "None"
+if the operation is used for training . \n
+
+*@par Attributes:
+*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
+*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
+*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
+
+*@par Outputs:
+* Five outputs, including: (NHWC, NCHWsupported)
+*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D.
+*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Specifies the mean of "x".
+*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+pecifies the variance of "x".
+*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
+*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
+
+*@attention Constraints:
+*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
+then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n
+*/
+REG_OP(FusedBatchNormV2)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(offset, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .ATTR(data_format, String, "NHWC")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(FusedBatchNormV2)
+
+/**
+ * @brief Large amount of data sort.First operator of TopK.
+ * @par Inputs:
+ * two input, including:
+ * @li input_data: A Tensor. Data to be sorted. Support float16 or float32.
+ * @li input_index: A Tensor. Range(0, 2048). Support float16 or int32.
+ * @par Attributes:
+ * @li k_num: Int.Number to be sorted.
+ * @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true.
+ * If "True", the "k" largest elements are returned in descending order.
+ * If "False", the "k" smallest elements are returned in ascending order.
+ * @par Outputs:
+ * One output, including:
+ * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
+ * @par Restrictions:
+ * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+ */
+REG_OP(SegmentSort)
+    .INPUT(input_data, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_index, TensorType({DT_FLOAT16,DT_INT32}))
+    .OUTPUT(output_proposal, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .REQUIRED_ATTR(k_num, Int)
+    .ATTR(largest, Bool, true)
+    .OP_END_FACTORY_REG(SegmentSort)
+
+/**
+ * @brief: Large amount of data sort.Second operator of TopK.
+ * @par Inputs:
+ * One input, including:
+ * input_proposal: A Tensor. Proposal sorted for each channel. Support float16 or float32
+ * @par Attributes:
+ * @li k_num: Int.Number to be sorted.
+ * @li include_index: Bool.include_index is false,output proposal. include_index is true, output data and index.
+ * @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true.
+ * If "True", the "k" largest elements are returned in descending order.
+ * If "False", the "k" smallest elements are returned in ascending order.
+ * @par Outputs:
+ * Two output, including:
+ * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
+ * output_index: A Tensor.If include_index is true, output index.
+ * @par Restrictions:
+ * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+ */
+REG_OP(MultiMerge)
+    .INPUT(input_proposal, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output_proposal, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output_index, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(k_num, Int)
+    .ATTR(include_index, Bool, false)
+    .ATTR(largest, Bool, true)
+    .OP_END_FACTORY_REG(MultiMerge)
+
+/**
+ * @brief Large amount of data sort.Third operator of TopK.
+ * @par Inputs:
+ * One input, including:
+ * input_proposal: A Tensor. Proposal sorted for each channel. Support float16
+ * @par Attributes:
+ * @li k_num: Int.Number to be sorted.
+ * @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true.
+ * If "True", the "k" largest elements are returned in descending order.
+ * If "False", the "k" smallest elements are returned in ascending order.
+ * @par Outputs:
+ * Two output, including:
+ * @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted.
+ * @li output_index: A Tensor. int32. Data index.
+ * @par Restrictions:
+ * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+ */
+REG_OP(SingleMerge)
+    .INPUT(input_proposal, TensorType({ DT_FLOAT16 }))
+    .OUTPUT(output_data, TensorType({ DT_FLOAT16 }))
+    .OUTPUT(output_index, TensorType({ DT_INT32 }))
+    .REQUIRED_ATTR(k_num, Int)
+    .ATTR(largest, Bool, true)
+    .OP_END_FACTORY_REG(SingleMerge)
+
+/**
+ * @brief MultiHeadAttention.
+ * @par Inputs:
+ * thirteen input, including:
+ * @li query: A Tensor. Query of Attention. Support float16
+ * @li key: A Tensor. Key of Attention. Support float16
+ * @li value: A Tensor. Value of Attention. Support float16
+ * @li query_weight: A Tensor. QueryWeight of Attention. Support float16
+ * @li key_weight: A Tensor. KeyWeight of Attention. Support float16
+ * @li value_weight: A Tensor. ValueWeight of Attention. Support float16
+ * @li attn_mask: A Tensor. AttentionMask of Attention. Support float16
+ * @li out_proj_weight: A Tensor. OutProjWeight of Attention. Support float16
+ * @li query_bias: Optional Tensor. QueryBias of Attention. Support float16
+ * @li key_bias: Optional Tensor. KeyBias of Attention. Support float16
+ * @li value_bias: Optional Tensor. ValueBias of Attention. Support float16
+ * @li out_proj_bias: Optional Tensor. OutProjBias of Attention. Support float16
+ * @li dropout_mask_input: Optional Tensor. DropOutMask of Attention. Support uint8 \n
+
+ * @par Attributes:
+ * @li attn_head_num: Attention Head numbers, Support int
+ * @li attn_dim_per_head: Attention dim of a Head, Support int
+ * @li src_len: source length, Support int
+ * @li tgt_len: target length, Support int
+ * @li keep_prob: dropout keep probability, Support float
+ * @li softmax_use_float: SoftMax Use Float32 to keep precision, Support bool \n
+
+ * @par Outputs:
+ * Eight output, including:
+ * @li y: A Tensor. Result of Attention. Support float16
+ * @li dropout_mask: DropOutMask of Attention. Support uint8
+ * @li query_res: Query Result of Attention. Support float16
+ * @li key_res: Key Result of Attention. Support float16
+ * @li value_res: Value Result of Attention. Support float16
+ * @li attn_scores: Attention Scores of SoftMax. Support float16, float
+ * @li attn_res: Attention Result of SoftMax. Support float16
+ * @li context: Context of Attention. Support float16
+
+ * @par Restrictions:
+ * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+ */
+REG_OP(MultiHeadAttention)
+    .INPUT(query, TensorType({DT_FLOAT16}))
+    .INPUT(key, TensorType({DT_FLOAT16}))
+    .INPUT(value, TensorType({DT_FLOAT16}))
+    .INPUT(query_weight, TensorType({DT_FLOAT16}))
+    .INPUT(key_weight, TensorType({DT_FLOAT16}))
+    .INPUT(value_weight, TensorType({DT_FLOAT16}))
+    .INPUT(attn_mask, TensorType({DT_FLOAT16}))
+    .INPUT(out_proj_weight, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(query_bias, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(key_bias, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(value_bias, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(out_proj_bias, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(dropout_mask_input, TensorType({DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .OUTPUT(dropout_mask, TensorType({DT_UINT8}))
+    .OUTPUT(query_res, TensorType({DT_FLOAT16}))
+    .OUTPUT(key_res, TensorType({DT_FLOAT16}))
+    .OUTPUT(value_res, TensorType({DT_FLOAT16}))
+    .OUTPUT(attn_scores, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(attn_res, TensorType({DT_FLOAT16}))
+    .OUTPUT(context, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(attn_head_num, Int)
+    .REQUIRED_ATTR(attn_dim_per_head, Int)
+    .REQUIRED_ATTR(src_len, Int)
+    .REQUIRED_ATTR(tgt_len, Int)
+    .REQUIRED_ATTR(keep_prob, Float)
+    .REQUIRED_ATTR(softmax_use_float, Bool)
+    .OP_END_FACTORY_REG(MultiHeadAttention)
+
+/**
+ * @brief MultiHeadAttentionGrad.
+ * @par Inputs:
+ * thirteen input, including:
+ * @li query: A Tensor. Query of Attention. Support float16
+ * @li key: A Tensor. Key of Attention. Support float16
+ * @li value: A Tensor. Value of Attention. Support float16
+ * @li query_weight: A Tensor. QueryWeight of Attention. Support float16
+ * @li key_weight: A Tensor. KeyWeight of Attention. Support float16
+ * @li value_weight: A Tensor. ValueWeight of Attention. Support float16
+ * @li out_proj_weight: A Tensor. OutProjWeight of Attention. Support float16
+ * @li query_res: A Tensor. Query Result of Attention. Support float16
+ * @li key_res: A Tensor. Key Result of Attention. Support float16
+ * @li value_res: A Tensor. Value Result of Attention. Support float16
+ * @li attn_scores: A Tensor. Attention Scores of Attention. Support float16, float
+ * @li attn_res: A Tensor. Attention Result of Attention. Support float16
+ * @li context: A Tensor. Context of Attention. Support float16
+ * @li y_grad: A Tensor. Grad of Attention. Support float16
+ * @li dropout_mask: : A Tensor. Query Result of Attention. Support uint8 \n
+
+ * @par Attributes:
+ * @li attn_head_num: Attention Head numbers, Support int
+ * @li attn_dim_per_head: Attention dim of a Head, Support int
+ * @li src_len: source length, Support int
+ * @li tgt_len: target length, Support int
+ * @li keep_prob: dropout keep probability, Support float
+ * @li softmax_use_float: SoftMax Use Float32 to keep precision, Support bool
+ * @li bias_grad_mask: mask for attention has bias grad, Support list bool  \n
+
+ * @par Outputs:
+ * Eight output, including:
+ * @li query_weight_grad: QueryWeight Grad of Attention. Support float16
+ * @li key_weight_grad: KeyWeight Grad of Attention. Support float16
+ * @li value_weight_grad: ValueWeight Grad of Attention. Support float16
+ * @li out_proj_weight_grad: OutProjWeight Grad of Attention. Support float16
+ * @li query_grad: Query Grad of Attention. Support float16
+ * @li key_grad: Key Grad of Attention. Support float16
+ * @li value_grad: Value Grad of Attention. Support float16
+ * @li query_bias_grad: QueryBias Grad of Attention. Support float16
+ * @li key_bias_grad: KeyBias Grad of Attention. Support float16
+ * @li value_bias_grad: ValueBias Grad of Attention. Support float16
+ * @li out_proj_bias_grad: OutProjBias Grad of Attention. Support float16
+
+ * @par Restrictions:
+ * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+ */
+REG_OP(MultiHeadAttentionGrad)
+    .INPUT(query, TensorType({DT_FLOAT16}))
+    .INPUT(key, TensorType({DT_FLOAT16}))
+    .INPUT(value, TensorType({DT_FLOAT16}))
+    .INPUT(query_weight, TensorType({DT_FLOAT16}))
+    .INPUT(key_weight, TensorType({DT_FLOAT16}))
+    .INPUT(value_weight, TensorType({DT_FLOAT16}))
+    .INPUT(out_proj_weight, TensorType({DT_FLOAT16}))
+    .INPUT(query_res, TensorType({DT_FLOAT16}))
+    .INPUT(key_res, TensorType({DT_FLOAT16}))
+    .INPUT(value_res, TensorType({DT_FLOAT16}))
+    .INPUT(attn_scores, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(attn_res, TensorType({DT_FLOAT16}))
+    .INPUT(context, TensorType({DT_FLOAT16}))
+    .INPUT(y_grad, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(dropout_mask, TensorType({DT_UINT8}))
+    .OUTPUT(query_weight_grad, TensorType({DT_FLOAT16}))
+    .OUTPUT(key_weight_grad, TensorType({DT_UINT8}))
+    .OUTPUT(value_weight_grad, TensorType({DT_FLOAT16}))
+    .OUTPUT(out_proj_weight_grad, TensorType({DT_FLOAT16}))
+    .OUTPUT(query_grad, TensorType({DT_FLOAT16}))
+    .OUTPUT(key_grad, TensorType({DT_FLOAT16}))
+    .OUTPUT(value_grad, TensorType({DT_FLOAT16}))
+    .OUTPUT(query_bias_grad, TensorType({DT_FLOAT16}))
+    .OUTPUT(key_bias_grad, TensorType({DT_FLOAT16}))
+    .OUTPUT(value_bias_grad, TensorType({DT_FLOAT16}))
+    .OUTPUT(out_proj_bias_grad, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(attn_head_num, Int)
+    .REQUIRED_ATTR(attn_dim_per_head, Int)
+    .REQUIRED_ATTR(src_len, Int)
+    .REQUIRED_ATTR(tgt_len, Int)
+    .REQUIRED_ATTR(keep_prob, Float)
+    .REQUIRED_ATTR(softmax_use_float, Bool)
+    .REQUIRED_ATTR(bias_grad_mask, ListBool)
+    .OP_END_FACTORY_REG(MultiHeadAttentionGrad)
+}// namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
index ab35ba47..f34de163 100644
--- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,7 +29,7 @@ namespace ge {
 /**
 *@brief Performs pooling on the input.
 *@par Inputs:
-*@li x: An NCHW tensor of type float16, float32, int8.
+* x: An NCHW tensor of type float16, float32, int8.
 *@par Attributes:
 *@li mode: An optional int32, specifying the pooling algorithm, either "0" (max pooling) or "1" (avg pooling). Defaults to "0".
 *@li global_pooling: An optional bool. Defaults to "false".
@@ -50,6 +50,7 @@ namespace ge {
 *dilation[2]: An optional int32, specifying the left dilation. Defaults to "1".
 *dilation[3]: An optional int32, specifying the right dilation. Defaults to "1".
 *@li ceil_mode: An optional int32, either "0" (ceil mode) or "1" (floor mode). Defaults to "0".
+*@li data_format: An optional string, Specify the data format of the input and output data. With the default format "NCHW".
 *@par Outputs:
 *y: An NCHW tensor of type float16, float32, int32.
 *@attention Constraints:
@@ -74,26 +75,36 @@ REG_OP(Pooling)
     .OP_END_FACTORY_REG(Pooling)
 
 /**
-*@brief Performs average pooling on the input . \n
-
+*@brief Performs average pooling on the input. \n
 *@par Inputs:
-*x: A tensor of type float16, float32, double . \n
+*x: A tensor of type float16, float32, double. \n
 
 *@par Attributes:
-*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, where N = C = 1, and H and W are positive integers within the range [1, 255].
-*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. The strides of the H and W dimensions are positive integers within the range [1, 63].
-*@li padding: A required string, specifying the padding algorithm, either "VALID" or "SAME". With "SAME" means that the outputs will have the same spatial dimensions as its inputs. With "VALID" means no padding.
-*@li data_format: An optional string, specifying the data format of "ksize" and "strides", either "NCHW", "NC1HWC0", or "NHWC" (default) . \n
+*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W)
+* of the sliding window, where N = C = 1, and H and W are positive integers
+*  within the range [1, 255].
+* @li strides: A required list of 4 ints, specifying the stride of the
+* sliding window. The strides of the N and C dimensions are 1. The strides of
+*  the H and W dimensions are positive integers within the range [1, 63].
+*@li padding: A required string, specifying the padding algorithm,
+ * either "VALID" or "SAME". With "SAME" means that the outputs will have the
+ * same spatial dimensions as its inputs. With "VALID" means no padding.
+*@li data_format: An optional string, specifying the data format of "ksize"
+* and "strides", either "NCHW", or "NHWC" (default). \n
 
 *@par Outputs:
-*y: The average pooled output tensor. Has the same type and format as input "x" . \n
+* y: The average pooled output tensor. Has the same type and format
+* as input "x". \n
 
-*@attention Constraints:
-*@li This operator applies only to a TensorFlow network.
-*@li Only single input and single output are supported.
+* @attention Constraints:
+* @li This operator applies only to a TensorFlow network.
+* @li Only single input and single output are supported.
 *@li Global pooling is supported.
-*@li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256
-*@li Due to instruction restrictions, the values of "strides_h" and "strides_w" are positive integers within the range [1, 63].
+* @li "ksize_H" and "ksize_W" are positive integers within the range [1, 255].
+* ksize_H * ksize_W < 256
+*@li Due to instruction restrictions,
+ * the values of "strides_h" and "strides_w" are positive integers within
+ * the range [1, 63].
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPool.
 */
@@ -108,28 +119,35 @@ REG_OP(AvgPool)
 
 /**
 *@brief Performs average pooling on the input.
-
 *@par Inputs:
 *x: A tensor of type float16, float32, double.
 
 *@par Attributes:
-*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, where N = C = 1, and H and W are positive integers within the range [1, 255].
-*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. The strides of the H and W dimensions are positive integers within the range [1, 63].
-*@li padding_mode: A required string, specifying the padding algorithm, either "VALID", "SAME" and "CALCULATED". With "SAME" means that the outputs will have the same spatial dimensions as its inputs. With "VALID" means no padding.
-*@li pads: Pad value when padding_mode is "CALCULATED".
-*@li data_format: An optional string, specifying the data format of "ksize" and "strides", either "NCHW", "NC1HWC0", or "NHWC" (default).
-*@li global_pooling: Global or not. If true, pads will change to {0,0,0,0} and ksize will change to [input_h, input_w]
-*@li ceil_mode: Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
-*@li exclusive: Ignore padding area or not when calculating average.
+*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window,
+ * where N = C = 1, and H and W are positive integers within the range [1, 255].
+*@li strides: A required list of 4 ints, specifying the stride of the sliding window.
+ * The strides of the N and C dimensions are 1.
+ * The strides of the H and W dimensions are positive integers within the range [1, 63].
+*@li padding_mode: A required string, specifying the padding algorithm,
+ * either "VALID", "SAME" and "CALCULATED".
+ * With "SAME" means that the outputs will have the same spatial dimensions as its inputs.
+ * With "VALID" means no padding.
+* @li pads: Pad value when padding_mode is "CALCULATED".
+* @li data_format: An optional string, specifying the data format of "ksize" and "strides",
+ * either "NCHW", or "NHWC" (default).
+* @li global_pooling: Global or not. If true, pads will change to {0,0,0,0} and ksize will change to [input_h, input_w]
+* @li ceil_mode: Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
+* @li exclusive: Ignore padding area or not when calculating average.
 
-*@par Outputs:
-*y: The average pooled output tensor. Has the same type and format as input "x".
+* @par Outputs:
+* y: The average pooled output tensor. Has the same type and format as input "x".
 
 *@attention Constraints:
 *@li Only single input and single output are supported.
-*@li Global pooling is supported.
+* @li Global pooling is supported.
 *@li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256
-*@li Due to instruction restrictions, the values of "strides_h" and "strides_w" are positive integers within the range [1, 63].
+*@li Due to instruction restrictions,
+ * the values of "strides_h" and "strides_w" are positive integers within the range [1, 63].
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPoolV2.
 */
@@ -147,17 +165,60 @@ REG_OP(AvgPoolV2)
     .OP_END_FACTORY_REG(AvgPoolV2)
 
 /**
-*@brief Performs average pooling on the input.
+* @brief Performs average pooling on the input. \n
+* @par Inputs:
+* x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type
+* float16, float32, double. \n
 
-*@par Inputs:
-*x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double.
+* @par Attributes:
+* @li ksize: List of ints that has length 1, 3 or 5. The size of the window
+* for each dimension of the input tensor.
+*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding
+* window for each dimension of the input tensor.
+* @li pads: List of ints, implicit zero paddings on both sides of the input.
+* @li ceil_mode: When true, will use ceil instead of floor in the formula to
+* compute the output shape.
+* @li count_include_pad: When true, will include the zero-padding in the
+* averaging calculation.
+* @li divisor_override: if specified, it will be used as divisor, otherwise
+* size of the pooling region will be used.
+* @li data_format: A string, format of input data. \n
 
-*@par Attributes:
+*@par Outputs:
+*y: The average pooled output tensor. \n
+
+*@attention Constraints:
+* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63].
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator AvgPool3D.
+*/
+REG_OP(AvgPool3D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(count_include_pad, Bool, true)
+    .ATTR(divisor_override, Int, 0)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(AvgPool3D)
+
+
+/**
+* @brief Performs average pooling on the input.
+* @par Inputs:
+* @li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double.
+* @li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout.
+*@li multiplier: An optional tensor of float16, float32, double.
+
+* @par Attributes:
 *@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
-*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor.
+* @li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor.
 *@li pads: List of ints, implicit zero paddings on both sides of the input.
 *@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
-*@li count_include_pad: When true, will include the zero-padding in the averaging calculation.
+* @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
 *@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
 *@li data_format: A string, format of input data . \n
 
@@ -165,13 +226,15 @@ REG_OP(AvgPoolV2)
 *y: The average pooled output tensor . \n
 
 *@attention Constraints:
-*@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
+*"ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPool3D.
 */
-REG_OP(AvgPool3D)
+REG_OP(AvgPool3DD)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
     .REQUIRED_ATTR(ksize, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
@@ -180,28 +243,121 @@ REG_OP(AvgPool3D)
     .ATTR(count_include_pad, Bool, true)
     .ATTR(divisor_override, Int, 0)
     .ATTR(data_format, String, "NDHWC")
-    .OP_END_FACTORY_REG(AvgPool3D)
+    .OP_END_FACTORY_REG(AvgPool3DD)
+
+/**
+* @brief Computes AvgPool3DGrad function. \n
+* @par Inputs:
+* @li orig_input_shape: An NDHWC tensor of type int32.
+* @li grads: An NDHWC tensor of type float16, float32, or double. \n
+
+* @par Attributes:
+* @li ksize: List of ints that has length 5. The size of the window for
+* each dimension of the input tensor.
+* @li strides:List of ints that has length 5. The stride of the sliding
+* window for each dimension of the input tensor.
+* @li pads: List of ints, implicit zero paddings on both sides of the input.
+* @li ceil_mode: When true, will use ceil instead of floor in the formula to
+* compute the output shape.
+* @li count_include_pad: When true, will include the zero-padding in the
+* averaging calculation.
+* @li divisor_override: if specified, it will be used as divisor, otherwise
+* size of the pooling region will be used.
+* @li data_format: A string, format of input data. \n
+
+* @par Outputs:
+* @li output: A mutable tensor with the same shape and type as "grads".
+
+* @attention Constraints:
+* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]. \n
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator AvgPoolGrad.
+*/
+
+REG_OP(AvgPool3DGrad)
+    .INPUT(orig_input_shape, TensorType({DT_INT32}))
+    .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(count_include_pad, Bool, true)
+    .ATTR(divisor_override, Int, 0)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(AvgPool3DGrad)
+
+/**
+* @brief Performs average pooling on the input.
+* @par Inputs:
+* @li grads: An NDHWC tensor of type float16.
+* @li filter: An optional tensor of type float16, fractal_z_3d layout.
+* @li multiplier: An optional tensor of float16.
+
+* @par Attributes:
+* @li orig_input_shape: List of ints that has length 5.
+* The size of the window for each dimension of the input tensor.
+* @li ksize: List of ints that has length 5.
+* The size of the window for each dimension of the input tensor.
+* @li strides:List of ints that has length 5.
+* The stride of the sliding window for each dimension of the input tensor.
+* @li pads: List of ints, implicit zero paddings on both sides of the input.
+* @li ceil_mode: When true, will use ceil instead of floor
+* in the formula to compute the output shape.
+* @li count_include_pad: When true, will include the zero-padding
+* in the averaging calculation.
+* @li divisor_override: if specified, it will be used as divisor,
+* otherwise size of the pooling region will be used.
+* @li data_format: A string, format of input data. \n
+
+* @par Outputs:
+* output: The average pooled output tensor . \n
+
+* @attention Constraints:
+* "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator AvgPool3DGradD.
+*/
+REG_OP(AvgPool3DGradD)
+    .INPUT(grads, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16}))
+    .OUTPUT(output, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(orig_input_shape, ListInt)
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(count_include_pad, Bool, true)
+    .ATTR(divisor_override, Int, 0)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(AvgPool3DGradD)
 
 /**
 *@brief Performs max_pool_ext2 on the input . \n
 
 *@par Inputs:
 * One input:
-*x: An NC1HWC0 Tensor of type float16.
+*x: A Tensor of type float16.
 
 
 *@par Attributes:
-*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value.
-*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
+*@li ksize: A required list of int8, int16, int32, or int64 values,
+ * specifying the size of the window for each dimension of the input tensor. No default value.
+*@li strides: A required list of int8, int16, int32, or int64 values,
+ * specifying the stride of the sliding window for each dimension of the input tensor. No default value.
 *@li padding: A required string. No default value.
-*@li data_format: An optional string. Defaults to "NC1HWC0" . \n
+*@li data_format: An optional string . \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type and format as input "x" . \n
 
 *@attention Constraints:
 *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
-*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
+*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1,
+ * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
 *@li "padding" is either "SAME" or "VALID" . \n
 
 *@par Third-party framework compatibility
@@ -223,9 +379,9 @@ REG_OP(MaxPoolExt2)
 /**
 *@brief Performs max pooling on the input . \n
 
-*@par Inputs:
+* @par Inputs:
 * One input:
-*x: An NC1HWC0 Tensor. Supported type:float16, float32, double, int8, int16,
+* x: A Tensor. Supported type:float16, float32, double, int8, int16,
 * int32, int64, uint8, uint16, qint8
 
 *@par Attributes:
@@ -265,10 +421,10 @@ REG_OP(MaxPool)
     .OP_END_FACTORY_REG(MaxPool)
 
 /**
-*@brief Performs max 3d pooling on the input . \n
+* @brief Performs max 3d pooling on the input . \n
 
 *@par Inputs:
-*x: An NC1HWC0 Tensor. Supported type float16, float32, double . \n
+* x: A Tensor. Supported type float16, float32, double . \n
 
 *@par Attributes:
 *@li ksize: A required list of int8, int16, int32, or int64 values,
@@ -278,8 +434,8 @@ No default value.
 specifying the stride of the sliding window for each dimension of
 the input tensor. No default value.
 *@li padding: A required string type of float16.
-*@li pads: A list type of int32. Default value {0, 0, 0}.
-*@li dilation: A list type of int32. Default value {1, 1, 1}.
+*@li pads: A list type of int32. Default value {0,0,0,0,0,0}.
+*@li dilation: A list type of int32. Default value {1,1,1,1,1,1}.
 *@li ceil_mode: A ceil mode number of int32 . Default value 0.
 *@li data_format: An optional string. Defaults to "NDHWC" . \n
 
@@ -302,12 +458,77 @@ REG_OP(MaxPool3D)
     .REQUIRED_ATTR(ksize, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(padding, String)
-    .ATTR(pads, ListInt, {0,0,0})
-    .ATTR(dilation, ListInt, {1,1,1})
+    .ATTR(pads, ListInt, {0,0,0,0,0,0})
+    .ATTR(dilation, ListInt, {1,1,1,1,1,1})
     .ATTR(ceil_mode, Int, 0)
     .ATTR(data_format, String, "NDHWC")
     .OP_END_FACTORY_REG(MaxPool3D)
 
+/**
+* @brief Performs max pooling3d on both max values and indices.
+* 
+* @par Inputs:
+*  One input:
+*  x: An 6D tensor. Supported type: float16. Format as NDC1HWC0.
+* @par Attributes:
+*  @li ksize: A required list of int32 values,
+*   specifying the size of the window for each dimension of the input tensor.
+*   No default value.
+*  @li strides: A required list of int32 values,
+*   specifying the stride of the sliding window for each dimension of
+*   the input tensor. No default value.
+*  @li pads: A required 3*2-dimension-list of int32 values.
+*   specifying the pad of three dimension of input, implement with 0.
+*  @li dilation: dilation of kernel. default value is {1,1,1,1,1}.
+*  @li ceil_mode: default value is false.
+*  @li data_format: the format of torch input, default value is "NCDHW".
+*  @li argmax_type: the function of this field is to determine the type of
+*   output argmax, "bitmask" is the default value, the argmax will return
+*   a img2col bitmask. "index_int32" and "index_int64" represent the torch 
+*   output indices.
+* @par Outputs:
+*  y: An 6D tensor. the maxpool3d output(max value), format as NDoC1HoWoC0.
+* @par Outputs:
+*  argmax: A 5D uint16 tensor. the indice output.
+*/
+REG_OP(MaxPool3DWithArgmax)
+    .INPUT(x, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .OUTPUT(argmax, TensorType::IndexNumberType())
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilation, ListInt, {1, 1, 1, 1, 1})
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(data_format, String, "NCDHW")
+    .ATTR(argmax_type, String, "bitmask")
+    .OP_END_FACTORY_REG(MaxPool3DWithArgmax)
+
+/**
+*@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n
+* The output is of size H x W, for any input size. 
+
+* @par Inputs:
+* One input, including:
+* @li x: A Tensor. Must be one of the following data types:
+*     float16, float32, float64. \n
+
+* @par Attributes:
+* @li output_size: A required list of 2 ints
+*    specifying the size (H,W) of the output tensor. \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same data type as "x" \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator AdaptiveMaxPool2d.
+*/
+REG_OP(AdaptiveMaxPool2d)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(argmax, TensorType::IndexNumberType())
+    .REQUIRED_ATTR(output_size, ListInt)
+    .OP_END_FACTORY_REG(AdaptiveMaxPool2d)
 
 /**
 * @brief Computes second-order gradients of the maxpooling3d function . \n
@@ -357,9 +578,9 @@ REG_OP(MaxPool3DGradGrad)
 * @brief Computes gradients of the maxpooling function . \n
 
 * @par Inputs:
-* @li x1: A mutable NC1HWC0 tensor of type RealNumberType.
-* @li x2: A mutable NC1HWC0 tensor of type RealNumberTypex.
-* @li grad: A mutable NC1HWC0 tensor of type RealNumberType . \n
+* @li x1: A mutable tensor of type RealNumberType.
+* @li x2: A mutable tensor of type RealNumberTypex.
+* @li grad: A mutable tensor of type RealNumberType . \n
 
 * @par Attributes:
 * @li ksize: A required tuple or list, specifying the size of the window for
@@ -375,8 +596,7 @@ REG_OP(MaxPool3DGradGrad)
 * y: A mutable tensor. Has the same shape and type as "x1" . \n
 
 * @attention Constraints:
-* @li Computing gradients of global pooling is not supported, which means
-* "ksize < x1".
+* @li ksize is limited by buffer with full tiling.
 * @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
 
 * @par Third-party framework compatibility
@@ -421,7 +641,7 @@ REG_OP(MaxPoolGrad)
 * @li Other dimensions of ksize and strides is 1 . \n
 
 * @par Outputs:
-* @li y: Has the same type and format as input "x1" . \n
+* y: Has the same type and format as input "x1" . \n
 
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator MaxPoolGradGrad.
@@ -441,22 +661,25 @@ REG_OP(MaxPoolGradGrad)
 *@brief Performs max_pool_ext2 on the input . \n
 
 *@par Inputs:
-* Two inputs:
-*@li x: An NC1HWC0 Tensor of type float16.
-*@li strides: A required type of int32 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
-*@li ksize: A required type of int32 values, specifying the size of the window for each dimension of the input tensor. No default value.
+* Three inputs:
+*@li x: A Tensor of type float16.
+*@li strides: A required type of int32 values,
+ * specifying the stride of the sliding window for each dimension of the input tensor. No default value.
+*@li ksize: A required type of int32 values,
+ * specifying the size of the window for each dimension of the input tensor. No default value.
 
 
 *@par Attributes:
 *@li padding: A required string. No default value.
-*@li data_format: An optional string. Defaults to "NC1HWC0" . \n
+*@li data_format: An optional string. \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type and format as input "x" . \n
 
 *@attention Constraints:
 *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
-*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
+*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1,
+ * strides[2] <= 63, strides[2] >= 1.
 *@li "padding" is either "SAME" or "VALID" . \n
 
 *@par Third-party framework compatibility
@@ -472,13 +695,14 @@ REG_OP(MaxPoolV2)
     .OP_END_FACTORY_REG(MaxPoolV2)
 
 /**
-*@brief Performs max pooling on the input and outputs both max values and
+* @brief Performs max pooling on the input and outputs both max values and
  * indices . \n
 
 *@par Inputs:
 * One input:
-*x: An NC1HWC0 Tensor. Supported type: float, double, int32,
- * uint8, int16, int8, int64, uint16, half, uint32, uint64 . \n
+* x: An 4D Tensor. Supported type: float, double, int32,
+ * uint8, int16, int8, int64, uint16, half, uint32, uint64.
+ * Must set the format, supported format list ["NCHW, NHWC"]. \n
 
 *@par Attributes:
 *@li ksize: A required list of int8, int16, int32, or int64 values,
@@ -487,17 +711,18 @@ REG_OP(MaxPoolV2)
 *@li strides: A required list of int8, int16, int32, or int64 values,
  * specifying the stride of the sliding window for each dimension of
  * the input tensor. No default value.
-*@li padding: A required string. No default value . \n
+*@li padding: A required string. No default value .
+*@li Targmax:An optional int with default value 7 . \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type and format as input "x".
-*argmax: A Tensor. Has the same type and format as input "x".
-*@attention Constraints:
+*@li y: A Tensor. Has the same type and format as input "x".
+*@li argmax: A Tensor. Has the same type and format as input "x".
+* @attention Constraints:
 *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1,
  * ksize[1] * ksize[2] <= 255.
 *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1,
  * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
-*@li "padding" is either "SAME" or "VALID" . \n
+*@li "padding" is either "SAME" or "VALID" .
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator MaxPoolWithArgmax.
@@ -513,37 +738,39 @@ REG_OP(MaxPoolWithArgmax)
     .OP_END_FACTORY_REG(MaxPoolWithArgmax)
 
 /**
-*@brief Performs the backpropagation of MaxPoolWithArgmax . \n
+* @brief Performs the backpropagation of MaxPoolWithArgmax . \n
 
-*@par Inputs:
+* @par Inputs:
 * Three inputs, including:
-*@li x: An NC1HWC0 tensor. Supported type: float, double, int32,
+* @li x: An 4d tensor. Supported type: float, double, int32,
  * uint8, int16, int8, int64, uint16, half, uint32, uint64.
-*@li grad: An NC1HWC0 tensor. Supported type: float, double, int32,
+ * Must set the format, supported format list ["NCHW, NHWC"]
+* @li grad: An 4d tensor. Supported type: float, double, int32,
  * uint8, int16, int8, int64, uint16, half, uint32, uint64.
-*@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n
+ * Must set the format, supported format list ["NCHW, NHWC"]
+*@li argmx: A tensor of type int32 or int64 . \n
 
-*@par Attributes:
-*@li ksize: A required list of int8, int16, int32, or int64 values,
+* @par Attributes:
+* @li ksize: A required list of int8, int16, int32, or int64 values,
  * specifying the size of the window for each dimension of the input tensor.
  * No default value.
-*@li strides: A required list of int8, int16, int32, or int64 values,
+* @li strides: A required list of int8, int16, int32, or int64 values,
  * specifying the stride of the sliding window for each dimension of
  * the input tensor. No default value.
-*@li padding: A required string. No default value . \n
+* @li padding: A required string. No default value . \n
 
-*@par Outputs:
+* @par Outputs:
 *y: A Tensor. Has the same type and format as input "x" . \n
 
-*@attention Constraints:
-*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1,
+* @attention Constraints:
+* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1,
  * ksize[1] * ksize[2] <= 255.
-*@li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1
-*@li "padding" is either "SAME" or "VALID".
+* @li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1
+* @li "padding" is either "SAME" or "VALID". \n
 
 
-*@see max_pool_with_argmax
-*@par Third-party framework compatibility
+* @see max_pool_with_argmax
+* @par Third-party framework compatibility
 * Compatible with the TensorFlow operator MaxPoolGradWithArgmax.
 */
 REG_OP(MaxPoolGradWithArgmax)
@@ -557,22 +784,23 @@ REG_OP(MaxPoolGradWithArgmax)
     .OP_END_FACTORY_REG(MaxPoolGradWithArgmax)
 
 /**
-*@brief Performs transform mask to argmax . \n
+* @brief Performs transform mask to argmax . \n
 
-*@par Inputs:
-* Two input:
-*x: An NC1HWC0 Tensor of type float16.
-*mask: An NC1HWC0 Tensor of type uint16 . \n
+* @par Inputs:
+* Two inputs:
+* @li x: A Tensor of type float16.
+* @li mask: A Tensor of type uint16 . \n
 
-*@par Attributes:
-*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value.
-*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
-*@li padding: A required string. No default value . \n
+* @par Attributes:
+* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value.
+* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
+* @li padding: A required string. No default value .
+* @li originshape:A required list of int8, int16, int32, or int64 values, No default value. \n
 
-*@par Outputs:
-*argmax: An NC1HWC0 Tensor of type int32 . \n
+* @par Outputs:
+*argmax: A Tensor of type int32 . \n
 
-*@attention Constraints:
+* @attention Constraints:
 *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
 *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
 *@li "padding" is either "SAME" or "VALID" . \n
@@ -604,7 +832,7 @@ REG_OP(Mask2Argmax)
 * @li strides: A required list, specifying the stride of the sliding window.
 * @li padding: A required string, window sliding mode. Either SAME or VALID.
 * @par Outputs:
-* @li y:Result tensor. Supported type: float, double, int32,
+* y:Result tensor. Supported type: float, double, int32,
  * uint8, int16, int8, int64, uint16, half, uint32, uint64
 
 * @attention Constraints:
@@ -617,7 +845,7 @@ REG_OP(Mask2Argmax)
 * (shape_max_pool[2] * shape_max_pool[3] + 31) // 16, 16), else failed . \n
 
 * @par Third-party framework compatibility
-* @li Compatible with the TensorFlow operator MaxPoolGradGradWithArgmax.
+* Compatible with the TensorFlow operator MaxPoolGradGradWithArgmax.
 */
 REG_OP(MaxPoolGradGradWithArgmax)
     .INPUT(x, TensorType::RealNumberType())
@@ -630,11 +858,10 @@ REG_OP(MaxPoolGradGradWithArgmax)
     .OP_END_FACTORY_REG(MaxPoolGradGradWithArgmax)
 
 /**
-* @brief Computes avgpoograd function . \n
-
+* @brief Computes avgpoograd function. \n
 * @par Inputs:
 * @li orig_input_shape: An NHWC tensor of type int32.
-* @li input_grad: An NHWC tensor of type float16, float32, or double . \n
+* @li input_grad: An NHWC tensor of type float16, float32, or double. \n
 
 * @par Attributes:
 * @li ksize: A required tuple or list, specifying the size of the window for
@@ -643,10 +870,10 @@ REG_OP(MaxPoolGradGradWithArgmax)
 * window for each dimension of the input tensor.
 * @li padding: A required string, specifying the type of
 * the padding algorithm to use.
-* @li data_format: An optional string. Defaults to "NHWC" . \n
+* @li data_format: An optional string. Defaults to "NHWC". \n
 
 * @par Outputs:
-* @out_grad: A mutable tensor with the same shape and type as "orig_input" . \n
+* out_grad: A mutable tensor with the same shape and type as "input_grad". \n
 
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator AvgPoolGrad.
@@ -663,7 +890,6 @@ REG_OP(AvgPoolGrad)
 
 /**
 * @brief Computes gradients of average pooling function . \n
-
 * @par Inputs:
 * @input_grad: An NHWC tensor of type float16.
 * @mean_matrix: Assist matrix, an NHWC tensor of type float16.
@@ -698,11 +924,10 @@ REG_OP(AvgPoolGradD)
     .OP_END_FACTORY_REG(AvgPoolGradD)
 
 /**
-* @brief Computes avgpoolv2grad function.
-
+* @brief Computes avgpoolv2grad function. \n
 * @par Inputs:
 * @li orig_input_shape: An NHWC tensor of type int32.
-* @li input_grad: An NHWC tensor of type float16, float32, or double.
+* @li input_grad: An NHWC tensor of type float16, float32, or double. \n
 
 * @par Attributes:
 * @li ksize: A required tuple or list, specifying the size of the window for
@@ -711,15 +936,15 @@ REG_OP(AvgPoolGradD)
 * window for each dimension of the input tensor.
 * @li padding_mode: A required string, specifying the type of
 * the padding algorithm to use.
-* @li global_pooling: Whether to use the global pooling. If global_pooling=true,
-* ksize and pads will be ignored. Default False.
-* @li ceil_mode: Whether to use the ceil function to calculate output height and
-* width. Default False.
+* @li global_pooling: Whether to use the global pooling. If global_pooling =
+* true, ksize and pads will be ignored. Default False.
+* @li ceil_mode: Whether to use the ceil function to calculate output height
+* and width. Default False.
 * @li exclusive: Whether to exclude padding points. default is true.
-* @li data_format: An optional string. Defaults to "NHWC".
+* @li data_format: An optional string. Defaults to "NHWC". \n
 
 * @par Outputs:
-* @out_grad: A mutable tensor with the same shape and type as "orig_input".
+* @li out_grad: A mutable tensor with the same shape and type as "orig_input". \n
 
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator AvgPoolGrad.
@@ -739,9 +964,8 @@ REG_OP(AvgPoolV2Grad)
     .OP_END_FACTORY_REG(AvgPoolV2Grad)
 /**
 * @brief Computes gradients of averagev2 pooling function.
-
 * @par Inputs:
-* @li input_grad: An NHWC tensor of type float16, float32, or double.
+*input_grad: An NHWC tensor of type float16, float32, or double.
 
 * @par Attributes:
 * @li orig_input_shape: A required tuple or list of type int32.
@@ -759,10 +983,10 @@ REG_OP(AvgPoolV2Grad)
 * @li data_format: An optional string. Defaults to "NHWC".
 
 * @par Outputs:
-* @out_grad: A mutable tensor with the same shape and type as "orig_input".
+*out_grad: A mutable tensor with the same shape and type as "orig_input".
 
 * @par Third-party framework compatibility
-* @li Compatible with the TensorFlow operator AvgPoolGrad.
+*Compatible with the TensorFlow operator AvgPoolGrad.
 */
 REG_OP(AvgPoolV2GradD)
     .INPUT(input_grad, TensorType({DT_FLOAT16}))
@@ -781,11 +1005,11 @@ REG_OP(AvgPoolV2GradD)
     .OP_END_FACTORY_REG(AvgPoolV2GradD)
 
 /**
-*@brief :upsample the layer
+*@brief upsample the layer, similar to the nearest-neighbor difference scaling algorithm.
 
 *@par Inputs:
 * one input, including:
-*@li x: A tensor of type float16 or float32.
+* x: A tensor of type float16 or float32.
 *@par Attributes:
 *@li  scale: A optional float32, scale factor of x. Defaults to "1.0".
 *@li  stride_h: An optional int32, broadcast the axis of h. Defaults to "2".
@@ -1037,13 +1261,13 @@ REG_OP(MaxPool3DGrad)
     .OUTPUT(y, TensorType::RealNumberType())
     .REQUIRED_ATTR(ksize, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
+    .ATTR(padding, String, "SAME")
     .REQUIRED_ATTR(pads, ListInt)
     .ATTR(data_format, String, "NDHWC")
     .OP_END_FACTORY_REG(MaxPool3DGrad)
 
 /**
 *@brief Performs AvgPool1D on the input . \n
-
 *@par Inputs:
 *x: A Tensor. Must be one of the following types: int8, uint8, int16, int32, int64, float16, float32, float64 . \n
 
@@ -1072,7 +1296,6 @@ REG_OP(AvgPool1D)
 
 /**
 *@brief Performs AvgPool1D on the input . \n
-
 *@par Inputs:
 *x: A Tensor. Must be one of the following types: int8, uint8, int16, int32, int64, float16, float32, float64 . \n
 
@@ -1103,33 +1326,38 @@ REG_OP(AvgPool1DD)
     .ATTR(count_include_pad, Bool, false)
     .OP_END_FACTORY_REG(AvgPool1DD)
 /**
-*@brief Performs max pooling on the input and outputs both max values and indices . \n
+* @brief Performs max pooling on the input and outputs both max values and indices . \n
 
-*@par Inputs:
+* @par Inputs:
 * One input:
-*x: An NC1HWC0 Tensor of type float16.
-*@par Attributes:
-*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
-* each dimension of the input tensor. No default value.
-*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
-* each dimension of the input tensor. No default value.
-*@li pads: A required string. No default value.
-*@li dtype: A optional int. default value is 3.
-*@li dilation: A optional list of int8, int16, int32, or int64 values.
-*@li ceil_mode: A optional bool. default value is false . \n
+* x: An 5hd Tensor of type float16. 
+* Must set the format, supported format list ["NC1HWC0"].
+* @par Attributes:
+* @li ksize: A required list of int8, int16, int32, or int64 values,
+* specifying the size of the window for each dimension of the input tensor. No default value.
+* @li strides: A required list of int8, int16, int32, or int64 values,
+* specifying the stride of the sliding window for each dimension of the input tensor. No default value.
+* @li pads: A required list of int8, int16, int32, or int64 values,
+* specifying the pad of the input feature map. No default value. \n
+* @li dtype: A optional int. default value is 3.
+* @li dilation: A optional list of int8, int16, int32, or int64 values.
+* @li ceil_mode: A optional bool. default value is false . \n
 
-*@par Outputs:
-*y: A Tensor. Has the same type and format as input "x".
-*argmax:  A Tensor. type:uint16, format:NC1HWC0.
-*@attention Constraints:
-*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
-*@li "strides is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1,
-* strides[2] <= 63, strides[2] >= 1.
-*@li "dilation" is a list that has length 4.
-*@li "ceil_mode" is a bool, default is false . \n
+* @par Outputs:
+* y: A Tensor. Has the same type and format as input "x".
+* argmax:  A Tensor. type:uint16.
+* @attention Constraints:
+* @li ksize: a list that has length 4:
+* ksize[0] = 1, ksize[1] = 1, ksize[2] * ksize[3] <= (ub_size-8)*1024//6//2//16.
+* @li strides: a list that has length 4:
+* strides[0] = 1, strides[1] = 1, 1 <= strides[2] <= 2048, 1 <= strides[3] <= 2048.
+* @li pads: a list that has length 4:
+* pads[0] = 1, pads[1] = 1, 1 <= pads[2] <= (ksize[2]//2), 1 <= pads[3] <= (ksize[3]//2).
+* @li dilation: a list that has length 4.
+* @li ceil_mode: is a bool, default is false . \n
 
-*@par Third-party framework compatibility
-* Compatible with the TensorFlow operator MaxPoolWithArgmax.
+* @par Third-party framework compatibility
+* Compatible with the PyTorch operator max_pool2d_with_indices.
 */
 REG_OP(MaxPoolWithArgmaxV2)
     .INPUT(x, TensorType({DT_FLOAT16}))
@@ -1144,36 +1372,44 @@ REG_OP(MaxPoolWithArgmaxV2)
     .OP_END_FACTORY_REG(MaxPoolWithArgmaxV2)
 
 /**
-*@brief Performs the backpropagation of MaxPoolWithArgmaxV2 . \n
+* @brief Performs the backpropagation of MaxPoolWithArgmaxV2. \n
 
-*@par Inputs:
+* @par Inputs:
 * Three inputs, including:
-*@li x: An NC1HWC0 tensor of type float16.
-*@li grad: An NC1HWC0 tensor of type float16.
-*@li argmx: An NC1HWC0 tensor of type uint16 or int64 . \n
-
-*@par Attributes:
-*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
- * each dimension of the input tensor. No default value.
-*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
- * each dimension of the input tensor. No default value.
-*@li pads: A required string. No default value.
-*@li dtype: A optional int. default value is 3.
-*@li dilation: A optional list of int8, int16, int32, or int64 values.
-*@li ceil_mode: A optional bool. default value is false . \n
+* @li x: An 5hd tensor of type float16. 
+* Must set the format, supported format list ["NC1HWC0"]
+* @li grad: An 5hd tensor of type float16. 
+* Must set the format, supported format list ["NC1HWC0"]
+* @li argmax: An 5hd tensor of type uint16 or int64. 
+* Must set the format, supported format list ["NC1HWC0"] \n
 
-*@par Outputs:
-*y: A Tensor. Has the same type and format as input "x" . \n
+* @par Attributes:
+* @li ksize: A required list of int8, int16, int32, or int64 values, 
+* specifying the size of the window for each dimension of the input tensor. No default value.
+* @li strides: A required list of int8, int16, int32, or int64 values, 
+* specifying the stride of the sliding window for each dimension of the input tensor. No default value.
+* @li pads: A required list of int8, int16, int32, or int64 values, 
+* specifying the pad of the input feature map. No default value. \n
+* @li dtype: A optional int. default value is 3.
+* @li dilation: A optional list of int8, int16, int32, or int64 values.
+* @li ceil_mode: A optional bool. default value is false. \n
 
-*@attention Constraints:
-*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
-*@li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1
-*@li "dilation" is a list that has length 4.
-*@li "ceil_mode" is a bool, default is false . \n
+* @par Outputs:
+* y: A Tensor. Has the same type and format as input "x". \n
 
-*@see max_pool_grad_with_argmaxv2
-*@par Third-party framework compatibility
-* Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV2.
+* @attention Constraints:
+* @li ksize: a list that has length 4: 
+* ksize[0] = 1, ksize[1] = 1, ksize[2] * ksize[3] <= (ub_size-8)*1024//7//2//16.
+* @li strides: a list that has length 4: 
+* strides[0] = 1, strides[1] = 1, 1 <= strides[2] <= 2048, 1 <= strides[3] <= 2048.
+* @li pads: a list that has length 4: 
+* pads[0] = 1, pads[1] = 1, 1 <= pads[2] <= (ksize[2]//2), 1 <= pads[3] <= (ksize[3]//2).
+* @li dilation: a list that has length 4.
+* @li ceil_mode: is a bool, default is false. \n
+
+* @see max_pool_grad_with_argmaxv2
+* @par Third-party framework compatibility
+* Compatible with the PyTorch backward operator of max_pool2d_with_indices.
 */
 
 REG_OP(MaxPoolGradWithArgmaxV2)
@@ -1194,7 +1430,7 @@ REG_OP(MaxPoolGradWithArgmaxV2)
 
 * @par Inputs:
 * One input:
-* x: An NC1HWC0 Tensor. Supported type:float16, float32, double, int32, int64,
+* x: A Tensor. Supported type:float16, float32, double, int32, int64,
 * uint8, int16, int8, uint16, qint8
 
 * @par Attributes:
@@ -1206,7 +1442,7 @@ REG_OP(MaxPoolGradWithArgmaxV2)
 * the input tensor. No default value.
 * @li padding_mode: A required string. Defaults to "CALCULATED".
 * @li pads:A required list of int8, int16, int32, or int64 values,
-* a data to caculate when padding_mode is "CALCULATED".
+* a data to calculate when padding_mode is "CALCULATED".
 * @li data_format: An optional string. Defaults to "NHWC" .
 * @li global_pooling bool, Whether to use the global pooling.
 * If global_pooling = true, kernel size and paddings will be ignored.
@@ -1223,7 +1459,7 @@ REG_OP(MaxPoolGradWithArgmaxV2)
 * ksize[1] * ksize[2] <= 255.
 * @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1,
 * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
-* @li "padding" is  "SAME" "VALID" or "CACULATE" .
+* @li "padding" is  "SAME" "VALID" or "CALCULATE" .
 
 
 * @par Third-party framework compatibility
@@ -1245,9 +1481,9 @@ REG_OP(MaxPoolV3)
 * @brief Computes gradients of the maxpooling function . \n
 
 * @par Inputs:
-* @li orig_input: A mutable NC1HWC0 tensor of type RealNumberType.
-* @li orig_output: A mutable NC1HWC0 tensor of type RealNumberTypex.
-* @li grad: A mutable NC1HWC0 tensor of type RealNumberType . \n
+* @li orig_input: A mutable tensor of type RealNumberType.
+* @li orig_output: A mutable tensor of type RealNumberTypex.
+* @li grad: A mutable tensor of type RealNumberType . \n
 
 * @par Attributes:
 * @li ksize: A required list of int8, int16, int32, or int64 values,
@@ -1268,7 +1504,7 @@ REG_OP(MaxPoolV3)
 * the floor function will be used. Default False \n
 
 * @par Outputs:
-* y: A mutable tensor. Has the same shape and type as "x1" . \n
+* out_grad: A mutable tensor. Has the same shape and type as "x1" . \n
 
 * @attention Constraints:
 * @li Computing gradients of global pooling is not supported, which means
@@ -1291,5 +1527,361 @@ REG_OP(MaxPoolV3Grad)
     .ATTR(global_pooling, Bool, false)
     .ATTR(ceil_mode, Bool, false)
     .OP_END_FACTORY_REG(MaxPoolV3Grad)
+
+/**
+*@brief Performs Dilation2D on the input . \n
+
+*@par Inputs:
+*@li x: A tensor of shape is 4d, format is support NHWC.
+*@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. \n
+
+*@par Attributes:
+*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1.
+*@li rates: A required list of 4 ints. The rates of the N and C dimensions are 1.
+*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID.
+*@li pads: An optional list of 4 ints.
+*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
+*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n
+
+*@par Outputs:
+*y: The output tensor. Has the same type and format as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Dilation2D.
+*/
+REG_OP(Dilation2D)
+    .INPUT(x,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .INPUT(filter,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .OUTPUT(y,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(rates, ListInt)
+    .ATTR(padding_mode, String, "SAME")
+    .ATTR(pads, ListInt, {0,0,0,0})
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(Dilation2D)
+
+/**
+*@brief Performs Dilation2DBackpropFilter on the input. \n
+
+*@par Inputs:
+*@li x: A tensor of shape is 4d, format is support NHWC.
+*@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
+*@li out_backprop: Has the same type and format as input x and the c dimension is same with x. \n
+
+*@par Attributes
+*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1.
+*@li rates: A required list of 4 ints, the rates of the N and C dimensions are 1.
+*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID.
+*@li pads: A optional list of 4 ints.
+*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
+*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n
+
+*@par Outputs:
+*y: The output tensor. Has the same type and format as input "filter" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Dilation2DBackpropFilter.
+*/
+
+REG_OP(Dilation2DBackpropFilter)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .INPUT(filter,
+           TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .INPUT(out_backprop,
+           TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .OUTPUT(y,
+            TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(rates, ListInt)
+    .ATTR(padding_mode, String, "SAME")
+    .ATTR(pads, ListInt, {0, 0, 0, 0})
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(Dilation2DBackpropFilter)
+
+/**
+*@brief Performs Dilation2DBackpropInput on the input. \n
+
+*@par Inputs:
+*@li x: A tensor of shape is 4d, format is support NHWC.
+*@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
+*@li out_backprop: Has the same type and format as input x and the c dimension is same with x. \n
+
+*@par Attributes
+*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1.
+*@li rates: A required list of 4 ints, the rates of the N and C dimensions are 1.
+*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID.
+*@li pads: A optional list of 4 ints.
+*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
+*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n
+
+*@par Outputs:
+*y: The output tensor. Has the same type and format as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Dilation2DBackpropInput.
+*/
+
+REG_OP(Dilation2DBackpropInput)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .INPUT(filter,
+           TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .INPUT(out_backprop,
+           TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .OUTPUT(y,
+            TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(rates, ListInt)
+    .ATTR(padding_mode, String, "SAME")
+    .ATTR(pads, ListInt, {0, 0, 0, 0})
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(Dilation2DBackpropInput)
+
+/**
+* @brief Applies a 2D adaptive average pooling over  
+*       an input signal composed of several input planes.  \n
+
+* @par Inputs:
+* One input, including:
+* @li x: A Tensor. Must be one of the following data types:
+*     float16, float32. \n
+
+* @par Attributes:
+* @li output_size: A required list of 2 ints
+*    specifying the size (H,W) of the output tensor. \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same data type as "x" \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator AdaptiveAvgPool2d.
+*/
+REG_OP(AdaptiveAvgPool2d)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(output_size, ListInt)
+    .OP_END_FACTORY_REG(AdaptiveAvgPool2d)
+
+/**
+* @brief Compute gradients of adaptive averagev2 pooling function.
+
+* @par Inputs:
+* @li input_grad: A Tensor. Must be one of the following data types:
+* float16, float32.
+
+* @par Attributes:
+* @li orig_input_shape: A required tuple or list of type int32.
+
+* @par Outputs:
+* @li output_grad: A tensor with the same type as "input_grad".
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator AdaptiveAvgPool2dGrad.
+*/
+REG_OP(AdaptiveAvgPool2dGrad)
+    .INPUT(input_grad, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(output_grad, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(orig_input_shape, ListInt)
+    .OP_END_FACTORY_REG(AdaptiveAvgPool2dGrad)
+
+/**
+* @brief Performs the backpropagation of MaxPoolWithGradArgmaxV1.
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: A tensor of type float16.
+* @li grad: A tensor of type float16.
+* @li argmax: A tensor of type uint16 or int64. \n
+
+* @par Attributes:
+* @li ksize: A required list of int8, int16, int32, or int64 values, 
+* specifying the size of the window for each dimension of the input tensor. No default value.
+* @li strides: A required list of int8, int16, int32, or int64 values, 
+* specifying the stride of the sliding window for each dimension of the input tensor. No default value.
+* @li pads: A required list of int8, int16, int32, or int64 values, 
+* specifying the pad of the input feature map. No default value. \n
+
+* @par Outputs:
+* y: A Tensor. Has the same type and format as input "x". \n
+
+* @attention Constraints:
+* @li The MaxPoolGradWithArgmaxV2 operator has the same function, and it is recommended to use the V2 operator.
+* @li ksize: a list that has length 4: 
+* ksize[0] = 1, ksize[3] = 1, ksize[1] * ksize[2] <= (ub_size-8)*1024//7//2//16.
+* @li strides: a list that has length 4: 
+* strides[0] = 1, strides[3] = 1, 1 <= strides[1] <= 2048, 1 <= strides[2] <= 2048.
+* @li pads: a list that has length 4: 
+* pads[0] = 1, pads[3] = 1, 1 <= pads[2] <= (ksize[1]//2), 1 <= pads[2] <= (ksize[3]//2).
+* @li ceil_mode: defaults to False.\n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch backward operator of max_pool2d_with_indices.
+*/
+
+REG_OP(MaxPoolGradWithArgmaxV1)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(grad, TensorType({DT_FLOAT16}))
+    .INPUT(argmax, TensorType({DT_UINT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dtype, Int, 3)
+    .ATTR(dilation, ListInt, {1, 1, 1, 1})
+    .ATTR(ceil_mode, Bool, false)
+    .OP_END_FACTORY_REG(MaxPoolGradWithArgmaxV1)
+
+/**
+* @brief Performs max pooling on the input and outputs both max values and indices.
+
+* @par Inputs:
+* One input:
+* x: A Tensor of type float16. \n
+
+* @par Attributes:
+* @li ksize: A required list of int8, int16, int32, or int64 values,
+* specifying the size of the window for each dimension of the input tensor. No default value.
+* @li strides: A required list of int8, int16, int32, or int64 values,
+* specifying the stride of the sliding window for each dimension of the input tensor. No default value.
+* @li pads: A required list of int8, int16, int32, or int64 values,
+* specifying the pad of the input feature map. No default value. \n
+
+* @par Outputs:
+* y: A Tensor. Has the same type and format as input "x".
+* argmax:  A Tensor. type:uint16. \n
+
+* @attention Constraints:
+* @li The MaxPoolWithArgmaxV2 operator has the same function, and it is recommended to use the V2 operator.
+* @li ksize: a list that has length 4:
+* ksize[0] = 1, ksize[3] = 1, ksize[1] * ksize[2] <= (ub_size-8)*1024//6//2//16.
+* @li strides: a list that has length 4:
+* strides[0] = 1, strides[3] = 1, 1 <= strides[1] <= 2048, 1 <= strides[2] <= 2048.
+* @li pads: a list that has length 4:
+* pads[0] = 1, pads[3] = 1, 1 <= pads[1] <= (ksize[1]//2), 1 <= pads[2] <= (ksize[2]//2).
+* @li ceil_mode: defaults to False.
+
+* @par Third-party framework compatibility
+* Compatible with the PyTorch operator max_pool2d_with_indices.
+*/
+REG_OP(MaxPoolWithArgmaxV1)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .OUTPUT(argmax, TensorType({DT_UINT16}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dtype, Int, 3)
+    .ATTR(dilation, ListInt, {1, 1, 1, 1})
+    .ATTR(ceil_mode, Bool, false)
+    .OP_END_FACTORY_REG(MaxPoolWithArgmaxV1)
+
+/**
+*@brief Randomly sample a subset of positive and negative examples,and overwrite
+the label vector to the ignore value (-1) for all elements that are not
+included in the sample.\n
+
+* @par Inputs:
+* One input:
+* labels: shape of labels,(N, ) label vector with values. \n
+
+* @par Attributes:
+* @li batch_size_per_images: A require attribute of type int.
+* @li positive_fraction: A require attribute of type float.
+
+*@par Outputs:
+*y: The result of subSample. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator SubSample.
+
+*@attention Constraints:
+*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly.
+*/
+REG_OP(SubSample)
+    .INPUT(labels, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(batch_size_per_images, Int)
+    .REQUIRED_ATTR(positive_fraction, Float)
+    .OP_END_FACTORY_REG(SubSample)
+
+/**
+*@brief Randomly sample a subset of positive and negative examples,and overwrite
+the label vector to the ignore value (-1) for all elements that are not
+included in the sample.\n
+
+* @par Inputs:
+* two inputs, including:
+* @li labels: shape of labels,(N, ) label vector with values:.
+* @li shuffle_matrix: random matrix with shape (N, ). \n
+
+* @par Attributes:
+* @li batch_size_per_images: A require attribute of type int.
+* @li positive_fraction: A require attribute of type float.
+
+*@par Outputs:
+*y: The result of subSample. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator SubSampleLabels.
+
+*@attention Constraints:
+*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly.
+*/
+REG_OP(SubSampleLabels)
+    .INPUT(labels, TensorType({DT_INT32}))
+    .INPUT(shuffle_matrix, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(batch_size_per_images, Int)
+    .REQUIRED_ATTR(positive_fraction, Float)
+    .OP_END_FACTORY_REG(SubSampleLabels)
+
+/**
+*@brief Computes GlobalLpPool, GlobalLpPool consumes an input tensor X and applies lp pool pooling across the
+values in the same channel. \n
+
+*@par Inputs:
+* x: A Tensor of type float16 or float32 . \n
+
+*@par Attributes:
+*@li p: Optional. Must be one of the following types: float32. Defaults to 2.0. \n
+
+*@par Outputs:
+* y: A Tensor. Has the same type as "x", when shape of x is [N,C,H,W], shape of y is [N,C,1,1].
+*@par Third-party framework compatibility
+* Compatible with the onnx operator GlobalLpPool.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED.
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(GlobalLpPool)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(p, Float, 2.0)
+    .OP_END_FACTORY_REG(GlobalLpPool)
+
+/**
+*@brief GlobalAveragePool consumes an input tensor X and applies average pooling across the values in the same channel.
+This is equivalent to AveragePool with kernel size equal to the spatial dimension of input tensor \n
+
+*@par Inputs:
+*@li x: Input data tensor from the previous operator; dimensions for image case are (N x C x H x W),
+where N is the batch size, C is the number of channels, and H and W are the height and the width of the data.
+For non image case, the dimensions are in the form of (N x C x D1 x D2 ... Dn), where N is the batch size.
+
+*@par Outputs:
+*y: Output data tensor from pooling across the input tensor. The output tensor has the same rank as the input.
+The first two dimensions of output shape are the same as the input (N x C), while the other dimensions are all 1
+
+*@par Restrictions:
+*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly.
+*/
+REG_OP(GlobalAveragePool)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(GlobalAveragePool);
+
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H
diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h
index 047fd6da..32da707e 100644
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -159,6 +159,7 @@ REG_OP(SparseApplyAdagrad)
     .INPUT(grad, TensorType({DT_FLOAT}))
     .INPUT(indices, TensorType({DT_INT32}))
     .OUTPUT(var, TensorType({DT_FLOAT}))
+    .OUTPUT(accum, TensorType({DT_FLOAT}))
     .ATTR(use_locking, Bool, false)
     .ATTR(update_slots, Bool, true)
     .OP_END_FACTORY_REG(SparseApplyAdagrad)
@@ -289,7 +290,8 @@ REG_OP(SparseApplyAdagradV2D)
 *     Should be from a Variable().
 *@li lr: A scalar. Has the same type as "var".
 *@li grad: A tensor for the gradient. Has the same type as "var".
-*
+*@li momentum: Momentum. Must be a scalar.
+
 *@par Attributes:
 *@li use_nesterov: An optional bool. Defaults to "False".
 *     If "True", the tensor passed to compute grad will be
@@ -701,7 +703,7 @@ REG_OP(ApplyPowerSignD)
 /**
 *@brief Updates "var" as FOBOS algorithm with fixed learning rate.
 *  prox_v = var - alpha * delta
-*  var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
+*  var = sign(prox_v)/(1+alpha * l2) * max{|prox_v|-alpha * l1,0}
 *
 *@attention Constraints:
 *  the input tensors must have the same shape.
@@ -2000,40 +2002,41 @@ REG_OP(ApplyAdadeltaD)
     .OP_END_FACTORY_REG(ApplyAdadeltaD)
 
 /**
-* @brief Updates "var" according to the ApplyMomentum algorithm.
-*   accum = accum * momentum + x1 * x2
-*   if use_nesterov is True:
-*       var -= x1 * x2 * lr + accum * momentum * lr
-*   else:
-*       var -= accum * lr
+*@brief Updates "var" according to the ApplyMomentum algorithm.
+* accum = accum * momentum + x1 * x2
+* if use_nesterov is True:
+* var -= x1 * x2 * lr + accum * momentum * lr
+* else: var -= accum * lr
 *
-* @par Inputs:
-*   Six inputs, including:
-*  @li var: A mutable Tensor has type TensorType::NumberType().
-*      Should be a Variable Tensor.
-*  @li accum: A mutable Tensor has the same type as "var".
-*      Should be a Variable Tensor.
-*  @li lr: A scalar has the same type as "var", for the scaling factor.
-*  @li x1: A Tensor has type TensorType::NumberType().
-*  @li momentum: A scalar has the same type as "var".
-*  @li x2: A scalar has the same type as "var".
+*@par Inputs:
+* Six inputs, including:
+*@li var: A mutable Tensor has type TensorType::NumberType().
+* Should be a Variable Tensor.
+*@li accum: A mutable Tensor has the same type as "var".
+* Should be a Variable Tensor.
+*@li lr: A scalar has the same type as "var", for the scaling factor.
+*@li x1: A Tensor has type TensorType::NumberType().
+*@li momentum: A scalar has the same type as "var".
+*@li x2: A scalar has the same type as "var". \n
 *
-* @par Attributes:
-*   Two attributes, including:
-*  @li use_nesterov: An optional bool. Defaults to "False".
-*       If True, the tensor passed to compute grad will be var - lr * momentum * accum,
-*       so in the end, the var you get is actually var - lr * momentum * accum.
-*  @li use_locking: An optional bool. Defaults to "False".
-*       If "True", updating of the "var", m", and "v" tensors will be protected
-*       by a lock; otherwise the behavior is undefined, but may exhibit less contention.
+*@par Attributes:
+* Two attributes, including:
+*@li use_nesterov: An optional bool. Defaults to "False".
+* If True, the tensor passed to compute grad will be
+* var - lr * momentum * accum, so in the end,
+* the var you get is actually var - lr * momentum * accum.
+*@li use_locking: An optional bool. Defaults to "False".
+* If "True", updating of the "var", m", and "v" tensors will be protected
+* by a lock; otherwise the behavior is undefined, but may exhibit
+* less contention. \n
 *
-* @par Outputs:
-*   Two outputs, including:
-*  @li var: A mutable Tensor has the same type as "var".
-*  @li accum: A mutable Tensor has the same type as "var".
+*@par Outputs:
+* Two outputs, including:
+*@li var: A mutable Tensor has the same type as "var".
+*@li accum: A mutable Tensor has the same type as "var". \n
 
 *@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(FusedMulApplyMomentum)
     .INPUT(var, TensorType::NumberType())
@@ -2102,6 +2105,57 @@ REG_OP(FusedMulApplyMomentumExtern)
     .OP_END_FACTORY_REG(FusedMulApplyMomentumExtern)
 
 /**
+*@brief Updates '*var' according to the momentum scheme.
+*   accum = accum * momentum - x1 * x2 * lr
+*   if use_nesterov is True:
+*       var += accum * momentum - x1 * x2 * lr
+*   else:
+*       var += accum
+*
+*@par Inputs:
+*@li var: A mutable tensor. Must be one of the data types defined in
+*    TensorType::NumberType(). Should be from a Variable().
+*@li accum: A mutable tensor. Has the same type as "var". Should be from a
+*    Variable().
+*@li lr: A tensor for the learning rate. Has the same type as "var". Should be
+*    from a Variable().
+*@li x1: A Tensor has type TensorType::NumberType().
+*@li momentum: A scalar. Has the same type as "var".
+*@li x2: A scalar has the same type as "var".
+*
+*@par Attributes:
+*@li use_nesterov: An optional bool. Defaults to "False".
+*    If "True", var will be updated by using Nesterov momentum.
+*@li use_locking: An optional bool. Defaults to "False".
+*    If "True", updating of the "var" tensor is protected by a lock;
+*    otherwise the behavior is undefined, but may exhibit less contention.
+*
+*@par Outputs:
+* @li var: A mutable tensor. Has the same type as input "var".
+* @li accum: A mutable tensor. Has the same type as input "accum".
+*
+*@attention Constraints:
+* @li var: A mutable tensor. Has the same type as input "var".
+* @li accum: A mutable tensor. Has the same type as input "accum".
+*
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ResourceApplyKerasMomentum.
+*
+*/
+REG_OP(FusedMulApplyKerasMomentum)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(x1, TensorType::NumberType())
+    .INPUT(momentum, TensorType::NumberType())
+    .INPUT(x2, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(accum, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .ATTR(use_nesterov, Bool, false)
+    .OP_END_FACTORY_REG(FusedMulApplyKerasMomentum)
+
+/**
 *@brief Update "g" according to the LARS algorithm . \n
 
 *@par Inputs:
@@ -2593,6 +2647,19 @@ REG_OP(SparseApplyAdadeltaD)
 REG_OP(AtomicAddrClean)
     .ATTR(automic_add_mem_size, ListInt, {})
     .OP_END_FACTORY_REG(AtomicAddrClean)
+
+/**
+*@brief Clean memory of workspace list . \n
+
+*@par Attributes:
+* @li workspace_size: sizes of workspaces . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(DynamicAtomicAddrClean)
+    .ATTR(automic_add_mem_size, ListInt, {})
+    .OP_END_FACTORY_REG(DynamicAtomicAddrClean)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_TRAINING_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/no_op.h b/third_party/fwkacllib/inc/ops/no_op.h
index 7834591c..b27b1fa0 100644
--- a/third_party/fwkacllib/inc/ops/no_op.h
+++ b/third_party/fwkacllib/inc/ops/no_op.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
index e0e5dfc6..e4d7936c 100644
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,35 +25,118 @@
 
 namespace ge {
 /**
-*@brief Computes the for the gelu of "x" . \n
+*@brief The GELU activation function is x*Φ(x),
+* where Φ(x) the standard Gaussian cumulative distribution function.
 
 *@par Inputs:
+*One input, including: \n
+*x: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". \n
+
+*@par Third-party framework compatibility:
+* Compatible with the TensorFlow operator Gelu.
+*/
+REG_OP(Gelu)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(Gelu)
+
+/**
+* @brief Compute hard_swish of "x" element-wise . \n
+
+*@par Inputs:
+*One input, including:
+*x: A Tensor. Must be one of the following types: float16, float32
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+* Compatible with the Torch operator HardSwish.
+*/
+REG_OP(HardSwish)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(HardSwish)
+
+/**
+*@brief Computes the gradient for the hard_swish of "x" . \n
+
+* @par Inputs:
 *Two inputs, including:
-* @li x: A Tensor. Must be one of the following types: float16, float32
+* @li grad: A Tensor. Must be one of the following types: float16, float32
+* @li x: A Tensor of the same type as "grad" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "grad".
+* @par Third-party framework compatibility
+* Compatible with the Torch operator HardSwishGrad.
+*/
+REG_OP(HardSwishGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(HardSwishGrad)
+
+/**
+*@brief Computes the for the Swish of "x" . \n
+
+*@par Inputs:
+*One input, including:
+*x: A Tensor. Must be one of the following types: float16, float32
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
+
+*@par Attributes:
+*scale: scalar parameter, default value = 1.0
+
 *@par Third-party framework compatibility
-*Compatible with the TensorFlow operator Gelu
+*Compatible with the Torch operator Swish
 */
-REG_OP(Gelu)
+REG_OP(Swish)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .OP_END_FACTORY_REG(Gelu)
+    .ATTR(scale, Float, 1.0)
+    .OP_END_FACTORY_REG(Swish)
 
 /**
-*@brief Computes the gradient for the gelu of "x" . \n
+*@brief Computes the gradient for the Swish of "x" . \n
 
 *@par Inputs:
 *Three inputs, including:
-* @li dy: A Tensor. Must be one of the following types: float16, float32
-* @li x: A Tensor of the same type as "dy".
-* @li y: A Tensor of the same type as "dy" . \n
+* @li grad: A Tensor. Must be one of the following types: float16, float32
+* @li x: A Tensor of the same type as "grad".
+* @li y: A Tensor of the same type as "grad" . \n
+* @par Attributes:
+* scale: A optional scalar. The data type is float . \n
+*@par Outputs:
+*grad_x: A Tensor. Has the same type as "grad".
+*@par Third-party framework compatibility
+*Compatible with the Torch operator SwishGrad
+*/
+REG_OP(SwishGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(scale, Float, 1.0)
+    .OP_END_FACTORY_REG(SwishGrad)
+
+/**
+*@brief Computes the gradient for the gelu of "x" .
+
+*@par Inputs:
+* Three inputs, including:
+*@li dy: A Tensor. Must be one of the following types: float16, float32.
+*@li x: A Tensor of the same type as "dy".
+*@li y: A Tensor of the same type as "dy" . \n
 
 *@par Outputs:
 *z: A Tensor. Has the same type as "dy".
 *@par Third-party framework compatibility
-*Compatible with the TensorFlow operator GeluGrad
+* Compatible with the TensorFlow operator GeluGrad.
 */
 REG_OP(GeluGrad)
     .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -63,11 +146,11 @@ REG_OP(GeluGrad)
     .OP_END_FACTORY_REG(GeluGrad)
 
 /**
-*@brief Computes the for the fast_gelu of "x" . \n
+*@brief The FastGelu activation function is x*e^(0.851*x)*(x-|x|)/(1+e^(-1.702|x|)). \n
 
 *@par Inputs:
-*Two inputs, including:
-* @li x: A Tensor. Must be one of the following types: float16, float32
+*One input, including:
+*x: A Tensor. Must be one of the following types: float16, float32
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
@@ -78,12 +161,28 @@ REG_OP(FastGelu)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OP_END_FACTORY_REG(FastGelu)
+/**
+*@brief The FastGeluV2 activation function is x*(sgn(x)*[(a/2)*(clip(|x|,max=-b)+b)^2+0.5]+0.5),
+*       where sgn(x) function is (x+0.000000000001)/|(x+0.000000000001)|. \n
 
+*@par Inputs:
+*One input, including:
+*x: A Tensor. Must be one of the following types: float16, float32
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator FastGeluV2
+*/
+REG_OP(FastGeluV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(FastGeluV2)
 /**
 *@brief Computes the gradient for the fast_gelu of "x" . \n
 
 *@par Inputs:
-*Three inputs, including:
+*Two inputs, including:
 * @li dy: A Tensor. Must be one of the following types: float16, float32
 * @li x: A Tensor of the same type as "dy" . \n
 
@@ -98,7 +197,6 @@ REG_OP(FastGeluGrad)
     .OUTPUT(z, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OP_END_FACTORY_REG(FastGeluGrad)
 
-
 /**
 *@brief Computes the gradient for the tanh of "x" . \n
 
@@ -169,7 +267,7 @@ REG_OP(Relu)
 * x: A Tensor of type RealNumberType . \n
 
 * @par Outputs:
-* y: A Tensor of type RealNumberType . \n
+* y: A Tensor with the same type as x . \n
 
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator Relu6.
@@ -209,8 +307,12 @@ REG_OP(Relu6D)
 *     backprops = gradients * (features > 0) * (features < 6) . \n
 
 * @par Inputs:
-* @li features: A Tensor of type RealNumberType.
-* @li gradients: A Tensor of type RealNumberType . \n
+* @li gradients: A Tensor of type RealNumberType. The backpropagated
+      gradients to the corresponding Relu6 operation.
+* @li features: A Tensor with the same type as gradients.he features passed
+      as input to the corresponding Relu6 operation, or its output;
+      using either one produces the same result.  \n
+
 
 * @par Outputs:
 * backprops: A Tensor of type RealNumberType . \n
@@ -223,7 +325,29 @@ REG_OP(Relu6Grad)
     .INPUT(features, TensorType::RealNumberType())
     .OUTPUT(backprops, TensorType::RealNumberType())
     .OP_END_FACTORY_REG(Relu6Grad)
-
+/**
+*@brief Calculate the elu_grad_v2 function.
+*Applies the element-wise function:
+* Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha .
+*@par Inputs:
+*Two inputs, including:
+* @li grads: A tensor. Must be one of the following types:
+*     float16, float32.
+* @li activations: A tensor. Must be one of the following types:
+*     float16, float32.
+*
+*@par Outputs:
+*y: A Tensor with the same type and shape of grads's.
+*
+*@par Attributes:
+*alpha: scalar parameter, default value = 1.0
+*/
+REG_OP(EluGradV2)
+    .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(alpha, Float, 1.0)
+    .OP_END_FACTORY_REG(EluGradV2)
 /**
 * @brief Compute sigmoid of "x" element-wise . \n
 
@@ -296,12 +420,12 @@ REG_OP(Softplus)
     .OP_END_FACTORY_REG(Softplus)
 
 /**
-*@brief Computes softplus gradients for a softplus operation . \n
+* @brief Computes softplus gradients for a softplus operation .
 
 *@par Inputs:
 *Two inputs:
-* @li gradients: An NC1HWC0 or ND Tensor of type float16 or float32.
-* @li features: An NC1HWC0 or ND Tensor of type float16 or float32.
+* @li gradients: A ND Tensor of type float16 or float32.
+* @li features: A ND Tensor of type float16 or float32.
 
 
 *@par Outputs:
@@ -317,7 +441,7 @@ REG_OP(SoftplusGrad)
     .OP_END_FACTORY_REG(SoftplusGrad)
 
 /**
-*@brief Computes softsign: x/(abs(x) + 1) . \n
+* @brief Computes softsign: x/(abs(x) + 1) .
 
 *@par Inputs:
 * One input:
@@ -335,15 +459,34 @@ REG_OP(Softsign)
     .OP_END_FACTORY_REG(Softsign)
 
 /**
+ * @brief Computes softsignGrad: gradients / (1 + abs(features)) ** 2 .
+ *
+ * @par Inputs:
+ * Two inputs, including:
+ * @li gradients: A Tensor.Must be one of the following types:float16, float32,
+ * @li features: A Tensor of the same type and shape as "gradients".
+
+ * @par Outputs:
+ * output:A Tensor. Has the same type as "gradients".
+ * @par Third-party framework compatibility
+ * Compatible with the TensorFlow operator SoftsignGrad.
+ */
+REG_OP(SoftsignGrad)
+    .INPUT(gradients, TensorType::FloatingDataType())
+    .INPUT(features, TensorType::FloatingDataType())
+    .OUTPUT(output, TensorType::FloatingDataType())
+    .OP_END_FACTORY_REG(SoftsignGrad)
+
+/**
 *@brief Computes scaled exponential linear: scale * alpha * (exp(x) - 1) . \n
 
 *@par Inputs:
 * One input:
 *x: A Tensor. Must be one of the following types: float16, float, double
- * int32, int8. format:ND, NC1HWC0 . \n
+ * int32, int8. format:ND. \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type and format as input "x". format:ND, NC1HWC0 . \n
+*y: A Tensor. Has the same type and format as input "x". format:ND. \n
 
 *@see Region()
 
@@ -358,6 +501,26 @@ REG_OP(Selu)
     .OP_END_FACTORY_REG(Selu)
 
 /**
+* @brief Computes SeluGrad backprops: gradients * (outputs + scale * alpha)
+*    if outputs < 0, scale * gradients otherwise .
+
+* @par Inputs:
+* Two inputs, including:
+* @li gradients: A Tensor of type RealNumberType .
+* @li outputs: A Tensor of type RealNumberType .
+* @par Outputs:
+* y: A Tensor. Must have the same type as "gradients" .
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator SeluGrad.
+*/
+REG_OP(SeluGrad)
+    .INPUT(gradients, TensorType::RealNumberType())
+    .INPUT(outputs, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .OP_END_FACTORY_REG(SeluGrad)
+
+/**
 *@brief Computes rectified linear gradients for a ReLU operation . \n
 
 *@par Inputs:
@@ -509,6 +672,66 @@ REG_OP(Elu)
     .OP_END_FACTORY_REG(Elu)
 
 /**
+*@brief Continuously Differentiable Exponential Linear Uints:
+*       Perform the linear uint element-wise on the input tensor X using formula:
+*       max(0, x) + min(0, alpha * (exp(x/alpha) - 1)). \n
+
+*@par Inputs:
+*x: A float16, float32, for the input data type . \n
+
+*@par Attributes:
+*@li alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .
+*@li alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .
+*@li alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n
+
+*@par Outputs:
+*y: A float16, float32, for the normalized result . \n
+
+*@attention Constraints:
+*@li The input is of type float16 or float32 . \n
+
+*@par Multiple batches supported or not
+*Supported
+*@par Third-party framework compatibility
+*@li Compatible with ONNX's Celu operator
+*/
+REG_OP(Celu)
+    .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .ATTR(alpha1, Float, 1.0)
+    .ATTR(alpha2, Float, 1.0)
+    .ATTR(alpha3, Float, 1.0)
+    .OP_END_FACTORY_REG(Celu)
+
+/**
+*@brief Continuously Differentiable Exponential Linear Uints:
+*       Perform the linear uint element-wise on the input tensor X using formula:
+*       max(0, x) + min(0, alpha * (exp(x/alpha) - 1)). \n
+
+*@par Inputs:
+*x: A float16, float32, for the input data type . \n
+
+*@par Attributes:
+*li alpha: A float32. Defines at which negative value the CELU saturates. Defaults to "1.0" .
+
+*@par Outputs:
+*y: A float16, float32, for the normalized result . \n
+
+*@attention Constraints:
+*@li The input is of type float16 or float32 . \n
+
+*@par Multiple batches supported or not
+*Supported
+*@par Third-party framework compatibility
+*@li Compatible with ONNX's Celu operator
+*/
+REG_OP(CeluV2)
+    .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .ATTR(alpha, Float, 1.0)
+    .OP_END_FACTORY_REG(CeluV2)
+
+/**
 *@brief Computes gradients for the exponential linear (Elu) operation.
 *
 *@par Inputs:
@@ -576,7 +799,7 @@ REG_OP(LeakyReluGrad)
     .OP_END_FACTORY_REG(LeakyReluGrad)
 
 /**
-*@brief Thresholds grad each element of the input Tensor . \n
+*@brief Thresholds grad each element of the input Tensor .
 
 *@par Inputs:
 * @li gradients: A Tensor shape and dtype of input gradients. Support float16, int32.
@@ -599,7 +822,7 @@ REG_OP(ThresholdGradV2D)
     .OP_END_FACTORY_REG(ThresholdGradV2D)
 
 /**
-*@brief Thresholds each element of the input Tensor y = (x > threshold) ? x : value . \n
+*@brief Thresholds each element of the input Tensor y = (x > threshold) ? x : value .
 
 *@par Inputs:
 *x: A Tensor dtype of real number . \n
@@ -640,6 +863,383 @@ REG_OP(Mish)
     .OUTPUT(y, TensorType({ DT_FLOAT,DT_FLOAT16 }))
     .OP_END_FACTORY_REG(Mish)
 
+/**
+ * @brief: pytorch mish_grad operator.
+ * @par Inputs:
+ * three input, including:
+ * @li grad: A Tensor. shape, datatype and format is same as x
+ * @li x: A Tensor. Must be one of the following types: float16, float32
+ * @li tanhx: A Tensor. shape, datatype and format is same as x
+ * @par Outputs:
+ * One output, including:
+ * x_grad: A Tensor. shape, datatype and format is same as x
+ */
+
+REG_OP(MishGrad)
+    .INPUT(grad, TensorType({ DT_FLOAT,DT_FLOAT16 }))
+    .INPUT(x, TensorType({ DT_FLOAT,DT_FLOAT16 }))
+    .OPTIONAL_INPUT(tanhx, TensorType({ DT_FLOAT,DT_FLOAT16 }))
+    .OUTPUT(x_grad, TensorType({ DT_FLOAT,DT_FLOAT16 }))
+    .OP_END_FACTORY_REG(MishGrad)
+
+/**
+ * @brief pytorch hardtanh_backward operator.
+ *
+ * @par Inputs:
+ * Two inputs, including:
+ * @li result, minimum tensor of the linear region range,
+ * datatype: float16/float32, format:ND/5HD.
+ * @li grad, maximum tensor of the linear region range,
+ * datatype:float16/float32, format:ND/5HD. \n
+
+ * @par Attributes:
+ * Two attributes, including:
+ * @li min_val, minimum value of the linear region range, datatype:float.
+ * @li max_val, maximum value of the linear region range, datatype:float. \n
+
+ * @par Outputs:
+ * One output, including:
+ * y, hardtanh_backward output tensor, datatype and format is same as
+ * input result. \n
+
+ * @attention Constraints:
+ * This operator only supports dataType: float16/float32, format: ND/5HD. \n
+
+ * @par Third-party framework compatibility
+ * Compatible with the Pytorch operator HardtanhGrad.
+ */
+REG_OP(HardtanhGrad)
+    .INPUT(result, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "First operand." */
+    .INPUT(grad, TensorType({ DT_FLOAT16, DT_FLOAT }))   /* "Second operand." */
+    .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT }))     /* "Result, has same element type as two inputs" */
+    .ATTR(min_val, Float, -1.0)
+    .ATTR(max_val, Float, 1.0)
+    .OP_END_FACTORY_REG(HardtanhGrad)
+
+/**
+* @brief Calculates the softplus loss function with attributes of beta and threshold. \n
+
+* @par Inputs:
+* One inputs, including:
+* x: A mutable Tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* @li beta: An optional float. Defaults to "1.0" \n
+
+* @li threshold: An optional float. Defaults to "20.0" \n
+
+* @par Outputs:
+* y: A mutable Tensor. Has the same type as "x" \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Softplus.
+*/
+REG_OP(SoftplusV2)
+    .INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .ATTR(beta, Float, 1.0)
+    .ATTR(threshold, Float, 20.0)
+    .OP_END_FACTORY_REG(SoftplusV2)
+
+/**
+* @brief Calculates the reversed outputs of the function "softplus_v2". \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li input_gradients: A mutable Tensor. Must be one of the following types:
+*     float16, float32.
+* @li input_features: A mutable Tensor of the same type as "input_gradients" \n
+
+* @par Attributes:
+* @li beta: An optional float. Defaults to "1.0" \n
+
+* @li threshold: An optional float. Defaults to "20.0" \n
+
+* @par Outputs:
+* output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator SoftplusGrad.
+*/
+REG_OP(SoftplusV2Grad)
+    .INPUT(input_gradients, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(input_features, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(output_backprops, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .ATTR(beta, Float, 1.0)
+    .ATTR(threshold, Float, 20.0)
+    .OP_END_FACTORY_REG(SoftplusV2Grad)
+
+/**
+ * @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor)
+ *  where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise.
+ *
+ * @par Inputs:
+ * one input including:
+ * x: input A Tensor. Must be one of the following types: float32, float16
+ *
+ * @par Attributes:
+ * alpha: An optional float. Defaults to 1.0. \n
+
+ * @par Outputs:
+ * one output including:
+ * y:A Tensor of the same type as x
+ *
+ */
+REG_OP(ThresholdedRelu)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(alpha, Float, 1.0)
+    .OP_END_FACTORY_REG(ThresholdedRelu)
+
+/**
+* @brief Calculate the hard shrinkage function. \n
+
+* @par Inputs:
+* One inputs, including:
+* input_x: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* lambd: An optional float. Defaults to 0.5. \n
+
+* @par Outputs:
+* output_y: A Tensor with the same dtype and shape of input_x's. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Hardshrink. \n
+*/
+REG_OP(HardShrink)
+    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(lambd, Float, 0.5)
+    .OP_END_FACTORY_REG(HardShrink)
+
+/**
+*@brief Calculate the hard shrink grad function. \n
+*
+* Computes the gradient for the HardShrink: if x > lambda or x < -lambda, x,otherwise 0
+*
+*@par Inputs:
+*Two inputs, including:
+* @li gradients: A tensor. Must be one of the following types:
+*     float16, float32. \n
+* @li features: A tensor. Must be one of the following types:
+*     float16, float32. \n
+*
+*@par Outputs:
+*backprops: A Tensor with the same type and shape of features's. \n
+*
+*@par Attributes:
+*lambd: An optional float.Defaults to 0.5. \n
+*
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Hardshrink_backward. \n
+*/
+  REG_OP(HardShrinkGrad)
+  .INPUT(gradients, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .ATTR(lambd, Float, 0.5)
+  .OP_END_FACTORY_REG(HardShrinkGrad)
+
+/**
+* @brief Calculate the hard sigmoid function. \n
+
+* @par Inputs:
+* One inputs, including:
+* input_x: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+
+* @par Attributes:
+* @li alpha: An optional float. Defaults to 0.16666666. \n
+* @li beta: An optional float. Defaults to 0.5. \n
+
+* @par Outputs:
+* y: A Tensor with the same dtype and shape of input_x's. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Hardsigmoid. \n
+*/
+REG_OP(HardSigmoid)
+    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(alpha, Float, 0.16666666)
+    .ATTR(beta, Float, 0.5)
+    .OP_END_FACTORY_REG(HardSigmoid)
+
+/**
+* @brief Calculate the soft shrinkage function. \n
+
+* @par Inputs:
+* One inputs, including:
+* input_x: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* lambd: An optional float. Defaults to 0.5. \n
+
+* @par Outputs:
+* y: A Tensor with the same dtype and shape of input_x's. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Softshrink. \n
+*/
+REG_OP(SoftShrink)
+     .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .ATTR(lambd, Float, 0.5)
+     .OP_END_FACTORY_REG(SoftShrink)
+
+/**
+* @brief Calculate the reversed outputs of the function "soft_shrink". \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li input_grad: A tensor. Must be one of the following types:
+*     float16, float32. \n
+* @li input_x: A tensor of the same dtype as "input_grad". \n
+
+* @par Attributes:
+* lambd: An optional float. Defaults to 0.5. \n
+
+* @par Outputs:
+* y: A Tensor of the same dtype and shape as "input_graxd". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator SoftShrinkGrad. \n
+*/
+REG_OP(SoftShrinkGrad)
+     .INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .ATTR(lambd, Float, 0.5)
+     .OP_END_FACTORY_REG(SoftShrinkGrad)
+
+/**
+*@brief Calculate the gradient of log simoid. \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li grads: A tensor, gradient of previous layer. Must be one of the following types:
+*       float16, float32. \n
+* @li features: A tensor, input of log sigmoid. Must be one of the following types:
+*       float16, float32. \n
+
+*@par Outputs:
+*One outputs, including:
+* @li backprops: A tensor with the same type of and shape of grads. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator LogSigmoidBackward. \n
+*/
+REG_OP(LogSigmoidGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(LogSigmoidGrad)
+
+/**
+*@brief Calculate -ln(1+e^(-x)). \n
+
+*@par Inputs:
+*One inputs, including:
+* x: A tensor. Must be one of the following types:
+*       float16, float32. \n
+
+*@par Outputs:
+*One outputs, including:
+* y: A tensor with the same type and shape of x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator LogSigmoid. \n
+*/
+REG_OP(LogSigmoid)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))  /* "output:y" */
+    .OP_END_FACTORY_REG(LogSigmoid)
+
+/**
+*@brief Calculate the backward outputs of the function "hard_sigmoid" \n
+
+*@par Inputs:
+*One inputs, including:
+* @li grads: A tensor. Must be one of the following types:
+*       float16, float32. \n
+* @li input_x: A tensor. Must be one of the following types:
+*       float16, float32. \n
+
+*@par Outputs:
+*One outputs, including:
+* y: A tensor with the same type and shape of x's. \n
+
+* @par Attributes:
+* @li alpha: An optional float. Defaults to 0.16666666. \n
+* @li beta: An optional float. Defaults to 0.5. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator LogSigmoidGrad. \n
+*/
+REG_OP(HardSigmoidGrad)
+    .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(alpha, Float, 0.16666666)
+    .ATTR(beta, Float, 0.5)
+    .OP_END_FACTORY_REG(HardSigmoidGrad)
+
+/**
+* @brief Calculate the shrink function. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li input_x: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* @li lambd: An optional float. Defaults to 0.5. \n
+* @li bias: An optional float. Defaults to 0.0. \n
+
+* @par Outputs:
+* y: A Tensor with the same dtype and shape of input_x's. \n
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator Shrink. \n
+*/
+REG_OP(Shrink)
+    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(lambd, Float, 0.5)
+    .ATTR(bias, Float, 0.0)
+    .OP_END_FACTORY_REG(Shrink)
+
+/**
+* @brief Thresholds each element of the input Tensor: y = (x > threshold) ? x : value \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: A Tensor.
+* Must be one of the following types on Ascend310: float16, int8, int32, uint8.
+* Must be one of the following types on Ascend310P or Ascend910: float16, float32, int8, int32, uint8. \n
+* @li threshold: A Tensor which should have the shape (1,), the value to threshold at.
+* Must be one of the following types on Ascend310: float16, int8, int32, uint8.
+* Must be one of the following types on Ascend310P or Ascend910: float16, float32, int8, int32, uint8. \n
+* @li value: A Tensor which should have the shape (1,), the value to replace with. default value is 0.
+* Must be one of the following types on Ascend310: float16, int8, int32, uint8.
+* Must be one of the following types on Ascend310P or Ascend910: float16, float32, int8, int32, uint8. \n
+
+* @par Outputs:
+* y: A Tensor which has the same shape and type as the input x. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Threshold.
+*/
+REG_OP(ThresholdV2)
+     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_INT8, DT_INT32, DT_UINT8}))
+     .INPUT(threshold, TensorType({DT_FLOAT16, DT_FLOAT32, DT_INT8, DT_INT32, DT_UINT8}))
+     .OPTIONAL_INPUT(value, TensorType({DT_FLOAT16, DT_FLOAT32, DT_INT8, DT_INT32, DT_UINT8}))
+     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_INT8, DT_INT32, DT_UINT8}))
+     .OP_END_FACTORY_REG(ThresholdV2)
 } // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
index 8d7ef9f9..b69abea5 100644
--- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
+++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -117,6 +117,32 @@ REG_OP(NPUGetFloatStatus)
     .INPUT(addr, TensorType{DT_FLOAT})
     .OUTPUT(data, TensorType({DT_FLOAT}))
     .OP_END_FACTORY_REG(NPUGetFloatStatus)
+
+
+/**
+*@brief Set the value of global workspace to 0. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(NPUClearFloatStatusV2)
+    .OP_END_FACTORY_REG(NPUClearFloatStatusV2)
+
+/**
+*@brief Set the value of global workspace to 0. \n
+
+*@par Inputs:
+*addr: A nested structure of Tensors of type float32 . \n
+
+*@par Outputs:
+*data: A Tensor of type float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(NPUGetFloatStatusV2)
+    .OUTPUT(data, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(NPUGetFloatStatusV2)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/ocr_ops.h b/third_party/fwkacllib/inc/ops/ocr_ops.h
new file mode 100644
index 00000000..9f43c8a6
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/ocr_ops.h
@@ -0,0 +1,271 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file ocr_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_OCR_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_OCR_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+*@brief batch input x acording to attr batch_size and enqueue.
+*@par Inputs:
+*@li x: A Tensor need to batch of type float16/float32/float64/int8/int32/int64/uint8/uint32/uint64. \n
+*@li queue_id:A Tensor of type uint32, queue id.
+
+*@par Outputs:
+*enqueue_count: A Tensor of type int64, enqueue tensor number.
+
+*@par Attributes:
+*@li batch_size: An optional int. Batch size.
+*@li queue_name: An optional string. Queue name.
+*@li queue_depth: An optional int. Queue depth.
+*@li pad_mode: An optional string from: '"REPLICATE", "ZERO"'. Defaults to
+"REPLICATE". Pad mode.
+*/
+REG_OP(BatchEnqueue)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \
+        DT_INT8, DT_INT32, DT_INT64, DT_UINT8, DT_UINT32, DT_UINT64}))
+    .OPTIONAL_INPUT(queue_id, TensorType({DT_UINT32}))
+    .OUTPUT(enqueue_count, TensorType({DT_INT32}))
+    .ATTR(batch_size, Int, 8)
+    .ATTR(queue_name, String, "")
+    .ATTR(queue_depth, Int, 100)
+    .ATTR(pad_mode, String, "REPLICATE")
+    .OP_END_FACTORY_REG(BatchEnqueue)
+
+/**
+*@brief batch input x acording to attr batch_size and enqueue.
+*@par Inputs:
+*@li imgs_data: A Tensor of type uint8. Multi img data value. \n
+*@li imgs_offset:A Tensor of type int32. Offset of every img data in input imgs_data. \n
+*@li imgs_size:A Tensor of type int32. Shape of every img data. \n
+*@li langs:A Tensor of type int32. Lang of every img data. \n
+*@li langs_score:A Tensor of type int32. Lang score of every img data. \n
+
+*@par Outputs:
+*@liimgs: A Tensor of type uint8. Multi imgs data after reconition pre handle.
+*@liimgs_relation: A Tensor of type int32. Output imgs orders in input imgs.
+*@liimgs_lang: A Tensor of type int32. Output batch imgs langs.
+
+*@par Attributes:
+*@li batch_size: An optional int. Batch size.
+*@li data_format: An optional string from: '"NHWC", "NCHW"'. Defaults to
+"NHWC". Data format.
+*@li pad_mode: An optional string from: '"REPLICATE", "ZERO"'. Defaults to
+"REPLICATE". Pad mode.
+*/
+REG_OP(OCRRecognitionPreHandle)
+    .INPUT(imgs_data, TensorType({DT_UINT8}))
+    .INPUT(imgs_offset, TensorType({DT_INT32}))
+    .INPUT(imgs_size, TensorType({DT_INT32}))
+    .INPUT(langs, TensorType({DT_INT32}))
+    .INPUT(langs_score, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(imgs, TensorType({DT_UINT8}))
+    .OUTPUT(imgs_relation, TensorType({DT_INT32}))
+    .OUTPUT(imgs_lang, TensorType({DT_INT32}))
+    .OUTPUT(imgs_piece_fillers, TensorType({DT_INT32}))
+    .ATTR(batch_size, Int, 8)
+    .ATTR(data_format, String, "NHWC")
+    .ATTR(pad_mode, String, "REPLICATE")
+    .OP_END_FACTORY_REG(OCRRecognitionPreHandle)
+
+/**
+*@brief ocr detection pre handle.
+*@par Inputs:
+*img: A Tensor of type uint8. img data value. \n
+
+*@par Outputs:
+*@li resized_img: A Tensor of type uint8. Img after detection pre handle.
+*@li h_scale: A Tensor of type float. H scale.
+*@li w_scale: A Tensor of type float. W scale.
+
+*@par Attributes:
+*data_format: An optional string from: '"NHWC", "NCHW"'. Defaults to
+"NHWC". Data format.
+*/
+REG_OP(OCRDetectionPreHandle)
+    .INPUT(img, TensorType({DT_UINT8}))
+    .OUTPUT(resized_img, TensorType({DT_UINT8}))
+    .OUTPUT(h_scale, TensorType({DT_FLOAT}))
+    .OUTPUT(w_scale, TensorType({DT_FLOAT}))
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(OCRDetectionPreHandle)
+
+/**
+*@brief ocr identify prehandle.
+*@par Inputs:
+*@li imgs_data: A Tensor of type uint8. Multi img data value. \n
+*@li imgs_offset:A Tensor of type int32. Offset of every img data in input imgs_data. \n
+*@li imgs_size:A Tensor of type int32. Shape of every img data. \n
+
+*@par Outputs:
+*resized_imgs: A Tensor of type uint8. Multi imgs after identify pre handle.
+
+*@par Attributes:
+*@li size: An optional int. Size.
+*@li data_format: An optional string from: '"NHWC", "NCHW"'. Defaults to
+"NHWC". Data format.
+*/
+REG_OP(OCRIdentifyPreHandle)
+    .INPUT(imgs_data, TensorType({DT_UINT8}))
+    .INPUT(imgs_offset, TensorType({DT_INT32}))
+    .INPUT(imgs_size, TensorType({DT_INT32}))
+    .OUTPUT(resized_imgs, TensorType({DT_UINT8}))
+    .REQUIRED_ATTR(size, ListInt)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(OCRIdentifyPreHandle)
+
+/**
+*@brief batch dilate polygons according to expand_scale.
+*@par Inputs:
+*@li polys_data: A Tensor of type int32. point data of every polygon. \n
+*@li polys_offset:A Tensor of type int32. Offset of every polygon . \n
+*@li polys_size:A Tensor of type int32. Size of every polygon. \n
+*@li score:A Tensor of type float. Score of every point in image. \n
+*@li min_border:A Tensor of type int32. Minimum width of each polygon. \n
+*@li min_area_thr:A Tensor of type int32. Minimum area of each polygon. \n
+*@li score_thr:A Tensor of type float. Minimum confidence score of each polygon. \n
+*@li expands_cale:A Tensor of type float. Polygon expansion multiple. \n
+
+*@par Outputs:
+*@li dilated_polys_data: A Tensor of type int32. Point data of every dilated polygon. \n
+*@li dilated_polys_offset: A Tensor of type int32. Offset of every dilated polygon . \n
+*@li dilated_polys_size: A Tensor of type int32. Size of every dilated polygon. \n
+*/
+REG_OP(BatchDilatePolys)
+    .INPUT(polys_data, TensorType({DT_INT32}))
+    .INPUT(polys_offset, TensorType({DT_INT32}))
+    .INPUT(polys_size, TensorType({DT_INT32}))
+    .INPUT(score, TensorType({DT_FLOAT}))
+    .INPUT(min_border, TensorType({DT_INT32}))
+    .INPUT(min_area_thr, TensorType({DT_INT32}))
+    .INPUT(score_thr, TensorType({DT_FLOAT}))
+    .INPUT(expands_cale, TensorType({DT_FLOAT}))
+    .OUTPUT(dilated_polys_data, TensorType({DT_INT32}))
+    .OUTPUT(dilated_polys_offset, TensorType({DT_INT32}))
+    .OUTPUT(dilated_polys_size, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(BatchDilatePolys)
+
+/**
+*@brief find contours acording to img.
+*@par Inputs:
+*@li img: A Tensor of type uint8. Img data value. \n
+
+*@par Outputs:
+*@li polys_data: A Tensor of type int32. Point data of every contours. \n
+*@li polys_offset:A Tensor of type int32. Offset of every contours . \n
+*@li polys_size:A Tensor of type int32. Size of every contours. \n
+*/
+REG_OP(OCRFindContours)
+    .INPUT(img, TensorType({DT_UINT8}))
+    .OUTPUT(polys_data, TensorType({DT_INT32}))
+    .OUTPUT(polys_offset, TensorType({DT_INT32}))
+    .OUTPUT(polys_size, TensorType({DT_INT32}))
+    .ATTR(value_mode, Int, 0)
+    .OP_END_FACTORY_REG(OCRFindContours)
+
+/**
+*@brief dequeue data acording to queue_id and queue_name.
+*@par Inputs:
+*@li queue_id:An Tensor of type uint32, queue id. \n
+
+*@par Outputs:
+*data: A Tensor of type RealNumberType, dequeue tensor. \n
+
+*@par Attributes:
+*@li output_type: A required type. dequeue data type.
+*@li output_shape: A required listint. dequeue data shape.
+*@li queue_name: An optional string. Queue name.   \n
+*/
+REG_OP(Dequeue)
+    .OPTIONAL_INPUT(queue_id, TensorType({DT_UINT32}))
+    .OUTPUT(data, TensorType::RealNumberType())
+    .REQUIRED_ATTR(output_type, Type)
+    .REQUIRED_ATTR(output_shape, ListInt)
+    .ATTR(queue_name, String, "")
+    .OP_END_FACTORY_REG(Dequeue);
+
+/**
+*@brief ocr detection post handle.
+*@par Inputs:
+*@li img: A Tensor of type uint8. original image data.
+*@li polys_data: A Tensor of type int32. point data of every poly.
+*@li polys_offset:A Tensor of type int32. Offset of every poly.
+*@li polys_size:A Tensor of type int32. Size of every poly. \n
+
+*@par Outputs:
+*@li imgs_data: A Tensor of type int32. imgs_data of original image.
+*@li imgs_offset: A Tensor of type int32. Offset of every imgs data.
+*@li imgs_size: A Tensor of type int32. Shape of every imgs data.
+*@li rect_points: A Tensor of type int32. Rect points of every imgs. \n
+
+*@par Attributes:
+*@li data_format: An optional string from: '"NHWC", "NCHW"'. Defaults to
+"NHWC". Data format.
+*/
+REG_OP(OCRDetectionPostHandle)
+    .INPUT(img, TensorType({DT_UINT8}))
+    .INPUT(polys_data, TensorType({DT_INT32}))
+    .INPUT(polys_offset, TensorType({DT_INT32}))
+    .INPUT(polys_size, TensorType({DT_INT32}))
+    .OUTPUT(imgs_data, TensorType({DT_UINT8}))
+    .OUTPUT(imgs_offset, TensorType({DT_INT32}))
+    .OUTPUT(imgs_size, TensorType({DT_INT32}))
+    .OUTPUT(rect_points, TensorType({DT_INT32}))
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(OCRDetectionPostHandle);
+
+/**
+*@brief resize and clip polys.
+*@par Inputs:
+*@li polys_data: A Tensor of type int32. point data of every poly.
+*@li polys_offset:A Tensor of type int32. Offset of every poly .
+*@li polys_size:A Tensor of type int32. Size of every poly.
+*@li h_scale:A Tensor of type float. Expand scale of height.
+*@li w_scale:A Tensor of type float. Expand scale of width.
+*@li img_h:A Tensor of type int32. Height of original image.
+*@li img_w:A Tensor of type int32. Width of original image. \n
+
+*@par Outputs:
+*@li clipped_polys_data: A Tensor of type int32. point data of every clipped poly. \n
+*@li clipped_polys_offset: A Tensor of type int32. Offset of every clipped poly . \n
+*@li clipped_polys_size: A Tensor of type int32. Size of every clipped poly. \n
+*@li clipped_polys_num: A Tensor of type int32. Number of clipped polys. \n
+*/
+REG_OP(ResizeAndClipPolys)
+    .INPUT(polys_data, TensorType({DT_INT32}))
+    .INPUT(polys_offset, TensorType({DT_INT32}))
+    .INPUT(polys_size, TensorType({DT_INT32}))
+    .INPUT(h_scale, TensorType({DT_FLOAT}))
+    .INPUT(w_scale, TensorType({DT_FLOAT}))
+    .INPUT(img_h, TensorType({DT_INT32}))
+    .INPUT(img_w, TensorType({DT_INT32}))
+    .OUTPUT(clipped_polys_data, TensorType({DT_INT32}))
+    .OUTPUT(clipped_polys_offset, TensorType({DT_INT32}))
+    .OUTPUT(clipped_polys_size, TensorType({DT_INT32}))
+    .OUTPUT(clipped_polys_num, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(ResizeAndClipPolys);
+
+
+} // namespace ge
+
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_OCR_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/outfeed_ops.h b/third_party/fwkacllib/inc/ops/outfeed_ops.h
index e0b783bc..53b9d701 100644
--- a/third_party/fwkacllib/inc/ops/outfeed_ops.h
+++ b/third_party/fwkacllib/inc/ops/outfeed_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h
index f746b3b3..6d4bcd5e 100644
--- a/third_party/fwkacllib/inc/ops/pad_ops.h
+++ b/third_party/fwkacllib/inc/ops/pad_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,8 +33,8 @@ namespace ge {
 
 *@li value: A 0D scalar. Specifies the value to fill the returned tensor.
 *    Must be one of the following types:
-*    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
-*    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
+*    float16, float32, double, int32, uint8, int16, int8, complex64, int64, bool, 
+*    qint8, quint8, qint32, qint16, quint16, uint16, complex128, uint32, uint64, .
 *
 *@par Outputs:
 * y: A tensor. Has the same type as "value".
@@ -46,8 +46,14 @@ namespace ge {
 */
 REG_OP(Fill)
     .INPUT(dims, TensorType::IndexNumberType())
-    .INPUT(value, TensorType::BasicType())
-    .OUTPUT(y, TensorType::BasicType())
+    .INPUT(value, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16,
+                              DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8,
+                              DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16, DT_UINT16,
+                              DT_COMPLEX128, DT_FLOAT16, DT_UINT32, DT_UINT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16,
+                              DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8,
+                              DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16, DT_UINT16,
+                              DT_COMPLEX128, DT_FLOAT16, DT_UINT32, DT_UINT64}))
     .OP_END_FACTORY_REG(Fill)
 
 /**
@@ -101,7 +107,7 @@ REG_OP(FillD)
 */
 REG_OP(BroadcastTo)
     .INPUT(x, TensorType::BasicType())
-    .INPUT(shape, TensorType({DT_INT32}))
+    .INPUT(shape, TensorType({DT_INT32,DT_INT64}))
     .OUTPUT(y, TensorType::BasicType())
     .OP_END_FACTORY_REG(BroadcastTo)
 
@@ -161,7 +167,7 @@ REG_OP(Pad)
 *@brief Pads a tensor . \n
 
 *@par Inputs:
-*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n
+*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
 
 *@par Attributes:
 *paddings: An optional "vector<vector<int>>". Defaults to "{}".
@@ -180,8 +186,8 @@ REG_OP(Pad)
 * Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead.
 */
 REG_OP(PadD)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
     .REQUIRED_ATTR(paddings, ListListInt)
     .OP_END_FACTORY_REG(PadD)
 
@@ -213,11 +219,11 @@ REG_OP(PadV2)
 *@brief Pads a tensor . \n
 
 *@par Inputs:
-*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n
-*constant_values: A Tensor. Must have the same type as input.
+*@li x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
+*@li constant_values: A Tensor. Must have the same type as input.
 
 *@par Attributes:
-*paddings: An optional "vector<vector<int>>". Defaults to "{}".
+*paddings: A required Attribute.
 *     For each dimension D of input, paddings[D, 0] indicates how many
 *     values to add before the contents of tensor in that dimension,
 *     and paddings[D, 1] indicates how many values to add after the
@@ -227,10 +233,7 @@ REG_OP(PadV2)
 *y: A Tensor of the same type as "x" . \n
 
 *@par Third-party framework compatibility:
-* Compatible with TensorFlow operator Pad.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead.
+* Compatible with TensorFlow operator PadV2.
 */
 REG_OP(PadV2D)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
@@ -271,22 +274,20 @@ REG_OP(PadV3)
     .ATTR(mode, String, "constant")
     .ATTR(paddings_contiguous, Bool, true)
     .OP_END_FACTORY_REG(PadV3)
-
-/**
-*@brief Pads a tensor.
+	
+ /**
+*@brief Cal the grad of Pads.
 
 *@par Inputs:
-*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32.
+*Two inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32,
+*     uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*     complex128, uint32, uint64.
+* @li paddings: A Tensor of type int32 or int64.
 
 *@par Attributes:
-* @li paddings: An required "vector<vector<int>>".
-*     For each dimension D of input, paddings[D, 0] indicates how many
-*     values to add before the contents of tensor in that dimension,
-*     and paddings[D, 1] indicates how many values to add after the
-*     contents of tensor in that dimension.
-* @li constant_values: An optional int value for pad.
-* @li mode: An optional string, Defaults to "constant", indicates paddings mode,
-*     support "constant", "reflect", "edge"
+* @li mode: An optional string, Defaults to "reflect", indicates paddings mode,
+*     support "reflect", "edge"
 * @li paddings_contiguous: An optional bool value, Defaults to true.
 *     If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...]
 *     If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...]
@@ -295,19 +296,53 @@ REG_OP(PadV3)
 *y: A Tensor of the same type as "x".
 
 *@par Third-party framework compatibility:
-* Compatible with ONNX operator Pad.
-
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead.
+* Compatible with ONNX operator PadGrad.
 */
-REG_OP(PadV3D)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
-    .REQUIRED_ATTR(paddings, ListListInt)
-    .ATTR(constant_values, Int, 0)
-    .ATTR(mode, String, "constant")
+
+REG_OP(PadV3Grad)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(paddings, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .ATTR(mode, String, "reflect")
     .ATTR(paddings_contiguous, Bool, true)
-    .OP_END_FACTORY_REG(PadV3D)
+    .OP_END_FACTORY_REG(PadV3Grad)
+
+  /**
+  *@brief Pads a tensor.
+
+  *@par Inputs:
+  *x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32.
+
+  *@par Attributes:
+  * @li paddings: An required "vector<vector<int>>".
+  *     For each dimension D of input, paddings[D, 0] indicates how many
+  *     values to add before the contents of tensor in that dimension,
+  *     and paddings[D, 1] indicates how many values to add after the
+  *     contents of tensor in that dimension.
+  * @li constant_values: An optional int value for pad.
+  * @li mode: An optional string, Defaults to "constant", indicates paddings mode,
+  *     support "constant", "reflect", "edge"
+  * @li paddings_contiguous: An optional bool value, Defaults to true.
+  *     If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...]
+  *     If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...]
+
+  *@par Outputs:
+  *y: A Tensor of the same type as "x".
+
+  *@par Third-party framework compatibility:
+  * Compatible with ONNX operator Pad.
+
+  * @par Restrictions:
+  * Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead.
+  */
+  REG_OP(PadV3D)
+      .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
+      .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
+      .REQUIRED_ATTR(paddings, ListListInt)
+      .ATTR(constant_values, Int, 0)
+      .ATTR(mode, String, "constant")
+      .ATTR(paddings_contiguous, Bool, true)
+      .OP_END_FACTORY_REG(PadV3D)
 
 /**
 *@brief Create a diagonal tensor
@@ -403,5 +438,76 @@ REG_OP(EmbeddingRankId)
     .ATTR(mode, String, "mod")
     .OP_END_FACTORY_REG(EmbeddingRankId)
 
+/**
+*@brief EmbeddingLocalIndex, Sort statistics index according to rank_id \n
+
+*@par Inputs:
+* @li addr_table: A 2D tensor which last dimension must be 3.
+* @li index: A tensor with data type int32, int64, uint32, uint64.
+
+*@par Attributes:
+* @li row_memory: The size of Embedding vector in a row, the default is 320.
+* @li mode: String type, currently there are two options: 'mod' and 'order'
+
+*@par Outputs:
+* @li local_idx:Index on each server.
+* @li nums:The number of local_idx found on each server.
+* @li recover_idx:The sorted local_idx element is at the position corresponding
+* to the original input index.
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Diag.
+*/
+REG_OP(EmbeddingLocalIndex)
+    .INPUT(addr_table, TensorType({DT_UINT64}))
+    .INPUT(index, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64}))
+    .OUTPUT(local_idx, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64}))
+    .OUTPUT(nums, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64}))
+    .OUTPUT(recover_idx, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64}))
+    .ATTR(row_memory, Int, 320)
+    .ATTR(mode, String, "mod")
+    .OP_END_FACTORY_REG(EmbeddingLocalIndex)
+
+/**
+* @brief Fill the value to a tensor has the specified shape.
+
+* @par Inputs:
+* One inputs, including:
+* @li dims: An Tensor, specify the shape that the value to fill.
+
+* @par Attributes:
+* @li value: An optional float value. Defaults to 0.0.
+
+* @par Outputs:
+* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator ConstantOfShape.
+*/
+REG_OP(FillV2)
+    .INPUT(dims, TensorType({DT_INT16, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .ATTR(value, Float, 0)
+    .OP_END_FACTORY_REG(FillV2)
+
+/**
+* @brief Fill the value to a tensor has the specified shape.
+
+* @par Attributes:
+* @li value: An optional float value. Defaults to 0.0.
+
+* @li dims: An required listInt to specify the shape that the value to fill.
+
+* @par Outputs:
+* y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator ConstantOfShape.
+*/
+REG_OP(FillV2D)
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64}))
+    .ATTR(value, Float, 0)
+    .REQUIRED_ATTR(dims, ListInt)
+    .OP_END_FACTORY_REG(FillV2D)
 } // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/ops/parsing_ops.h
index 5c7adfd8..e578997c 100644
--- a/third_party/fwkacllib/inc/ops/parsing_ops.h
+++ b/third_party/fwkacllib/inc/ops/parsing_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -51,6 +51,247 @@ REG_OP(StringToNumber)
     .ATTR(out_type, Type, DT_FLOAT)
     .OP_END_FACTORY_REG(StringToNumber)
 
+/**
+*@brief Convert serialized tensorflow.TensorProto prototype to Tensor.
+*@brief Parse an Example prototype. 
+*@par Inputs:
+*@li serialized: A Tensor of type string.
+*@li dense_defaults:  DYNAMIC INPUT Tensor type as string, float, int64. \n
+
+*@par Attributes:
+*@li num_sparse: type int num of inputs sparse_indices , sparse_values, sparse_shapes
+*@li sparse_keys: ListString
+*@li sparse_types: types of sparse_values
+*@li dense_keys: ListString
+*@li Tdense: output of dense_defaults type
+*@li dense_shapes: output of dense_defaults shape  \n
+
+*@par Outputs:
+*@li sparse_indices: A Tensor of type string. 
+*@li sparse_values:  Has the same type as sparse_types.
+*@li sparse_shapes: A Tensor of type int64
+*@li dense_values:  Has the same type as dense_defaults.
+
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ParseSingleExample)
+    .INPUT(serialized, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(dense_defaults, TensorType({DT_STRING,DT_FLOAT,DT_INT64}))
+    .DYNAMIC_OUTPUT(sparse_indices, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(sparse_values, TensorType({DT_STRING,DT_FLOAT,DT_INT64}))
+    .DYNAMIC_OUTPUT(sparse_shapes, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(dense_values, TensorType({DT_STRING,DT_FLOAT,DT_INT64}))
+    .ATTR(num_sparse, Int, 0)
+    .ATTR(sparse_keys, ListString, {})
+    .ATTR(dense_keys, ListString, {})
+    .ATTR(sparse_types, ListType, {})
+    .ATTR(Tdense, ListType, {})
+    .ATTR(dense_shapes, ListListInt, {})
+    .OP_END_FACTORY_REG(ParseSingleExample)
+
+/**
+*@brief Decodes raw file into  tensor . \n
+*@par Inputs:
+*bytes: A Tensor of type string.
+
+*@par Attributes:
+*@li little_endian: bool ture
+*@li out_type: output type
+
+*@par Outputs:
+*Output: A Tensor
+*/
+REG_OP(DecodeRaw)
+    .INPUT(bytes, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType({DT_BOOL,DT_FLOAT16,DT_DOUBLE,DT_FLOAT,
+                                    DT_INT64,DT_INT32,DT_INT8,DT_UINT8,DT_INT16,
+                                    DT_UINT16,DT_COMPLEX64,DT_COMPLEX128}))
+    .ATTR(out_type, Type, DT_FLOAT)
+    .ATTR(little_endian, Bool, true)
+    .OP_END_FACTORY_REG(DecodeRaw)
+
+/**
+*@brief Convert serialized tensorflow.TensorProto prototype to Tensor. \n
+
+*@par Inputs:
+*serialized: A Tensor of string type. Scalar string containing serialized
+*TensorProto prototype. \n
+
+*@par Attributes:
+*out_type: The type of the serialized tensor. The provided type must match the
+*type of the serialized tensor and no implicit conversion will take place. \n
+
+*@par Outputs:
+*output: A Tensor of type out_type. \n
+
+*@attention Constraints:
+*The implementation for StringToNumber on Ascend uses AICPU,
+*with badperformance. \n
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow ParseTensor operator.
+*/
+REG_OP(ParseTensor)
+    .INPUT(serialized, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType(DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16,
+                          DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_UINT32,
+                          DT_UINT64, DT_BOOL, DT_DOUBLE, DT_STRING,
+                          DT_COMPLEX64, DT_COMPLEX128}))
+    .ATTR(out_type, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(ParseTensor)
+
+/**
+*@brief Converts each string in the input Tensor to the specified numeric
+*type . \n
+
+*@par Inputs:
+*Inputs include:
+*@li records: Each string is a record/row in the csv and all records should have the
+*same format. \n
+*@li record_defaults: One tensor per column of the input record, with either a
+*scalar default value for that column or an empty vector if the column is
+*required. \n
+
+*@par Attributes:
+*@li OUT_TYPE: The numeric type to interpret each string in string_tensor as . \n
+*@li field_delim: char delimiter to separate fields in a record. \n
+*@li use_quote_delim: If false, treats double quotation marks as regular characters
+*inside of the string fields (ignoring RFC 4180, Section 2, Bullet 5). \n
+*@li na_value: Additional string to recognize as NA/NaN. \n
+*@li select_cols: Optional sorted list of column indices to select. If specified,
+only this subset of columns will be parsed and returned.
+
+*@par Outputs:
+*output: A Tensor. Has the same type as x . \n
+
+*@attention Constraints:
+*The implementation for StringToNumber on Ascend uses AICPU, with bad
+*performance. \n
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow StringToNumber operator.
+*/
+REG_OP(DecodeCSV)
+    .INPUT(records, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(record_defaults, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32,
+                                        DT_INT64, DT_STRING}))
+    .DYNAMIC_OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32,
+                                        DT_INT64, DT_STRING}))
+    .ATTR(OUT_TYPE, ListType, {})
+    .ATTR(field_delim, String, ",")
+    .ATTR(use_quote_delim, Bool, true)
+    .ATTR(na_value, String, ",")
+    .ATTR(select_cols, ListInt, {})
+    .OP_END_FACTORY_REG(DecodeCSV)
+
+/**
+*@brief Convert serialized tensorflow.TensorProto prototype to Tensor.
+*@brief Parse an Example prototype.
+*@par Inputs:
+*@li serialized: A Tensor of type string. \n
+*@li name:A Tensor of type string. \n
+*@li sparse_keys: Dynamic input tensor of string. \n
+*@li dense_keys: Dynamic input tensor of string \n
+*@li dense_defaults:  Dynamic input tensor type as string, float, int64. \n
+
+*@par Attributes:
+*@li Nsparse: Number of sparse_keys, sparse_indices and sparse_shapes \n
+*@li Ndense: Number of dense_keys \n
+*@li sparse_types: types of sparse_values \n
+*@li Tdense: Type of dense_defaults dense_defaults and dense_values \n
+*@li dense_shapes: output of dense_defaults shape  \n
+
+*@par Outputs:
+*@li sparse_indices: A Tensor of type string. \n
+*@li sparse_values:  Has the same type as sparse_types. \n
+*@li sparse_shapes: A Tensor of type int64 \n
+*@li dense_values:  Has the same type as dense_defaults. \n
+*@par Third-party framework compatibility \n
+*@li compatible with tensorflow StringToNumber operator. \n
+*/
+REG_OP(ParseExample)
+    .INPUT(serialized, TensorType({DT_STRING}))
+    .INPUT(name, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(sparse_keys, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(dense_keys, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(dense_defaults, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .DYNAMIC_OUTPUT(sparse_indices, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .DYNAMIC_OUTPUT(sparse_shapes, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .ATTR(Nsparse, Int, 0)
+    .ATTR(Ndense, Int, 0)
+    .ATTR(sparse_types, ListType, {})
+    .ATTR(Tdense, ListType, {})
+    .ATTR(dense_shapes, ListListInt, {})
+    .OP_END_FACTORY_REG(ParseExample)
+
+/**
+*@brief Transforms a scalar brain.SequenceExample proto (as strings) into typed
+*tensors.
+*@par Inputs:
+*@li serialized: A Tensor of type string. \n
+*@li feature_list_dense_missing_assumed_empty:A Tensor of type string. \n
+*@li context_sparse_keys: Dynamic input tensor of string. \n
+*@li context_dense_keys: Dynamic input tensor of string \n
+*@li feature_list_sparse_keys:  Dynamic input tensor of string \n
+*@li feature_list_dense_keys:  Dynamic input tensor of string \n
+*@li context_dense_defaults:  Dynamic input tensor of string, float, int64 \n
+*@li debug_name: A Tensor of type string. \n
+
+*@par Attributes:
+*@li Ncontext_sparse: Number of context_sparse_keys, context_sparse_indices and context_sparse_shapes \n
+*@li Ncontext_dense: Number of context_dense_keys \n
+*@li Nfeature_list_sparse: Number of feature_list_sparse_keys \n
+*@li Nfeature_list_dense: Number of feature_list_dense_keys \n
+*@li context_sparse_types: Types of context_sparse_values \n
+*@li Tcontext_dense: Number of dense_keys \n
+*@li feature_list_dense_types: Types of feature_list_dense_values \n
+*@li context_dense_shapes: Shape of context_dense \n
+*@li feature_list_sparse_types: Type of feature_list_sparse_values \n
+*@li feature_list_dense_shapes: Shape of feature_list_dense \n
+
+*@par Outputs:
+*@li context_sparse_indices: Dynamic output tensor of type int64. \n
+*@li context_sparse_values:  Dynamic output tensor of type string, float, int64. \n
+*@li context_sparse_shapes: Dynamic output tensor of type int64 \n
+*@li context_dense_values:  Dynamic output tensor of type string, float, int64. \n
+*@li feature_list_sparse_indices: Dynamic output tensor of type int64. \n
+*@li feature_list_sparse_values:  Dynamic output tensor of type string, float, int64. \n
+*@li feature_list_sparse_shapes: Dynamic output tensor of type int64 \n
+*@li feature_list_dense_values:  Dynamic output tensor of type string, float, int64. \n
+*@par Third-party framework compatibility \n
+*@li compatible with tensorflow StringToNumber operator. \n
+*/
+REG_OP(ParseSingleSequenceExample)
+    .INPUT(serialized, TensorType({DT_STRING}))
+    .INPUT(feature_list_dense_missing_assumed_empty, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(context_sparse_keys, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(context_dense_keys, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(feature_list_sparse_keys, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(feature_list_dense_keys, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(context_dense_defaults, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .INPUT(debug_name, TensorType({DT_STRING}))
+    .DYNAMIC_OUTPUT(context_sparse_indices, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(context_sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .DYNAMIC_OUTPUT(context_sparse_shapes, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(context_dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .DYNAMIC_OUTPUT(feature_list_sparse_indices, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(feature_list_sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .DYNAMIC_OUTPUT(feature_list_sparse_shapes, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(feature_list_dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .ATTR(Ncontext_sparse, Int, 0)
+    .ATTR(Ncontext_dense, Int, 0)
+    .ATTR(Nfeature_list_sparse, Int, 0)
+    .ATTR(Nfeature_list_dense, Int, 0)
+    .ATTR(context_sparse_types, ListType, {})
+    .ATTR(Tcontext_dense, ListType, {})
+    .ATTR(feature_list_dense_types, ListType, {})
+    .ATTR(context_dense_shapes, ListListInt, {})
+    .ATTR(feature_list_sparse_types, ListType, {})
+    .ATTR(feature_list_dense_shapes, ListListInt, {})
+    .OP_END_FACTORY_REG(ParseSingleSequenceExample)
+
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h
index b53cfeb6..ae701295 100644
--- a/third_party/fwkacllib/inc/ops/quantize_ops.h
+++ b/third_party/fwkacllib/inc/ops/quantize_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -61,49 +61,81 @@ REG_OP(Dequantize)
     .OP_END_FACTORY_REG(Dequantize)
 
 /**
-*@brief Quantizes the input . \n
+* @brief Quantizes the input . \n
+* @par Inputs:
+* @li x: shape and dtype of input_x. \n
+* @li scales: shape and dtype of input_scales. \n
+* @li zero_points: shape and dtype of input_zero_points \n
+* @par Attributes:
+* @li dtype: required, type.
+* @li axis: the processed dim. \n
+* @par Outputs:
+* y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n
+*/
+REG_OP(Quantize)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scales, TensorType({DT_FLOAT}))
+    .INPUT(zero_points, TensorType({DT_INT8,DT_UINT8,DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT8,DT_UINT8,DT_INT32}))
+    .REQUIRED_ATTR(dtype, String)
+    .ATTR(axis, Int, 1)
+    .OP_END_FACTORY_REG(Quantize)
 
-*@par Inputs:
-*x: An NC1HWC0 tensor of type float16 or float32, specifying the input . \n
+/**
+* @brief Quantizes the input . \n
 
-*@par Attributes:
-*@li scale: A required float32, specifying the scaling ratio.
-*@li offset: A required float16, specifying the offset.
-*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False".
-*@li round_mode: An optional string, specifying the float16 to int8 cast type.
-* The value range is [Round, Floor, Ceiling, Truncate]. Defaults to "Round" . \n
+* @par Inputs:
+* x: An tensor of type float16 or float32, specifying the input . \n
 
-*@par Outputs:
-*y: The quantized output tensor of type int8 and with format NC1HWC0 . \n
+* @par Attributes:
+* @li scale: A required float32, specifying the scaling ratio.
+* @li offset: A required float16, specifying the offset.
+* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False".
+* Defaults to "False".
+* @li round_mode: An optional string, specifying the float16 to int8 cast type.
+* The value range is [Round, Floor, Ceil, Trunc]. Defaults to "Round" .
+* @li dst_type: A optional int32, specifying the output data type. Defaults to "DT_INT8" . \n
+
+* @par Outputs:
+* y: The quantized output tensor of type int8 or int4. \n
+
+* @attention Constraints:
+* round_mode value range is [Round, Floor, Ceil, Trunc].
+* @li Round: round to nearest, tie to even(c language rint).
+* @li Floor: round to minus infinity(c language floor).
+* @li Ceil: round to positive infinity(c language ceil).
+* @li Trunc: round to zero(c language trunc). \n
 
-*@par Third-party framework compatibility
+* @par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(AscendQuant)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32}))
-    .OUTPUT(y, TensorType({DT_INT8}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_INT4}))
     .REQUIRED_ATTR(scale, Float)
     .REQUIRED_ATTR(offset, Float)
     .ATTR(sqrt_mode, Bool, false)
     .ATTR(round_mode, String, "Round")
+    .ATTR(dst_type, Int, DT_INT8)
     .OP_END_FACTORY_REG(AscendQuant)
 
 /**
-*@brief Dequantizes the input . \n
+* @brief Dequantizes the input . \n
 
-*@par Inputs:
-*@li x: An NC1HWC0 tensor of type int32, specifying the input.
-*@li deq_scale: An NC1HWC0 tensor of type float16 or uint64, specifying the scaling ratio . \n
+ *@par Inputs:
+* @li x: An tensor of type int32, specifying the input.
+* @li deq_scale: An tensor of type uint64, specifying the scaling ratio . \n
 
-*@par Attributes:
-*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False".
-*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False".
-*@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n
+* @par Attributes:
+* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False".
+* Defaults to "False".
+* @li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False".
+* @li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n
 
-*@par Outputs:
-*y: The dequantized output tensor of type float16 or float32 and with format NC1HWC0 . \n
+* @par Outputs:
+* y: The dequantized output tensor of type float16 or float32. \n
 
-*@par Third-party framework compatibility
+* @par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(AscendDequant)
@@ -116,21 +148,22 @@ REG_OP(AscendDequant)
     .OP_END_FACTORY_REG(AscendDequant)
 
 /**
-*@brief Anti quantizes the input . \n
+* @brief Anti quantizes the input . \n
 
-*@par Inputs:
-*x: An NC1HWC0 tensor of type int8, specifying the input . \n
+* @par Inputs:
+* x: An tensor of type int8, specifying the input . \n
 
-*@par Attributes:
-*@li scale: A required float32 scale.
-*@li offset: A required float32 offset.
-*@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT".
-*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False" . \n
+* @par Attributes:
+* @li scale: A required float32 scale.
+* @li offset: A required float32 offset.
+* @li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT".
+* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False".
+* Defaults to "False" . \n
 
-*@par Outputs:
-*y: The dequantized output tensor of type float16 or float32 and with format NC1HWC0 . \n
+* @par Outputs:
+* y: The dequantized output tensor of type float16 or float32. \n
 
-*@par Third-party framework compatibility
+* @par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(AscendAntiQuant)
@@ -143,20 +176,20 @@ REG_OP(AscendAntiQuant)
     .OP_END_FACTORY_REG(AscendAntiQuant)
 
 /**
-*@brief Dequantizes the input of int16 . \n
+* @brief Dequantizes the input of int16 . \n
 
-*@par Inputs:
-*@li x0: An NC1HWC0 tensor of type int32, specifying the input.
-*@li deq_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio.
-*@li x1: An NC1HWC0 tensor of type int16, specifying the input . \n
+* @par Inputs:
+* @li x0: An tensor of type int32, specifying the input.
+* @li deq_scale: An tensor of type uint64, specifying the scaling ratio.
+* @li x1: An tensor of type int16, specifying the input . \n
 
-*@par Attributes:
-*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n
+* @par Attributes:
+* relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n
 
-*@par Outputs:
-*y: The dequantized output tensor of type int16 and with format NC1HWC0 . \n
+* @par Outputs:
+* y: The dequantized output tensor of type int16. \n
 
-*@par Third-party framework compatibility
+* @par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(AscendDequantS16)
@@ -168,19 +201,19 @@ REG_OP(AscendDequantS16)
   .OP_END_FACTORY_REG(AscendDequantS16)
 
 /**
-*@brief Requantizes the input . \n
+* @brief Requantizes the input . \n
 
-*@par Inputs:
-*@li x: An NC1HWC0 tensor of type int32, specifying the input.
-*@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio . \n
+* @par Inputs:
+* @li x: An tensor of type int32, specifying the input.
+* @li req_scale: An tensor of type uint64, specifying the scaling ratio . \n
 
-*@par Attributes:
-*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n
+* @par Attributes:
+* relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n
 
-*@par Outputs:
-*y: The dequantized output tensor of type int8 and with format NC1HWC0 . \n
+* @par Outputs:
+* y: The dequantized output tensor of type int8. \n
 
-*@par Third-party framework compatibility
+* @par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(AscendRequant)
@@ -191,34 +224,57 @@ REG_OP(AscendRequant)
   .OP_END_FACTORY_REG(AscendRequant)
 
 /**
-*@brief Requantizes the input of int16 . \n
+* @brief Requantizes the input of int16 . \n
 
-*@par Inputs:
-*@li x: An NC1HWC0 tensor of type int16, specifying the input.
-*@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio.
-*@li x1: An NC1HWC0 tensor of type int16 . \n
+* @par Inputs:
+* @li x0: An tensor of type int16, specifying the input.
+* @li req_scale: An tensor of type uint64, specifying the scaling ratio.
+* @li x1: An tensor of type int16 . \n
 
-*@par Attributes:
-*@li dual_output: A optional bool, specifying whether to perform dual ouput, either "True" or "False". Defaults to "False".
-*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n
+* @par Attributes:
+* @li dual_output: A optional bool, specifying whether to perform dual ouput, either "True" or "False".
+* Defaults to "False".
+* @li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n
 
-*@par Outputs:
-*@li y: The dequantized output tensor of type int8 and with format NC1HWC0.
-*@li y1: The dequantized output tensor of type int16 and with format NC1HWC0 . \n
+* @par Outputs:
+* @li y0: The dequantized output tensor of type int8.
+* @li y1: The dequantized output tensor of type int16. \n
 
-*@par Third-party framework compatibility
+* @par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(AscendRequantS16)
-  .INPUT(x, TensorType({DT_INT16}))
+  .INPUT(x0, TensorType({DT_INT16}))
   .INPUT(req_scale, TensorType({DT_UINT64}))
   .OPTIONAL_INPUT(x1, TensorType({DT_INT16}))
-  .OUTPUT(y, TensorType({DT_INT8}))
+  .OUTPUT(y0, TensorType({DT_INT8}))
   .OUTPUT(y1, TensorType({DT_INT16}))
   .ATTR(dual_output, Bool, false)
   .ATTR(relu_flag, Bool, false)
   .OP_END_FACTORY_REG(AscendRequantS16)
 
+/**
+* @brief Quantizes the input of int8 . \n
+
+* @par Inputs:
+* @li x: A tensor of type int8, specifying the input.
+* @li offset: A tensor of type int8.
+
+* @par Attributes:
+* @li dst_type: A optional int from: DT_INT8, DT_INT4. Defaults to DT_INT8.
+
+* @par Outputs:
+* @li y: output tensor of type int4 or int8.
+
+* @par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe, Onnx, Tensorflow or Pythorch.
+*/
+REG_OP(AscendWeightQuant)
+  .INPUT(x, TensorType({DT_INT8}))
+  .INPUT(offset, TensorType({DT_INT8}))
+  .OUTPUT(y, TensorType({DT_INT8, DT_INT4}))
+  .ATTR(dst_type, Int, DT_INT8)
+  .OP_END_FACTORY_REG(AscendWeightQuant)
 } // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/ragged_array_ops.h b/third_party/fwkacllib/inc/ops/ragged_array_ops.h
index 9b31aa8e..5af2dd74 100644
--- a/third_party/fwkacllib/inc/ops/ragged_array_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_array_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -37,13 +37,18 @@ namespace ge {
 *deprecated name.
 *@li indices: Indices in the outermost dimension of `params` of the values that should be
 *gathered.
+
+*@par Attributes:
+*@li PARAMS_RAGGED_RANK:The ragged rank of the params_nested_splits.
+*@li Tsplits:A type of output_nested_splits.
 *@li OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain
 *this number of `row_splits` tensors. This value should equal
 *`indices.shape.ndims + params.ragged_rank - 1` . \n
 
 *@par Outputs:
-*y:A Returns The `nested_row_splits` tensors that define the row-partitioning for the
-*returned RaggedTensor.The `flat_values` for the returned RaggedTensor . \n
+*@li output_nested_splits:A Returns The `nested_row_splits` tensors that define the row-partitioning for the
+*returned RaggedTensor.The `flat_values` for the returned RaggedTensor . 
+*@li output_dense_values:The `flat_values` for the returned RaggedTensor. \n
 
 *@par Third-party framework compatibility
 * Compatible with tensorflow RaggedGather operator.
diff --git a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
index 13488a25..ceaa64e4 100644
--- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -61,7 +61,6 @@ REG_OP(RaggedTensorToSparse)
 *@brief Create a dense tensor from a ragged tensor, possibly altering its shape . \n
 
 *@par Inputs:
-*Six inputs, including:
 *@li shape:A `Tensor`. Must be one of the following types: `int64`, `int32`.
 *@li values:A 1D tensor representing the values of the ragged tensor.
 *@li default_value:A `Tensor`. Must have the same type as `values`.
@@ -78,7 +77,7 @@ The types of the row partition tensors. At present, these can be:
 is preceeded by "FIRST_DIM_SIZE" . \n
 
 *@par Outputs:
-*@li result: A `Tensor`. Has the same type as `values`.
+*result: A `Tensor`. Has the same type as `values`.
 */
 REG_OP(RaggedTensorToTensor)
     .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
diff --git a/third_party/fwkacllib/inc/ops/ragged_math_ops.h b/third_party/fwkacllib/inc/ops/ragged_math_ops.h
index 8af4f867..4376437f 100644
--- a/third_party/fwkacllib/inc/ops/ragged_math_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_math_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,7 +35,11 @@ namespace ge {
 *@li deltas: The deltas of each range . \n
 
 *@par Outputs:
-*y:A Returns The `row_splits` for the returned `RaggedTensor`.The `flat_values` for the returned `RaggedTensor` . \n
+*@li rt_dense_values:The `flat_values` for the returned `RaggedTensor`.
+*@li rt_nested_splits:The `row_splits` for the returned `RaggedTensor`. \n
+
+*@par Attributes:
+*Tsplits:A type of rt_nested_splits.
 
 *@attention Constraints:
 *The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors.
diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h
index b46da435..57631d14 100644
--- a/third_party/fwkacllib/inc/ops/random_ops.h
+++ b/third_party/fwkacllib/inc/ops/random_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -60,6 +60,65 @@ REG_OP(Multinomial)
     .OP_END_FACTORY_REG(Multinomial)
 
 /**
+*@brief Creates a multinomial distribution. \n
+
+*@par Inputs:
+*Inputs include:
+* @li q: A Tensor. Must be one of the following types: float, double.
+1-D Tensor with shape [num_classes].
+* @li j: A Tensor. Must be one of the following types: int64.
+1-D Tensor with shape [num_classes].
+* @li num_samples: A Tensor of type int32. 0-D. Number of independent samples to draw for each row slice . \n
+
+*@par Attributes:
+*@li output_dtype: An optional type from: int32, int64. Defaults to int64.
+*@li seed: An optional int. Defaults to 0.
+*@li seed2: An optional int. Defaults to 0. \n
+
+*@par Outputs:
+*y: A Tensor of type int32 or int64. \n
+
+*@attention Constraints:
+*The implementation for MultinomialAliasDraw on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with torch _multinomial_alias_draw operator.
+*/
+REG_OP(MultinomialAliasDraw)
+    .INPUT(q, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(j, TensorType({DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT64}))
+    .REQUIRED_ATTR(num_samples, Int)
+    .ATTR(seed, Int, 0)
+    .OP_END_FACTORY_REG(MultinomialAliasDraw)
+
+/**
+*@brief Prepares for MultinomialAliasDraw to create a multinomial distribution. \n
+
+*@par Inputs:
+*Inputs include:
+* @li probs: A Tensor. Must be one of the following types: float, double.
+1-D Tensor with shape [num_classes]. \n
+
+*@par Outputs:
+*j: A Tensor. Must be one of the following types: int64.
+1-D Tensor with shape [num_classes].
+*q: A Tensor. Must be one of the following types: float, double.
+1-D Tensor with shape [num_classes]. \n
+
+*@attention Constraints:
+*The implementation for MultinomialAliasSetup on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with torch _multinomial_alias_setup operator.
+*/
+REG_OP(MultinomialAliasSetup)
+    .INPUT(probs, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(j, TensorType({DT_INT64}))
+    .OUTPUT(q, TensorType({DT_FLOAT, DT_DOUBLE})) 
+    .OP_END_FACTORY_REG(MultinomialAliasSetup)
+
+/**
 *@brief Outputs random values from a normal distribution . \n
 
 *@par Inputs:
@@ -148,6 +207,53 @@ REG_OP(RandomGamma)
     .OP_END_FACTORY_REG(RandomGamma)
 
 /**
+*@brief Returns the random permutation of integers from 0 to n-1. \n
+
+*@par Attributes:
+*@li n: An required int.
+*@li dtype: An optional str. Defaults to int64 .
+*@li layout: An optional int. Defaults to 0 . \n
+
+*@par Outputs:
+*out: A required Tensor. Must be one of the following types:
+         float16, float32, float32, int8, uint8, int16, int32, int64. \n
+
+*@attention Constraints:
+*The implementation for Randperm on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with Pytorch Randperm operator.
+*/
+REG_OP(Randperm)
+    .OUTPUT(out, TensorType({DT_INT64, DT_INT32, DT_INT16,
+        DT_UINT8, DT_INT8, DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .REQUIRED_ATTR(n, Int)
+    .ATTR(layout, Int, 0)
+    .ATTR(dtype, Type, DT_INT64)
+    .OP_END_FACTORY_REG(Randperm)
+
+/**
+*@brief Fills a tensor with elements drawn from the poisson distribution. \n
+
+*@par Inputs:
+*x:  A Tensor. Must be one of the following types: float16, float. \n
+
+*@par Attributes:
+*@li seed: An optional int. Defaults to 0. \n
+
+*@par Outputs:
+*y: A Tensor list with same type as "x" . \n
+
+*@par Third-party framework compatibility
+*@ Compatible with the Pytorch operator Poisson.
+*/
+REG_OP(Poisson)
+    .INPUT(x, TensorType({ DT_FLOAT16,DT_FLOAT }))
+    .OUTPUT(y, TensorType({ DT_FLOAT16,DT_FLOAT }))
+    .ATTR(seed, Int, 0)
+    .OP_END_FACTORY_REG(Poisson)   
+ 
+/**
 *@brief Outputs random values from the Poisson distribution(s) described by rate . \n
 
 *@par Inputs:
@@ -157,11 +263,12 @@ REG_OP(RandomGamma)
 
 *@par Attributes:
 *@li dtype: An optional type from: half, float32, float64, int32, int64. Defaults to int64.
-*@li seed: An optional int. Defaults to 0.
-*@li seed2: An optional int. Defaults to 0 . \n
+*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, 
+the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
 
 *@par Outputs:
-*y: A Tensor of type dtype . \n
+*y: A Tensor of type dtype float16, float, double, int32, int64. \n
 
 *@attention Constraints:
 *The implementation for RandomPoisson on Ascend uses AICPU, with bad performance.
@@ -188,11 +295,13 @@ REG_OP(RandomPoisson)
 *x: A Tensor. The tensor to be shuffled . \n
 
 *@par Attributes:
-*@li seed: An optional int. Defaults to 0.
-*@li seed2: An optional int. Defaults to 0 . \n
+*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, 
+the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type as x . \n
+*y: A Tensor. Has the same type as x . A Tensor of type float16, float, 
+*double, int32, int64, int16, uint16, int8, uint8, int32,int64. \n
 
 *@attention Constraints:
 *The implementation for RandomShuffle on Ascend uses AICPU, with bad performance.
@@ -220,11 +329,12 @@ REG_OP(RandomShuffle)
 
 *@par Attributes:
 *@li dtype: A type from: half, float16, float32, float64. The type of the output.
-*@li seed: An optional int. Defaults to 0.
-*@li seed2: An optional int. Defaults to 0 . \n
+*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, 
+the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
 
 *@par Outputs:
-*y: A Tensor of type dtype . \n
+*y: A Tensor of type float32, float16, double. \n
 
 *@attention Constraints:
 *The implementation for RandomStandardNormal on Ascend uses AICPU, with bad performance.
@@ -241,6 +351,28 @@ REG_OP(RandomStandardNormal)
     .OP_END_FACTORY_REG(RandomStandardNormal)
 
 /**
+*@brief Output random value from  separate normal distribution. \n
+
+*@par Inputs:
+*Inputs include:
+*mean: The mean is a tensor with the mean of each output element’s normal distribution . 
+*std: The std is a tensor with the standard deviation of each output element’s normal distribution. \n
+*@par Outputs:
+*y: A Tensor of type dtype . \n
+
+*@attention Constraints:
+*The implementation for Normal on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with Pytorch Normal operator.
+*/
+REG_OP(Normal)
+    .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(std, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(Normal)
+
+/**
 *@brief Outputs random integers from a uniform distribution . \n
 
 *@par Inputs:
@@ -250,8 +382,9 @@ REG_OP(RandomStandardNormal)
 * @li max: A Tensor. Must have the same type as minval. 0-D . \n
 
 *@par Attributes:
-*@li seed: An optional int. Defaults to 0.
-*@li seed2: An optional int. Defaults to 0 . \n
+*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, 
+the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as min . \n
@@ -280,8 +413,9 @@ REG_OP(RandomUniformInt)
 
 *@par Attributes:
 *@li dtype: A type from: half, float16, float32, float64. The type of the output.
-*@li seed: An optional int. Defaults to 0.
-*@li seed2: An optional int. Defaults to 0 . \n
+*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, 
+the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
 
 *@par Outputs:
 *y: A Tensor of type dtype . \n
@@ -308,11 +442,14 @@ REG_OP(RandomUniform)
 *shape: A Tensor. Must be one of the following types: int32, int64 . \n
 
 *@par Attributes:
-*@li seed: An optional int. Defaults to 0.
-*@li seed2: An optional int. Defaults to 0 . \n
+*@li seed: An optional int. Defaults to 0.If either `seed` or `seed2` 
+are set to be non-zero, the random number generator is seeded by the given 
+seed. Otherwise, it is seeded by a random seed.
+*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
 
 *@par Outputs:
-*size: A Tensor of types: float16, float32, double . \n
+*y: A Tensor of types: float16, float32, double . A tensor of the specified shape
+filled with random truncated normal values. \n
 
 *@attention Constraints:
 *The implementation for TruncatedNormal on Ascend uses AICPU, with bad performance.
@@ -356,6 +493,67 @@ REG_OP(DropOutGenMask)
     .ATTR(seed2, Int, 0)
     .OP_END_FACTORY_REG(DropOutGenMask)
 
+
+/**
+*@brief Generate random uint8 mask for dropout v3 . \n
+
+*@par Inputs:
+include:
+*@li shape:The shape of the output tensor.
+*@li prob:0-D. Prob of 1 . \n
+
+*@par Attributes:
+*@li seed:If either seed or seed2 are set to be non-zero, the random number
+*generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2:A second seed to avoid seed collision . \n
+
+*@par Outputs:
+*y:Output (1-D) random number using uint8 data format . \n
+
+*@attention Constraints:
+*The output is aligned with 16
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+
+*@see DropOutGenMaskV3()
+*/
+REG_OP(DropOutGenMaskV3)
+    .INPUT(shape, TensorType({ DT_INT32, DT_INT64 }))
+    .INPUT(prob, TensorType({ DT_FLOAT16, DT_FLOAT }))
+    .OUTPUT(y, TensorType({ DT_UINT8 }))
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(DropOutGenMaskV3)
+
+    
+/**
+*@brief Generate stateless random bit mask for dropout . \n
+
+*@par Inputs:
+include:
+*@li shape:The shape of the output tensor.
+*@li prob:0-D. Number of bit 1 . \n
+*@li seed:If either seed or seed2 are set to be non-zero, the random number
+*generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2:A second seed to avoid seed collision . \n
+
+*@par Outputs:
+*y:Output (1-D) random number using uint data format . \n
+
+*@attention Constraints:
+*The output is aligned with 128 bits
+
+*@see StatelessDropOutGenMask()
+*/
+REG_OP(StatelessDropOutGenMask)
+    .INPUT(shape, TensorType({ DT_INT32, DT_INT64 }))
+    .INPUT(prob, TensorType({ DT_FLOAT16, DT_FLOAT }))
+    .INPUT(seed, TensorType({ DT_INT32, DT_INT64 }))
+    .INPUT(seed1, TensorType({ DT_INT32, DT_INT64 }))
+    .OUTPUT(y, TensorType({ DT_UINT8 }))
+    .OP_END_FACTORY_REG(StatelessDropOutGenMask)
+
 /**
 *@brief Generates values in an interval . \n
 
@@ -472,15 +670,15 @@ REG_OP(RandomChoiceWithMask)
 
 *@par Inputs:
 *Inputs including:
-* @li x: A required Tensor. Must be one of the following types:
-         float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
+* x: A required Tensor. Must be one of the following types:
+     float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
 
 *@par Attributes:
-*@li group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n
+* group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n
 
 *@par Outputs:
-*y: A required Tensor. Has same type and shape as "x". Must be one of the following types:
-    float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
+* y: A required Tensor. Has same type and shape as "x". Must be one of the following types:
+     float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
 
 *@attention Constraints:
 *@li "group" must be greater than 0 and must evenly divide the channel dimension size.
@@ -495,6 +693,175 @@ REG_OP(ShuffleChannel)
                            DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64}))
     .ATTR(group, Int, 1)
     .OP_END_FACTORY_REG(ShuffleChannel)
-}   // namespace ge
 
+/**
+ * @briefGenerate a tensor of samples from a multinomial 
+ * distribution according to the probabilities of each of 
+ * the possible outcomes.
+ * 
+ * @par inputs
+ * one input including:
+ * @li x:Input tensor with shape [batch_size, class_size], 
+ * where class_size is the number of all possible outcomes.
+ * Each value along the axis zero represents the unnormalized 
+ * log-probability of each corresponding outcome in a batch.
+ * 
+ * @par output
+ * one output including:
+ * @li y:Output tensor with shape [batch_size, sample_size], 
+ * where sample_size is the number of times to sample. 
+ * Each value along the axis zero represents the outcome of 
+ * the corresponding sample in a batch.
+ * 
+ * @par Restrictions:
+ * Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+ */
+REG_OP(MultinomialFuss)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .ATTR(dtype, Int, 6)
+    .ATTR(sample_size, Int, 1)
+    .ATTR(seed, Float, 0)
+    .OP_END_FACTORY_REG(MultinomialFuss)
+
+/**
+* @brief During training, randomly zeroes some of the elements of the input tensor
+* with probability
+*
+* @par Inputs:
+* @li x: A ND Tensor. Must be one of the following data types: Float, Float16
+* @li seed: A ND Tensor. Must be one of the following data types: Float
+*
+* @par Attributes:
+* @li p: probability of an element to be zeroed
+*
+* @par Outputs:
+* @li y: A tensor with the same shape and type as "x".
+* @li mask: A tensor with the same shape and type as "x".
+* @li new_seed: A tensor with the same shape and type as "seed".
+*/
+
+REG_OP(DropoutV2)
+    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT }))
+    .INPUT(seed, TensorType({ DT_FLOAT }))
+    .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT }))
+    .OUTPUT(mask, TensorType({ DT_FLOAT }))
+    .OUTPUT(seed, TensorType({ DT_FLOAT }))
+    .REQUIRED_ATTR(p, Float)
+    .OP_END_FACTORY_REG(DropoutV2)
+
+/**
+* @brief The Bernoulli distribution with probability . \n
+
+* @par Inputs:
+* @li x: A ND Tensor. Must be one of the following data types: 
+         int8, uint8, int16, int32, int64, bool, float32, float64 . 
+* @li p: A ND Tensor. The probability of an element to be zeroed. 
+        Must be one of the following data types: float32, float64. \n
+
+* @par Attributes:
+* seed: An Integer, the seed of the random generator. Default value -1 
+    to use current timestamp, otherwise it should be a positive integer.
+
+* @par Outputs:
+* y: A tensor with the same shape and type as "x".
+*/
+
+REG_OP(Bernoulli)
+    .INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(p, TensorType({ DT_FLOAT, DT_DOUBLE }))
+    .OUTPUT(y, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(seed, Int, -1)
+    .OP_END_FACTORY_REG(Bernoulli)
+
+/**
+ * @brief: Fill the input tensor with values drawn from the uniform distribution U(from, to). \n
+ 
+ * @par Inputs:
+ * x: A Tensor. Must be one of the following types: float16, float, double. \n
+
+ * @par Attributes:
+ * @li from: The lower bound of the uniform. Defaults: 0.0
+ * @li to: The upper bound of the uniform. Defaults: 1.0  \n
+
+ * @par Outputs:
+ * y: A Tensor has the same type as x. \n
+ */
+REG_OP(Uniform)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(from, Float, 0.0)
+    .ATTR(to, Float, 1.0)
+    .OP_END_FACTORY_REG(Uniform)
+
+/**
+*@brief Outputs integers consisting of 0 and 1, used for lstm etc. \n
+*@par Inputs
+* @li time_step: A tensor with data type int64. 0-D.
+* @li batch_size: A tensor with data type int64. 0-D.
+
+*@par Outputs:
+*y: A Tensor. Has the  type float16 or float, 2-D, [time_step,batch_size]. \n
+
+*@attention Constraints:
+* Compatible with the Caffe operator ContinuationIndicator.
+*/
+REG_OP(ContinuationIndicator)
+    .REQUIRED_ATTR(time_step, Int)
+    .REQUIRED_ATTR(batch_size, Int)
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(ContinuationIndicator)
+
+/**
+*@brief Outputs random values from the Exponential distribution(s) described by rate . \n
+
+*@par Inputs:
+*Inputs include:
+* @li x: A Tensor. Must be one of the following types: half, float32, float64. \n
+
+*@par Attributes:
+*@li lambda: An optional float. Defaults to 1.
+*@li seed: An optional int. Defaults to 0.The random number generator is seeded by the given seed.
+ Otherwise, it is seeded by a random seed. \n
+
+*@par Outputs:
+*y: A Tensor of type dtype float16, float, double. \n
+
+*@attention Constraints:
+*The implementation for Exponential on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow Exponential operator.
+*/
+REG_OP(Exponential)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(lambda, Float, 1)
+    .ATTR(seed, Int, 0)
+    .OP_END_FACTORY_REG(Exponential)
+
+/**
+*@brief Fills a tensor with elements drawn from the geometric distribution. \n
+
+*@par Inputs:
+*x:  A Tensor. Must be one of the following types: float16, float. \n
+
+*@par Attributes:
+*@li p: The probability of experimental success in Bernoulli's experiment.
+*@li seed: An optional int. Defaults to 0. \n
+
+*@par Outputs:
+*y: A Tensor list with same type as "x" . \n
+
+*@par Third-party framework compatibility
+*@ Compatible with the Pytorch operator Geometric.
+*/
+REG_OP(Geometric)
+    .INPUT(x, TensorType({ DT_FLOAT16,DT_FLOAT }))
+    .OUTPUT(y, TensorType({ DT_FLOAT16,DT_FLOAT }))
+    .REQUIRED_ATTR(p, Float)
+    .ATTR(seed, Int, 0)
+    .OP_END_FACTORY_REG(Geometric)
+
+}   // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/randomdsa_ops.h b/third_party/fwkacllib/inc/ops/randomdsa_ops.h
new file mode 100644
index 00000000..4574bb5d
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/randomdsa_ops.h
@@ -0,0 +1,139 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file randomdsa_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_RANDOMDSA_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_RANDOMDSA_OPS_H_
+
+#include <vector>
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+/**
+* @brief Generate DSA random bit mask for dropout. \n
+
+* @par Inputs:
+include:
+* @li count:The shape of the input tensor.
+* @li seed:If seed is set to be non-zero, the random number
+* generator is seeded by the given seed. Otherwise, it is seeded by a random seed
+* @li dropout:0-D. Number of bit 1 . \n
+
+* @par Attributes:
+* @li random_algorithm:The default value is "Philox". \n
+
+* @par Outputs:
+* y:Output (1-D) random number using uint data format . \n
+
+* @see DSAGenBitMask()
+*/
+REG_OP(DSAGenBitMask)
+    .INPUT(count, TensorType({DT_INT64}))
+    .INPUT(seed, TensorType({DT_UINT64}))
+    .INPUT(dropout, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16}))
+    .OUTPUT(out, TensorType({DT_UINT8}))
+    .ATTR(random_algorithm, String, "Philox")
+    .OP_END_FACTORY_REG(DSAGenBitMask)
+
+/**
+* @brief Generate DSA truncatenormal data in random. \n
+
+* @par Inputs:
+include:
+* @li count: The shape of the input tensor.
+* @li seed: If seed is set to be non-zero, the random number
+* generator is seeded by the given seed. Otherwise, it is seeded by a random seed
+* @li mean: A Tensor. Must be one of the following types: float16, float32, double
+* @li stdev: A Tensor. Must be one of the following types: float16, float32, double. \n
+
+* @par Attributes:
+* @li random_algorithm:The default value is "Philox". \n
+
+* @par Outputs:
+* y:Output (1-D) random number using float and bf data format . \n
+
+* @see DSARandomTruncatedNormal()
+*/
+REG_OP(DSARandomTruncatedNormal)
+    .INPUT(count, TensorType({DT_INT64}))
+    .INPUT(seed, TensorType({DT_UINT64}))
+    .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16}))
+    .INPUT(stdev, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16}))
+    .OUTPUT(out, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16}))
+    .ATTR(random_algorithm, String, "Philox")
+    .OP_END_FACTORY_REG(DSARandomTruncatedNormal)
+
+/**
+* @brief Generate DSA normal data in random. \n
+
+* @par Inputs:
+include:
+* @li count: The shape of the input tensor.
+* @li seed: If seed is set to be non-zero, the random number
+* generator is seeded by the given seed. Otherwise, it is seeded by a random seed
+* @li mean: A Tensor. Must be one of the following types: float16, float32, double
+* @li stdev: A Tensor. Must be one of the following types: float16, float32, double. \n
+
+* @par Attributes:
+* @li random_algorithm:The default value is "Philox". \n
+
+* @par Outputs:
+* y:Output (1-D) random number using float and bf data format . \n
+
+* @see DSARandomNormal()
+*/
+REG_OP(DSARandomNormal)
+    .INPUT(count, TensorType({DT_INT64}))
+    .INPUT(seed, TensorType({DT_UINT64}))
+    .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16}))
+    .INPUT(stdev, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16}))
+    .OUTPUT(out, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16}))
+    .ATTR(random_algorithm, String, "Philox")
+    .OP_END_FACTORY_REG(DSARandomNormal)
+
+/**
+* @brief Generate DSA uniform data in random. \n
+
+* @par Inputs:
+include:
+* @li count: The shape of the input tensor.
+* @li seed: If seed is set to be non-zero, the random number
+* generator is seeded by the given seed. Otherwise, it is seeded by a random seed
+* @li low: A Tensor. Must be one of the following types: int, float, bf
+* @li high: A Tensor. Must be one of the following types: int, float, bf. \n
+
+* @par Attributes:
+* @li random_algorithm:The default value is "Philox". \n
+
+* @par Outputs:
+* y:Output (1-D) random number using float int and bf data format . \n
+
+* @see DSARandomUniform()
+*/
+REG_OP(DSARandomUniform)
+    .INPUT(count, TensorType({DT_INT64}))
+    .INPUT(seed, TensorType({DT_UINT64}))
+    .INPUT(low, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
+    .INPUT(high, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
+    .OUTPUT(out, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
+    .ATTR(random_algorithm, String, "Philox")
+    .OP_END_FACTORY_REG(DSARandomUniform)
+}
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_RANDOMDSA_OPS_H
diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h
index 6f44093e..079982db 100644
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,10 +25,10 @@
 
 namespace ge {
 /**
-*@brief Performs reduced batch normalization . \n
+*@brief Performs reduced batch normalization .
 
 *@par Inputs:
-*x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n
+*x: A tensor of type float16 or float32. \n
 
 *@par Outputs:
 *@li sum: A 1D Tensor of type float32 for SUM reduced "x".
@@ -37,7 +37,7 @@ namespace ge {
 *@attention Constraints:
 * This operator is a BatchNorm fusion operator for updating the moving
 * averages for training.
-* This operator is used in conjunction with BNTrainingUpdate.
+* This operator is used in conjunction with BNTrainingReduce.
 */
 REG_OP(BNTrainingReduce)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -46,21 +46,42 @@ REG_OP(BNTrainingReduce)
     .OP_END_FACTORY_REG(BNTrainingReduce)
 
 /**
-*@brief Performs the backpropagation of BatchNorm . \n
+*@brief Performs reduced batch normalization . \n
+
+*@par Inputs:
+*x: A tensor of type float16 or float32. \n
+
+*@par Outputs:
+*@li sum: A tensor of type float32 for SUM reduced "x".
+*@li square_sum: A tensor of type float32 for SUMSQ reduced "x" . \n
+
+*@attention Constraints:
+* This operator is a BatchNorm fusion operator for updating the moving
+* averages for training.
+* This operator is used in conjunction with BN3DTrainingReduce.
+*/
+REG_OP(BN3DTrainingReduce)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(sum, TensorType({DT_FLOAT}))
+    .OUTPUT(square_sum, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BN3DTrainingReduce)
+
+/**
+*@brief Performs the backpropagation of BatchNorm .
 
 *@par Inputs:
 * Seven inputs, including:
-*@li grads: A 5D Tensor of type float16 or float32, with format NC1HWC0, for
+*@li grads: A tensor of type float16 or float32, for
 * the gradient.
-*@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0.
-*@li diff_scale: A 5D Tensor of type float32, with format NC1HWC0,
+*@li x: A tensor of type float16 or float32.
+*@li diff_scale: A tensor of type float32,
 * for the mean of "x".
-*@li diff_offset: A 5D Tensor of type float32, with format NC1HWC0,
+*@li diff_offset: A tensor of type float32,
 * for the variance of "x".
-*@li scale: A 5D Tensor of type float32, with format NC1HWC0.
-*@li batch_mean: A 5D Tensor of type float32, with format NC1HWC0,
+*@li scale: A tensor of type float32.
+*@li batch_mean: A tensor of type float32,
 * for the mean of "x".
-*@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0,
+*@li batch_variance: A tensor of type float32,
 * for the variance of "x" . \n
 
 *@par Attributes:
@@ -68,7 +89,7 @@ REG_OP(BNTrainingReduce)
 * added to the variance of "x" . \n
 
 *@par Outputs:
-*y: A Tensor of type float16 or float32, with format NC1HWC0, for the offset
+*y: A Tensor of type float16 or float32, for the offset
 * of "x" . \n
 
 *@attention Constraints:
@@ -89,11 +110,54 @@ REG_OP(BNTrainingReduceGrad)
     .OP_END_FACTORY_REG(BNTrainingReduceGrad)
 
 /**
-*@brief Performs reduced batch normalization . \n
+*@brief Performs the backpropagation of BatchNorm . \n
 
 *@par Inputs:
-* Seven inputs, including: (NC1HWC0 supported)
-*@li x: A 5D Tensor of type float16 or float32.
+* Seven inputs, including:
+*@li grads: A tensor of type float16 or float32, for
+* the gradient.
+*@li x: A tensor of type float16 or float32.
+*@li diff_scale: A tensor of type float32,
+* for the mean of "x".
+*@li diff_offset: A tensor of type float32,
+* for the variance of "x".
+*@li scale: A tensor of type float32.
+*@li batch_mean: A tensor of type float32,
+* for the mean of "x".
+*@li batch_variance: A tensor of type float32,
+* for the variance of "x" . \n
+
+*@par Attributes:
+*epsilon: An optional float32. Defaults to "0.0001". A small float number
+* added to the variance of "x" . \n
+
+*@par Outputs:
+*y: A Tensor of type float16 or float32, for the offset
+* of "x" . \n
+
+*@attention Constraints:
+* The preceding layer of this operator must be BN3DTrainingReduceGrad . \n
+
+*@see BN3DTrainingReduceGrad
+*/
+REG_OP(BN3DTrainingReduceGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(diff_scale, TensorType({DT_FLOAT}))
+    .INPUT(diff_offset, TensorType({DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(batch_mean, TensorType({DT_FLOAT}))
+    .INPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .OP_END_FACTORY_REG(BN3DTrainingReduceGrad)
+
+/**
+*@brief Performs reduced batch normalization .
+
+*@par Inputs:
+* Seven inputs, including:
+*@li x: A tensor of type float16 or float32.
 *@li sum: A 1D Tensor of type float32 for the output of operator
 * BNTrainingReduce.
 *@li square_sum: A 1D Tensor of type float32 for the output of operator
@@ -110,19 +174,19 @@ REG_OP(BNTrainingReduceGrad)
 * and variance . \n
 
 *@par Outputs:
-* Five outputs, including: (NC1HWC0 supported)
-*@li y: A 5D Tensor of type float16 or float32, for normalized "x".
-*@li mean: A 5D Tensor of type float32, for the updated mean.
-*@li variance: A 5D Tensor of type float32, for the updated variance.
+* Five outputs, including:
+*@li y: A tensor of type float16 or float32, for normalized "x".
+*@li mean: A tensor of type float32, for the updated mean.
+*@li variance: A tensor of type float32, for the updated variance.
 *@li batch_mean: A 1D Tensor of type float32, for the mean of "x".
 *@li batch_variance: A 1D Tensor of type float32, for the variance of "x" . \n
 
 *@attention Constraints:
 *@li This operator is a BatchNorm fusion operator for updating the moving
-averages for training.
-*This operator is used in conjunction with BNTrainingReduce.
-*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square
-* root instruction.
+* averages for training. This operator is used in conjunction with
+* BNTrainingUpdate.
+*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to the
+* square root instruction.
 */
 REG_OP(BNTrainingUpdate)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -142,26 +206,79 @@ REG_OP(BNTrainingUpdate)
     .OP_END_FACTORY_REG(BNTrainingUpdate)
 
 /**
-*@brief Performs batch normalization for inference . \n
+*@brief Performs reduced batch normalization . \n
 
 *@par Inputs:
-* Five inputs, including: (NC1HWC0 supported)
-*@li x: A 5D Tensor of type float16 or float32.
-*@li scale: A 5D Tensor of type float32, for the scaling factor.
-*@li offset: A 5D Tensor of type float32, for the scaling offset.
-*@li mean: A 5D Tensor of type float32, for the mean.
-*@li variance: A 5D Tensor of type float32, for the variance . \n
+* Seven inputs, including:
+*@li x: A tensor of type float16 or float32.
+*@li sum: A tensor of type float32 for the output of operator
+* BN3DTrainingUpdate.
+*@li square_sum: A tensor of type float32 for the output of operator
+* BN3DTrainingUpdate.
+*@li scale: A tensor of type float32, for the scaling factor.
+*@li offset: A tensor of type float32, for the scaling offset.
+*@li mean: A tensor of type float32, for the updated mean.
+*@li variance: A tensor of type float32, for the updated variance . \n
+
+*@par Attributes:
+*@li epsilon: A required float32, specifying the small value added to variance
+* to avoid dividing by zero.
+*@li factor: A required float32, specifying the weight for updating the mean
+* and variance . \n
+
+*@par Outputs:
+* Five outputs, including:
+*@li y: A tensor of type float16 or float32, for normalized "x".
+*@li mean: A tensor of type float32, for the updated mean.
+*@li variance: A tensor of type float32, for the updated variance.
+*@li batch_mean: A tensor of type float32, for the mean of "x".
+*@li batch_variance: A tensor of type float32, for the variance of "x" . \n
+
+*@attention Constraints:
+*@li This operator is a BatchNorm fusion operator for updating the moving
+averages for training.
+*This operator is used in conjunction with BN3DTrainingUpdate.
+*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to the square
+* root instruction.
+*/
+REG_OP(BN3DTrainingUpdate)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(sum, TensorType({DT_FLOAT}))
+    .INPUT(square_sum, TensorType({DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(offset, TensorType({DT_FLOAT}))
+    .INPUT(mean, TensorType({DT_FLOAT}))
+    .INPUT(variance, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(factor, Float)
+    .REQUIRED_ATTR(epsilon, Float)
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(mean, TensorType({DT_FLOAT}))
+    .OUTPUT(variance, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BN3DTrainingUpdate)
+
+/**
+*@brief Performs batch normalization for inference .
+
+*@par Inputs:
+* Five inputs, including:
+*@li x: A tensor of type float16 or float32.
+*@li scale: A tensor of type float32, for the scaling factor.
+*@li offset: A tensor of type float32, for the scaling offset.
+*@li mean: A tensor of type float32, for the mean.
+*@li variance: A tensor of type float32, for the variance . \n
 
 *@par Attributes:
 *epsilon: An optional float32, specifying the small value added to variance to
 * avoid dividing by zero. Defaults to "0.0001" . \n
 
 *@par Outputs:
-*y: A 5D Tensor of type float16 or float32 for the normalized "x" . \n
+*y: A tensor of type float16 or float32 for the normalized "x" . \n
 
 *@attention Constraints:
-*For Ascend 310, the result accuracy fails to reach 1‰ due to the square root
-* instruction.
+*For Ascend 310, the result accuracy fails to reach 1/1000 due to the
+* square root instruction.
 */
 REG_OP(BNInfer)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -174,29 +291,32 @@ REG_OP(BNInfer)
     .OP_END_FACTORY_REG(BNInfer)
 
 /**
-*@brief Performs reduced batch normalization. For some scene which don't contain
-assignmoving average . \n
+*@brief Performs reduced batch normalization. For some scenes which don't
+* contain assign moving average .
 
 *@par Inputs:
-*Five inputs, including: (NC1HWC0 supported)
-*@li x: A 5D Tensor of type float16 or float32.
-*@li sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce.
-*@li square_sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce.
-*@li scale: A 5D Tensor of type float32, for the scaling factor.
-*@li offset: A 5D Tensor of type float32, for the scaling offset . \n
+*Five inputs, including:
+*@li x: A tensor of type float16 or float32.
+*@li sum: A tensor of type float32 for the output of operator BNTrainingReduce.
+*@li square_sum: A tensor of type float32 for the output of operator
+* BNTrainingReduce.
+*@li scale: A tensor of type float32, for the scaling factor.
+*@li offset: A tensor of type float32, for the scaling offset . \n
 
 *@par Attributes:
-*epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n
+*epsilon: A required float32, specifying the small value added to
+* variance to avoid dividing by zero . \n
 
 *@par Outputs:
-*Three outputs, including: (NC1HWC0 supported)
-*@li y: A 5D Tensor of type float16 or float32, for normalized "x".
-*@li batch_mean: A 5D Tensor of type float32, for the mean of "x".
-*@li batch_variance: A 5D Tensor of type float32, for the variance of "x" . \n
+*Three outputs, including:
+*@li y: A tensor of type float16 or float32, for normalized "x".
+*@li batch_mean: A tensor of type float32, for the mean of "x".
+*@li batch_variance: A tensor of type float32, for the variance of "x" . \n
 
 *@attention Constraints:
 *This operator is used in conjunction with BNTrainingReduce.
-For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
+*For Ascend 310, the result accuracy fails to reach 1/1000 due to
+* the square root instruction.
 */
 REG_OP(BNTrainingUpdateV2)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -211,30 +331,35 @@ REG_OP(BNTrainingUpdateV2)
     .OP_END_FACTORY_REG(BNTrainingUpdateV2)
 
 /**
-*@brief Performs reduced batch normalization v3. For some scene which don't contain
-assign moving average . \n
+*@brief Performs reduced batch normalization v3. For some scenes which
+* don't contain assign moving average .
 
 *@par Inputs:
-* Five inputs, including: (NC1HWC0 supported)
-*@li x: A 5D Tensor of type float16 or float32.
-*@li sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce.
-*@li square_sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce.
-*@li scale: A 5D Tensor of type float32, for the scaling factor.
-*@li offset: A 5D Tensor of type float32, for the scaling offset . \n
+* Five inputs, including:
+*@li x: A tensor of type float16 or float32.
+*@li sum: A tensor of type float32 for the output of operator BNTrainingReduce.
+*@li square_sum: A tensor of type float32 for the output of operator
+* BNTrainingReduce.
+*@li scale: A tensor of type float32, for the scaling factor.
+*@li offset: A tensor of type float32, for the scaling offset . \n
 
 *@par Attributes:
-*epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n
+*epsilon: A required float32, specifying the small value added to variance
+* to avoid dividing by zero . \n
 
 *@par Outputs:
-*@li y: A 5D Tensor of type float16 or float32, for normalized "x".
-*@li batch_mean: A 5D Tensor of type float32, for the mean of "x".
-*@li batch_variance: A 5D Tensor of type float32, for the variance of "x".
-*@li reserve_1: A 5D Tensor of type float32, for the mean of batch "x". Has the same type as batch_mean.
-*@li reserve_2: A 5D Tensor of type float32, for the variance of batch "x". Has the same type as batch_mean . \n
+*@li y: A tensor of type float16 or float32, for normalized "x".
+*@li batch_mean: A tensor of type float32, for the mean of "x".
+*@li batch_variance: A tensor of type float32, for the variance of "x".
+*@li reserve_1: A tensor of type float32, for the mean of batch "x".
+* Has the same type as batch_mean.
+*@li reserve_2: A tensor of type float32, for the variance of batch "x".
+* Has the same type as batch_mean . \n
 
 *@attention Constraints:
 *@li This operator is used in conjunction with BNTrainingReduce.
-*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
+*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to
+* the square root instruction.
 */
 REG_OP(BNTrainingUpdateV3)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -251,16 +376,16 @@ REG_OP(BNTrainingUpdateV3)
     .OP_END_FACTORY_REG(BNTrainingUpdateV3)
 
 /**
-*@brief Performs the backpropagation of BatchNorm . \n
+*@brief Performs the backpropagation of BatchNorm .
 
 *@par Inputs:
 * Four inputs, including:
-*@li grads: A 5D Tensor of type float16 or float32, with format NC1HWC0,
+*@li grads: A tensor of type float16 or float32,
 * for the gradient.
-*@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0.
-*@li batch_mean: A 5D Tensor of type float32, with format NC1HWC0,
+*@li x: A tensor of type float16 or float32.
+*@li batch_mean: A tensor of type float32,
 * for the mean of "x".
-*@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0,
+*@li batch_variance: A tensor of type float32,
 * for the variance of "x" . \n
 
 *@par Attributes:
@@ -268,9 +393,9 @@ REG_OP(BNTrainingUpdateV3)
 * added to the variance of "x" . \n
 
 *@par Outputs:
-*@li diff_scale: A Tensor of type float32, with format NC1HWC0,
+*@li diff_scale: A Tensor of type float32,
 * for the offset of "scale".
-*@li diff_offset: A Tensor of type float32, with format NC1HWC0,
+*@li diff_offset: A Tensor of type float32,
 * for the offset of "offset" . \n
 
 */
@@ -285,19 +410,54 @@ REG_OP(BNTrainingUpdateGrad)
     .OP_END_FACTORY_REG(BNTrainingUpdateGrad)
 
 /**
-*@brief Performs the backpropagation of BatchNorm for inference . \n
+*@brief Performs the backpropagation of BatchNorm . \n
+
+*@par Inputs:
+* Four inputs, including:
+*@li grads: A tensor of type float16 or float32,
+* for the gradient.
+*@li x: A tensor of type float16 or float32.
+*@li batch_mean: A tensor of type float32,
+* for the mean of "x".
+*@li batch_variance: A tensor of type float32,
+* for the variance of "x" . \n
+
+*@par Attributes:
+*epsilon: An optional float32. Defaults to "0.0001". A small float number
+* added to the variance of "x" . \n
+
+*@par Outputs:
+*@li diff_scale: A Tensor of type float32,
+* for the offset of "scale".
+*@li diff_offset: A Tensor of type float32,
+* for the offset of "offset" . \n
+
+*/
+REG_OP(BN3DTrainingUpdateGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(batch_mean, TensorType({DT_FLOAT}))
+    .INPUT(batch_variance, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .OUTPUT(diff_scale, TensorType({DT_FLOAT}))
+    .OUTPUT(diff_offset, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BN3DTrainingUpdateGrad)
+
+/**
+*@brief Performs the backpropagation of BatchNorm for inference .
 
 *@par Inputs:
 * Three inputs, including:
-*@li grads: A 5D Tensor of type loat16 or float32, with format NC1HWC0, for the gradient.
-*@li scale: A 5D Tensor of type float32, with format NC1HWC0.
-*@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0. It is an output of BatchNorm . \n
+*@li grads: A tensor of type float16 or float32, for the gradient.
+*@li scale: A tensor of type float32.
+*@li batch_variance: A tensor of type float32. It is an output of BatchNorm . \n
 
 *@par Attributes:
-*epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x" . \n
+*epsilon: An optional float32. Defaults to "0.0001". A small float number
+* added to the variance of "x" . \n
 
 *@par Outputs:
-*x_backprop: A Tensor of type float16 or float32, with format NC1HWC0, for the offset of "x" . \n
+*x_backprop: A Tensor of type float16 or float32, for the offset of "x" . \n
 
 *@attention Constraints:
 * The preceding layer of this operator must be operator BatchNorm.
@@ -365,6 +525,34 @@ REG_OP(ReduceSumD)
     .OP_END_FACTORY_REG(ReduceSumD)
 
 /**
+*@brief Calculate the total mean based on the mean of each device . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: A Tensor. Must be one of the following types: float16, float32 .
+*@li count: A Tensor. Must be one of the following types: float16, float32 .
+*@li count_sum: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Attributes:
+*@li axes: A required 1D list or tuple of int32 or int64. Specifies the dimensions to reduce.
+*@li keepdims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n
+
+*@par Outputs:
+*y: The reduced tensor. Has the same type and format as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Sum.
+*/
+REG_OP(ReduceMeanWithCount)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(count, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(axes, ListInt)
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceMeanWithCount)
+
+/**
 *@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n
 
 *@par Inputs:
@@ -425,7 +613,7 @@ REG_OP(ReduceAll)
 *@li axis: A mutable Tensor. The dimensions to reduce . \n
 
 *@par Attributes:
-*@li keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n
+*keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type and format as input "x" . \n
@@ -484,6 +672,9 @@ REG_OP(ReduceProdD)
 *keep_dims: A bool or NoneType.
 * - If true, retains reduced dimensions with length 1.
 * - If false, the rank of the tensor is reduced by 1 for each entry in axis.
+*noop_with_empty_axes: A bool.
+* - If true, when axes = [], not reduce.
+* - If false, when axes = [], reduce all.
 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n
 
@@ -495,6 +686,7 @@ REG_OP(ReduceMean)
     .INPUT(axes, TensorType::IndexNumberType())
     .OUTPUT(y, TensorType::NumberType())
     .ATTR(keep_dims, Bool, false)
+    .ATTR(noop_with_empty_axes, Bool, true)
     .OP_END_FACTORY_REG(ReduceMean)
 
 /**
@@ -511,6 +703,9 @@ REG_OP(ReduceMean)
 *@li keep_dims: A bool or NoneType.
 * - If true, retains reduced dimensions with length 1.
 * - If false, the rank of the tensor is reduced by 1 for each entry in axis.
+*@li noop_with_empty_axes: A bool default False.
+* - If true, same as tf.
+* - If false, when x's shape is [], reduce all dims, for onnx.
 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n
 
@@ -525,6 +720,7 @@ REG_OP(ReduceMeanD)
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(axes, ListInt)
     .ATTR(keep_dims, Bool, false)
+    .ATTR(noop_with_empty_axes, Bool, false)
     .OP_END_FACTORY_REG(ReduceMeanD)
 
 /**
@@ -635,8 +831,8 @@ REG_OP(ReduceMin)
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead.
 */
 REG_OP(ReduceMinD)
-    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32}))
     .REQUIRED_ATTR(axes, ListInt)
     .ATTR(keep_dims, Bool, false)
     .OP_END_FACTORY_REG(ReduceMinD)
@@ -747,14 +943,14 @@ REG_OP(Reduction)
 *@brief Computes the euclidean norm of elements across dimensions of a tensor . \n
 
 *@par Inputs:
-*@li input_tensor: A Tensor. Must be one of the following types: float16, float32, int32.
+*@li x: A Tensor. Must be one of the following types: float16, float32, int32.
 *@li axes: A Tensor of type int8 or int32. Specifies the dimensions to reduce. Defaults to "None" . \n
 
 *@par Attributes:
 *keep_dims: An optional bool. If "True", reduced dimensions will be retained. Defaults to "False" . \n
 
 *@par Outputs:
-*output_tensor: A Tensor. Must be one of the following types: float16, float32, int32 . \n
+*y: A Tensor. Must be one of the following types: float16, float32, int32 . \n
 
 *@attention Constraints:
 * If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n
@@ -804,7 +1000,7 @@ REG_OP(EuclideanNormD)
 *@brief Performs instance normalization for inference . \n
 
 *@par Inputs:
-* Five inputs, including: (NC1HWC0 supported)
+* Five inputs, including:
 *@li x: A Tensor of type float16 or float32.
 *@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma.
 *@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta.
@@ -816,12 +1012,12 @@ REG_OP(EuclideanNormD)
 Defaults to "0.00001" . \n
 
 *@par Outputs:
-*y: A Tensor of type float16 or float32 for the normalized "x".
-*batch_mean: A Tensor of type float32 for the result mean.
-*batch_ variance: A Tensor of type float32 for the result variance . \n
+*@li y: A Tensor of type float16 or float32 for the normalized "x".
+*@li batch_mean: A Tensor of type float32 for the result mean.
+*@li batch_ variance: A Tensor of type float32 for the result variance . \n
 
 *@attention Constraints:
-*For Ascend 310, the result accuracy fails to reach 1� due to the square root instruction.
+*For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction.
 */
 REG_OP(INInferV2)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -836,10 +1032,10 @@ REG_OP(INInferV2)
     .OP_END_FACTORY_REG(INInferV2)
 
 /**
-*@brief Performs reduced instance normalization . \n
+*@brief Performs reduce instance normalization. \n
 
 *@par Inputs:
-*x: A Tensor of type float16 or float32, with format NC1HWC0 . \n
+*x: A Tensor of type float16 or float32. \n
 
 *@par Outputs:
 *@li sum: A Tensor of type float32 for SUM reduced "x".
@@ -857,32 +1053,31 @@ REG_OP(INTrainingReduceV2)
 
 
 /**
-*@brief Performs update instance normalization . \n
+*@brief Performs update instance normalization. \n
 
 *@par Inputs:
-* Seven inputs, including: (NC1HWC0supported)
+* Seven inputs, including:
 *@li x: A Tensor of type float16 or float32.
-*@li sum: A T [N, C1, 1, 1, C0] ensor of type float32 for the output of operator INTrainingReduceV2.
-*@li square_sum: A  [N, C1, 1, 1, C0] Tensor of type float32 for the output of operator INTrainingReduceV2.
-*@li gamma: A  [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma.
-*@li beta: A  [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta.
-*@li mean: A  [N, C1, 1, 1, C0] Tensor of type float32, for the updated mean.
-*@li variance: A  [N, C1, 1, 1, C0] Tensor of type float32, for the updated variance . \n
+*@li sum: A Tensor of type float32 for the output of operator INTrainingReduceV2.
+*@li square_sum: A Tensor of type float32 for the output of operator INTrainingReduceV2.
+*@li gamma: A Tensor of type float32, for the scaling gamma.
+*@li beta: A Tensor of type float32, for the scaling beta.
+*@li mean: A Tensor of type float32, for the updated mean.
+*@li variance: A Tensor of type float32, for the updated variance. \n
 
 *@par Attributes:
 *@li momentum: A required float32, specifying the momentum to update mean and var.
-*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n
+*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero. \n
 
 *@par Outputs:
-* Three outputs, including: (NC1HWC0 supported)
+* Three outputs
 *@li y: A Tensor of type float16 or float32, for normalized "x".
 *@li batch_mean: A Tensor of type float32, for the updated mean.
-*@li batch_variance: A Tensor of type float32, for the updated variance . \n
+*@li batch_variance: A Tensor of type float32, for the updated variance. \n
 
 *@attention Constraints:
-*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training.
+* This operator is a InstanceNorm fusion operator for updating the moving averages for training.
 * This operator is used in conjunction with INTrainingReduceV2.
-*@li For Ascend 310, the result accuracy fails to reach 1� due to the square root instruction.
 */
 REG_OP(INTrainingUpdateV2)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -901,6 +1096,80 @@ REG_OP(INTrainingUpdateV2)
 
 
 /**
+*@brief Performs the backpropagation of InstanceNorm. \n
+
+*@par Inputs:
+* Seven inputs, including:
+*@li dy: A Tensor of type float16 or float32.
+*@li x: A Tensor of type float16 or float32.
+*@li variance: A Tensor of type float32, for the variance of "x".
+*@li mean: A Tensor of type float32, for the mean of "x".
+*@li res_gamma: A Tensor of type float32.
+*@li res_beta: A Tensor of type float32.
+*@li gamma: A Tensor of type float32. \n
+
+*@par Outputs:
+*pd_x: A Tensor of type float16 or float32, for the offset of "x". \n
+
+*@attention Constraints:
+* The preceding layer of this operator must be INTrainingUpdateGrad. \n
+*/
+REG_OP(INTrainingReduceGrad)
+    .INPUT(dy, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(variance, TensorType({DT_FLOAT}))
+    .INPUT(mean, TensorType({DT_FLOAT}))
+    .INPUT(res_gamma, TensorType({DT_FLOAT}))
+    .INPUT(res_beta, TensorType({DT_FLOAT}))
+    .INPUT(gamma, TensorType({DT_FLOAT}))
+    .OUTPUT(pd_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(INTrainingReduceGrad)
+
+/**
+*@brief Performs the backpropagation of InstanceNorm. \n
+
+*@par Inputs:
+* Four inputs, including:
+*@li dy: A Tensor of type float16 or float32, for the gradient.
+*@li x: A Tensor of type float16 or float32.
+*@li variance: A Tensor of type float32, for the variance of "x".
+*@li mean: A Tensor of type float32, for the mean of "x". \n
+
+*@par Outputs:
+*@li res_gamma: A Tensor of type float32.
+*@li res_beta: A Tensor of type float32. \n
+
+*/
+REG_OP(INTrainingUpdateGrad)
+    .INPUT(dy, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(variance, TensorType({DT_FLOAT}))
+    .INPUT(mean, TensorType({DT_FLOAT}))
+    .OUTPUT(res_gamma, TensorType({DT_FLOAT}))
+    .OUTPUT(res_beta, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(INTrainingUpdateGrad)
+
+/**
+*@brief Performs the backpropagation of InstanceNorm. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li res_gamma: A Tensor of type float32.
+*@li res_beta: A Tensor of type float32. \n
+
+*@par Outputs:
+*@li pd_gamma: A Tensor of type float32.
+*@li pd_beta: A Tensor of type float32. \n
+
+*/
+REG_OP(INTrainingUpdateGradGammaBeta)
+    .INPUT(res_gamma, TensorType({DT_FLOAT}))
+    .INPUT(res_beta, TensorType({DT_FLOAT}))
+    .OUTPUT(pd_gamma, TensorType({DT_FLOAT}))
+    .OUTPUT(pd_beta, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(INTrainingUpdateGradGammaBeta)
+
+/**
 *@brief Performs reduced group normalization . \n
 
 *@par Inputs:
@@ -912,7 +1181,7 @@ REG_OP(INTrainingUpdateV2)
 
 
 *@par Attributes:
-*@li num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n
+*num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n
 
 *@attention Constraints:
 * This operator is a GroupNorm fusion operator for updating the moving averages for training.
@@ -930,24 +1199,24 @@ REG_OP(GNTrainingReduce)
 *@brief Performs update group normalization . \n
 
 *@par Inputs:
-* Eight inputs, including: (NCHW NHWC supported)
+* Seven inputs, including: (NCHW NHWC supported)
 *@li x: A Tensor of type float16 or float32.
-*@li sum: A 5D Tensor of type float32,
+*@li sum: A tensor of type float32,
 shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC
 for the output of operator GNTrainingReduce.
-*@li square_sum: A 5D Tensor of type float32,
+*@li square_sum: A tensor of type float32,
 shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC
 for the output of operator GNTrainingReduce.
-*@li scale: A 5D Tensor of type float32,
+*@li scale: A tensor of type float32,
 shape is [1, G, 1, 1, 1] for NCHW, [1, 1, 1, G, 1] for NHWC
 is for the scaling gamma.
-*@li offset: A 5D Tensor of type float32,
+*@li offset: A tensor of type float32,
 shape is [1, G, 1, 1, 1] for NCHW, [1, 1, 1, G, 1] for NHWC
 for the scaling beta.
-*@li mean: A 5D Tensor of type float32,
+*@li mean: A tensor of type float32,
 shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC
 for the updated mean.
-*@li variance: A 5D Tensor of type float32,
+*@li variance: A tensor of type float32,
 shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC
 for the updated variance.
 
@@ -957,7 +1226,7 @@ for the updated variance.
 *@li num_groups: Int, specifying the num of groups. required, same to GNTrainingReduce
 
 *@par Outputs:
-* Three outputs, including: (NC1HWC0 supported)
+* Three outputs, including:
 *@li y: A Tensor of type float16 or float32, for normalized "x".
 *@li batch_mean: A Tensor of type float32, for the updated mean.
 *@li batch_variance: A Tensor of type float32, for the updated variance . \n
@@ -965,7 +1234,7 @@ for the updated variance.
 *@attention Constraints:
 *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training.
 * This operator is used in conjunction with GNTrainingUpdate.
-*@li For Ascend 310, the result accuracy fails to reach 1� due to the square root instruction.
+*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to the square root instruction.
 */
 REG_OP(GNTrainingUpdate)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -982,6 +1251,221 @@ REG_OP(GNTrainingUpdate)
     .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
     .OP_END_FACTORY_REG(GNTrainingUpdate)
 
+/**
+*@brief Joins a string Tensor across the given dimensions. \n
+
+*@par Inputs:
+include:
+*@li input:A Tensor of type string. The text to be processed.
+*@li reduction_indices:A Tensor of type int. The text to be processed. 
+
+*@par Attributes:
+*@li keep_dims:A bool, An optional bool. Defaults to False. If True, retain reduced dimensions with length 1..
+*@li separator:string.
+
+*@par Outputs:
+*output:A Tensor of type string.
+*/
+REG_OP(ReduceJoin)
+    .INPUT(input, TensorType({DT_STRING}))
+    .INPUT(reduction_indices, TensorType({DT_INT32}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .ATTR(keep_dims, Bool, true)
+    .ATTR(separator, String, "")
+    .OP_END_FACTORY_REG(ReduceJoin)
+
+/**
+* @brief Calculates the standard deviation and average value of Tensors.
+
+* @par Inputs:
+* x: A Tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* Three Attributes, including:
+* @li dim: An optional listint, Defaults to "None". \n
+
+* @li unbiased: An optional bool. Defaults to "True".
+*     If "True", Use Bessel Correction.
+*     If "False", Do not use Bessel Correction. \n
+
+* @li keepdim: An optional bool. Defaults to "False".
+*     If "True", Keep the original tensor dimension.
+*     If "False", Do not keep the original tensor dimension. \n
+
+* @par Outputs:
+* Two Outputs, including:
+* @li y1: A Tensor. Has the same type as "x".
+* @li y2: A Tensor. Has the same type as "x". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator ReduceStd.
+*/
+REG_OP(ReduceStd)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y1, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y2, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(dim, ListInt, {})
+    .ATTR(unbiased, Bool, true)
+    .ATTR(keepdim, Bool, false)
+    .OP_END_FACTORY_REG(ReduceStd)
+
+/**
+* @brief Calculates the standard deviation of Tensors.
+
+* @par Inputs:
+* include:
+* @li x: A Tensor. Must be one of the following types: float16, float32. \n
+* @li mean: A Tensor. It's the mean of X. Must be one of the following types: float16, float32. \n
+
+
+* @par Attributes:
+* Five Attributes, including:
+* @li dim: An optional listint, Defaults to "None". \n
+* @li unbiased: An optional bool. Defaults to "True".
+*     If "True", Use Bessel Correction.
+*     If "False", Do not use Bessel Correction. \n
+* @li keepdim: An optional bool. Defaults to "False".
+*     If "True", Keep the original tensor dimension.
+*     If "False", Do not keep the original tensor dimension. \n
+* @li invert: An optional bool, Defaults to "False".
+*     If "True", the output is inverse of variance.
+*     If "False", the output is variance.
+* @li epsilon: An optional floar, Defaults to 0.001.
+*     Prevent division by 0.
+
+* @par Outputs:
+* @li y: A Tensor. It's the variance of X or reciprocal of vaiance of X. Has the same type as "x".
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator ReduceStdWithMean.
+*/
+REG_OP(ReduceStdWithMean)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(dim, ListInt, {})
+    .ATTR(unbiased, Bool, true)
+    .ATTR(keepdim, Bool, false)
+    .ATTR(invert, Bool, false)
+    .ATTR(epsilon, Float, 0.001)
+    .OP_END_FACTORY_REG(ReduceStdWithMean)
+
+/**
+*@brief Performs reduced batch normalization . \n
+
+*@par Inputs:
+*x: A tensor of type float16 or float32 . \n
+
+*@par Outputs:
+*@li mean: A Tensor of type float32 for SUM reduced "x".
+*@li variance: A Tensor of type float32 for square sum reduced "x" . \n
+
+*@par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(ReduceMeanVariance)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(mean, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(variance, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(axes, ListInt, {})
+    .ATTR(keep_dims, Bool, true)
+    .OP_END_FACTORY_REG(ReduceMeanVariance)
+
+/**
+* @brief Calculates the standard deviation or the variance of Tensors with the average value.
+
+* @par Inputs:
+* Two inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32. \n
+* @li mean: A Tensor. It's the mean of X. Has the same shape and type as "x" \n
+
+* @par Attributes:
+* Four Attributes, including:
+* @li dim: An listint. \n
+* @li if_std: An optional bool. Defaults to "False"
+*     If "True", Calculate the standard deviation
+*     If "False", Calculate the variance
+* @li unbiased: An optional bool. Defaults to "True".
+*     If "True", Use Bessel Correction.
+*     If "False", Do not use Bessel Correction. \n
+* @li keepdim: An optional bool. Defaults to "False".
+*     If "True", Keep the original tensor dimension.
+*     If "False", Do not keep the original tensor dimension. \n
+
+* @par Outputs:
+* @li output_var: A Tensor. It's the standard deviation or the variance of X. Has the same type as "x".
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Var_mean.
+*/
+REG_OP(ReduceStdV2Update)
+    .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .OUTPUT(output_var, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .REQUIRED_ATTR(dim, ListInt)
+    .ATTR(if_std, Bool, false)
+    .ATTR(unbiased, Bool, true)
+    .ATTR(keepdim, Bool, false)
+    .OP_END_FACTORY_REG(ReduceStdV2Update)
+    
+/**
+*@brief Computes the log and sum and exp of elements across dimensions of a tensor.
+* Reduces "x" along the dimensions given in "axes".
+* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each
+* entry in "axes". If "keep_dims" is true, the reduced dimensions
+* are retained with length 1.
+*
+*@par Inputs:
+* Two inputs, including:
+*@li x: A Tensor. Must be one of the following types:
+*     float32, float16, int32, int64, uint32, uint64, double
+*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n
+*
+*@par Attributes:
+*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n
+*
+*@par Outputs:
+*y: The reduced tensor. Has the same type and format as input "x" . \n
+*
+*@par Third-party framework compatibility
+* Compatible with the Onnx operator ReduceLogSumExp.
+*/
+REG_OP(ReduceLogSumExp)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(axes, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceLogSumExp)
+
+/**
+*@brief Computes the log and sum of elements across dimensions of a tensor.
+* Reduces "x" along the dimensions given in "axes".
+* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each
+* entry in "axes". If "keep_dims" is true, the reduced dimensions
+* are retained with length 1.
+*
+*@par Inputs:
+* Two inputs, including:
+*@li x: A Tensor. Must be one of the following types:
+*     float32, float16, int32, int64, uint32, uint64, double
+*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n
+*
+*@par Attributes:
+*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n
+*
+*@par Outputs:
+*y: The reduced tensor. Has the same type and format as input "x" . \n
+*
+*@par Third-party framework compatibility
+* Compatible with the Onnx operator ReduceLogSum.
+*/
+REG_OP(ReduceLogSum)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(axes, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceLogSum)
 } //namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/resource_variable_ops.h b/third_party/fwkacllib/inc/ops/resource_variable_ops.h
index 1b60d42a..156f2f34 100644
--- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h
+++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,10 +33,12 @@ namespace ge {
 *y:A Tensor of type resource. \n
 
 *@par Attributes:
-* @li container: optional, string.
-* @li shared_name: optional, string.
-* @li dtype: required, type.
-* @li shape: optional, ListInt. \n
+* @li container: optional, string. the container this 
+variable is placed in.
+* @li shared_name: optional, string.the name by which
+ this variable is referred to.
+* @li dtype: required, type. the output of type.
+* @li shape: optional, ListInt. the output of shape. \n
 
 *@see VarHandleOp.
 */
@@ -53,11 +55,11 @@ REG_OP(VarHandleOp)
 *@brief Assigns a new value to a variable. \n
 
 *@par Inputs:
-*resource:Handle to the resource in which to store the variable.
-*value:The value to set the new tensor to use. \n
+*@li resource:Handle to the resource in which to store the variable.
+*@li value:The value to set the new tensor to use. \n
 
 *@par Attributes:
-* @li dtype: required, type. \n
+* dtype: required, type. \n
 
 *@see AssignVariableOp.
 */
@@ -73,11 +75,11 @@ REG_OP(AssignVariableOp)
 *@brief Adds a value to the current value of a variable. \n
 
 *@par Inputs:
-*resource:Handle to the resource in which to store the variable.
-*value:The value by which the variable will be incremented. \n
+*@li resource:Handle to the resource in which to store the variable.
+*@li value:The value by which the variable will be incremented. \n
 
 *@par Attributes:
-* @li dtype: required, type. \n
+* dtype: required, type. \n
 
 *@see AssignAddVariableOp.
 */
@@ -93,11 +95,11 @@ REG_OP(AssignAddVariableOp)
 *@brief Subtracts a value to the current value of a variable. \n
 
 *@par Inputs:
-*resource:Handle to the resource in which to store the variable.
-*value:The value by which the variable will be incremented. \n
+*@li resource:Handle to the resource in which to store the variable.
+*@li value:The value by which the variable will be incremented. \n
 
 *@par Attributes:
-* @li dtype: required, type. \n
+* dtype: required, type. \n
 
 *@see AssignSubVariableOp.
 */
diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h
index 84723872..7f742957 100644
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,31 +25,31 @@
 
 namespace ge {
 /**
-*@brief: Basic LSTM Cell forward calculation.
-*@par Inputs:
-*five inputs:
-*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
-*@li h:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
-*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
-*@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n
-
-*@par Attributes:
-*@li keep_prob:An integer identifying the keep prob in the op. Default to 1.
-*@li forget_bias:An integer identifying the forget bias in the op. Default to 1.
-*@li state_is_tuple:An bool identifying if the hidden state and cell state is tuple. Default to true.
-*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported . \n
+* @brief: Basic LSTM Cell forward calculation.
+* @par Inputs:
+* five inputs:
+* @li x:A 4D Tensor. Must be one of the following types: float16.
+* @li h:A 4D Tensor. Must be one of the following types: float16.
+* @li c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li w:A 4D Tensor. Must be one of the following types: float16.
+* @li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n
+* @li mask:A 1D Tensor. Must be one of the following types: uint8.
 
-*@par Outputs:
-*seven outputs:
-*@li mask:A 1D Tensor. Must be one of the following types: uint8.
-*@li ct:A 4D Tensor. Must be one of the following types: float16, float32.
-*@li ht:A 4D Tensor. Must be one of the following types: float16.
-*@li it:A 4D Tensor. Must be one of the following types: float16, float32.
-*@li jt:A 4D Tensor. Must be one of the following types: float16, float32.
-*@li ft:A 4D Tensor. Must be one of the following types: float16, float32.
-*@li ot:A 4D Tensor. Must be one of the following types: float16, float32.
-*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
+* @par Attributes:
+* @li keep_prob:An integer identifying the keep prob in the op. Default to 1.
+* @li forget_bias:An integer identifying the forget bias in the op. Default to 1.
+* @li state_is_tuple:An bool identifying if the hidden state and cell state is tuple. Default to true.
+* @li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported . \n
+
+* @par Outputs:
+* seven outputs:
+* @li ct:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li ht:A 4D Tensor. Must be one of the following types: float16.
+* @li it:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li jt:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li ft:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li ot:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
 */
 REG_OP(BasicLSTMCell)
     .INPUT(x, TensorType({DT_FLOAT16}))
@@ -72,18 +72,18 @@ REG_OP(BasicLSTMCell)
     .OP_END_FACTORY_REG(BasicLSTMCell)
 
 /**
-*@brief: Dynamic LSTM forward calculation . \n
+* @brief: Dynamic LSTM forward calculation . \n
 
-*@par Inputs:
-*@li x:A 4D Tensor. Must be the type float32. The format must be FRACTAL_NZ.
-*@li w:A 4D Tensor. Must be the type float32. The format must be FRACTAL_Z.
-*@li b:A 1D Tensor. Must be the type float32. The format must be ND . \n
+* @par Inputs:
+* @li x:A 4D Tensor. Must be the type float32.
+* @li w:A 4D Tensor. Must be the type float32.
+* @li b:A 1D Tensor. Must be the type float32. The format must be ND . \n
 
-*@par Outputs:
-*output_h:A Tensor of output. Must be the type float32. The format must be FRACTAL_Z.
+* @par Outputs:
+* output_h:A Tensor of output. Must be the type float32.
 
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(DynamicLSTM)
     .INPUT(x, TensorType({DT_FLOAT32}))
@@ -93,54 +93,53 @@ REG_OP(DynamicLSTM)
     .OP_END_FACTORY_REG(DynamicLSTM)
 
 /**
-*@brief: DynamicRNNGrad calculation.
-*@par Inputs:
-*ten inputs: \n
-*@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li y:A 1D Tensor. Must be one of the following types: int32. The format must be FRACTAL_NZ.
-*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dc:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li seq_length:A 1D Tensor. Must be one of the following types: int32.
-*@li mask:A 1D Tensor. Must be one of the following types: int8.
-*@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-
-*@par Attributes:
-*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
-*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
-*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
-*@li use_peephole:An bool identifying if use peephole in the op. Default to false.
-*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
-*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
-*@li num_proj:An integer identifying the num projection in the op. Default to 0.
-*@li time_major:An bool identifying the time major in the op. Default to false.
-*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
-*@li forget_bias:An float identifying the forget bias in the op. Default to 0.
-*@li is_training:An bool identifying is training in the op. Default to true.
+* @brief: DynamicRNNGrad calculation.
+* @par Inputs:
+* ten inputs: \n
+* @li x:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li w:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li b:A 1D Tensor. Must be one of the following types: float16, float32.
+* @li y:A 1D Tensor. Must be one of the following types: int32.
+* @li init_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li init_c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dy:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dc:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li i:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li j:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li f:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li o:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li seq_length:A 1D Tensor. Must be one of the following types: int32.
+* @li mask:A 1D Tensor. Must be one of the following types: int8.
+* @li wci:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li wcf:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li wco:A 4D Tensor. Must be one of the following types: float16, float32.
 
-*@par Outputs:
-*eight outputs: \n
-*@li dw:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li db:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dwci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dwcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dwco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+* @par Attributes:
+* @li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
+* @li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
+* @li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+* @li use_peephole:An bool identifying if use peephole in the op. Default to false.
+* @li keep_prob:An float identifying the keep prob in the op. Default to 1.
+* @li cell_clip:An float identifying the cell clip in the op. Default to -1.
+* @li num_proj:An integer identifying the num projection in the op. Default to 0.
+* @li time_major:An bool identifying the time major in the op. Default to false.
+* @li forget_bias:An float identifying the forget bias in the op. Default to 0.
+* @li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifjo". Default to "ijfo".
+
+* @par Outputs:
+* eight outputs: \n
+* @li dw:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li db:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dx:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dwci:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dwcf:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dwco:A 4D Tensor. Must be one of the following types: float16, float32.
 */
 REG_OP(DynamicRNNGrad)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -181,52 +180,56 @@ REG_OP(DynamicRNNGrad)
     .ATTR(num_proj, Int, 0)
     .ATTR(time_major, Bool, true)
     .ATTR(forget_bias, Float, 0.0)
+    .ATTR(gate_order, String, "ijfo")
     .OP_END_FACTORY_REG(DynamicRNNGrad)
 
 /**
-*@brief: DynamicRNN calculation.
-*@par Inputs:
-*ten inputs:
-*@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
-*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
-*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND.
-*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
-*@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
-*@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
-*@li mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n
-
-*@par Attributes:
-*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
-*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
-*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
-*@li use_peephole:An bool identifying if use peephole in the op. Default to false.
-*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
-*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
-*@li num_proj:An integer identifying the num projection in the op. Default to 0.
-*@li time_major:An bool identifying the time major in the op. Default to true.
-*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
-*@li forget_bias:An float identifying the forget bias in the op. Default to 0.
-*@li is_training:An bool identifying is training in the op. Default to true . \n
+* @brief: DynamicRNN calculation.
+* @par Inputs:
+* ten inputs:
+* @li x:A required 4D Tensor. Must be one of the following types: float16, float32.
+* @li w:A required 4D Tensor. Must be one of the following types: float16, float32.
+* @li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+* @li seq_length:A optional Tensor. Only Support int32 in ND.
+* @li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32.
+* @li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32.
+* @li wci:A 4D optional Tensor. Must be one of the following types: float16, float32.
+* @li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32.
+* @li wco:A 4D optional Tensor. Must be one of the following types: float16, float32.
+* @li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n
 
-*@par Outputs:
-*eight outputs:
-*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+* @par Attributes:
+* @li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
+* @li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
+* @li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+* @li use_peephole:An bool identifying if use peephole in the op. Default to false.
+* @li keep_prob:An float identifying the keep prob in the op. Default to 1.
+* @li cell_clip:An float identifying the cell clip in the op. Default to -1.
+* @li num_proj:An integer identifying the num projection in the op. Default to 0.
+* @li time_major:An bool identifying the time major in the op. Default to true.
+* @li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
+* @li forget_bias:An float identifying the forget bias in the op. Default to 0.
+* @li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifjo". Default to "ijfo".
+* @li is_training:An bool identifying is training in the op. Default to true . \n
+
+* @par Outputs:
+* eight outputs:
+* @li y:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li output_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li output_c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li i:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li j:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li f:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li o:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
+* @par Third-party framework compatibility:
+* Compatible with the TF operator LSTM.
 */
 REG_OP(DynamicRNN)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16}))
     .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -251,32 +254,369 @@ REG_OP(DynamicRNN)
     .ATTR(time_major, Bool, true)
     .ATTR(activation, String, "tanh")
     .ATTR(forget_bias, Float, 0.0)
+    .ATTR(gate_order, String, "ijfo")
     .ATTR(is_training, Bool, true)
     .OP_END_FACTORY_REG(DynamicRNN)
 
 /**
-*@brief: LSTMInputGrad calculation.
-*@par Inputs:
-*ten inputs: \n
-*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dc:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+* @brief: DynamicRNNV2 calculation.
+* @par Inputs:
+* ten inputs:
+* @li x:A required 4D Tensor. Must be one of the following types: float16, float32.
+* @li weight_input:A required 4D Tensor. Must be one of the following types: float16, float32.
+* @li weight_hidden:A required 4D Tensor. Must be one of the following types: float16, float32.
+* @li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+* @li seq_length:A optional 1D Tensor. Must be one of the following types: float16, int32.
+* @li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32.
+* @li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32.
+* @li wci:A 4D optional Tensor. Must be one of the following types: float16, float32.
+* @li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32.
+* @li wco:A 4D optional Tensor. Must be one of the following types: float16, float32.
+* @li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n
 
+* @par Attributes:
+* @li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
+* @li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL".
+* Only UNIDIRECTIONAL is currently supported.
+* @li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+* @li use_peephole:An bool identifying if use peephole in the op. Default to false.
+* @li keep_prob:An float identifying the keep prob in the op. Default to 1.
+* @li cell_clip:An float identifying the cell clip in the op. Default to -1.
+* @li num_proj:An integer identifying the num projection in the op. Default to 0.
+* @li time_major:An bool identifying the time major in the op. Default to true.
+* @li activation:An string identifying the type of activation function in the op. Default to "tanh".
+* Support "tanh" and "clip".
+* @li recurrent_activation:An string identifying the type of activation function in the op. Default to "sigmoid".
+* Support "sigmoid" and "hard_sigmoid". In general, set "hard_sigmoid" for TF Keras LSTM.
+* @li forget_bias:An float identifying the forget bias in the op. Default to 0.
+* @li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifco". Default to "ijfo".
+* Set "ijfo" for TF operator LSTM, Set "ifco" for TF Keras LSTM.
+* @li stateful: An bool identifying the type of stateful in the op. Default to fasle.Only false is currently supported.
+* @li merge_mode: An string identifying the type of merge_modein the op. Default to "concat".
+* Only "concat" is currently supported
+* @li is_training:An bool identifying is training in the op. Default to true . \n
 
-*@par Outputs:
-*eight outputs: \n
-*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+* @par Outputs:
+* eight outputs:
+* @li y:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li output_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* Return the last output_h.
+* @li output_c:A 4D Tensor. Must be one of the following types: float16, float32.
+* Return the last output_c.
+* @li i:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li j:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li f:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li o:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
+* @par Third-party framework compatibility:
+* Compatible with the TF operator LSTM or TF keras operator LSTM.
+*/
+
+REG_OP(DynamicRNNV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weight_input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16}))
+    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(cell_type, String, "LSTM")
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 1)
+    .ATTR(use_peephole, Bool, false)
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(activation, String, "tanh")
+    .ATTR(recurrent_activation, String, "sigmoid")
+    .ATTR(forget_bias, Float, 0.0)
+    .ATTR(gate_order, String, "ijfo")
+    .ATTR(stateful, Bool, false)
+    .ATTR(merge_mode, String, "concat")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(DynamicRNNV2)
+
+/**
+* @brief: DynamicRNNV2Grad calculation.
+* @par Inputs:
+* twenty-one inputs:
+* @li x:A required 4D Tensor. Must be one of the following types: float16, float32.
+* @li w_x:A required 4D Tensor. Must be one of the following types: float16, float32.
+* @li w_h:A required 4D Tensor. Must be one of the following types: float16, float32.
+* @li y:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li init_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li init_c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dy:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dc:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li i:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li j:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li f:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li o:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li seq_length:A 1D Tensor. Must be one of the following types: int32.
+* @li wci:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li wcf:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li wco:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li mask:A 1D Tensor. Must be one of the following types: int8. \n
+
+* @par Attributes:
+* @li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
+* @li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL".
+* Only UNIDIRECTIONAL is currently supported.
+* @li cell_depth:An integer identifying the cell depth in the op. Default to 1. Only 1 is currently supported.
+* @li use_peephole:An bool identifying if use peephole in the op. Default to false.
+* Only false is currently supported.
+* @li keep_prob:An float identifying the keep prob in the op. Default to 1. Only 1 is currently supported.
+* @li cell_clip:An float identifying the cell clip in the op. Default to -1. Only -1 is currently supported.
+* @li num_proj:An integer identifying the num projection in the op. Default to 0. Only 0 is currently supported.
+* @li time_major:An bool identifying the time major in the op. Default to true. Only true is currently supported.
+* @li activation:An string identifying the type of activation function in the op. Default to "tanh".
+* Only "tanh" is currently supported.
+* @li recurrent_activation:An string identifying the type of activation function in the op. Default to "sigmoid".
+* Only "sigmoid" is currently supported.
+* @li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifco". Default to "ijfo".
+* Set "ijfo" for TF operator LSTM, Set "ifco" for TF Keras/Pytorch LSTM .
+* @li stateful: An bool identifying the type of stateful in the op. Default to fasle.Only false is currently supported.
+* @li merge_mode: An string identifying the type of merge_modein the op. Default to "concat".
+* Only "concat" is currently supported. \n
+
+* @par Outputs:
+* nine outputs:
+* @li dw_x:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dw_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li db:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dx:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dwci:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dwcf:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dwco:A 4D Tensor. Must be one of the following types: float16, float32.
+
+* @par Third-party framework compatibility:
+* Compatible with the TF operator LSTM or TF keras operator LSTM.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicRNNV2Grad)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dc, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(dw_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dw_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(db, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dc_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_OUTPUT(dwci, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_OUTPUT(dwcf, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_OUTPUT(dwco, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(cell_type, String, "LSTM")
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 1)
+    .ATTR(use_peephole, Bool, false)
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(activation, String, "tanh")
+    .ATTR(recurrent_activation, String, "sigmoid")
+    .ATTR(gate_order, String, "ijfo")
+    .ATTR(stateful, Bool, false)
+    .ATTR(merge_mode, String, "concat")
+    .OP_END_FACTORY_REG(DynamicRNNV2Grad)
+
+/**
+* @brief: DynamicRNNV3 calculation.
+* @par Inputs:
+* ten inputs:
+* @li x:A required 4D Tensor. Must be one of the following types: float16, float32.
+* @li w:A required 4D Tensor. Must be one of the following types: float16, float32.
+* @li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+* @li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
+* @li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32.
+* @li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32.
+* @li wci:A 4D optional Tensor. Must be one of the following types: float16, float32.
+* @li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32.
+* @li wco:A 4D optional Tensor. Must be one of the following types: float16, float32.
+* @li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n
+* @li real_mask:A 4D optional Tensor. Must be one of the following types: float16, float32.
+* @li project:A 4D optional Tensor. Must be one of the following types: float16, float32.
+
+* @par Attributes:
+* @li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
+* @li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
+* @li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+* @li use_peephole:An bool identifying if use peephole in the op. Default to false.
+* @li keep_prob:An float identifying the keep prob in the op. Default to 1.
+* @li cell_clip:An float identifying the cell clip in the op. Default to -1.
+* @li num_proj:An integer identifying the num projection in the op. Default to 0.
+* @li time_major:An bool identifying the time major in the op. Default to true.
+* @li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
+* @li forget_bias:An float identifying the forget bias in the op. Default to 0.
+* @li is_training:An bool identifying is training in the op. Default to true . \n
+
+* @par Outputs:
+* eight outputs:
+* @li y:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li output_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li output_c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li i:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li j:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li f:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li o:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
+* @par Third-party framework compatibility:
+* Compatible with the TF operator LSTM.
+*/
+REG_OP(DynamicRNNV3)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
+    .OPTIONAL_INPUT(real_mask, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(project, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(cell_type, String, "LSTM")
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 1)
+    .ATTR(use_peephole, Bool, false)
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(activation, String, "tanh")
+    .ATTR(forget_bias, Float, 0.0)
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(DynamicRNNV3)
+
+/**
+* @brief: DynamicLSTMV2 calculation.
+* @par Inputs:
+* ten inputs:
+* @li x:A required 4D Tensor. Must be one of the following types: float16, float32.
+* @li w:A required 4D Tensor. Must be one of the following types: float16, float32.
+* @li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+* @li cont:A required 2D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+* @li w_xc_x_static:A optional 2D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+* @li h0:A optional 4D Tensor. Must be one of the following types: float16, float32.
+* @li c0:A optional 4D Tensor. Must be one of the following types: float16, float32.
+* @li wci:A optional 4D Tensor. Must be one of the following types: float16, float32.
+* @li wcf:A optional 4D Tensor. Must be one of the following types: float16, float32.
+* @li wco:A optional 4D Tensor. Must be one of the following types: float16, float32.
+* @li mask:A optional 1D Tensor. Must be one of the following types: uint8. The format must be ND .
+
+* @par Attributes:
+* @li num_output:An integer identifying the num projection in the op. Default to 0.
+* @li expose_hidden:An bool identifying the expose_hidden in the op. Default to flase.
+* @li need_output_last:An bool identifying the time major in the op. Default to true.
+* @li forget_bias:An float identifying the forget bias in the op. Default to 0.
+
+* @par Outputs:
+* eight outputs:
+* @li y:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li output_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li output_c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li last_output_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li last_output_c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @par Third-party framework compatibility:
+* Compatible with the Caffe operator LSTM.
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicLSTMV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(cont, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(w_xc_x_static, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(h0, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(c0, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(last_output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(last_output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(num_output, Int, 0)
+    .ATTR(expose_hidden, Bool, false)
+    .ATTR(need_output_last, Bool, false)
+    .ATTR(forget_bias, Float, 0.0)
+    .OP_END_FACTORY_REG(DynamicLSTMV2)
+
+/**
+* @brief: LSTMInputGrad calculation.
+* @par Inputs:
+* ten inputs: \n
+* @li w:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li init_c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dy:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dc:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li i:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li j:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li f:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li o:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
+
+
+* @par Outputs:
+* four outputs: \n
+* @li dx:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dgate:A 4D Tensor. Must be one of the following types: float16.
 */
 REG_OP(LSTMInputGrad)
     .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -297,24 +637,78 @@ REG_OP(LSTMInputGrad)
     .OP_END_FACTORY_REG(LSTMInputGrad)
 
 
+
 /**
-*@brief: Basic LSTM Cell backward calculation.Calculate the gradient of input and hidden state.
-*@par Inputs:
-*three inputs:
-*@li dgate:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
-*@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
-*@li dropout_mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n
+* @brief: Dynamic LSTM Cell grad calculation.Calculate the gradient of gates and cell state.
+* @par Inputs:
+* twelve inputs:
+* @li init_c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dy:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dc:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li i:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li j:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li f:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li o:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li mask:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li t_state:A 4D Tensor. Must be one of the following types: float16, float32. . \n
 
-*@par Attributes:
-*keep_prob:An integer identifying the keep prob in the op. Default to 1 . \n
+* @par Attributes:
+* @li forget_bias:An integer identifying the forget bias in the op. Default to 1.
+* @li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported . \n
+* @li direction:An string that marks the calculation sequence of the operator. Default to "Forward".
+* @li gate_order:An string mark the order of output 4 gate. Default to "ijfo".
 
-*@par Outputs:
-*two outputs:
-*@li dxt:A 4D Tensor. Must be one of the following types: float16, float32.
-*@li dht:A 4D Tensor. Must be one of the following types: float16, float32.
+* @par Outputs:
+* two outputs:
+* @li dgate:A 4D Tensor. Must be one of the following types: float16.
+* @li dct_1:A 4D Tensor. Must be one of the following types: float16, float32.
 
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(DynamicLSTMGradCell)
+  .INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(dc, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(t_state, TensorType({DT_INT32, DT_INT32}))
+  .INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(dgate, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(dct_1, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .ATTR(forget_bias, Float, 1.0)
+  .ATTR(activation, String, "tanh")
+  .ATTR(direction, String, "UNIDIRECTIONAL")
+  .ATTR(gate_order, String, "ijfo")
+  .OP_END_FACTORY_REG(DynamicLSTMGradCell)
+
+
+/**
+* @brief: Basic LSTM Cell backward calculation.Calculate the gradient of input and hidden state.
+* @par Inputs:
+* three inputs:
+* @li dgate:A 4D Tensor. Must be one of the following types: float16.
+* @li w:A 4D Tensor. Must be one of the following types: float16.
+* @li dropout_mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n
+
+* @par Attributes:
+* keep_prob:An integer identifying the keep prob in the op. Default to 1 . \n
+
+* @par Outputs:
+* two outputs:
+* @li dxt:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dht:A 4D Tensor. Must be one of the following types: float16, float32.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(BasicLSTMCellInputGrad)
     .INPUT(dgate, TensorType({DT_FLOAT16}))
@@ -326,20 +720,20 @@ REG_OP(BasicLSTMCellInputGrad)
     .OP_END_FACTORY_REG(BasicLSTMCellInputGrad)
 
 /**
-*@brief: Basic LSTM Cell backward calculation.Calculate the gradient of weight and bias.
-*@par Inputs:
-*three inputs:
-*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
-*@li h:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
-*@li dgate:A 4D Tensor. Must be one of the following types: uint8. The format must be FRACTAL_NZ . \n
+* @brief: Basic LSTM Cell backward calculation.Calculate the gradient of weight and bias.
+* @par Inputs:
+* three inputs:
+* @li x:A 4D Tensor. Must be one of the following types: float16.
+* @li h:A 4D Tensor. Must be one of the following types: float16.
+* @li dgate:A 4D Tensor. Must be one of the following types: uint8. \n
 
-*@par Outputs:
-*two outputs:
-*@li dw:A 4D Tensor. Must be one of the following types: float16.
-*@li db:A 4D Tensor. Must be one of the following types: float16, float32.
+* @par Outputs:
+* two outputs:
+* @li dw:A 4D Tensor. Must be one of the following types: float16.
+* @li db:A 4D Tensor. Must be one of the following types: float16, float32.
 
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(BasicLSTMCellWeightGrad)
     .INPUT(x, TensorType({DT_FLOAT16}))
@@ -350,29 +744,29 @@ REG_OP(BasicLSTMCellWeightGrad)
     .OP_END_FACTORY_REG(BasicLSTMCellWeightGrad)
 
 /**
-*@brief: Basic LSTM Cell backward calculation.Calculate the gradient of gates and cell state.
-*@par Inputs:
-*eight inputs:
-*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dht:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li it:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li jt:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li ft:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li ot:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ . \n
-
-*@par Attributes:
-*@li forget_bias:An integer identifying the forget bias in the op. Default to 1.
-*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported . \n
+* @brief: Basic LSTM Cell backward calculation.Calculate the gradient of gates and cell state.
+* @par Inputs:
+* eight inputs:
+* @li c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dht:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dct:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li it:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li jt:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li ft:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li ot:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. \n
 
-*@par Outputs:
-*two outputs:
-*@li dgate:A 4D Tensor. Must be one of the following types: float16.
-*@li dct_1:A 4D Tensor. Must be one of the following types: float16, float32.
+* @par Attributes:
+* @li forget_bias:An integer identifying the forget bias in the op. Default to 1.
+* @li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported . \n
+
+* @par Outputs:
+* two outputs:
+* @li dgate:A 4D Tensor. Must be one of the following types: float16.
+* @li dct_1:A 4D Tensor. Must be one of the following types: float16, float32.
 
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(BasicLSTMCellCStateGrad)
     .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -390,30 +784,30 @@ REG_OP(BasicLSTMCellCStateGrad)
     .OP_END_FACTORY_REG(BasicLSTMCellCStateGrad)
 
 /**
-*@brief: RNN operator.
-*@par Inputs:
-*eight inputs:
-*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
-*@li cont:A 1D Tensor. Must be one of the following types: float16. The format must be ND.
-*@li x_static:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
-*@li h_0:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li w_xh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
-*@li w_sh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
-*@li w_hh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
-*@li w_ho:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
-*@li bias_h:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
-*@li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND . \n
-
-*@par Attributes:
-*@li expose_hidden:An bool identifying if expose the hidden state of last time step. Default to false.
-*@li num_output:An integer identifying the number of output features. Default to 0 . \n
+* @brief: RNN operator.
+* @par Inputs:
+* eight inputs:
+* @li x:A 4D Tensor. Must be one of the following types: float16.
+* @li cont:A 1D Tensor. Must be one of the following types: float16. The format must be ND.
+* @li x_static:A 4D Tensor. Must be one of the following types: float16.
+* @li h_0:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li w_xh:A 4D Tensor. Must be one of the following types: float16.
+* @li w_sh:A 4D Tensor. Must be one of the following types: float16.
+* @li w_hh:A 4D Tensor. Must be one of the following types: float16.
+* @li w_ho:A 4D Tensor. Must be one of the following types: float16.
+* @li bias_h:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+* @li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND . \n
 
-*@par Outputs:
-*two outputs:
-*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+* @par Attributes:
+* @li expose_hidden:An bool identifying if expose the hidden state of last time step. Default to false.
+* @li num_output:An integer identifying the number of output features. Default to 0 . \n
+
+* @par Outputs:
+* two outputs:
+* @li o:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li h_t:A 4D Tensor. Must be one of the following types: float16, float32.
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(RNN)
     .INPUT(x, TensorType({DT_FLOAT16}))
@@ -433,30 +827,30 @@ REG_OP(RNN)
     .OP_END_FACTORY_REG(RNN)
 
 /**
-*@brief: BasicRNNCell operator.
-*@par Inputs:
-*eight inputs:
-*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
-*@li cont:A 1D Tensor. Must be one of the following types: float16. The format must be ND.
-*@li w_xh_x_static:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
-*@li h_0:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li w_xh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
-*@li w_hh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
-*@li w_ho:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
-*@li bias_h:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
-*@li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND . \n
-
-*@par Attributes:
-*@li expose_hidden:An bool identifying if expose the hidden state of last time step. Default to false.
-*@li num_output:An integer identifying the number of output features. Default to 0 . \n
+* @brief: BasicRNNCell operator.
+* @par Inputs:
+* eight inputs:
+* @li x:A 4D Tensor. Must be one of the following types: float16.
+* @li cont:A 1D Tensor. Must be one of the following types: float16. The format must be ND.
+* @li w_xh_x_static:A 4D Tensor. Must be one of the following types: float16.
+* @li h_0:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li w_xh:A 4D Tensor. Must be one of the following types: float16.
+* @li w_hh:A 4D Tensor. Must be one of the following types: float16.
+* @li w_ho:A 4D Tensor. Must be one of the following types: float16.
+* @li bias_h:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+* @li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND . \n
 
-*@par Outputs:
-*two outputs:
-*@li o_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+* @par Attributes:
+* @li expose_hidden:An bool identifying if expose the hidden state of last time step. Default to false.
+* @li num_output:An integer identifying the number of output features. Default to 0 . \n
 
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+* @par Outputs:
+* two outputs:
+* @li o_t:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li h_t:A 4D Tensor. Must be one of the following types: float16, float32.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(BasicRNNCell)
     .INPUT(x, TensorType({DT_FLOAT16}))
@@ -475,37 +869,37 @@ REG_OP(BasicRNNCell)
     .OP_END_FACTORY_REG(BasicRNNCell)
 
 /**
-*@brief: DynamicGRU calculation.
-*@par Inputs:
-*seven inputs: \n
-*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ.
-*@li w:Must be one of the following types: float16. The format must be FRACTAL_Z.
-*@li b:Must be one of the following types: float16, float32. The format must be ND.
-*@li cw:Must be one of the following types: float16. The format must be FRACTAL_Z.
-*@li cb:Must be one of the following types: float16, float32. The format must be ND.
-*@li seq_length:Must be one of the following types: int32. The format must be ND.
-*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-
-*@par Attributes:
-*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
-*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
-*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
-*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
-*@li num_proj:An integer identifying the num projection in the op. Default to 0.
-*@li time_major:An bool identifying the time major in the op. Default to true.
-*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
-*@li is_training:An bool identifying is training in the op. Default to true.
+* @brief DynamicGRU calculation.
+* @par Inputs:
+* seven inputs: 
+* @li x:Must be one of the following types: float16.
+* @li w:Must be one of the following types: float16.
+* @li b:Must be one of the following types: float16, float32. The format must be ND.
+* @li cw:Must be one of the following types: float16.
+* @li cb:Must be one of the following types: float16, float32. The format must be ND.
+* @li seq_length:Must be one of the following types: int32. The format must be ND.
+* @li init_h:Must be one of the following types: float16, float32.
 
-*@par Outputs:
-*five outputs: \n
-*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li i:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li n:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+* @par Attributes:
+* @li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
+* @li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+* @li keep_prob:An float identifying the keep prob in the op. Default to 1.
+* @li cell_clip:An float identifying the cell clip in the op. Default to -1.
+* @li num_proj:An integer identifying the num projection in the op. Default to 0.
+* @li time_major:An bool identifying the time major in the op. Default to true.
+* @li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
+* @li is_training:An bool identifying is training in the op. Default to true.
+
+* @par Outputs:
+* five outputs: 
+* @li y:Must be one of the following types: float16, float32.
+* @li output_h:Must be one of the following types: float16, float32.
+* @li r:Must be one of the following types: float16, float32.
+* @li i:Must be one of the following types: float16, float32.
+* @li n:Must be one of the following types: float16, float32.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(DynamicGRU)
     .INPUT(x, TensorType({DT_FLOAT16}))
@@ -531,40 +925,37 @@ REG_OP(DynamicGRU)
     .OP_END_FACTORY_REG(DynamicGRU)
 
 /**
-*@brief: DynamicGRUV2 calculation.
-*@par Inputs:
-*seven inputs: \n
-*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ.
-*@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z.
-*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
-*@li bias_input:Must be one of the following types: float16, float32. The format must be ND.
-*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
-*@li seq_length:Must be one of the following types: int32. The format must be ND.
-*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-
-*@par Attributes:
-*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
-*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
-*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
-*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
-*@li num_proj:An integer identifying the num projection in the op. Default to 0.
-*@li time_major:An bool identifying the time major in the op. Default to true.
-*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
-*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
-*@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.
-*@li is_training:An bool identifying is training in the op. Default to true.
+* @brief DynamicGRUV2 calculation.
+* @par Inputs:
+* seven inputs: 
+* @li x:Must be one of the following types: float16.
+* @li weight_input:Must be one of the following types: float16.
+* @li weight_hidden:Must be one of the following types: float16.
+* @li bias_input:Must be one of the following types: float16, float32. The format must be ND.
+* @li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
+* @li seq_length:Must be one of the following types: int32 in ND.
+* @li init_h:Must be one of the following types: float16, float32.
 
-*@par Outputs:
-*six outputs: \n
-*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+* @par Attributes:
+* @li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Support "UNIDIRECTIONAL" and "REDIRECTIONAL".
+* @li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+* @li keep_prob:An float identifying the keep prob in the op. Default to 1.
+* @li cell_clip:An float identifying the cell clip in the op. Default to -1.
+* @li num_proj:An integer identifying the num projection in the op. Default to 0.
+* @li time_major:An bool identifying the time major in the op. Default to true.
+* @li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
+* @li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
+* @li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.
+* @li is_training:An bool identifying is training in the op. Default to true.
+
+* @par Outputs:
+* six outputs: 
+* @li y:Must be one of the following types: float16, float32.
+* @li output_h:Must be one of the following types: float16, float32.
+* @li update:Must be one of the following types: float16, float32.
+* @li reset:Must be one of the following types: float16, float32.
+* @li new:Must be one of the following types: float16, float32.
+* @li hidden_new:Must be one of the following types: float16, float32.
 */
 REG_OP(DynamicGRUV2)
     .INPUT(x, TensorType({DT_FLOAT16}))
@@ -572,7 +963,7 @@ REG_OP(DynamicGRUV2)
     .INPUT(weight_hidden, TensorType({DT_FLOAT16}))
     .OPTIONAL_INPUT(bias_input, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16}))
     .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -592,44 +983,298 @@ REG_OP(DynamicGRUV2)
     .ATTR(is_training, Bool, true)
     .OP_END_FACTORY_REG(DynamicGRUV2)
 
+
 /**
-*@brief: DynamicGRUV2Grad calculation.
-*@par Inputs:
-*fourteen inputs: \n
-*@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li weight_input:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li seq_length:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li mask:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-
-*@par Attributes:
-*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
-*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
-*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
-*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
-*@li num_proj:An integer identifying the num projection in the op. Default to 0.
-*@li time_major:An bool identifying the time major in the op. Default to true.
-*@li bias_type:An string identifying the type of bias_type function in the op. Default to "double_bias".
-*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
-*@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.
+* @brief DynamicGRUV2Hidden calculation.
+* @par Inputs:
+* five inputs: 
+* @li x_weight_input:Must be one of the following types: float32.
+* @li weight_hidden:Must be one of the following types: float16.
+* @li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
+* @li seq_length:Must be one of the following types: int32 in ND.
+* @li init_h:Must be one of the following types: float16, float32.
+
+* @par Attributes:
+* @li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Support "UNIDIRECTIONAL" and "REDIRECTIONAL".
+* @li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+* @li keep_prob:An float identifying the keep prob in the op. Default to 1.
+* @li cell_clip:An float identifying the cell clip in the op. Default to -1.
+* @li num_proj:An integer identifying the num projection in the op. Default to 0.
+* @li time_major:An bool identifying the time major in the op. Default to true.
+* @li activation:An string identifying the type of activation function in the op. Default to "tanh". 
+ Only tanh is currently supported.
+* @li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
+* @li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.
+* @li is_training:An bool identifying is training in the op. Default to true.
 
 *@par Outputs:
-*six outputs: \n
-*@li dw_input:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dw_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li db_input:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+* six outputs: 
+* @li y:Must be one of the following types: float16, float32.
+* @li output_h:Must be one of the following types: float16, float32.
+* @li update:Must be one of the following types: float16, float32.
+* @li reset:Must be one of the following types: float16, float32.
+* @li new:Must be one of the following types: float16, float32.
+* @li hidden_new:Must be one of the following types: float16, float32.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicGRUV2Hidden)
+    .INPUT(x_weight_input, TensorType({DT_FLOAT32}))
+    .INPUT(weight_hidden, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16}))
+    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(update, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 1)
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(activation, String, "tanh")
+    .ATTR(gate_order, String, "zrh")
+    .ATTR(reset_after, Bool, true)
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(DynamicGRUV2Hidden)
+
+/**
+* @brief DynamicAUGRU calculation.
+* @par Inputs:
+* eight inputs:
+* @li x:Must be one of the following types: float16.
+* @li weight_input:Must be one of the following types: float16.
+* @li weight_hidden:Must be one of the following types: float16.
+* @li weight_attr:Must be one of the following types: float16.
+* @li bias_input:Must be one of the following types: float16, float32. The format must be ND.
+* @li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
+* @li seq_length:Must be one of the following types: int32 in ND.
+* @li init_h:Must be one of the following types: float16, float32.
+
+* @par Attributes:
+* @li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
+* @li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+* @li keep_prob:An float identifying the keep prob in the op. Default to 1.
+* @li cell_clip:An float identifying the cell clip in the op. Default to -1.
+* @li num_proj:An integer identifying the num projection in the op. Default to 0.
+* @li time_major:An bool identifying the time major in the op. Default to true.
+* @li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
+* @li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
+* @li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.
+* @li is_training:An bool identifying is training in the op. Default to true.
+
+* @par Outputs:
+* seven outputs:
+* @li y:Must be one of the following types: float16, float32.
+* @li output_h:Must be one of the following types: float16, float32.
+* @li update:Must be one of the following types: float16, float32.
+* @li update_att:Must be one of the following types: float16, float32.
+* @li reset:Must be one of the following types: float16, float32.
+* @li new:Must be one of the following types: float16, float32.
+* @li hidden_new:Must be one of the following types: float16, float32.
+*/
+REG_OP(DynamicAUGRU)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(weight_input, TensorType({DT_FLOAT16}))
+    .INPUT(weight_hidden, TensorType({DT_FLOAT16}))
+    .INPUT(weight_att, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias_input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16}))
+    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(update, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(update_att, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 1)
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(activation, String, "tanh")
+    .ATTR(gate_order, String, "zrh")
+    .ATTR(reset_after, Bool, true)
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(DynamicAUGRU)
+
+/**
+* @brief: DynamicAUGRUGrad calculation.
+* @par Inputs:
+* sixteen inputs: \n
+* @li x:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li weight_input:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li weight_att:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li y:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li init_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dy:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li update:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li update_att:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li reset:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li new:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li seq_length:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li mask:A 4D Tensor. Must be one of the following types: float16, float32.
+
+* @par Attributes:
+* @li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
+* @li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+* @li keep_prob:An float identifying the keep prob in the op. Default to 1.
+* @li cell_clip:An float identifying the cell clip in the op. Default to -1.
+* @li num_proj:An integer identifying the num projection in the op. Default to 0.
+* @li time_major:An bool identifying the time major in the op. Default to true.
+* @li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
+* @li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.
+
+* @par Outputs:
+* seven outputs: \n
+* @li dw_input:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dw_hidden:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li db_input:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dx:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dw_att:A 4D Tensor. Must be one of the following types: float16, float32.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicAUGRUGrad)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weight_input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weight_att, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(update, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(update_att, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(dw_input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dw_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(db_input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(db_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dw_att, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 1)
+    .ATTR(keep_prob, Float, -1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(gate_order, String, "zrh")
+    .ATTR(reset_after, Bool, true)
+    .OP_END_FACTORY_REG(DynamicAUGRUGrad)
+
+/**
+* @brief: AUGRUHiddenGrad calculation.
+* @par Inputs:
+* twelve inputs: \n
+* @li weight_att:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li init_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dy:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li update:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li update_att:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li reset:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li new:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li seq_mask:A 4D Tensor. Must be one of the following types: float16, float32.
+
+* @par Attributes:
+* @li t_state:An Int identifying the current t state. Default to [0, 4].
+* @li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
+
+* @par Outputs:
+* four outputs: \n
+* @li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dw_att_t:A 4D Tensor. Must be one of the following types: float16, float32.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(AUGRUHiddenGradCell)
+    .INPUT(weight_att, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(update, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(update_att, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dw_att_t, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(t_state, Int, 0)
+    .ATTR(gate_order, String, "zrh")
+    .OP_END_FACTORY_REG(AUGRUHiddenGradCell)
+
+/**
+* @brief: DynamicGRUV2Grad calculation.
+* @par Inputs:
+* fourteen inputs: \n
+* @li x:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li weight_input:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li y:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li init_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dy:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li update:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li reset:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li new:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li seq_length:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li mask:A 4D Tensor. Must be one of the following types: float16, float32.
+
+* @par Attributes:
+* @li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
+* @li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+* @li keep_prob:An float identifying the keep prob in the op. Default to 1.
+* @li cell_clip:An float identifying the cell clip in the op. Default to -1.
+* @li num_proj:An integer identifying the num projection in the op. Default to 0.
+* @li time_major:An bool identifying the time major in the op. Default to true.
+* @li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
+* @li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.
+
+* @par Outputs:
+* six outputs: \n
+* @li dw_input:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dw_hidden:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li db_input:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dx:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(DynamicGRUV2Grad)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -658,37 +1303,85 @@ REG_OP(DynamicGRUV2Grad)
     .ATTR(cell_clip, Float, -1.0)
     .ATTR(num_proj, Int, 0)
     .ATTR(time_major, Bool, true)
-    .ATTR(bias_type, String, "double_bias")
     .ATTR(gate_order, String, "zrh")
     .ATTR(reset_after, Bool, true)
     .OP_END_FACTORY_REG(DynamicGRUV2Grad)
 
 /**
-*@brief: GRUV2HiddenGrad calculation.
-*@par Inputs:
-*nine inputs: \n
-*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-
-*@par Attributes:
-*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
+* @brief: GRUV2HiddenGrad calculation.
+* @par Inputs:
+* nine inputs: \n
+* @li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li init_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dy:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li update:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li reset:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li new:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li seq_length:A 1D Tensor. Must be one of the following types: float16, float32.
 
-*@par Outputs:
-*three outputs: \n
-*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+* @par Attributes:
+* @li t_state:An Int identifying the current t state. Default to [0, 4].
+* @li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
+
+* @par Outputs:
+* three outputs: \n
+* @li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
-REG_OP(GRUV2HiddenGrad)
-    .INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+REG_OP(GRUV2HiddenGradCell)
+    .INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(update, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(t_state, Int, 0)
+    .ATTR(gate_order, String, "zrh")
+    .OP_END_FACTORY_REG(GRUV2HiddenGradCell)
+
+/**
+* @brief: DynamicGRUCellGrad calculation.
+* @par Inputs:
+* eleven inputs: \n
+* @li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dy:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li update:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li reset:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li new:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32.+
+* @li init_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li t_state:A 1D Tensor. Must be one of the following types: int32. The format must be ND.
+* @li seq_length:A 1D Tensor. Must be one of the following types: float16, float32.
+
+* @par Attributes:
+* gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
+
+* @par Outputs:
+* three outputs: \n
+* @li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicGRUCellGrad)
+    .INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -696,11 +1389,240 @@ REG_OP(GRUV2HiddenGrad)
     .INPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(t_state, TensorType({DT_INT32, DT_INT32}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(gate_order, String, "zrh")
-    .OP_END_FACTORY_REG(GRUV2HiddenGrad)
+    .OP_END_FACTORY_REG(DynamicGRUCellGrad)
+
+/**
+* @brief Calculates the reversed outputs of the function "embedding". \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li grad: A mutable Tensor of word grad. Must be one of the following types:
+*     float32.
+* @li indices: A mutable word index Tensor of the int32 type.\n
+
+* @par Attributes:
+* @li num_weights: An int attr which use to judge how many words in dict. \n
+
+* @li padding_idx: An int attr judge which word to fill zeros. Defaults to "-1". \n
+
+* @li scale_grad_by_freq: An optional bool. Defaults to "False".
+*     If "True", "grad_weight" will be scale by word_frequency.
+*     If "False", "grad_weight" will not be scale by word_frequency. \n
+
+* @par Outputs:
+* y: A mutable output Tensor of new word grad has the same type as "grads". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator EmbeddingDenseGrad.
+*/
+REG_OP(EmbeddingDenseGrad)
+    .INPUT(grad, TensorType({ DT_FLOAT32 }))  /* "First operand." */
+    .INPUT(indices, TensorType({ DT_INT32 })) /* "Second operand." */
+    .OUTPUT(y, TensorType({ DT_FLOAT32 }))    /* "Result, has same element type as two inputs" */
+    .REQUIRED_ATTR(num_weights, Int)
+    .ATTR(padding_idx, Int, -1)
+    .ATTR(scale_grad_by_freq, Bool, false)
+    .OP_END_FACTORY_REG(EmbeddingDenseGrad)
+
+/**
+* @brief CommonLSTM calculation.
+* @par Inputs:
+* eight inputs: \n
+* @li x:Each time step is a 4D Tensor. Must be one of the following types: float16, float32.
+* @li w:Each direction is a 4D Tensor. Must be one of the following types: float16, float32.
+* @li r:Each direction is a 4D Tensor. Must be one of the following types: float16, float32.
+* @li b:An optional input. Each direction is a 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+* @li sequence_lens:An optional input. A 1D Tensor.Must be one of the following types: int32. The format must be ND.
+* @li initial_h:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32.
+* @li initial_c:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32.
+* @li p:An optional input. Each direction is a 1D Tensor.Must be one of the following types: float16, float32. The format must be ND.
+
+* @par Attributes:
+* @li activation_alpha:Optional scaling values used by some activation functions. Empty is currently supported.
+* @li activation_beta:Optional scaling values used by some activation functions. Empty is currently supported.
+* @li activations:The list of activation functions. Empty is currently supported.
+* @li clip:An float identifying the cell clip in the op. Default to -1.
+* @li direction:Specify if the RNN is forward, reverse, or bidirectional. Must be one of forward(default), reverse, or bidirectional.
+* @li hidden_size:Number of neurons in the hidden layer. Reserved.
+* @li input_forget:Couple the input and forget gates if 1. Reserved.
+
+* @par Outputs:
+* three outputs: \n
+* @li y:First dimension is time step, second dimension is direction, others is a 4D Tensor. Must be one of the following types: float16, float32.
+* @li y_h:Each direction is a 4D Tensor. Must be one of the following types: float16, float32.
+* @li y_c:Each direction is a 4D Tensor. Must be one of the following types: float16, float32.
+*/
+
+REG_OP(CommonLSTM)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(initial_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(p, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(activation_alpha, ListFloat, {})
+    .ATTR(activation_beta, ListFloat, {})
+    .ATTR(activations, ListString, {})
+    .ATTR(clip, Float, -1.0)
+    .ATTR(direction, String, "forward")
+    .REQUIRED_ATTR(hidden_size, Int)
+    .ATTR(input_forget, Int, 0)
+    .OP_END_FACTORY_REG(CommonLSTM)
+
+/**
+ * @brief Calculate the mask. According to hidden_size and num_step, convert seq_length to mask.
+ *
+ * @par Inputs:
+ * @li seq_length: A 1D Tensor. Must be one of the following types: int32. Record the current length of each batch. [batch_size].
+ * @li x: A 3D Tensor. Must be one of the following types: fp16/fp32. Record the num_step/batch_size/input_size. [num_step, batch_size, input_size].
+ * @li hidden_size: An optional attribute of type int32. pass the hidden_size. \n
+ *
+ * @par Outputs:
+ * seq_mask: A 3D Tensor. Must be one of the following types: fp16/fp32. with the shape of [num_step, batch_size, hidden_size]. And has the same type as "b" \n
+ *
+ * @par Restrictions:
+ * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+ */
+REG_OP(RnnGenMaskV2)
+    .INPUT(seq_length, TensorType({DT_INT32}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(hidden_size, Int)
+    .OUTPUT(seq_mask, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(RnnGenMaskV2)
+
+/**
+* @brief Common GRU calculation.
+
+* @par Inputs:
+* Eight inputs, including:
+* @li x: The input sequences packed (and pontentially padded) into on 3D Tesnor(float16).
+* @li w: The weight tensor for the gates is 3D Tensor(float16).
+* @li r: The recurrence weight tesnor is 3D Tensor(float16).
+* @li b: The bias tensor for the gates. The format must be ND
+* @li sequence_lens: Optional tensor specifying lengths of sequences(int32). The format must be ND
+* @li init_h: Optional initial value of the hidden(float16,float32).
+
+* @par Attributes:
+* @li activation_alpha: Optional scaling values used by some activation functions.  \n
+* @li activation_beta: Optional scaling values used by some activation functions.  \n
+* @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates.  \n
+* @li clip: Cell clip threshold. \n
+* @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n
+* @li hidden_size: Number of neurons in the hidden layer. \n
+* @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n
+
+* @par Outputs:
+* @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32).
+* @li y_h: The last output value of the hidden(float16,float32).
+*/
+REG_OP(CommonGRU)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(activation_alpha, ListFloat, {})
+    .ATTR(activation_beta , ListFloat, {})
+    .ATTR(activations , ListString, {})
+    .ATTR(clip, Float, -1.0)
+    .ATTR(direction, String, "forward")
+    .REQUIRED_ATTR(hidden_size, Int)
+    .ATTR(linear_before_reset , Int, 0)
+    .OP_END_FACTORY_REG(CommonGRU)
+/**
+* @brief Calculates the reversed outputs of the function "embedding". \n
+
+* @par Inputs:
+* Four inputs, including:
+* @li weight: A mutable Tensor of word grad. Must be one of the following types:
+*     float32.
+* @li indices: A mutable word index Tensor of the int32 type.\n
+* @li offsets: A mutable word index Tensor of the int32 type.\n
+* @li per_sample_weights: to indicate all weights should be taken to be 1.
+*     If specified, per_sample_weights must have exactly the same shape as input
+*     and is treated as having the same offsets, if those are not None.
+*     Only supported for mode='sum'.\n
+
+* @par Attributes:
+* @li mode: An string attr which use "sum"``, ``"mean"`` or ``"max"``. Specifies the way to reduce the bag. \n
+
+* @li scale_grad_by_freq: An optional bool. Defaults to "False".
+*     If "True", "grad_weight" will be scale by word_frequency.
+*     If "False", "grad_weight" will not be scale by word_frequency. \n
+* @li sparse: if True, gradient w.r.t.attr weight matrix will be a sparse tensor. \n
+* @li include_last_offset: if True, attr offsets  has one additional element, where the last element
+*     is equivalent to the size of indices. This matches the CSR format. \n
+
+* @par Outputs:
+* y: A mutable output Tensor of new word grad has the same type as "grads". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator EmbeddingBag.
+*/
+REG_OP(EmbeddingBag)
+    .INPUT(weight, TensorType({ DT_FLOAT32 }))
+    .INPUT(indices, TensorType({ DT_INT32 }))
+    .OPTIONAL_INPUT(offsets, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(per_sample_weights, TensorType({DT_FLOAT32}))
+    .OUTPUT(y, TensorType({ DT_FLOAT32 }))
+    .ATTR(mode, String, "mean")
+    .ATTR(scale_grad_by_freq, Bool, false)
+    .ATTR(sparse, Bool, false)
+    .ATTR(include_last_offset, Bool, false)
+    .OP_END_FACTORY_REG(EmbeddingBag)
+/**
+ * @brief:LSTMP calculation
+ * @par Inputs:
+ * eight inputs:
+ * @li x:A required Tensor(seq, batch, dim). Must be one of the following types: float16, float32.
+ * @li real_mask:A optional Tensor(seq, batch). Must be one of the following types: float16, float32.
+ * @li init_h:A optional Tensor(batch, state). Must be one of the following types: float16, float32.
+ * @li init_c:A optional Tensor(batch, hidden). Must be one of the following types: float16, float32.
+ * @li wx:A required Tensor(4*hidden, dim). Must be one of the following types: float16, float32.
+ * @li wr:A required Tensor(4*hidden, state). Must be one of the following types: float16, float32.
+ * @li bias:A optional Tensor(hidden). Must be one of the following types: float16, float32. The format must be ND.
+ * @li project: A optional Tensor. Must be one of the following types: float16, float32.
+ *
+ * @par Outputs:
+ * three outputs:
+ * @li y:A Tensor. Must be one of the following types: float16, float32.
+ * @li output_h:A Tensor. Must be one of the following types: float16, float32.
+ * @li output_c:A Tensor. Must be one of the following types: float16, float32.
+ *
+ *@par Attributes:
+ * time_major:An bool identifying the time major in the op. Default to false.
+ * @par Restrictions:
+ * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(LSTMP)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(wx, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(wr, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(project, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(real_mask, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(time_major, Bool, false)
+    .OP_END_FACTORY_REG(LSTMP)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_
diff --git a/third_party/fwkacllib/inc/ops/rpn_ops.h b/third_party/fwkacllib/inc/ops/rpn_ops.h
index b7649a44..850b3e5a 100644
--- a/third_party/fwkacllib/inc/ops/rpn_ops.h
+++ b/third_party/fwkacllib/inc/ops/rpn_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,12 +28,12 @@ namespace ge {
 * iou_threshold with higher scoring box according to their
 * intersection-over-union (IoU) . \n
 
-*@par Input:
-* @li box_scores: 2-D tensor with shape of [N, 8], including proposal boxes and
+* @par Inputs:
+* box_scores: 2-D tensor with shape of [N, 8], including proposal boxes and
 * corresponding confidence scores . \n
 
 * @par Attributes:
-* @li iou_threshold: An optional float. The threshold for deciding whether boxes
+* iou_threshold: An optional float. The threshold for deciding whether boxes
 * overlap too much with respect to IOU . \n
 
 * @par Outputs:
diff --git a/third_party/fwkacllib/inc/ops/save_ops.h b/third_party/fwkacllib/inc/ops/save_ops.h
index 0ce473b7..5ce6c2e0 100644
--- a/third_party/fwkacllib/inc/ops/save_ops.h
+++ b/third_party/fwkacllib/inc/ops/save_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/sdca_ops.h b/third_party/fwkacllib/inc/ops/sdca_ops.h
index cbd9839d..601b360b 100644
--- a/third_party/fwkacllib/inc/ops/sdca_ops.h
+++ b/third_party/fwkacllib/inc/ops/sdca_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -45,7 +45,13 @@ namespace ge {
 *corresponding weights in sparse_weights. This field maybe omitted for the dense approach.It's a dynamic input.
 *@li sparse_weights: a list of vectors where each value is the weight associated with a sparse feature group.
 *@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group.It's a dynamic input.
-*@li example_state_data: a list of vectors containing the example state data.
+*@li example_state_data: a list of vectors containing the example state data. \n
+
+*@par Attributes:
+*@li adaptive: the type is bool default false.
+*@li num_sparse_features:The num of sparse.
+*@li num_sparse_features_with_values: The num of sparse_feature_values
+*@li num_dense_features:The num of dense.
 *@li loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, squared and hinge losses.
 *@li l1: Symmetric l1 regularization strength.
 *@li l2: Symmetric l2 regularization strength.
@@ -53,10 +59,10 @@ namespace ge {
 *@li num_inner_iterations: Number of iterations per mini-batch . \n
 
 *@par Outputs:
-*y: A Returns a list of vectors containing the updated example state
+*@li out_example_state_data: A Returns a list of vectors containing the updated example state
 *data.a list of vectors where each value is the delta
-*weights associated with a sparse feature group.a list of vectors where the values are the delta
-*weights associated with a dense feature group . \n
+*@li out_delta_sparse_weights:weights associated with a sparse feature group.a list of vectors where the values are the delta
+*@li out_delta_dense_weights:weights associated with a dense feature group . \n
 
 *@par Third-party framework compatibility
 * Compatible with tensorflow SdcaOptimizerV2 operator.
diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h
index 2c99e82e..810d024b 100644
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -178,7 +178,7 @@ REG_OP(GatherNd)
     .OP_END_FACTORY_REG(GatherNd)
 
 /**
-*@brief Gather slices from "x" according to "indices" by corresponding axis . \n
+*@brief Gather slices from "x" according to "indices" by corresponding axis .
 
 *@par Inputs:
 *Three inputs, including:
@@ -187,16 +187,19 @@ REG_OP(GatherNd)
 *     uint16, complex128, float16, uint32, uint64, complex64, complex128.
 * @li indices: A Tensor of type int32 or int64.
 * @li axis: A Tensor of type as int32 or int64,
-*     Must be in the range [-rank(input_tensor), rank(input_tensor)) . \n
+*     Must be in the range [-rank(input_tensor), rank(input_tensor)) .
+
+*@par Attributes:
+* batch_dims: An optional int. Defaults to 0.
 
 *@par Outputs:
-*y: A Tensor. Has the same type as "x" . \n
+*y: A Tensor. Has the same type as "x" .
 
 *@attention Constraints:
 *Value in indices must be in range [0, x.shape[axis])
 
 *@par Third-party framework compatibility
-* Compatible with the TensorFlow operator GatherV2 . \n
+* Compatible with the TensorFlow operator GatherV2 .
 
 */
 REG_OP(GatherV2)
@@ -204,6 +207,7 @@ REG_OP(GatherV2)
     .INPUT(indices, TensorType::IndexNumberType())
     .INPUT(axis, TensorType::IndexNumberType())
     .OUTPUT(y, TensorType::BasicType())
+    .ATTR(batch_dims, Int, 0)
     .OP_END_FACTORY_REG(GatherV2)
 
 /**
@@ -240,6 +244,56 @@ REG_OP(GatherV2D)
     .OP_END_FACTORY_REG(GatherV2D)
 
 /**
+*@Gathers values along an axis specified by dim . \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64.
+*@li index: A Tensor. Must be one of the following types: int64 . \n
+
+*@par Attributes:
+* dim: the axis along which to index . \n
+
+*@par Outputs:
+* y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the PyTorch operator Gather.
+*/
+REG_OP(GatherElements)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_INT16,DT_INT32,
+    DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64}))
+    .INPUT(index, TensorType({DT_INT32,DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_INT16,DT_INT32,
+    DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64}))
+    .ATTR(dim, Int, 0)
+    .OP_END_FACTORY_REG(GatherElements)
+
+/**
+*@Gathers values along an axis specified by dim . \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
+*     int64, uint16, float16, uint32, uint64, bool.
+*@li dim: A Tensor. Must be one of the following types: int32, int64.
+*@li index: A Tensor. Must be one of the following types: int32, int64 . \n
+
+
+*@par Outputs:
+* y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the PyTorch operator Gather.
+*/
+REG_OP(GatherD)
+    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32
+                          DT_INT64, DT_UINT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(dim, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(index, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
+    .ATTR(dim, Int, 0)
+    .OP_END_FACTORY_REG(GatherD)
+
+/**
 *@brief Extracts a strided slice of a tensor. Roughly speaking, this op
     extracts a slice of size (end-begin)/stride from the given input tensor.
     Starting at the location specified by begin the slice continues by
@@ -275,8 +329,6 @@ REG_OP(GatherV2D)
 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n
 
-*@attention Constraints:
-
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator StridedSlice.
 */
@@ -327,8 +379,6 @@ REG_OP(StridedSlice)
 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n
 
-*@attention Constraints:
-
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator StridedSlice.
 
@@ -336,9 +386,9 @@ REG_OP(StridedSlice)
 * Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSlice instead.
 */
 REG_OP(StridedSliceD)
-    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8,
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT64, DT_UINT8, DT_INT8,
                           DT_BOOL}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8,
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT64, DT_UINT8, DT_INT8,
                           DT_BOOL}))
     .REQUIRED_ATTR(begin, ListInt)
     .REQUIRED_ATTR(end, ListInt)
@@ -385,8 +435,6 @@ REG_OP(StridedSliceD)
 *@par Outputs:
 *output: A Tensor. Has the same type as "dy" . \n
 
-*@attention Constraints:
-
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator StridedSliceGradD.
 
@@ -444,8 +492,6 @@ REG_OP(StridedSliceGradD)
 *@par Outputs:
 *output: A Tensor has the same type as "dy" . \n
 
-*@attention Constraints:
-
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator StridedSliceGrad.
 */
@@ -487,6 +533,38 @@ REG_OP(UnsortedSegmentSum)
     .OP_END_FACTORY_REG(UnsortedSegmentSum)
 
 /**
+*@brief Creates a one-dimensional tensor of size steps whose values are evenly spaced from start to 
+*	end, inclusive, on a logarithmic scale with base base. \n
+
+*@par Inputs:
+*One inputs, including:
+* assist: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* @li start: An required float. Used to select the start. \n
+* @li end: An required float. Used to select the end. \n
+* @li steps: An optional int.Defaults to 100. \n
+* @li base: An optional float.Defaults to 10.0. \n
+* @li dtype: An optional int.Defaults to 1. \n
+
+*@par Outputs:
+*y: A Tensor with the same type and shape of input_x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator logspaced. \n
+*/
+REG_OP(LogSpaceD)
+    .INPUT(assist, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR (start, Float)
+    .REQUIRED_ATTR (end, Float)
+    .ATTR(steps, Int, 100)
+    .ATTR(base, Float, 10.0)
+    .ATTR(dtype, Int, 1)
+    .OP_END_FACTORY_REG(LogSpaceD)
+
+/**
 *@brief Computes the sum along segments of a tensor . \n
 
 *@par Inputs:
@@ -649,6 +727,27 @@ REG_OP(SegmentMax)
     .OP_END_FACTORY_REG(SegmentMax)
 
 /**
+*@brief Computes the sum along segments of a tensor . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A Tensor of type NumberType.
+* @li segment_ids: A Tensor of type IndexNumberType, whose shape is a prefix
+* of "x.shape".
+
+*@par Outputs:
+*y: A Tensor of type NumberType . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SegmentSum.
+*/
+REG_OP(SegmentSum)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(segment_ids, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .OP_END_FACTORY_REG(SegmentSum)
+
+/**
 *@brief: Computes the maximum along segments of a tensor.
 *Computes a tensor such that output[i]=(data[i]) where max is over j
  * such that segment_ids[j] == i.
@@ -797,6 +896,34 @@ REG_OP(SliceD)
     .OP_END_FACTORY_REG(SliceD)
 
 /**
+*@brief Extracts a slice from a tensor.
+*       This operation extracts a slice of size "size" from a tensor "x"
+*		starting at the location specified by "begin" . \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be one of the following types:
+* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8,
+* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n
+
+*@par Inputs:
+*@li offsets: The starting location for the slice.
+
+*@par Attributes:
+*@li size: The tensor shape . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". The slice extracted from the tensor.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use Slice instead.
+*/
+REG_OP(SliceDV2)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(offsets, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(size, ListInt)
+    .OP_END_FACTORY_REG(SliceDV2)
+    
+/**
 * @brief Finds values and indices of the "k" largest elements for the last
 * dimension . \n
 
@@ -813,24 +940,28 @@ REG_OP(SliceD)
 * @par Attributes:
 * @li k: A required int that is at least 0, specifying the number of top elements
 * to look for along the last dimension (along each row for matrices).
-* @li sorted: An optional bool. Defaults to true.
-* If true, the resulting "k" elements will be sorted by the values in descending
-* order.
+* @li sorted: An optional bool. Defaults to "True".
+* If "True", the returned "k" elements are themselves sorted.
+* If "False", the returned "k" elements are not sorted.
 * @li dim: An optional int. Defaults to -1. For reserved use.
-* @li largest: An optional bool. Defaults to true. For reserved use. \n
+* @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true.
+* If "True", the "k" largest elements are returned in descending order.
+* If "False", the "k" smallest elements are returned in ascending order. \n
 
 * @par Outputs:
 * @li values: A Tensor, specifying the sorted data. Has the same type as "input".
 * @li indices: A Tensor of type int32, specifying the indices of sorted data . \n
 
 * @attention Constraints:
-* @li k =< 5120
+* @li k =< 4096
 * @li Size of the last dimension =< 1458176
+* @li k =< 2048 under lhisi version
+* @li Size of the last dimension =< 1040000 under lhisi version
 * @li sorted = true
 * @li It's unstable sorted indices on the platform of Ascend310
 
-* @par Third-party framework compatibility
-* @li Compatible with the TensorFlow operator TopK.
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use TopKV2 instead.
 */
 REG_OP(TopKD)
     .INPUT(x, TensorType::RealNumberType())
@@ -853,13 +984,62 @@ REG_OP(TopKD)
 * at least "k".
 * @li k: A 0D Tensor of type int32.
 * Number of top elements to look for along the last dimension (along each row
+* for matrices) .
+* @li assist_seq: A 1D tensor of type float16.
+* with size of 2N, which "N" is the last dimension.
+* The first N numbers is indices, and the next N numbers is deviation of casting
+* int32 to float16. \n
+
+* @par Attributes:
+* @li sorted: An optional bool. Defaults to "True".
+* If "True", the returned "k" elements are themselves sorted.
+* If "False", the returned "k" elements are not sorted.
+* @li dim: An optional int. Defaults to -1. For reserved use.
+* @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true.
+* If "True", the "k" largest elements are returned in descending order.
+* If "False", the "k" smallest elements are returned in ascending order. \n
+
+
+* @par Outputs:
+* @li values: A Tensor, specifying the sorted data. Has the same type as
+* "input".
+* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n
+
+* @see TopK()
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator TopKV2.
+*/
+REG_OP(TopKV2D)
+    .INPUT(x, TensorType::RealNumberType())
+    .INPUT(k, TensorType({DT_INT32}))
+    .INPUT(assist_seq, TensorType({DT_FLOAT16}))
+    .OUTPUT(values, TensorType::RealNumberType())
+    .OUTPUT(indices, TensorType({DT_INT32}))
+    .ATTR(sorted, Bool, true)
+    .ATTR(dim, Int, -1)
+    .ATTR(largest, Bool, true)
+    .OP_END_FACTORY_REG(TopKV2D)
+
+/**
+* @brief Finds values and indices of the "k" largest elements for the last
+* dimension . \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li x: A 1D or higher tensor of type BasicType, with the last dimension
+* at least "k".
+* @li k: A 0D Tensor of type int32.
+* Number of top elements to look for along the last dimension (along each row
 * for matrices) . \n
 
 * @par Attributes:
-* @li sorted: An optional bool. Defaults to true.
-* If true, the resulting "k" elements will be sorted by the values in descending
-* order.
-* @li T: Indicator of indices type . \n
+* @li sorted: An optional bool. Defaults to "True".
+* If "True", the returned "k" elements are themselves sorted.
+* If "False", the returned "k" elements are not sorted.
+* @li dim: An optional int. Defaults to -1. For reserved use.
+* @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true.
+* If "True", the "k" largest elements are returned in descending order.
+* If "False", the "k" smallest elements are returned in ascending order. \n
 
 * @par Outputs:
 * @li values: A Tensor, specifying the sorted data. Has the same type as
@@ -870,21 +1050,63 @@ REG_OP(TopKD)
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator TopKV2.
 */
+REG_OP(TopKV2)
+    .INPUT(x, TensorType::RealNumberType())
+    .INPUT(k, TensorType({DT_INT32}))
+    .OUTPUT(values, TensorType::RealNumberType())
+    .OUTPUT(indices, TensorType({DT_INT32}))
+    .ATTR(sorted, Bool, true)
+    .ATTR(dim, Int, -1)
+    .ATTR(largest, Bool, true)
+    .OP_END_FACTORY_REG(TopKV2)
+
+/**
+* @brief Finds values and indices of the "k" largest elements for the last
+* dimension . \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li x: A 1D or higher tensor of type BasicType, with the last dimension
+* at least "k".
+* @li k: A 0D Tensor of type int32.
+* Number of top elements to look for along the last dimension (along each row
+* for matrices) . \n
+
+* @par Attributes:
+* @li sorted: An optional bool. Defaults to "True".
+* If "True", the returned "k" elements are themselves sorted.
+* If "False", the returned "k" elements are not sorted.
+* @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true.
+* If "True", the "k" largest elements are returned in descending order.
+* If "False", the "k" smallest elements are returned in ascending order.
+* @li dim:0-D. Number of top elements to look for along the last dimension (along each row for matrices). \n
+
+* @par Outputs:
+* @li values: A Tensor, specifying the sorted data. Has the same type as
+* "input".
+* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n
+
+* @see TopK()
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator TopKV2.
+*/
 REG_OP(TopK)
     .INPUT(x, TensorType::RealNumberType())
     .INPUT(k, TensorType({DT_INT32}))
     .OUTPUT(values, TensorType::RealNumberType())
     .OUTPUT(indices, TensorType({DT_INT32}))
     .ATTR(sorted, Bool, true)
+    .ATTR(largest, Bool, true)
+    .ATTR(dim, Int, -1)
     .OP_END_FACTORY_REG(TopK)
 /**
 *@brief Creates a new tensor by applying sparse "updates" to individual values or slices within a tensor (initially zero for numeric, empty for string) of the given "shape" according to "indices" . \n
 
 *@par Inputs:
 *Inputs including:
-* @li indices: A required index tensor. Must be one of the following types: float32, float16, int32, int8, uint8.
-* @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8.
-* @li shape: A required list of int32, specifying the output shape.
+* @li indices: A required index tensor. Must be one of the following types: int32 or int64.
+* @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8...
+* @li shape: A required list of int32 or int64, specifying the output shape.
 *@par Outputs:
 *y:A output Tensor with same datatype as "updates" . \n
 
@@ -895,7 +1117,7 @@ REG_OP(TopK)
 * Compatible with the TensorFlow operator ScatterNd.
 */
 REG_OP(ScatterNd)
-    .INPUT(indices, TensorType::BasicType())
+    .INPUT(indices, TensorType::IndexNumberType())
     .INPUT(x, TensorType::BasicType())
     .INPUT(shape, TensorType::IndexNumberType())
     .OUTPUT(y, TensorType::BasicType())
@@ -908,11 +1130,11 @@ REG_OP(ScatterNd)
 *@par Inputs:
 *Inputs including:
 * @li indices: A required index tensor. Must be one of the following types:
- * float, float16, int32, int16. format:ND.
+ * int32 or int64. format:ND.
 * @li x: A required slice tensor. Must be one of the following types:
- * float, float16, int32, int16. format:ND.
+ * float16, float, int32, int8, uint8. format:ND.
 *@par Attributes:
-* @li shape: A required list of int32, specifying the output shape.
+* @li shape: A required list of int32 or int64, specifying the output shape.
 *@par Outputs:
 *y: A Tensor. Has the same type as "x". format:ND . \n
 
@@ -927,8 +1149,8 @@ REG_OP(ScatterNd)
 */
 REG_OP(ScatterNdD)
     .INPUT(indices, TensorType::IndexNumberType())
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
     .REQUIRED_ATTR(shape, ListInt)
     .OP_END_FACTORY_REG(ScatterNdD)
 
@@ -969,7 +1191,6 @@ REG_OP(InTopKD)
 * @brief Says whether the targets are in the top "k" predictions . \n
 
 * @par Inputs:
-* Two inputs, including:
 * @li x1: A 2D Tensor of type float32. A "batch_size * classes" tensor.
 * @li x2: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids.
 * @li k: A 1D Tensor of the same type as "x2".
@@ -1100,19 +1321,20 @@ REG_OP(StridedSliceAssignD)
 * @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
 *     int64, qint8, quint8, qint32, qint16, quint16, uint16,
 *     float16, uint32, uint64, complex64, complex128.
-* @li indices: A Tensor of type int32 or int64 . \n
+* @li indices: A Tensor of type int32 or int64 .
 
 *@par Attributes:
-*validate_indices: A bool specifying whether to verify the argument of "indice" . \n
+* @li validate_indices: A bool specifying whether to verify the argument of "indice" .
+* @li batch_dims: An optional int. Defaults to 0.
 
 *@par Outputs:
-*y: A Tensor. Has the same type as "x" . \n
+*y: A Tensor. Has the same type as "x" .
 
 *@attention Constraints:
-* "indices" is in the range [0, x.shape[0]) . \n
+* "indices" is in the range [0, x.shape[0]) .
 
 *@par Third-party framework compatibility
-* Compatible with the TensorFlow operator Gather . \n
+* Compatible with the TensorFlow operator Gather .
 
 */
 REG_OP(Gather)
@@ -1120,6 +1342,7 @@ REG_OP(Gather)
     .INPUT(indices, TensorType::IndexNumberType())
     .OUTPUT(y, TensorType::BasicType())
     .ATTR(validate_indices, Bool, true)
+    .ATTR(batch_dims, Int, 0)
     .OP_END_FACTORY_REG(Gather)
 
 /**
@@ -1184,8 +1407,7 @@ REG_OP(CumprodD)
 
 *@par Inputs:
 * Two inputs, including:
-*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
-* complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64.
+*@li x: A Tensor. Must be one of the following types: float32, int32, uint8, int8, float16.
 *@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0".
 *
 *@par Attributes:
@@ -1211,8 +1433,7 @@ REG_OP(Cumsum)
 *
 *@par Inputs:
 * One input:
-*x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
-* complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64.
+*x: A Tensor. Must be one of the following types: float32, int32, uint8, int8, float16.
 *
 *@par Attributes:
 *@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0".
@@ -1502,12 +1723,12 @@ REG_OP(UnsortedSegmentMinD)
 * y: A Tensor of type RealNumberType . \n
 
 * @attention Constraints:
-* @li segment_ids must be non-negative tensor.
+* segment_ids must be non-negative tensor.
 
 * @see UnsortedSegmentSum(), UnsortedSegmentProd(),
 
 * @par Third-party framework compatibility
-* @li Compatible with the TensorFlow operator UnsortedSegmentMax.
+* Compatible with the TensorFlow operator UnsortedSegmentMax.
 */
 REG_OP(UnsortedSegmentMax)
     .INPUT(x, TensorType::RealNumberType())
@@ -1753,27 +1974,84 @@ REG_OP(Crop)
       .OP_END_FACTORY_REG(Crop)
 
 /**
+*@brief Returns a namedtuple (values, indices) where values is the cumulative 
+* the cumulative minimum of elements of input in the dimension dim. 
+* And indices is the index location of each maximum value found in the dimension dim. \n
+
+*@par Inputs:
+*One inputs, including:
+* x: A tensor . Must be one of the following types:
+*     float16, float32, int32, uint32, int8, uint8. \n
+
+*@par Attributes:
+* axis: Axis along which to cummin. \n
+
+*@par Outputs:
+* @li y: A Tensor with the same type and shape of x's.
+* @li indices: A Tensor with the int32 type and the same shape of x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Cummin. \n
+*/
+REG_OP(Cummin)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OUTPUT(indices, TensorType::BasicType())
+    .REQUIRED_ATTR(axis, Int)
+    .OP_END_FACTORY_REG(Cummin)
+
+/**
+*@brief Returns a namedtuple (values, indices) where values is the cumulative 
+* the cumulative maximum of elements of input in the dimension dim. 
+* And indices is the index location of each maximum value found in the dimension dim. \n
+
+*@par Inputs:
+*One inputs, including:
+* x: A tensor . Must be one of the following types:
+*     float16, float32, int32, uint32, int8, uint8. \n
+
+*@par Attributes:
+* dim: Axis along which to cummax. \n
+
+*@par Outputs:
+* @li y: A Tensor with the same type and shape of x's.
+* @li indices: A Tensor with the int32/int64 type and the same shape of x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Cummax. \n
+*/
+REG_OP(Cummax)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OUTPUT(indices, TensorType::BasicType())
+    .REQUIRED_ATTR(dim, Int)
+    .OP_END_FACTORY_REG(Cummax)
+
+/**
 *@brief Extends the input with copies of data along a specified dimension. For example:
-*(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2);
-*(2) axis = 1;
-*(3) tiles = 2;
-*(4) Then, y = [[[1, 2], [3, 4], [5, 6], [1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12], [7, 8], [9, 10], [11, 12]]], with shape (2, 6, 2) . \n
+*(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2); \n
+*(2) axis = 1; \n
+*(3) tiles = 2; \n
+*(4) Then, y = [[[1, 2], [3, 4], [5, 6], [1, 2], [3, 4], [5, 6]], [[7, 8],
+* [9, 10], [11, 12], [7, 8], [9, 10], [11, 12]]],
+* with shape (2, 6, 2) . \n
 
 *@par Inputs:
 * One input:
 *input_x: A Tensor with any format. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n
 
-*@par Attributes:
-*@li axis: An optional int32, specifying the axis to tile. Defaults to 1.
-*@li tiles: A required int32, specifying the number of copies (tiles) to output . \n
+* @par Attributes:
+* @li axis: An optional int32, specifying the axis to tile. Defaults to 1.
+* @li tiles: A required int32, specifying the number of copies (tiles) to output . \n
 
 *@par Outputs:
-*output_y: A Tensor of any format. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n
+* output_y: A Tensor of any format. Must be one of the following types:
+* float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n
 
-*@attention Constraints:
-*@li "axis" must be within the rank of the input tensor.
-*@li "tiles" must be greater than 1.
-*@par Third-party framework compatibility
+* @attention Constraints:
+* @li "axis" must be within the rank of the input tensor.
+* @li "tiles" must be greater than 1.
+* @par Third-party framework compatibility
 * Compatible with the Caffe operator Tile.
 */
 REG_OP(TileWithAxis)
@@ -1786,17 +2064,17 @@ REG_OP(TileWithAxis)
     .OP_END_FACTORY_REG(TileWithAxis)
 
 /**
-*@brief Read data with offset and stride . \n
+* @brief Read data with offset and stride .
 
-*@par Inputs:
-*One input:
-*x: A Tensor. Must be one of the following types: float16, int8 . \n
+* @par Inputs:
+* One input:
+* x: A Tensor. Must be one of the following types: float16, int8 . \n
 
-*@par Attributes:
-*@li stride_list: An optional 5D list of type int32. Defaults to "[1,1,1,1,1]" . \n
+* @par Attributes:
+* stride_list: An optional 5D list of type int32. Defaults to "[1,1,1,1,1]" . \n
 
-*@par Outputs:
-*y: A Tensor of the same type as "x".
+* @par Outputs:
+* y: A Tensor of the same type as "x".
 
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
@@ -1808,10 +2086,10 @@ REG_OP(ReadSelect)
     .OP_END_FACTORY_REG(ReadSelect)
 
 /**
-*@brief: Write data with offset . \n
+* @brief: Write data with offset .
 
-*@par Inputs:
-*x: A Tensor. Must be one of the following types: int32, float32, float16, int8 . \n
+* @par Inputs:
+* x: A Tensor. Must be one of the following types: int32, float32, float16, int8 . \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
@@ -1825,17 +2103,14 @@ REG_OP(WriteSelect)
     .OP_END_FACTORY_REG(WriteSelect)
 
 /**
-*@brief Read data by stride . \n
+* @brief Read data by stride.
 
-*@par Inputs:
-*One input:
-*x: A Tensor. Must be one of the following types: float16, int8 . \n
-
-*@par Attributes:
-*@li axis: A required int32, specifying the index of axis to read by stride . \n
+* @par Inputs:
+* x: A Tensor. Must be one of the following types: float16, int8. \n
 
-*@par Attributes:
-*@li stride: A required int32, specifying the value of reading stride . \n
+* @par Attributes:
+* @li axis: A required int32, specifying the index of axis to read by stride.
+* @li stride: A required int32, specifying the value of reading stride. \n
 
 *@par Outputs:
 *y: A Tensor of the same type as "x".
@@ -1848,16 +2123,14 @@ REG_OP(StridedRead)
     .OP_END_FACTORY_REG(StridedRead)
 
 /**
-*@brief: Write data by stride . \n
-
-*@par Inputs:
-*x: A Tensor. Must be one of the following types: float16, int8 . \n
+* @brief Write data by stride.
 
-*@par Attributes:
-*@li axis: A required int32, specifying the index of axis to write by stride . \n
+* @par Inputs:
+* x: A Tensor. Must be one of the following types: float16, int8. \n
 
-*@par Attributes:
-*@li stride: A required int32, specifying the value of writing stride . \n
+* @par Attributes:
+* @li axis: A required int32, specifying the index of axis to write by stride.
+* @li stride: A required int32, specifying the value of writing stride. \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
@@ -1870,20 +2143,20 @@ REG_OP(StridedWrite)
     .OP_END_FACTORY_REG(StridedWrite)
 
 /**
-*@brief Computes the cumulative log sum exp of the tensor "x" along "axis" . \n
+* @brief Computes the cumulative log sum exp of the tensor "x" along "axis" .
 
-*@par Inputs:
+* @par Inputs:
 * Two inputs, including:
-*@li x: A Tensor. Must be one of the following types: float32, float16.
-*@li axis A Tensor of type int32 or int16. Defaults to "0".
+* @li x: A Tensor. Must be one of the following types: float32, float16.
+* @li axis A Tensor of type int32 or int16. Defaults to "0".
 *
 *@par Attributes:
 *@li exclusive: If "False", performs inclusive CumulativeLogsumexp, which means that the first element of the input is identical to the first element of the output. If "True", performs exclusive CumulativeLogsumexp.
 *@li reverse: A bool. Defaults to "False".
 *
-*@par Outputs:
-*@li y: A Tensor. Has the same type as "x".
-*@par Third-party framework compatibility
+* @par Outputs:
+* y: A Tensor. Has the same type as "x".
+* @par Third-party framework compatibility
 * Compatible with the TensorFlow operator Cumsum.
 */
 REG_OP(CumulativeLogsumexp)
@@ -1899,7 +2172,7 @@ REG_OP(CumulativeLogsumexp)
 *
 *@par Inputs:
 * One input:
-*x: A Tensor. Must be one of the following types: float32, float16.
+* x: A Tensor. Must be one of the following types: float32, float16.
 *
 *@par Attributes:
 *@li axis A Tensor of type int32 or int16. Defaults to "0".
@@ -1921,6 +2194,458 @@ REG_OP(CumulativeLogsumexpD)
     .ATTR(exclusive, Bool, false)
     .ATTR(reverse, Bool, false)
     .OP_END_FACTORY_REG(CumulativeLogsumexpD)
+
+/**
+* @brief Add updates to var according to axis and indices.
+
+* @par Inputs:
+* Three inputs, including:
+* @li var: A Tensor. Must be one of the following types:
+*     float16, float32, int16, int32, int8, uint8.
+* @li indices: A Tensor of the indices, type should be int32.
+* @li updates: A Tensor of the same type as "var". \n
+
+* @par Attributes:
+* axis: An required int to specify the axis to perform indices add. \n
+
+* @par Outputs:
+* var: A Tensor. Same as input "var".
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator index_add_.
+*/
+REG_OP(InplaceIndexAdd)
+    .INPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8,
+                            DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(updates, TensorType({DT_INT16, DT_INT32, DT_INT8,
+                                DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .OUTPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8,
+                            DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .REQUIRED_ATTR(axis, Int)
+    .OP_END_FACTORY_REG(InplaceIndexAdd)
+
+/**
+* @brief Replace the value of X with value according to mask.
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: A Tensor of dtype is float16 or float32 or int64 or int32 or int8.
+* @li mask: A Tensor of dtype bool.
+* @li value: A Tensor of dtype float16 or float32 or int64 or int32 or int8. \n
+
+* @par Outputs:
+* y: A tensor. Must be one of the following dtypes:
+* float16, float32, int64, int32, int8.
+*/
+REG_OP(MaskedFill)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32, DT_INT64}))
+    .INPUT(mask, TensorType({DT_BOOL}))
+    .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(MaskedFill)
+
+/**
+* @brief Choose the value of X with value according to mask.
+
+* @par Inputs:
+* two inputs, including:
+*  @li x: A Tensor of dtype is float16 or float32.
+*  @li mask: A Tensor of dtype is bool. \n
+
+* @par Outputs:
+* y: A tensor with the same type as x. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Numpy operator select.
+* Replaces the pytorch operator masked_select in some scenarios.\n
+*/
+REG_OP(MaskedSelectV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(mask, TensorType({DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(MaskedSelectV2)
+
+/**
+* @brief update the value of X with value according to mask.
+
+* @par Inputs:
+* three inputs, including:
+*  @li x: A Tensor of dtype is float16 or float32 or float64 or int64 or int32 or int16 or int8 or uint8.
+*  @li mask: A Tensor of dtype is bool.
+*  @li updates: A tensor with the same type as x. \n
+
+* @par Outputs:
+*  @li y: A tensor with the same type as x. \n
+*/
+REG_OP(MaskedScatter)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8,  DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .INPUT(mask, TensorType({DT_BOOL}))
+    .INPUT(updates, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8,  DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(MaskedScatter)
+
+/**
+* @brief Slice a tensor at its last dim, e.x. a[..., begin:end:stride]. \n
+
+* @par Inputs:
+* One inputs, including:
+* x: A Tensor. Must be one of the following types: float16, float32, int16, int32.
+
+* @par Attributes:
+* @li start: An  attribute of type Int, start index of last dim. \n
+* @li end: An  attribute of type Int, end index of last dim. \n
+* @li stride: An  attribute of type Int, stride of slice. \n
+
+* @par Outputs:
+* y: A Tensor. Has the same type as "x". \n
+
+* @par Third-party framework compatibility
+* No compatibility
+*/
+REG_OP(SliceLastDim)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(start, Int)
+    .REQUIRED_ATTR(end, Int)
+    .ATTR(stride, Int, 1)
+    .OP_END_FACTORY_REG(SliceLastDim)
+
+/**
+* @brief Extracts a strided slice of a tensor. Roughly speaking, this op 
+*   extracts a slice of size (end-begin)/stride from the given input tensor. 
+*   Starting at the location specified by begin the slice continues by 
+*   adding stride to the index until all dimensions are not less than end. \n
+*
+* @par Inputs:
+* Five inputs, including:
+* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, 
+*     complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*     complex128, float16, uint32, uint64, complex64, complex128. 
+* @li begin: A Tensor of type int32 or int64, for the index of the first value to select.
+* @li end: A Tensor of type int32 or int64, for the index of the last value to select.
+* @li axes: A Tensor of type int32 or int64, indicate axis to be select.
+* @li strides: A Tensor of type int32 or int64, for the increment. \n
+*
+* @par Attributes:
+* @li begin_mask: A Tensor of type int32.
+*     A bitmask where a bit "i" being "1" means to ignore the begin 
+*     value and instead use the largest interval possible.
+* @li end_mask: A Tensor of type int32. 
+*     Analogous to "begin_mask".
+* @li ellipsis_mask: A Tensor of type int32. 
+*     A bitmask where bit "i" being "1" means the "i"th position
+*     is actually an ellipsis.
+* @li new_axis_mask: A Tensor of type int32.
+*     A bitmask where bit "i" being "1" means the "i"th 
+*     specification creates a new shape 1 dimension.
+* @li shrink_axis_mask: A Tensor of type int32. 
+*     A bitmask where bit "i" implies that the "i"th
+*     specification should shrink the dimensionality. \n
+*
+* @par Outputs:
+* y: A Tensor. Has the same type as "x".
+*
+* @attention Constraints:
+*
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator StridedSliceV2.
+*/
+REG_OP(StridedSliceV2)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(begin, TensorType::IndexNumberType())
+    .INPUT(end, TensorType::IndexNumberType())
+    .OPTIONAL_INPUT(axes, TensorType::IndexNumberType())
+    .OPTIONAL_INPUT(strides, TensorType::IndexNumberType())
+    .ATTR(begin_mask, Int, 0)
+    .ATTR(end_mask, Int, 0)
+    .ATTR(ellipsis_mask, Int, 0)
+    .ATTR(new_axis_mask, Int, 0)
+    .ATTR(shrink_axis_mask, Int, 0)
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(StridedSliceV2)
+
+/**
+* @brief Fills the elements of the input tensor with value val by selecting the indices in the order given in index.
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+* @li assist1: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+* @li assist2: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+
+* @par Attributes:
+* dim: A required int. Used to select the dimension of this tensor. \n
+
+* @par Outputs:
+* y: A Tensor with the same type and shape of input_x's. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator IndexFill. \n
+*/
+REG_OP(IndexFillD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(assist1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(assist2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .REQUIRED_ATTR(dim, Int)
+    .OP_END_FACTORY_REG(IndexFillD)
+
+/**
+* @brief For each row r of this and for each column c, do (*this)(r, c) += src(j, c), \n
+*   where j ranges from indexes[r].first through indexes[r].second - 1. \n
+*   In general indexes must be >= 0 and < src.NumRows(); \n
+*   but to represent an empty range you may use the pair (-1, -1) or any pair of numbers (i, j) such that i >= j. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: A Tensor. Must be one of the following types:
+*     float16, float32.
+* @li indices: A Tensor of the indices, type should be int32.
+* @li src: A Tensor of the same type as "x". \n
+
+* @par Outputs:
+* @li x: A Tensor. Same as input "x".
+
+* @par Third-party framework compatibility
+* Compatible with the kaldi operator AddRowRanges.
+*/
+REG_OP(AddRowRanges)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(src, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(AddRowRanges)
+
+/**
+* @brief masked fill tensor along with one axis by range.
+* boxes. It is a customized masked fill range operator . \n
+
+* @par Inputs:
+* Four inputs, including:
+* @li x: input tensor. A ND Tensor of float32/float16/int32/int8 with shapes
+* 1-D (D,), 2-D(N, D), 3-D(N, C, D)
+* @li start: masked fill start pos. A 3D Tensor of int32 with
+* shape (num, N). "num" indicates the number of loop masked fill, and the value N
+* indicates the batch of ND Tensor, if input x shape is 1-D, N = 1.
+* @li end: masked fill end pos. A 3D Tensor of int32 with
+* shape (num, N). "num" indicates the number of loop masked fill, and the value N
+* indicates the batch of ND Tensor.
+* @li value: masked fill value. A 2D Tensor of float32/float16/int32/int8 with
+* shape (num,). "num" indicates the number of loop masked fill. \n
+
+* @par Attributes:
+* @li axis: axis with masked fill of int32. Defaults to -1.
+
+* @par Outputs:
+* y: A ND Tensor of float32/float16/int32/int8 with shapes 1-D (D,), 2-D(N, D), 3-D(N, C, D)
+
+*@attention Constraints:
+* Warning: input shape's length must not be bigger than 1024 * 1024 * 1024.
+*/
+REG_OP(MaskedFillRange)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32}))
+    .INPUT(start, TensorType({DT_INT32}))
+    .INPUT(end, TensorType({DT_INT32}))
+    .INPUT(value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32}))
+    .REQUIRED_ATTR(axis, Int)
+    .OP_END_FACTORY_REG(MaskedFillRange)
+
+/**
+* @brief After a set of sorted data and a new set of data are re-sorted, get the first k data. \n
+*
+* @par Inputs:
+* Six inputs, including:
+* @li topk_pq_distance: A sorted Tensor, Will be updated after calculation.
+* Must be one of the following types: float32, float16. 
+* @li topk_pq_index: A Tensor of type int32, index corresponding to topk_pq_distance.
+* @li topk_pq_ivf: A Tensor of type int32 , the bucket number corresponding to topk_pq_distance.
+* @li pq_distance: A Tensor of type float32 or float16,
+* the new data set will be reordered with topk_pq_distance and updated to topk_pq_distance.
+* @li pq_index: A Tensor of type int32, index corresponding to pq_distance. 
+* @li pq_ivf: A scalar of type int32 , the bucket number corresponding to pq_distance. \n
+*
+* @par Attributes:
+* @li order: A string, indicates the sorting method of topk_pq_distance. \n
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(InplaceTopKDistance)
+    .INPUT(topk_pq_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(topk_pq_index, TensorType({DT_INT32}))
+    .INPUT(topk_pq_ivf, TensorType({DT_INT32}))
+    .INPUT(pq_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(pq_index, TensorType({DT_INT32}))
+    .INPUT(pq_ivf, TensorType({DT_INT32}))
+    .ATTR(order, String, "asc")
+    .OP_END_FACTORY_REG(InplaceTopKDistance)
+
+/**
+* @brief After a set of sorted data and a new set of data are re-sorted, get the first k data. \n
+*
+* @par Inputs:
+* @li sorted_distance: A sorted Tensor, Will be updated after calculation. Must be one of the following types: float16. 
+* @li pq_ivf: A Tensor of type int32, index corresponding to sorted_distance.
+* @li pq_index: A Tensor of type int32 , the bucket number corresponding to sorted_distance. \n
+*
+*@par Outputs:
+* @li topk_distance: A Tensor of type float16, the new data set will be reordered with sorted_distance and updated to topk_distance.
+* @li topk_ivf: A Tensor of type int32, index corresponding to topk_distance. 
+* @li topk_index: A scalar of type int32 , the bucket number corresponding to topk_distance. \n
+*
+* @par Attributes:
+* k: get the first k data of sorted_distance. \n
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(TopKPQDistanceMerge)
+    .INPUT(sorted_distance, TensorType({DT_FLOAT16}))
+    .INPUT(pq_ivf, TensorType({DT_INT32}))
+    .INPUT(pq_index, TensorType({DT_INT32}))
+    .OUTPUT(topk_distance, TensorType({DT_FLOAT16}))
+    .OUTPUT(topk_ivf, TensorType({DT_INT32}))
+    .OUTPUT(topk_index, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(k, Int)
+    .OP_END_FACTORY_REG(TopKPQDistanceMerge)
+
+/**
+*@brief Extracts a strided slice of a tensor. Roughly speaking, this op
+    extracts a slice of size (end-begin)/stride from the given input tensor.
+    Starting at the location specified by begin the slice continues by
+    adding stride to the index until all dimensions are not less than end.
+
+*@par Inputs:
+*Four inputs, including:
+* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
+*     complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*     complex128, float16, uint32, uint64.
+* @li begin: A Tensor of type int32 or int64, for the index of the first value to select . \n
+
+* @li end: A Tensor of type int32 or int64, for the index of the last value to select . \n
+
+* @li strides: A Tensor of type int32 or int64, for the increment . \n
+
+* @li axes: A Tensor of type int32 or int64, for the increment . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(StridedSliceV3)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(begin, TensorType::IndexNumberType())
+    .INPUT(end, TensorType::IndexNumberType())
+    .OPTIONAL_INPUT(axes, TensorType::IndexNumberType())
+    .OPTIONAL_INPUT(strides, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(StridedSliceV3)
+
+/**
+* @brief Sum the alpha according to the offset and ksize,
+    and quadrature it with the sigmoid value of energy. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li alpha: A Tensor. Must be one of the following types: float32, float16.
+* @li energy: A Tensor. Must be one of the following types: float32, float16.
+* @li offset: A Tensor of type int32. \n
+
+*@par Outputs:
+* y: A Tensor with same type as "alpha". \n
+*
+* @par Attributes:
+* ksize: A int.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(MovingSumWithSigmoid)
+    .INPUT(alpha, TensorType::BasicType())
+    .INPUT(energy, TensorType::BasicType())
+    .INPUT(offset, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(ksize, Int)
+    .OP_END_FACTORY_REG(MovingSumWithSigmoid)
+
+/**
+* @brief Choose the value of X with value according to mask.
+
+* @par Inputs:
+* two inputs, including:
+* @li x: A Tensor of dtype is BasicType.
+* @li mask: A Tensor of dtype is bool. \n
+
+* @par Outputs:
+* y: A tensor with the same type as x. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Numpy operator select.\n
+*/
+REG_OP(MaskedSelect)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(mask, TensorType({DT_BOOL}))
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(MaskedSelect)
+
+/**
+* @brief Sum X1 and X2 according to the offset recorded in seq_len1 and seq_len2. \n
+
+* @par Inputs:
+* Four inputs, including:
+* @li x1: A Tensor. Support BasicType.
+* @li x2: A Tensor. Support BasicType.
+* @li seq_len1: A Tensor. Support int32.
+* @li seq_len2: A Tensor. Support int32. \n
+
+* @par Outputs:
+* y: A Tensor with same type as "x1". \n
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynSeqOuter)
+    .INPUT(x1, TensorType::BasicType())
+    .INPUT(x2, TensorType::BasicType())
+    .INPUT(seq_len1, TensorType({DT_INT32}))
+    .INPUT(seq_len2, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(DynSeqOuter)
+
+/**
+* @brief Returns sliced data based on max nmsed_num. \n
+
+* @par Inputs:
+* Four inputs, including:
+* @li input_nmsed_boxes: A Tensor. Must be the following types: float16.
+* @li input_nmsed_score: A Tensor. Must be the following types: float16.
+* @li input_nmsed_class: A Tensor. Must be the following types: float16.
+* @li input_nmsed_num: A Tensor. Must be the following types: int32. \n
+
+* @par Outputs:
+* output_nmsed_boxes: A Tensor. Must be the following type: float.
+* output_nmsed_score: A Tensor. Must be the following type: float.
+* output_nmsed_class: A Tensor. Must be the following type: float. \n
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(NonMaxSuppressionBucketize)
+    .INPUT(input_nmsed_boxes, TensorType({DT_FLOAT16}))
+    .INPUT(input_nmsed_score, TensorType({DT_FLOAT16}))
+    .INPUT(input_nmsed_class, TensorType({DT_FLOAT16}))
+    .INPUT(input_nmsed_num, TensorType({DT_INT32}))
+    .OUTPUT(output_nmsed_boxes, TensorType({DT_FLOAT}))
+    .OUTPUT(output_nmsed_score, TensorType({DT_FLOAT}))
+    .OUTPUT(output_nmsed_class, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(NonMaxSuppressionBucketize)
 } // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/set_ops.h b/third_party/fwkacllib/inc/ops/set_ops.h
index 1d02fa15..04e04f1b 100644
--- a/third_party/fwkacllib/inc/ops/set_ops.h
+++ b/third_party/fwkacllib/inc/ops/set_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h
index d7512790..bf0f670a 100644
--- a/third_party/fwkacllib/inc/ops/sparse_ops.h
+++ b/third_party/fwkacllib/inc/ops/sparse_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -281,9 +281,9 @@ REG_OP(SparseSliceGrad)
 * @li size: A 1D Tensor of type int64. The size of the slice . \n
 
 *@par Outputs:
-*y_indices: A Tensor of type int64.
-*y_values: A Tensor. Has the same type as "values".
-*y_values: A Tensor of type int64 . \n
+*@li y_indices: A Tensor of type int64.
+*@li y_values: A Tensor. Has the same type as "values".
+*@li y_shape: A Tensor of type int64 . \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SparseSlice.
@@ -313,8 +313,8 @@ REG_OP(SparseSlice)
 * @li sum_indices: A 2D Tensor of type int64. The indices of the sum SparseTensor, with size [nnz(sum), ndims] . \n
 
 *@par Outputs:
-*x1_val_grad: A Tensor. Has the same type as "backprop_val_grad".
-*x2_val_grad: A Tensor. Has the same type as "backprop_val_grad" . \n
+*@li x1_val_grad: A Tensor. Has the same type as "backprop_val_grad".
+*@li x2_val_grad: A Tensor. Has the same type as "backprop_val_grad" . \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SparseAddGrad.
@@ -363,7 +363,7 @@ REG_OP(SparseFillEmptyRowsGrad)
 
 *@par Inputs:
 * @li x1_indices: A 2D Tensor of type int32 or int64.
-* @li The indices of the matrix "SparseTensor", with size [nnz, 2].
+*The indices of the matrix "SparseTensor", with size [nnz, 2].
 * @li x1_values: A 1D Tensor. The values of the SparseTensor, with size [nnz].
 * @li x1_shape: A 1D Tensor of type int64. The shape of the SparseTensor, with size [2].
 * @li x2: A dense matrix Tensor of the same type as "x1_values". 2D . \n
@@ -373,9 +373,9 @@ REG_OP(SparseFillEmptyRowsGrad)
 
 *@par Attributes:
 *@li adjoint_a: An optional bool. Defaults to "False".Use the adjoint of A in the matrix multiply.
-*@li If A is complex, this is transpose(conj(A)). Otherwise it is transpose(A).
+*If A is complex, this is transpose(conj(A)). Otherwise it is transpose(A).
 *@li adjoint_b: An optional bool. Defaults to "False".Use the adjoint of B in the matrix multiply.
-*@li If B is complex, this is transpose(conj(B)). Otherwise it is transpose(B) . \n
+*If B is complex, this is transpose(conj(B)). Otherwise it is transpose(B) . \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SparseTensorDenseMatMul.
@@ -383,11 +383,11 @@ REG_OP(SparseFillEmptyRowsGrad)
 REG_OP(SparseTensorDenseMatMul)
     .INPUT(x1_indices, TensorType({DT_INT32, DT_INT64}))
     .INPUT(x1_values, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, \
-        DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16}))
+        DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16, DT_INT64}))
     .INPUT(x1_shape, TensorType({DT_INT64}))
-    .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \
+    .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \
         DT_COMPLEX128, DT_FLOAT16}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \
         DT_COMPLEX128, DT_FLOAT16}))
     .ATTR(adjoint_a, Bool, false)
     .ATTR(adjoint_b, Bool, false)
@@ -400,9 +400,13 @@ REG_OP(SparseTensorDenseMatMul)
 * @li indices: A 0D, 1D, or 2D Tensor of type int32 or int64.
 * @li output_shape: A 1D Tensor of the same type as "sparse_indices". The shape of the dense output tensor.
 * @li values: A 1D Tensor. Values corresponding to each row of "sparse_indices",
-* @li or a scalar value to be used for all sparse indices.
+or a scalar value to be used for all sparse indices.
 * @li default_value: A Tensor of the same type as "sparse_values" . \n
 
+*@par Attributes:
+*validate_indices: If true, indices are checked to make sure they are sorted in
+lexicographic order and that there are no repeats. \n
+
 *@par Outputs:
 *y: A Tensor. Has the same type as "values" . \n
 
@@ -427,7 +431,6 @@ REG_OP(SparseToDense)
 *Concatenation is with respect to the dense versions of these sparse tensors . \n
 
 *@par Inputs:
-*3 or 5 inputs,contains:
 * @li indices:A list of at least 2 `Tensor` objects with type `int64`.2-D.
 *Indices of each input `SparseTensor`.It's a dynamic input.
 * @li values:A list with the same length as `indices` of `Tensor` objects with the same type.
@@ -700,7 +703,6 @@ REG_OP(SparseReduceMaxSparse)
 *@brief Computes the sum of elements across dimensions of a SparseTensor . \n
 
 *@par Inputs:
-*4 or 5 inputs, including:
 * @li x_indices: A 2D Tensor of type int64.
 *"N x R" matrix with the indices of non-empty values in a
 *SparseTensor, possibly not in canonical ordering.
@@ -711,13 +713,11 @@ REG_OP(SparseReduceMaxSparse)
 *A length-"K" vector containing the reduction axes . \n
 
 *@par Attributes:
-* keep_dims: An optional bool. Defaults to "False".
+*keep_dims: An optional bool. Defaults to "False".
 *If true, retains reduced dimensions with length 1 . \n
 
 *@par Outputs:
-* @li y_indices: A Tensor of type int64.
-* @li y_values: A Tensor. Has the same type as "input_values".
-* @li y_shape: A Tensor of type int64 . \n
+*y: A Tensor. Has the same type as "x_values". \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SparseReduceSum.
@@ -818,7 +818,6 @@ REG_OP(SparseSplit)
 *@brief Generates sparse cross from a list of sparse and dense tensors . \n
 
 *@par Inputs:
-*8 or 10 inputs, including:
 * @li indices: A list of 2D Tensor objects of type int64.
 * Indices of each input SparseTensor.It's a dynamic input.
 * @li values: A list of 1D Tensor objects of type int64 or string.
@@ -899,9 +898,8 @@ REG_OP(AddManySparseToTensorsMap)
 *@brief Reads SparseTensors from a "SparseTensorsMap" and concatenate them . \n
 
 *@par Inputs:
-*2 or 4 inputs, including:
 * handles: A 1D Tensor of type int64.
-* The "N" serialized SparseTensor objects . \n
+*The "N" serialized SparseTensor objects . \n
 
 *@par Attributes:
 * @li dtype: A tf.DType. The "dtype" of the SparseTensor objects stored in the "SparseTensorsMap".
@@ -911,9 +909,9 @@ REG_OP(AddManySparseToTensorsMap)
 *The shared name for the "SparseTensorsMap" read by this op . \n
 
 *@par Outputs:
-* @li indices: A Tensor of type int64.
-* @li values: A Tensor of type "dtype".
-* @li shape: A Tensor of type int64 . \n
+* @li indices: A Tensor of type int64.2-D. The `indices` of the minibatch `SparseTensor`.
+* @li values: A Tensor of type "dtype". 1-D. The `values` of the minibatch `SparseTensor`.
+* @li shape: A Tensor of type int64 . 1-D. The `shape` of the minibatch `SparseTensor`. \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator TakeManySparseFromTensorsMap.
@@ -953,7 +951,7 @@ REG_OP(SerializeSparse)
         DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \
         DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
     .INPUT(shape, TensorType({DT_INT64}))
-    .OUTPUT(serialized_sparse, TensorType({DT_STRING}))
+    .OUTPUT(serialized_sparse, TensorType({DT_STRING, DT_VARIANT}))
     .ATTR(out_type, Type, DT_STRING)
     .OP_END_FACTORY_REG(SerializeSparse)
 
@@ -981,7 +979,7 @@ REG_OP(SerializeManySparse)
         DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \
         DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
     .INPUT(shape, TensorType({DT_INT64}))
-    .OUTPUT(serialized_sparse, TensorType({DT_STRING}))
+    .OUTPUT(serialized_sparse, TensorType({DT_STRING, DT_VARIANT}))
     .ATTR(out_type, Type, DT_STRING)
     .OP_END_FACTORY_REG(SerializeManySparse)
 
@@ -989,8 +987,7 @@ REG_OP(SerializeManySparse)
 *@brief Deserializes SparseTensor objects . \n
 
 *@par Inputs:
-*Two inputs, including:
-* serialized_sparse: A Tensor. The serialized SparseTensor objects.
+*serialized_sparse: A Tensor. The serialized SparseTensor objects.
 *The last dimension must have 3 columns . \n
 
 *@par Attributes:
@@ -1005,7 +1002,7 @@ REG_OP(SerializeManySparse)
 * Compatible with the TensorFlow operator DeserializeSparse.
 */
 REG_OP(DeserializeSparse)
-    .INPUT(serialized_sparse, TensorType({DT_STRING}))
+    .INPUT(serialized_sparse, TensorType({DT_STRING, DT_VARIANT}))
     .OUTPUT(indices, TensorType({DT_INT64}))
     .OUTPUT(values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, \
         DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \
diff --git a/third_party/fwkacllib/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/ops/spectral_ops.h
index 64fa7814..ab9e1dec 100644
--- a/third_party/fwkacllib/inc/ops/spectral_ops.h
+++ b/third_party/fwkacllib/inc/ops/spectral_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,6 +27,24 @@
 namespace ge {
 
 /**
+*@brief Computes the inverse 1-dimensional discrete Fourier transform over the
+inner-most dimension of `x`. \n
+
+*@par Inputs:
+*x: A Tensor. Must be the following types: complex64, complex128. \n
+
+*@par Outputs:
+*y: A complex tensor of the same rank as `x`. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow IFFT operator.
+*/
+REG_OP(IFFT)
+    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(IFFT)
+
+/**
 *@brief Real-valued fast Fourier transform . \n
 
 *@par Inputs:
@@ -34,7 +52,7 @@ namespace ge {
 *@li fft_length: An int32 tensor of shape [1]. The FFT length . \n
 
 *@par Outputs:
-*@li y: A complex64 tensor of the same rank as `input`. The inner-most
+*y: A complex64 tensor of the same rank as `input`. The inner-most
 dimension of `input` is replaced with the `fft_length / 2 + 1` unique
 frequency components of its 1D Fourier transform . \n
 
@@ -47,6 +65,84 @@ REG_OP(RFFT)
     .OUTPUT(y, TensorType({DT_COMPLEX64}))
     .OP_END_FACTORY_REG(RFFT)
 
+/**
+*@brief Inverse real-valued fast Fourier transform. \n
+
+*@par Inputs:
+*@li x: A complex64 tensor.
+*@li fft_length: An int32 tensor of shape [1]. The FFT length. \n
+
+*@par Outputs:
+* y: A float32 tensor of the same rank as `input`. The inner-most
+  dimension of `input` is replaced with the `fft_length` samples of its inverse
+  1D Fourier transform. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow IRFFT operator.
+*/
+REG_OP(IRFFT)
+    .INPUT(x, TensorType({DT_COMPLEX64}))
+    .INPUT(fft_length, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(IRFFT)
+
+
+/**
+*@brief 2D fast Fourier transform. \n
+
+*@par Inputs:
+*x: A complex64 tensor.
+
+*@par Outputs:
+*y: A complex64 tensor of the same shape as `input`. The inner-most 2
+  dimensions of `input` are replaced with their 2D Fourier transform. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow FFT2D operator.
+*/
+REG_OP(FFT2D)
+    .INPUT(x, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(FFT2D)
+
+/**
+*@brief Calculate the one-dimensional discrete Fourier transform on the
+innermost dimension of the input. \n
+
+*@par Inputs:
+*x: A Tensor. Must be the following types: complex64, complex128. \n
+
+*@par Outputs:
+*y: A complex tensor with the same shape as input. The innermost dimension
+of the input is replaced by its 1-dimensional Fourier transform. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow FFT operator.
+*/
+REG_OP(FFT)
+    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(FFT)
+
+/**
+*@brief Calculate the inverse 1-dimensional discrete Fourier transform on the
+innermost dimension of the input. \n
+
+*@par Inputs:
+*x: A Tensor. Must be the following types: complex64, complex128. \n
+
+*@par Outputs:
+*y: A complex tensor with the same shape as input. The innermost dimension
+of the input is replaced by its inverse two-dimensional Fourier transform. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow IFFT2D operator.
+*/
+REG_OP(IFFT2D)
+    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(IFFT2D)
+
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h
index efe4715d..2915590d 100644
--- a/third_party/fwkacllib/inc/ops/split_combination_ops.h
+++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -62,8 +62,8 @@ REG_OP(Split)
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
 
 *@par Attributes:
-*@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value.
-*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
+*@li split_dim: A required int32. Specifies the dimension along which to split. No default value.
+*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n
 
 *@par Outputs:
 *y:Dynamic output. A list of output tensors. Has the same type and format as "x" . \n
@@ -94,12 +94,12 @@ REG_OP(SplitD)
 *@par Inputs:
 * Three inputs, including:
 *@li x: An ND Tensor.
-*Must be one of the following types:
-*@li size_splits: A list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension.
-*@li split_dim: An int8, int16, int32, or int64. Specifies the dimension along which to split . \n
+*Must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
+*@li size_splits: Must be one of the types:int32, int64. Specifies a list containing the sizes of each output tensor along the split dimension.
+*@li split_dim: Must be the following type:int32. Specifies the dimension along which to split . \n
 
 *@par Attributes:
-*num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
+*num_split: A required int32. Specifies the number of output tensors. No default value . \n
 
 *@par Outputs:
 *y:  Dynamic output.A list of output tensors. Has the same type and format as "x" . \n
@@ -129,9 +129,9 @@ REG_OP(SplitV)
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
 
 *@par Attributes:
-*@li size_splits: A required list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension.
-*@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value.
-*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
+*@li size_splits: A required list of int32. Specifies a list containing the sizes of each output tensor along the split dimension.
+*@li split_dim: A required int32. Specifies the dimension along which to split. No default value.
+*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n
 
 *@par Outputs:
 *y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n
@@ -161,14 +161,11 @@ REG_OP(SplitVD)
 /**
 *@brief Concatenates a list of N tensors along the first dimension.
 *@par Inputs:
-* Two inputs, including:
-* @li values: A list of Tensors. Must be one of the following types: int8, int16, int32,
+* One input, including:
+* values: A list of Tensors. Must be one of the following types: int8, int16, int32,
 *     int64, uint8, uint16, uint32, uint64, float16, float32.
 *     Tensors to be concatenated. All must have size 1 in the first dimension and same shape.
-*     It's a dynamic input.
-* @li shape: A Tensor of the same type as "x".
-* The final shape of the result. Should be equal to the shapes of any input
-* but with the number of input values in the first dimension . \n
+*     It's a dynamic input. \n
 
 *@par Attributes:
 * @li shape: A required list of ints.
@@ -191,7 +188,7 @@ REG_OP(ParallelConcat)
 
 *@par Inputs:
 * One input:
-*x: Dynamic input.An NC1HWC0 or ND Tensor.
+*x: Dynamic input.A ND Tensor.
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
 
 *@par Attributes:
@@ -222,7 +219,7 @@ REG_OP(ConcatV2D)
 
 *@par Inputs:
 * Two inputs, including:
-*@li Dynamic input "x" is An NC1HWC0 or ND Tensor.
+*@li Dynamic input "x" is A ND Tensor.
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
 *@li concat_dim: An int32, or int64. Specifies the dimension along which to concatenate . \n
 
@@ -250,7 +247,7 @@ REG_OP(ConcatV2)
 
 *@par Inputs:
 * One input:
-*x:Dynamic input. An NC1HWC0 or ND Tensor.
+*x:Dynamic input. A ND Tensor.
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
 
 *@par Attributes:
@@ -281,7 +278,7 @@ REG_OP(ConcatD)
 
 *@par Inputs:
 * Two inputs, including:
-*@li x: Dynamic input.An NC1HWC0 or ND Tensor.
+*@li x: Dynamic input.A ND Tensor.
 *Must be one of the following types: float16, float32, double, int32,
 *     uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16,
 *     complex128, uint32, uint64, qint16, quint16.
@@ -301,8 +298,8 @@ REG_OP(ConcatD)
 * Compatible with the TensorFlow operator Concat.
 */
 REG_OP(Concat)
-    .DYNAMIC_INPUT(x, TensorType::BasicType())
     .INPUT(concat_dim, TensorType::IndexNumberType())
+    .DYNAMIC_INPUT(x, TensorType::BasicType())
     .OUTPUT(y, TensorType::BasicType())
     .ATTR(N, Int, 1)
     .OP_END_FACTORY_REG(Concat)
@@ -317,15 +314,15 @@ REG_OP(Concat)
 *     int64, uint8, uint16, uint32, uint64, float16, float32, bool . It's a dynamic input. \n
 
 *@par Attributes:
-*@li axis: A optional int, defaultvalue is 0.
+*@li axis: A optional int, default value is 0.
 *     Dimension along which to pack. The range is [-(R+1), R+1).
 *@li N: A required int. Number of tensors . \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
+
 *@par Third-party framework compatibility
-*Compatible with the TensorFlow operator Pack.
-It's a dynamic output.
+* Compatible with the TensorFlow operator Pack.
 */
 REG_OP(Pack)
     .DYNAMIC_INPUT(x, TensorType::BasicType())
@@ -384,6 +381,30 @@ REG_OP(ConcatOffsetD)
     .REQUIRED_ATTR(concat_dim, Int)
     .REQUIRED_ATTR(N, Int)
     .OP_END_FACTORY_REG(ConcatOffsetD)
+
+/**
+*@brief Compute combinations of length of the given tensor. \n
+
+*@par Inputs:
+*x:  A list of 1D Tensor objects. \n
+
+*@par Attributes:
+*@li r: An optional int indicates number of elements to combine. Defaults to 2.
+*@li with_replacement: An optional bool indicates whether to allow duplication
+*in combination. Defaults to "False". \n
+
+*@par Outputs:
+*y: A Tensor list with same type as "x" . \n
+
+*@par Third-party framework compatibility
+*@ Compatible with the Pytorch operator Combinations.
+*/
+REG_OP(Combinations)
+    .INPUT(x, TensorType::ALL())
+    .OUTPUT(y, TensorType::ALL())
+    .ATTR(r, Int, 2)
+    .ATTR(with_replacement, Bool, false)
+    .OP_END_FACTORY_REG(Combinations)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SPLIT_COMBINATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/state_ops.h b/third_party/fwkacllib/inc/ops/state_ops.h
index db1f5353..d1ec00b5 100644
--- a/third_party/fwkacllib/inc/ops/state_ops.h
+++ b/third_party/fwkacllib/inc/ops/state_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -104,7 +104,7 @@ REG_OP(DestroyTemporaryVariable)
 *@brief Checks whether a tensor has been initialized. Outputs boolean scalar indicating whether the tensor has been initialized . \n
 
 *@par Inputs:
-*x: A tensor . \n
+*x: A Tensor of type float16, float32, double, bool, int8, uint8, uint16, int16, int32, uint32, uint64, int64. 
 
 *@par Outputs:
 *y: A tensor, indicating whether "x" has been initialized . \n
diff --git a/third_party/fwkacllib/inc/ops/stateful_random_ops.h b/third_party/fwkacllib/inc/ops/stateful_random_ops.h
index 366112d6..e776c416 100644
--- a/third_party/fwkacllib/inc/ops/stateful_random_ops.h
+++ b/third_party/fwkacllib/inc/ops/stateful_random_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,7 +32,10 @@ namespace ge {
 *@par Inputs:
 *This op may use some OS-provided source of non-determinism (e.g. an RNG),
 *so each execution will give different results. Inputs included:
-*@li shape: The shape of the output tensor . \n
+*shape: The shape of the output tensor . \n
+
+*@par Attributes:
+*dtype: required, type. \n
 
 *@par Outputs:
 *y:A Returns Non-deterministic integer values with specified shape . \n
@@ -54,13 +57,10 @@ REG_OP(NonDeterministicInts)
 *counter is an unspecified implementation detail . \n
 
 *@par Inputs:
-*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li delta: The amount of advancement . \n
 
-*@par Outputs:
-*y:A Returns the created operation . \n
-
 *@par Third-party framework compatibility
 * Compatible with tensorflow RngSkip operator.
 */
@@ -81,11 +81,16 @@ power of two.  The bias is small for values of `maxval - minval` significantly
 smaller than the range of the output (either `2^32` or `2^64`) . \n
 
 *@par Inputs:
-*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor.
-*@li minval: Minimum value (inclusive, scalar).
-*@li maxval: Maximum value (exclusive, scalar) . \n
+*@li counts: A 0/1-D Tensor or Python value. The counts of the binomial
+distribution.  Must be broadcastable with the leftmost dimension defined by `shape`.
+*@li probs: A 0/1-D Tensor or Python value. The probability of success for the
+binomial distribution.  Must be broadcastable with the leftmost dimension defined by `shape`.\n
+
+*@par Attributes:
+*dtype: required, type. \n
 
 *@par Outputs:
 *y:A Returns Random values with specified shape . \n
@@ -109,7 +114,7 @@ REG_OP(StatefulRandomBinomial)
 *The generated values will have mean 0 and standard deviation 1 . \n
 
 *@par Inputs:
-*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor . \n
 
@@ -134,7 +139,7 @@ REG_OP(StatefulStandardNormalV2)
 *deviations from the mean are dropped and re-picked . \n
 
 *@par Inputs:
-*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor . \n
 
@@ -158,7 +163,7 @@ The generated values follow a uniform distribution in the range `[0, 1)`. The
 lower bound 0 is included in the range, while the upper bound 1 is excluded.
 
 *@par Inputs:
-*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor . \n
 
@@ -181,7 +186,7 @@ REG_OP(StatefulUniform)
 The generated values are uniform integers covering the whole range of `dtype` . \n
 
 *@par Inputs:
-*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor . \n
 
@@ -209,7 +214,7 @@ power of two.  The bias is small for values of `maxval - minval` significantly
 smaller than the range of the output (either `2^32` or `2^64`) . \n
 
 *@par Inputs:
-*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor.
 *@li minval: Minimum value (inclusive, scalar).
@@ -231,6 +236,30 @@ REG_OP(StatefulUniformInt)
     .OUTPUT(y, TensorType({DT_INT64}))
     .OP_END_FACTORY_REG(StatefulUniformInt)
 
+/**
+* @brief Advance the counter of a counter-based RNG. The state of the RNG after
+* `rng_skip(n)` will be the same as that after `stateful_uniform([n])`
+* (or any other distribution). The actual increment added to the
+* counter is an unspecified implementation detail . \n
+
+* @par Inputs:
+* @li value: Stores the state of the RNG.
+* @li algorithm: The RNG algorithm.
+* @li delta: The amount of advancement . \n
+
+* @par Outputs:
+* value:A Returns Random values with specified shape . \n
+
+* @par Third-party framework compatibility
+* Compatible with tensorflow RngReadAndSkipV2 operator.
+*/
+
+REG_OP(RngReadAndSkipV2)
+    .INPUT(value, TensorType({DT_INT64}))
+    .INPUT(algorithm, TensorType({DT_INT32}))
+    .INPUT(delta, TensorType({DT_UINT64}))
+    .OUTPUT(value, TensorType({DT_INT64}))
+    .OP_END_FACTORY_REG(RngReadAndSkipV2)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_STATEFUL_RANDOM_OPS_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/ops/stateless_random_ops.h b/third_party/fwkacllib/inc/ops/stateless_random_ops.h
index dad3c379..5ff3e4fb 100644
--- a/third_party/fwkacllib/inc/ops/stateless_random_ops.h
+++ b/third_party/fwkacllib/inc/ops/stateless_random_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -79,6 +79,452 @@ REG_OP(StatelessRandomUniformInt)
     .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
     .OP_END_FACTORY_REG(StatelessRandomUniformInt)
 
-}  // namespace ge
+/**
+* @brief Outputs random values from a normal distribution. \n
+
+* @par Inputs:
+* Inputs include:
+* @li shape: A Tensor. Must be one of the following types: int32, int64.
+      The shape of the output tensor. Batches are indexed by the 0th dimension.
+* @li seed: 2 seeds (shape [2]).
+* @li means: A Tensor. Must be one of the following types: half, bfloat16, float32, float64.
+* @li stdevs: A Tensor. Must have the same type as means.
+* @li min: A Tensor. Must have the same type as means. The minimum cutoff. May be -infinity.
+* @li max: A Tensor. Must have the same type as means. \n
+
+* @par Outputs:
+* y: A Tensor. Has the same type as means. \n
+
+* @attention Constraints:
+* The implementation for StatelessParameterizedTruncatedNormal on Ascend uses AICPU, with bad performance. \n
+
+* @par Third-party framework compatibility
+* @li compatible with tensorflow StatelessParameterizedTruncatedNormal operator.
+*/
+
+REG_OP(StatelessParameterizedTruncatedNormal)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(seed, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(means, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(stdevs, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(min, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(max, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(StatelessParameterizedTruncatedNormal)
+
+/**
+* @brief Generate a single randomly distorted bounding box for an image . \n
+
+* @par Inputs:
+* Input images must be a 4-D tensor. Inputs include:
+* @li image_size: 1-D, containing [height, width, channels].
+* @li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding
+ boxes associated with the image.
+* @li min_object_covered: The cropped area of the image must contain at least
+ this fraction of any bounding box supplied. The value of this parameter should
+ be non-negative. In the case of 0, the cropped area does not need to overlap
+ any of the bounding boxes supplied .
+* @li seed: A shape [2] Tensor, the seed to the random number generator. \n
+
+* @par Attributes:
+* @li aspect_ratio_range: The cropped area of the image must have an aspect
+ ratio = width / height within this range.
+* @li area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
+ cropped area of the image must contain a fraction of the supplied image
+ within this range.
+* @li max_attempts: Number of attempts at generating a cropped region of the
+ image of the specified constraints. After max_attempts failures, return the
+ entire image.
+* @li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes
+ supplied. If true, assume an implicit bounding box covering the whole input.
+ If false, raise an error . \n
+
+* @par Outputs:
+* @li begin: 1-D, containing [offset_height, offset_width, 0].
+* @li size: 1-D, containing [target_height, target_width, -1].
+* @li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n
+
+* @attention Constraints:
+* Input images can be of different types but output images are always float . \n
+
+* @par Third-party framework compatibility
+* Compatible with tensorflow StatelessSampleDistortedBoundingBox operator.
+*/
+
+REG_OP(StatelessSampleDistortedBoundingBox)
+    .INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .INPUT(bounding_boxes, TensorType({ DT_FLOAT }))
+    .INPUT(min_object_covered, TensorType({ DT_FLOAT }))
+    .INPUT(seed, TensorType({ DT_INT32, DT_INT64 }))
+    .OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .OUTPUT(bboxes, TensorType({ DT_FLOAT }))
+    .ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f })
+    .ATTR(area_range, ListFloat, { 0.05f, 1.0f })
+    .ATTR(max_attempts, Int, 100)
+    .ATTR(use_image_if_no_bounding_boxes, Bool, false)
+    .OP_END_FACTORY_REG(StatelessSampleDistortedBoundingBox)
+
+/**
+* @brief Outputs random values from a truncated normal distribution. \n
+
+* @par Inputs:
+* Inputs include:
+* @li shape: A Tensor. Must be one of the following types: int32, int64. \n
+* @li key: Key of RNG algorithm. Shape[1]. \n
+* @li counter: Counter of RNG algorithm. Shape[2] for philox, shape[1] for threefry. \n
+* @li alg: RNG algorithm. 1：philox 2：threefry. \n
+
+* @par Attributes:
+* @li dtype: dtype: A optional attr, specifying the output data type. Defaults to "DT_FLOAT". \n
+
+* @par Outputs:
+* y: A Tensor of types: float16, float32, double. A tensor of the specified shape
+ filled with random truncated normal values. \n
+
+* @attention Constraints:
+* The implementation for StatelessTruncatedNormalV2 on Ascend uses AICPU, with bad performance.
+
+* @par Third-party framework compatibility
+* @li compatible with tensorflow StatelessTruncatedNormalV2 operator.
+*/
+
+REG_OP(StatelessTruncatedNormalV2)
+    .INPUT(shape, TensorType({ DT_INT32, DT_INT64 }))
+    .INPUT(key, TensorType({ DT_UINT64 }))
+    .INPUT(counter, TensorType({ DT_UINT64 }))
+    .INPUT(alg, TensorType({ DT_INT32 }))
+    .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
+    .ATTR(dtype, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(StatelessTruncatedNormalV2)
+
+/**
+* @brief Outputs deterministic pseudorandom random numbers from a gamma distribution. \n
+
+* @par Inputs:
+* @li shape: The shape of the output tensor.
+* @li seed: 2 seeds (shape [2]).
+* @li alpha: The concentration of the gamma distribution. Shape must match the rightmost dimensions of shape. \n
+
+* @par Outputs:
+* y: A Tensor. Has the same type as alpha. \n
+
+* @par Third-party framework compatibility
+* Compatible with TensorFlow StatelessRandomGammaV2 operator.
+*/
+
+REG_OP(StatelessRandomGammaV2)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(seed, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(StatelessRandomGammaV2)
+
+/**
+* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n
+
+* @par Inputs:
+* @li shape: The shape of the output tensor.
+* @li seed: 2 seeds (shape [2]). \n
+
+* @par Attributes:
+* dtype:Output data type . \n
+
+* @par Outputs:
+* y: Returns Random values with specified shape . \n
+
+* @par Third-party framework compatibility
+* Compatible with TensorFlow StatelessRandomUniformFullInt operator.
+*/
+
+REG_OP(StatelessRandomUniformFullInt)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(seed, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
+    .ATTR(dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(StatelessRandomUniformFullInt)
+
+/**
+* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n
+
+* @par Inputs:
+* @li shape: The shape of the output tensor.
+* @li key: Key for the counter-based RNG algorithm.
+* @li counter: Initial counter for the counter-based RNG algorithm.
+* @li alg: 0-D. The RNG algorithm. \n
+
+* @par Attributes:
+* dtype:Output data type . \n
+
+* @par Outputs:
+* y: Returns Random values with specified shape . \n
+
+* @par Third-party framework compatibility
+* Compatible with TensorFlow StatelessRandomUniformFullIntV2 operator.
+*/
+
+REG_OP(StatelessRandomUniformFullIntV2)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(key, TensorType({DT_UINT64}))
+    .INPUT(counter, TensorType({DT_UINT64}))
+    .INPUT(alg, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
+    .ATTR(dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(StatelessRandomUniformFullIntV2)
+
+/**
+* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n
+
+* @par Inputs:
+* @li shape: The shape of the output tensor.
+* @li key: Key for the counter-based RNG algorithm.
+* @li counter: Initial counter for the counter-based RNG algorithm.
+* @li alg: 0-D. The RNG algorithm.
+* @li minval: Minimum value (inclusive, scalar).
+* @li maxval: Maximum value (exclusive, scalar) . \n
+
+* @par Outputs:
+* y: Returns Random values with specified shape . \n
+
+* @par Third-party framework compatibility
+* Compatible with TensorFlow StatelessRandomUniformIntV2 operator.
+*/
+
+REG_OP(StatelessRandomUniformIntV2)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(key, TensorType({DT_UINT64}))
+    .INPUT(counter, TensorType({DT_UINT64}))
+    .INPUT(alg, TensorType({DT_INT32}))
+    .INPUT(minval, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
+    .INPUT(maxval, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
+    .OP_END_FACTORY_REG(StatelessRandomUniformIntV2)
+
+/**
+* @brief Outputs deterministic pseudorandom random integers from a binomial distribution. \n
+
+* @par Inputs:
+* @li shape: The shape of the output tensor.
+* @li seed: 2 seeds (shape [2]).
+* @li counts: The counts of the binomial distribution. Must be broadcastable with probs,
+* and broadcastable with the rightmost dimensions of shape.
+* @li probs: The probability of success for the binomial distribution.
+* Must be broadcastable with counts and broadcastable with the rightmost dimensions of shape. \n
+
+* @par Attributes:
+* @li dtype: A optional int32, specifying the output data type. Defaults to "DT_INT32". \n
+
+* @par Outputs:
+* @li y: Returns Random values with specified shape. \n
 
-#endif  // OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_
\ No newline at end of file
+* @par Third-party framework compatibility
+* Compatible with TensorFlow StatelessRandomBinomial operator.
+*/
+REG_OP(StatelessRandomBinomial)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(seed, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(counts, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .INPUT(probs, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(StatelessRandomBinomial)
+
+/**
+* @brief Outputs deterministic pseudorandom random integers from a poisson distribution . \n
+
+* @par Inputs:
+* @li shape: The shape of the output tensor.
+* @li seed: 2 seeds (shape [2]).
+* @li lam: mean value value of poisson distribution . \n
+
+* @par Attributes:
+* dtype:Output data type . \n
+
+* @par Outputs:
+* y: Returns Random values with specified shape . \n
+
+* @par Third-party framework compatibility
+* Compatible with TensorFlow StatelessRandomUniformInt operator.
+*/
+
+REG_OP(StatelessRandomPoisson)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(seed, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(lam, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(StatelessRandomPoisson)
+
+/**
+* @brief Get the counter of the RNG algorithm. \n
+
+* @par Outputs:
+* @li alg: The RNG algorithm. \n
+
+* @par Third-party framework compatibility
+* Compatible with TensorFlow StatelessRandomGetAlg operator.
+*/
+REG_OP(StatelessRandomGetAlg)
+    .OUTPUT(alg, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(StatelessRandomGetAlg)
+
+/**
+* @brief This op picks the best counter-based RNG algorithm based on device, and
+* scrambles a shape-[2] seed into a key and a counter, both needed by the
+* counter-based algorithm. \n
+
+* @par Inputs:
+* @li seed: 2 seeds (shape [2]). \n
+
+* @par Outputs:
+* @li key: Key for the counter-based RNG algorithm.
+* @li counter: Initial counter for the counter-based RNG algorithm. \n
+
+* @par Third-party framework compatibility
+* Compatible with TensorFlow StatelessRandomGetKeyCounter operator.
+*/
+REG_OP(StatelessRandomGetKeyCounter)
+    .INPUT(seed, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(key, TensorType({DT_UINT64}))
+    .OUTPUT(counter, TensorType({DT_UINT64}))
+    .OP_END_FACTORY_REG(StatelessRandomGetKeyCounter)
+
+/**
+* @brief This op picks the best counter-based RNG algorithm based on device, and
+* scrambles a shape-[2] seed into a key and a counter, both needed by the
+* counter-based algorithm. \n
+
+* @par Inputs:
+* @li seed: 2 seeds (shape [2]). \n
+
+* @par Outputs:
+* @li key: Key for the counter-based RNG algorithm.
+* @li counter: Initial counter for the counter-based RNG algorithm.
+* @li alg: The RNG algorithm. \n
+
+* @par Third-party framework compatibility
+* Compatible with TensorFlow StatelessRandomGetKeyCounterAlg operator.
+*/
+REG_OP(StatelessRandomGetKeyCounterAlg)
+    .INPUT(seed, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(key, TensorType({DT_UINT64}))
+    .OUTPUT(counter, TensorType({DT_UINT64}))
+    .OUTPUT(alg, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(StatelessRandomGetKeyCounterAlg)
+
+/**
+* @brief Outputs deterministic pseudorandom values from a normal distribution. \n
+
+* @par Inputs:
+* @li shape: The shape of the output tensor.
+* @li key: Key for the counter-based RNG algorithm.
+* @li counter: Initial counter for the counter-based RNG algorithm.
+* @li alg: The RNG algorithm. \n
+
+* @par Attributes:
+* @li dtype: Output data type . \n
+
+* @par Outputs:
+* @li y: Returns Random values with specified shape . \n
+
+* @par Third-party framework compatibility
+* Compatible with TensorFlow StatelessRandomNormalV2 operator.
+*/
+REG_OP(StatelessRandomNormalV2)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(key, TensorType({DT_UINT64}))
+    .INPUT(counter, TensorType({DT_UINT64}))
+    .INPUT(alg, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(dtype, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(StatelessRandomNormalV2)
+
+/**
+* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n
+
+* @par Inputs:
+* @li shape: The shape of the output tensor.
+* @li key: Key for the counter-based RNG algorithm.
+* @li counter: Initial counter for the counter-based RNG algorithm.
+* @li alg: 0-D. The RNG algorithm. \n
+
+* @par Attributes:
+* dtype:Output data type . \n
+
+* @par Outputs:
+* y: Returns Random values with specified shape . \n
+
+* @par Third-party framework compatibility
+* Compatible with TensorFlow StatelessRandomUniformV2 operator.
+*/
+
+REG_OP(StatelessRandomUniformV2)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(key, TensorType({DT_UINT64}))
+    .INPUT(counter, TensorType({DT_UINT64}))
+    .INPUT(alg, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE}))
+    .ATTR(dtype, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(StatelessRandomUniformV2)
+
+/**
+* @brief Create a random number seed generator . \n
+
+* @par Inputs:
+* include:
+* @li seed:1-D Tensor,the seed to generate random.
+* Must be one of the types:int32 or int64.
+* @li seed2:1-D Tensor,the seed to generate random.
+* Must be one of the types:int32 or int64.
+* @li reshuffle:1-D Tensor.Seed selection, True:random seed, False:fixed seed.
+* Must be one of the types:bool.  \n
+
+* @par Outputs:
+* handle:Handle to the random number generator.
+* deleter:Handle to the remover.
+* Used when deleting the random number seed generator \n
+
+* @see AnonymousSeedGenerator()
+
+* @par Third-party framework compatibility
+* compatible with AnonymousSeedGenerator op of tensorflow
+*/
+REG_OP(AnonymousSeedGenerator)
+    .INPUT(seed, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(seed2, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(reshuffle, TensorType({DT_BOOL}))
+    .OUTPUT(handle, TensorType({DT_RESOURSE}))
+    .OUTPUT(deleter, TensorType({DT_VARIANT}))
+    .OP_END_FACTORY_REG(AnonymousSeedGenerator)
+
+/**
+* @brief DeleteSeedGenerator . \n
+
+* @par Inputs:
+* @li handle:   A Tensor of type resource.
+* @li deleter: A Tensor of type variant.
+
+* @par Third-party framework compatibility
+* Compatible with TensorFlow DeleteSeedGenerator operator.
+*/
+REG_OP(DeleteSeedGenerator)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .INPUT(deleter, TensorType({DT_VARIANT}))
+    .OP_END_FACTORY_REG(DeleteSeedGenerator)
+
+/**
+* @brief Create a placeholder handle to rewrite and pass
+* to use during the graph compilation phase. \n
+
+* @par Outputs:
+* handle:Output random number . \n
+*/
+REG_OP(DummySeedGenerator)
+    .OUTPUT(handle, TensorType({ DT_RESOURCE }))
+    .OP_END_FACTORY_REG(DummySeedGenerator)
+
+}  // namespace ge
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h
index 4a88bc79..a78d63a1 100644
--- a/third_party/fwkacllib/inc/ops/string_ops.h
+++ b/third_party/fwkacllib/inc/ops/string_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,235 @@
 #include "graph/operator_reg.h"
 
 namespace ge {
+/**
+*@brief Creates ngrams from ragged string data . \n
+
+*@par Inputs:
+include:
+*@li data:1-D.The values tensor of the ragged string tensor to make ngrams out of.
+*@li data_splits:The splits tensor of the ragged string tensor to make ngrams out of . \n
+
+*@par Attributes:
+* separator:The string to append between elements of the token. Use "" for no separator.
+* ngram_widths:The sizes of the ngrams to create.
+* left_pad:The string to use to pad the left side of the ngram sequence. Only used if pad_width != 0.
+* right_pad:The string to use to pad the right side of the ngram sequence. Only used if pad_width != 0.
+* pad_width:The number of padding elements to add to each side of each sequence. 
+* preserve_short_sequences: Preserve short sequences. \n
+
+*@par Outputs:
+*@li ngrams:The values tensor of the output ngrams ragged tensor.
+*@li ngrams_splits:The splits tensor of the output ngrams ragged tensor. \n
+
+*@see StringNGrams()
+
+*@par Third-party framework compatibility
+*compatible with StringNGrams op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(StringNGrams)
+    .INPUT(data, TensorType({DT_STRING}))
+    .INPUT(data_splits, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(ngrams, TensorType({DT_STRING}))
+    .OUTPUT(ngrams_splits, TensorType({DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(separator, String)
+    .ATTR(ngram_widths, ListInt, {})
+    .REQUIRED_ATTR(left_pad, String)
+    .REQUIRED_ATTR(right_pad, String)
+    .REQUIRED_ATTR(pad_width, Int)
+    .REQUIRED_ATTR(preserve_short_sequences, Bool)
+    .OP_END_FACTORY_REG(StringNGrams)
+
+/**
+*@brief Decodes each string in `input` into a sequence of Unicode code points . \n
+
+*@par Inputs:
+include:
+*@li input:The text to be decoded. Can have any shape. Note that the output is flattened
+to a vector of char values. \n
+
+*@par Attributes:
+* input_encoding:Text encoding of the input strings. This is any of the encodings supported
+by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
+* errors:Error handling policy when there is invalid formatting found in the input.
+The value of 'strict' will cause the operation to produce a InvalidArgument
+error on any invalid input formatting. A value of 'replace' (the default) will
+cause the operation to replace any invalid formatting in the input with the
+`replacement_char` codepoint. A value of 'ignore' will cause the operation to
+skip any invalid formatting in the input and produce no corresponding output
+character.
+* replacement_char:The replacement character codepoint to be used in place of any invalid
+formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+be used. The default value is the default unicode replacement character is
+0xFFFD or U+65533.
+* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the
+`replacement_char`. Default is false. \n
+
+*@par Outputs:
+*@li row_splits:A 1D tensor containing the row splits.
+*@li char_values:A 1D tensor containing the decoded codepoints.
+*@li char_to_byte_starts:A 1D int32 Tensor containing the byte index in the input string where each
+character in `char_values` starts. \n
+
+*@see UnicodeDecodeWithOffsets()
+
+*@par Third-party framework compatibility
+*compatible with UnicodeDecodeWithOffsets op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(UnicodeDecodeWithOffsets)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(row_splits, TensorType({DT_INT64}))
+    .OUTPUT(char_values, TensorType({DT_INT32}))
+    .OUTPUT(char_to_byte_starts, TensorType({DT_INT64}))
+    .REQUIRED_ATTR(input_encoding, String)
+    .ATTR(errors, String, "replace")
+    .ATTR(replacement_char, Int, 65533)
+    .ATTR(replace_control_characters, Bool, false)
+    .ATTR(Tsplits, Type, DT_INT64)
+    .OP_END_FACTORY_REG(UnicodeDecodeWithOffsets)
+
+/**
+*@brief Decodes each string in `input` into a sequence of Unicode code points. \n
+
+*@par Inputs:
+include:
+*@li input:The text to be decoded. Can have any shape. Note that the output is flattened
+to a vector of char values. \n
+
+*@par Attributes:
+* input_encoding:Text encoding of the input strings. This is any of the encodings supported
+by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
+* errors:Error handling policy when there is invalid formatting found in the input.
+The value of 'strict' will cause the operation to produce a InvalidArgument
+error on any invalid input formatting. A value of 'replace' (the default) will
+cause the operation to replace any invalid formatting in the input with the
+`replacement_char` codepoint. A value of 'ignore' will cause the operation to
+skip any invalid formatting in the input and produce no corresponding output
+character.
+* replacement_char:The replacement character codepoint to be used in place of any invalid
+formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+be used. The default value is the default unicode replacement character is
+0xFFFD or U+65533.
+* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the
+`replacement_char`. Default is false. \n
+
+*@par Outputs:
+*@li row_splits:A 1D tensor containing the row splits.
+*@li char_values:A 1D tensor containing the decoded codepoints. \n
+
+*@see UnicodeDecode()
+
+*@par Third-party framework compatibility
+*compatible with UnicodeDecode op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(UnicodeDecode)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(row_splits, TensorType({DT_INT64}))
+    .OUTPUT(char_values, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(input_encoding, String)
+    .ATTR(errors, String, "replace")
+    .ATTR(replacement_char, Int, 65533)
+    .ATTR(replace_control_characters, Bool, false)
+    .ATTR(Tsplits, Type, DT_INT64)
+    .OP_END_FACTORY_REG(UnicodeDecode)
+
+/**
+*@brief Transcode the input text from a source encoding to a destination encoding. \n
+
+*@par Inputs:
+include:
+*@li input:The text to be processed. Can have any shape. \n
+
+*@par Attributes:
+* input_encoding:Text encoding of the input strings. This is any of the encodings supported
+by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
+* output_encoding:The unicode encoding to use in the output. Must be one of `"UTF-8", "UTF-16-BE", "UTF-32-BE"`.
+Multi-byte encodings will be big-endian.
+* errors:Error handling policy when there is invalid formatting found in the input.
+The value of 'strict' will cause the operation to produce a InvalidArgument
+error on any invalid input formatting. A value of 'replace' (the default) will
+cause the operation to replace any invalid formatting in the input with the
+`replacement_char` codepoint. A value of 'ignore' will cause the operation to
+skip any invalid formatting in the input and produce no corresponding output
+character.
+* replacement_char:The replacement character codepoint to be used in place of any invalid
+formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+be used. The default value is the default unicode replacement character is
+0xFFFD or U+65533.
+* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the
+`replacement_char`. Default is false. \n
+
+*@par Outputs:
+*@li output:A string tensor containing unicode text encoded using `output_encoding`. \n
+
+*@see UnicodeTranscode()
+
+*@par Third-party framework compatibility
+*compatible with UnicodeTranscode op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(UnicodeTranscode)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .REQUIRED_ATTR(input_encoding, String)
+    .ATTR(output_encoding, String, "UTF-8")
+    .ATTR(errors, String, "replace")
+    .ATTR(replacement_char, Int, 65533)
+    .ATTR(replace_control_characters, Bool, false)
+    .OP_END_FACTORY_REG(UnicodeTranscode)
+
+/**
+*@brief Encode a tensor of ints into unicode strings. \n
+
+*@par Inputs:
+include:
+*@li input_values:A 1D tensor containing the unicode codepoints that should be encoded.
+*@li input_splits:A 1D tensor specifying how the unicode codepoints should be split into strings. \n
+
+*@par Attributes:
+* output_encoding:The unicode encoding to use in the output. Must be one of `"UTF-8", "UTF-16-BE", "UTF-32-BE"`.
+Multi-byte encodings will be big-endian.
+* errors:Error handling policy when there is invalid formatting found in the input.
+The value of 'strict' will cause the operation to produce a InvalidArgument
+error on any invalid input formatting. A value of 'replace' (the default) will
+cause the operation to replace any invalid formatting in the input with the
+`replacement_char` codepoint. A value of 'ignore' will cause the operation to
+skip any invalid formatting in the input and produce no corresponding output
+character.
+* replacement_char:The replacement character codepoint to be used in place of any invalid
+formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+be used. The default value is the default unicode replacement character is
+0xFFFD or U+65533. \n
+
+*@par Outputs:
+*@li output:The 1-D Tensor of strings encoded from the provided unicode codepoints. \n
+
+*@see UnicodeEncode()
+
+*@par Third-party framework compatibility
+*compatible with UnicodeEncode op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(UnicodeEncode)
+    .INPUT(input_values, TensorType({DT_INT32}))
+    .INPUT(input_splits, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .ATTR(errors, String, "replace")
+    .ATTR(output_encoding, String, "UTF-8")
+    .ATTR(replacement_char, Int, 65533)
+    .OP_END_FACTORY_REG(UnicodeEncode)
 
 /**
 *@brief Split elements of input based on delimiter into a SparseTensor . \n
@@ -62,6 +291,116 @@ REG_OP(StringSplit)
     .OP_END_FACTORY_REG(StringSplit)
 
 /**
+*@brief Replaces the match of pattern in input with rewrite. \n
+
+*@par Inputs:
+include:
+*input:A Tensor of type string. The text to be processed. \n
+
+*@par Attributes:
+*@li pattern:A string. The regular expression to match the input.
+*@li rewrite:A string. The rewrite to be applied to the matched expression.
+*@li replace_global:An optional bool. Defaults to True. If True, the replacement is global,
+otherwise the replacement is done only on the first match.
+
+*@par Outputs:
+*output::A Tensor of type string.
+*/
+REG_OP(StaticRegexReplace)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .ATTR(pattern, String, "")
+    .ATTR(rewrite, String, "")
+    .ATTR(replace_global, Bool, true)
+    .OP_END_FACTORY_REG(StaticRegexReplace)
+
+/**
+*@brief The input is a string tensor of any shape. The pattern is the
+*regular expression to be matched with every element of the input tensor.
+*The boolean values (True or False) of the output tensor indicate
+*if the input matches the regex pattern provided.
+
+*@par Inputs:
+include:
+*input:A Tensor of type string. The text to be processed. \n
+
+*@par Attributes:
+*pattern:A string. The regular expression to match the input.
+
+*@par Outputs:
+*output::A bool tensor with the same shape as `input`.
+*/
+REG_OP(StaticRegexFullMatch)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType({DT_BOOL}))
+    .ATTR(pattern, String, "")
+    .OP_END_FACTORY_REG(StaticRegexFullMatch)
+
+/**
+*@brief A Tensor of type string. The input to be joined. \n
+
+*@par Inputs:
+include:
+*@li input:A Tensor of type string. The text to be processed. 
+*@li segment_ids:A Tensor. Must be one of the following types: int32, int64. 
+*A tensor whose shape is a prefix of data.shape. Negative segment ids are not supported.
+*@li num_segments:A Tensor. Must be one of the following types: int32, int64. A scalar. 
+
+*@par Attributes:
+*separator:An optional string. Defaults to "". The separator to use when joining.
+
+*@par Outputs:
+*output::A Tensor of type string..
+*/
+REG_OP(UnsortedSegmentJoin)
+    .INPUT(input, TensorType({DT_STRING}))
+    .INPUT(segment_ids, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(num_segments, TensorType({DT_INT32,DT_INT64}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .ATTR(separator, String, "")
+    .OP_END_FACTORY_REG(UnsortedSegmentJoin)
+
+/**
+*@brief Inputs to TensorFlow operations are outputs of another TensorFlow operation.
+*This method is used to obtain a symbolic handle that represents the computation of the input.
+
+*@par Inputs:
+include:
+*input:A Tensor of type string. The text to be processed. 
+
+*@par Attributes:
+*encoding:An optional string. Defaults to "". 
+
+*@par Outputs:
+*output::A Tensor of type string..
+*/
+REG_OP(StringLower)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .ATTR(encoding, String, "")
+    .OP_END_FACTORY_REG(StringLower)
+
+/**
+*@brief Inputs to TensorFlow operations are outputs of another TensorFlow operation.
+*This method is used to obtain a symbolic handle that represents the computation of the input.
+
+*@par Inputs:
+include:
+*input:A Tensor of type string. The text to be processed. 
+
+*@par Attributes:
+*encoding:An optional string. Defaults to "". 
+
+*@par Outputs:
+*output::A Tensor of type string..
+*/
+REG_OP(StringUpper)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .ATTR(encoding, String, "")
+    .OP_END_FACTORY_REG(StringUpper)
+
+/**
 *@brief Split elements of source based on sep into a SparseTensor . \n
 
 *@par Inputs:
@@ -488,7 +827,7 @@ include:
 */
 REG_OP(AsString)
     .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT, \
-        DT_DOUBLE, DT_BOOL}))
+        DT_DOUBLE, DT_BOOL, DT_COMPLEX64, DT_COMPLEX128}))
     .OUTPUT(y, TensorType({DT_STRING}))
     .ATTR(precision, Int, -1)
     .ATTR(scientific, Bool, false)
@@ -557,6 +896,45 @@ REG_OP(DecodeBase64)
     .INPUT(x, TensorType({DT_STRING}))
     .OUTPUT(y, TensorType({DT_STRING}))
     .OP_END_FACTORY_REG(DecodeBase64)
+
+/**
+*@brief StringNormalization performs string operations for basic cleaning . \n
+
+*@par Inputs:
+*input: only accepts [C] or [1, C] UTF-8 strings tensor . \n
+
+*@par Outputs:
+*output: UTF-8 strings tensor after cleaning . \n
+
+*@par Attributes:
+*@li stopwords : list of strings (default is empty).
+*List of stop words. If not set, no word would be removed from input strings
+tensor.
+
+*@li is_case_sensitive : bool (default is false).
+*Boolean. Whether the identification of stop words in input strings tensor is
+case-sensitive. Default is false.
+
+*@li case_change_action : string (default is "NONE").
+*string enum that cases output to be lowercased/uppercases/unchanged. Valid
+values are "LOWER", "UPPER", "NONE". Default is "NONE".
+
+*@li locale : string (default is "C").
+*Environment dependent string that denotes the locale according to which output
+strings needs to be upper/lowercased.Default C or platform specific equivalent
+as decided by the implementation. \n
+
+*@attention Constraints:
+*input can be either a 1-D or 2-D tensor, the shape of 2-D tensor must be [1, C].
+*/
+REG_OP(StringNormalizer)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .ATTR(stopwords, ListString, {})
+    .ATTR(is_case_sensitive, Bool, false)
+    .ATTR(case_change_action, String, "NONE")
+    .ATTR(locale, String, "C")
+    .OP_END_FACTORY_REG(StringNormalizer)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_STRING_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/swap_co_ops.h b/third_party/fwkacllib/inc/ops/swap_co_ops.h
index a1bf4f8b..6e8eaac3 100644
--- a/third_party/fwkacllib/inc/ops/swap_co_ops.h
+++ b/third_party/fwkacllib/inc/ops/swap_co_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/target_crop_and_resize.h b/third_party/fwkacllib/inc/ops/target_crop_and_resize.h
index 9c61f2c9..d9dea358 100644
--- a/third_party/fwkacllib/inc/ops/target_crop_and_resize.h
+++ b/third_party/fwkacllib/inc/ops/target_crop_and_resize.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,7 +39,7 @@ namespace ge {
 *input_format: A required string, specifying the input format. \n
 
 *@par Outputs:
-*y: The output tensor of type uint8, format only support NC1HWC0_C04.
+*y: The output tensor of type uint8.
 *@par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 *
diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h
index 64e18fc7..eaaed8e4 100644
--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,15 +29,15 @@ namespace ge {
 
 *@par Inputs:
 *The input handle must have the resource type. Inputs include:
-*@li x:A list of Tensor objects. One or more tensors from which
+*x:A list of Tensor objects. One or more tensors from which
 the enqueued tensors should be taken . \n
 
 *@par Outputs:
-*@li y:A list of Tensor objects. One or more tensors from which
+*y:A list of Tensor objects. One or more tensors from which
 the enqueued tensors should be taken . \n
 
 *@par Attributes:
-*@li type: An optional ge::DataType. It refers to the target data type of outputs . \n
+*type: An optional ge::DataType. It refers to the target data type of outputs . \n
 
 *@par Third-party framework compatibility
 *Compatible with tensorflow QueueIsClosed operator.
@@ -60,7 +60,10 @@ REG_OP(Bitcast)
 *x: A Tensor. Must be 4D Tensor of type float16, float32, int32, uint16, with format HWCN . \n
 
 *@par Outputs:
-*y: A 6D Tensor. Has the same type as "x", with format C1HWNCoC0.
+*y: A 6D Tensor. Has the same type as "x", with format C1HWNCoC0. \n
+
+*@attention Constraints:
+*THIS OPERATOR IS DEPRECATED. It will be removed in a future version.
 */
 REG_OP(DepthwiseWeight4DTo6D)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))
@@ -77,7 +80,10 @@ REG_OP(DepthwiseWeight4DTo6D)
 *channel_size: An optional int, specifying the channel size of 4D Tensor with format HWCN . \n
 
 *@par Outputs:
-*y: A 4D Tensor. Has the same type as "x", with format HWCN.
+*y: A 4D Tensor. Has the same type as "x", with format HWCN. \n
+
+*@attention Constraints:
+*THIS OPERATOR IS DEPRECATED. It will be removed in a future version.
 */
 REG_OP(DepthwiseWeight6DTo4D)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))
@@ -130,31 +136,60 @@ REG_OP(Transpose)
     .OP_END_FACTORY_REG(Transpose)
 
 /**
-*@brief Doing format_transfer for various data format only
-support "NHWC/NCHW" to "NC1HWC0" and "NC1HWC0" to "NHWC/NCHW"
-"NCHW" to "FRACTAL_Zn" or "FRACTAL_Zn" to "NCHW".
-"HWCN" to "FRACTAL_Zn" or "FRACTAL_Zn" to "HWCN" . \n
+*@brief Do format transfer for various data format.
+* In general, the framework will insert it atomatically . \n
 
 *@par Inputs:
-*src: A Tensor dtype of all types . \n
+*src: A Tensor. For all branches can be types: float16, float32, int32, int8, bool.
+* For branches without padding also can be types: int16, int64, uint8, uint16, uint32, uint64 . \n
 
 *@par Attributes:
-*@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Zn" etc.
-*@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc.
-*@li group: A required int32, default value is 1. \n
+*@li src_format: A string source data format, can be "NHWC", "NCHW" etc.
+*@li dst_format: A string target data format, can be "NCHW" etc.
+*@li src_subformat: A optional int32 for source sub-format, default value is 0.
+*@li dst_subformat: A optional int32 for target sub-format, default value is 0.
+*@li groups: A optional int32, default value is 1. \n
 
 *@par Outputs:
-*dst: A Tensor dtype of all types.
+*dst: A Tensor. Has the same type as "src".
 */
 REG_OP(TransData)
     .INPUT(src, TensorType::BasicType())
     .OUTPUT(dst, TensorType::BasicType())
     .REQUIRED_ATTR(src_format, String)
     .REQUIRED_ATTR(dst_format, String)
-    .ATTR(group, Int, 1)
+    .ATTR(src_subformat, Int, 0)
+    .ATTR(dst_subformat, Int, 0)
+    .ATTR(groups, Int, 1)
     .OP_END_FACTORY_REG(TransData)
 
 /**
+*@brief Do format transfer for various data format only 
+support "ND" to "ND_RNN_BIAS" and "ND" to "FRACTAL_ZN_RNN"
+
+*@par Inputs:
+*src: A Tensor. For all branches can be types: float16, float32, int32, int8, bool.
+* For branches without padding also can be types: int16, int64, uint8, uint16, uint32, uint64 . \n
+
+*@par Attributes:
+*@li src_format: A string source data format, can be "ND", "ND_RNN_BIAS", "FRACTAL_ZN_RNN" etc.
+*@li dst_format: A string target data format, can be "ND", "ND_RNN_BIAS", "FRACTAL_ZN_RNN" etc.
+*@li input_size: A mental int32.
+*@li hidden_size: A mental int32.
+
+*@par Outputs:
+*dst: A Tensor. Has the same type as "src".
+*/
+REG_OP(TransDataRNN)
+    .INPUT(src, TensorType::BasicType())
+    .OUTPUT(dst, TensorType::BasicType())
+    .REQUIRED_ATTR(src_format, String)
+    .REQUIRED_ATTR(dst_format, String)
+    .REQUIRED_ATTR(input_size, Int)
+    .REQUIRED_ATTR(hidden_size, Int)
+    .OP_END_FACTORY_REG(TransDataRNN)
+
+/**
 *@brief Permutes the dimensions according to order.
         The returned tensor's dimension i will correspond to the input dimension order[i] . \n
 
@@ -174,21 +209,27 @@ REG_OP(Permute)
     .OP_END_FACTORY_REG(Permute)
 
 /**
-*@brief Flattens the inputs. Reserves axis 0 and flattens the input tensors
-* along axis 1 . \n
+*@brief Flattens the inputs tensor into a 2D matrix. If input tensor has shape (d_0, d_1,..., d_n),
+* then the output will have shape (d_0 X d_1 ... d_(axis-1), d_axis X d_(axis + 1)...X d_n)\n
 
 *@par Inputs:
-*One input:
-*x: A multi-dimensional Tensor. Must be one of the following types:
-* int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32 . \n
+* One input:
+* x: A multi-dimensional Tensor. Must be one of the following types:
+* int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32.
 
 *@par Outputs:
-*y: A 2D flattened Tensor (Reserves axis 0 and flattens the input tensors
-* along axis 1). Must be one of the following data types: int8, uint8, int16,
-* uint16, int32, uint32, int64,uint64, float16, float32 . \n
+* y: A 2D flattened Tensor with the contents of the input tensor, with input dimensions up to axis flattened 
+* to the outer dimension of the output and remaining input dimensions flattened into the inner dimension of the output.
+* Must be one of the following data types: int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32 .
+
+*@par Attributes:
+* axis: A optional int32, default value is 1. Indicate up to which input dimensions (exclusive) should be flattened 
+* to the outer dimension of the output. The value for axis must be in the range [-r, r], where r is the rank of 
+* the input tensor. Negative value means counting dimensions from the back. When axis = 0, the shape of 
+* the output tensor is (1, (d_0 X d_1 ... d_n), where the shape of the input tensor is (d_0, d_1, ... d_n).
 
 *@par Third-party framework compatibility
-* Compatible with TensorFlow operator Flatten.
+* Compatible with TensorFlow / ONNX operator Flatten.
 */
 REG_OP(Flatten)
     .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64,
@@ -197,6 +238,7 @@ REG_OP(Flatten)
     .OUTPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64,
                            DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64,
                            DT_FLOAT, DT_FLOAT16}))
+    .ATTR(axis, Int, 1)
     .OP_END_FACTORY_REG(Flatten)
 
 /**
@@ -204,13 +246,13 @@ REG_OP(Flatten)
 
 *@par Inputs:
 * Three inputs, including:
-*@li x: A 5D Tensor of type float16 or int8 or uint8, with format NC1HWC0.
+*@li x: A 5D Tensor of type float16 or int8 or uint8.
 *@li block_shape: A 1D list or tuple of int32 or int64.
 *@li crops: A 2D list or tuple of int32 or int64. Specifies the amount to
 *crop from start and end dimensions after permutation . \n
 
 *@par Outputs:
-*y: A Tensor with format NC1HWC0. Has the same type as input "x" . \n
+*y: A Tensor has the same type as input "x" . \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchToSpaceND.
@@ -227,7 +269,7 @@ REG_OP(BatchToSpaceND)
 
 *@par Inputs:
 * One input:
-*x: A 5D Tensor of type float16 or int8 or uint8, with format NC1HWC0 . \n
+*x: A 5D Tensor of type float16 or int8 or uint8. \n
 
 *@par Attributes:
 *@li block_shape: A required 1D list or tuple of int32 or int64.
@@ -235,7 +277,7 @@ REG_OP(BatchToSpaceND)
 * from the start and end dimensions after permutation . \n
 
 *@par Outputs:
-*y: A Tensor with format NC1HWC0. Has the same type as input "x".
+*y: A Tensor has the same type as input "x".
 
 
 *@par Third-party framework compatibility
@@ -256,12 +298,12 @@ REG_OP(BatchToSpaceNDD)
 
 *@par Inputs:
 * Three inputs, including:
-*@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0.
+*@li x: A 5D Tensor of type float16 or float32.
 *@li block_shape: A 1D list or tuple of int32 or int64.
 *@li paddings: A 2D list or tuple of int32 or int64. Specifies the padding for the start and end dimensions after permutation . \n
 
 *@par Outputs:
-*y: A Tensor with format NC1HWC0. Has the same type as input "x" . \n
+*y: A Tensor has the same type as input "x" . \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SpaceToBatchND.
@@ -278,14 +320,14 @@ REG_OP(SpaceToBatchND)
 
 *@par Inputs:
 * One input:
-*x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n
+*x: A 5D Tensor of type float16 or float32. \n
 
 *@par Attributes:
 *@li block_shape: A required 1D list or tuple of int32 or int64.
 *@li paddings: A required 2D list or tuple of int32 or int64. Specifies the padding for the start and end dimensions after permutation . \n
 
 *@par Outputs:
-*y: A Tensor with format NC1HWC0. Has the same type as input "x" . \n
+*y: A Tensor has the same type as input "x" . \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SpaceToBatchND.
@@ -336,8 +378,9 @@ REG_OP(SpaceToDepth)
 *     complex128, uint32, uint64
 
 *@par Attributes:
-*Two attributes, including:
+*Three attributes, including:
 * @li block_size: An int >= 2, specifying the size of the spatial block.
+* @li mode: An optional string, specifying the mode. Defaults to "DCR".
 * @li data_format: An optional string, specifying the data format. Defaults to "NHWC" . \n
 
 *@par Outputs:
@@ -350,6 +393,7 @@ REG_OP(DepthToSpace)
   .INPUT(x, TensorType::BasicType())
   .OUTPUT(y, TensorType::BasicType())
   .REQUIRED_ATTR(block_size, Int)
+  .ATTR(mode, String, "DCR")
   .ATTR(data_format, String, "NHWC")
   .OP_END_FACTORY_REG(DepthToSpace)
 
@@ -357,7 +401,7 @@ REG_OP(DepthToSpace)
 *@brief Permutes data into spatial data blocks and then prunes them . \n
 
 *@par Inputs:
-*@li x: A 4D Tensor with format NHWC.
+*@li x: A 4D Tensor with format. Must set the format, supported format list ["NCHW, NHWC"]
 *@li crops: A 1D list or tuple of int32 or int64 . \n
 
 *Must be one of the following types: float16, float32
@@ -418,12 +462,8 @@ REG_OP(BatchToSpace)
 * Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpace instead.
 */
 REG_OP(BatchToSpaceD)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8,
-                        DT_UINT16, DT_UINT32, DT_UINT64, DT_INT8, DT_INT16, DT_COMPLEX64,
-                        DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8,
-                        DT_UINT16, DT_UINT32, DT_UINT64, DT_INT8, DT_INT16, DT_COMPLEX64,
-                        DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32}))
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
     .REQUIRED_ATTR(block_size, Int)
     .REQUIRED_ATTR(crops, ListInt)
     .OP_END_FACTORY_REG(BatchToSpaceD)
@@ -434,9 +474,10 @@ REG_OP(BatchToSpaceD)
 
 *@par Inputs:
 * Two inputs, including:
-*@li x: An NHWC Tensor. Must be one of the following types:
+*@li x: An 4D Tensor. Must be one of the following types:
 * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8,
 * int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
+* Must set the format, supported format list ["NCHW, NHWC"]
 *@li paddings: A 2D tensor of type int, specifying the input . \n
 
 *@par Attributes:
@@ -485,7 +526,7 @@ REG_OP(SpaceToBatchD)
 * tensors . \n
 
 * @par Inputs:
-* x: A rank-R tensor (R > 0) of type BasicType, with format ND or NC1HWC0 . \n
+* x: A rank-R tensor (R > 0) of type BasicType. \n
 
 * @par Attributes:
 * @li num: A required int, specifying the number of tensors to be unpacked to.
@@ -498,8 +539,7 @@ REG_OP(SpaceToBatchD)
 
 * @attention Constraints:
 * @li If "num" is not specified, it is inferred from the shape of "x".
-* @li For the ND format, "axis" is in the range [-R, R); For the NC1HWC0 format,
-* "axis" must not be 2, 3, -2, or -3 . \n
+* @li For the ND format, "axis" is in the range [-R, R). \n
 
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator Unpack.
@@ -518,7 +558,8 @@ REG_OP(Unpack)
 * @par Inputs:
 * x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the
 *    following types:float32, double, int32, uint8, int16, int8, int64, uint16,
-*    float16, uint32, uint64
+*    float16, uint32, uint64. The inputs must have data_format with one of follows:
+*    NHWC, NCHW.
 
 * @par Attributes:
 * @li ksizes: A required list or tuple. The size of the sliding window for each
@@ -533,7 +574,6 @@ REG_OP(Unpack)
 * This is equivalent to rate in dilated (a.k.a. Atrous) convolutions.
 * @li padding: A required string. The type of padding algorithm to use,
   support "SAME" or "VALID". \n
-* @li data_format: A required string. The format of input, only supported NHWC. \n
 
 * @par Outputs:
 * y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows *
@@ -554,7 +594,6 @@ REG_OP(ExtractImagePatches)
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(rates, ListInt)
     .REQUIRED_ATTR(padding, String)
-    .ATTR(data_format, String, "NHWC")
     .OP_END_FACTORY_REG(ExtractImagePatches)
 
 /**
@@ -563,6 +602,7 @@ REG_OP(ExtractImagePatches)
 
 * @par Inputs:
 * x: A 5D Tensor with shape [batch, in_planes, in_rows, in_cols, depth] . \n
+*    The inputs must have data_format with one of follows: NDHWC, NCDHW. \n
 
 * @par Attributes:
 * @li ksizes: A required list or tuple. The size of the sliding window for each
@@ -571,7 +611,6 @@ REG_OP(ExtractImagePatches)
 * patches are in "x". Must be: [1, stride_planes, stride_rows, stride_cols, 1].
 * @li padding: A required string. The type of padding algorithm to use ,
 * support "SAME" or "VALID" . \n
-* @li data_format: An optional string. The format of input, only supported NDHWC. \n
 
 * @par Outputs:
 * Output: A 5D Tensor with shape [batch, out_planes, out_rows, out_cols, ksize_planes *
@@ -590,7 +629,6 @@ REG_OP(ExtractVolumePatches)
     .REQUIRED_ATTR(ksizes, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(padding, String)
-    .ATTR(data_format, String, "NDHWC")
     .OP_END_FACTORY_REG(ExtractVolumePatches)
 
 /**
@@ -717,6 +755,215 @@ REG_OP(CompressFcOp)
 .OUTPUT(compress_index, TensorType({DT_INT8}))
 .REQUIRED_ATTR(compress_parameters, ListInt)
 .OP_END_FACTORY_REG(CompressFcOp)
+
+/**
+*@brief Performs Col2im for each batch entry. \n
+
+*@par Inputs:
+*@li x: The Col Tensor. 4-D, shape: `(n, c, kernel_h*kernel_w, ho*wo)`. 
+where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1.
+*@li output_size: The img shape Tensor. 1-D, shape:`(2)`, value: (output_h, output_w).  \n
+
+*@par Outputs:
+*y: The img Tensor. 4-D, shape: `(n, c, output_h, output_w)`. \n
+
+*@par Attributes:
+*@li kernel_shape: ListInt, value: `(kernel_h, kernel_w)`, the shape of kernel in convolution.
+*@li dilation: ListInt, value: `(dilation_h, dilation_w)`, the dilation in convolution.
+*@li padding: ListInt, value: `(padding_h, padding_w)`, the dilation in convolution.
+*@li stride:  ListInt, value: `(stride_h, stride_w)`, the dilation in convolution.  \n
+
+*@par Third-party framework compatibility
+* Compatible with Pytorch col2im/im2col_backward operator.
+*/
+REG_OP(Col2im)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(output_size, TensorType({DT_INT32, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(kernel_size, ListInt)
+    .REQUIRED_ATTR(dilation, ListInt)
+    .REQUIRED_ATTR(padding, ListInt)
+    .REQUIRED_ATTR(stride, ListInt)
+    .OP_END_FACTORY_REG(Col2im)
+
+/**
+* @brief Performs Im2col for each batch entry. \n
+
+* @par Inputs:
+* x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the
+*    following types:float32, int8, float16. The inputs must have data_format with
+*    one of follows:NHWC, NCHW.
+
+* @par Attributes:
+* @li ksizes: A required list or tuple. The size of the sliding window for each
+* dimension of images.
+* @li strides: A optional list or tuple. How far the centers of two consecutive
+* patches are in the images. Defaults to "{1}".
+* @li dilations: A optional list or tuple. Defaults to "{1}".
+* This is the input stride, specifying how far two consecutive patch
+* samples are in the input. Equivalent to extracting patches
+* with patch_sizes_eff = patch_sizes + (patch_sizes - 1) *
+* (dilations - 1), followed by subsampling them spatially by a factor of dilations.
+* This is equivalent to rate in dilated (a.k.a. Atrous) convolutions.
+* @li padding_mode: A optional String. The type of padding algorithm to use,
+* support "SAME", "VALID", "CALCULATED". Among the three modes, only the "CALCULATED"
+* means to use the pads below. Defaults to "CALCULATED".
+* @li pads: A optional list or tuple. The pad distance. Defaults to "{0}". \n
+
+* @par Outputs:
+* y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows *
+* ksize_cols * depth] containing image patches with size ksize_rows x ksize_cols
+* x depth vectorized in the "depth" dimension. Note "out_rows" and "out_cols"
+* are the dimensions of the output patches . \n
+
+* @attention Constraints:
+* "ksizes", "strides", "dilations" and "pads" are lists of integers . \n
+
+* @par Third-party framework compatibility
+* Compatible with Pytorch Im2col operator.
+*/
+REG_OP(Im2col)
+    .INPUT(x, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .REQUIRED_ATTR(ksizes, ListInt)
+    .ATTR(strides, ListInt, {1})
+    .ATTR(dilations, ListInt, {1})
+    .ATTR(padding_mode, String, "CALCULATED")
+    .ATTR(pads, ListInt, {0})
+    .OP_END_FACTORY_REG(Im2col)
+
+/**
+*@brief Generates a 2D or 3D flow field (sampling grid), given a batch of affine
+matrices theta. \n
+
+*@par Inputs:
+*Input theta must be float16 or float, output_size must be int32 type.Inputs
+include:
+*@li theta: input batch of affine matrices with shape (N,2,3) for 2D or (N,3,4)
+for 3D
+*@li output_size: the target output image size. (N×C×H×W for 2D or N×C×D×H×W for
+3D) Example: torch.Size((32, 3, 24, 24)) . \n
+
+
+*@par Attributes:
+*align_corners: if True, consider -1 and 1 to refer to the centers of the corner
+pixels rather than the image corners.Refer to grid_sample() for a more complete
+description. A grid generated by affine_grid() should be passed to grid_sample()
+with the same setting for this option. Default: False \n
+
+*@par Outputs:
+*@li y: A 2-D integer tensor of shape [M] representing the
+selected indices from the boxes tensor, where M <= max_output_size. \n
+
+*@attention Constraints:
+*Input theta must be float16 or float, output_size must be int32 type .
+The current implementation of AffineGrid operator AiCore adopts 
+BatchMatMul's FP16 fusion operator scheme, and the accuracy will 
+decrease when the theta range exceeds [-10,10].If the model requires 
+high accuracy of AffineGrid, it is recommended to use AICPU. \n
+
+*@par Third-party framework compatibility
+*Compatible with Pytorch affine_grid operator.
+*/
+
+REG_OP(AffineGrid)
+    .INPUT(theta, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(output_size, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(AffineGrid)
+
+/**
+*@brief  Make memory of a view be contiguous. \n
+
+*@par Inputs:
+*Four inputs, including:
+*@li x: The input tensor.
+*@li size: The shape of output tensor. 
+*@li stride: The stride of output tensor.
+*@li storage_offset: The offset in the underlying storage of the output tensor. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the pytorch operator as_strided.
+*/
+REG_OP(AsStrided)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(size, TensorType::IndexNumberType())
+    .INPUT(stride, TensorType::IndexNumberType())
+    .INPUT(storage_offset, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(AsStrided)
+
+/**
+*@brief This transform extracts n-grams from the input sequence and save them as a
+vector. \n
+
+*@par Inputs:
+*@li input: can be either a 1-D or 2-D tensor for n-gram extraction, It is ether string UTF-8 or int32/int64 . \n
+
+*@par Attributes:
+*@li max_gram_length : int (required)
+*Maximum n-gram length. If this value is 3, 3-grams will be used to generate the output .
+*@li max_skip_count : int (required)
+*Maximum number of items (integers/strings) to be skipped when constructing an n-gram from X.
+If max_skip_count=1, min_gram_length=2, max_gram_length=3, this operator may generate 2-grams
+with skip_count=0 and skip_count=1, and 3-grams with skip_count=0 and skip_count=1.
+*@li min_gram_length : int (required)
+*Minimum n-gram length. If this value is 2 and max_gram_length is 3, output may contain counts of
+2-grams and 3-grams.
+*@li mode : string (required)
+*The weighting criteria. It can be one of "TF" (term frequency), "IDF" (inverse document frequency),
+and "TFIDF" (the combination of TF and IDF).
+*@li ngram_counts : list of ints (required)
+*The starting indexes of 1-grams, 2-grams, and so on in pool. It is useful when determining the boundary
+between two consecutive collections of n-grams. For example, if ngram_counts is [0, 17, 36],
+the first index (zero-based) of 1-gram/2-gram/3-gram in pool are 0/17/36. This format is essentially identical
+to CSR (or CSC) sparse matrix format, and we choose to use this due to its popularity.
+*@li ngram_indexes : list of ints (required)
+*list of int64s (type: AttributeProto::INTS). This list is parallel to the specified 'pool_*' attribute. The i-th element
+in ngram_indexes indicate the coordinate of the i-th n-gram in the output tensor.
+*@li pool_int64s : list of ints
+*List of int64 n-grams learned from the training set. Either this or pool_strings attributes must be present but not both.
+It's an 1-D tensor starting with the collections of all 1-grams and ending with the collections of n-grams. The i-th element
+in pool stores the n-gram that should be mapped to coordinate ngram_indexes[i] in the output vector.
+*@li pool_strings : list of strings
+*List of strings n-grams learned from the training set. Either this or pool_int64s attributes must be present but not both.
+It's an 1-D tensor starting with the collections of all 1-grams and ending with the collections of n-grams. The i-th element
+in pool stores the n-gram that should be mapped to coordinate ngram_indexes[i] in the output vector.
+*@li weights : list of floats
+*list of floats. This attribute stores the weight of each n-gram in pool. The i-th element in weights is the weight of
+the i-th n-gram in pool. Its length equals to the size of ngram_indexes. By default, weights is an all-one tensor.This attribute
+is used when mode is "IDF" or "TFIDF" to scale the associated word counts. \n
+
+*@par Outputs:
+*@li output: tensor(float)
+*For 1-D input, output is the n-gram representation of that input. For 2-D input, the output is also a 2-D tensor
+whose i-th row is the n-gram representation of the i-th input row. More specifically, if input shape is [C], the corresponding
+output shape would be [max(ngram_indexes) + 1]. If input shape is [N, C], this operator produces a [N, max(ngram_indexes) + 1]-tensor. \n
+
+*@attention Constraints:
+*@li input can be either a 1-D or 2-D tensor, shape is [C] or [N, C].
+*@li max(ngram_indexes) + 1 == len(weights), len(y) == len(weights).
+*@li ngram_counts and pool(pool_int64s or pool_strings) must match.
+*@li either pool_strings or pool_int64s attributes must be present but not both.
+*/
+
+REG_OP(TfIdfVectorizer)
+    .INPUT(input, TensorType({DT_INT32, DT_INT64, DT_STRING}))
+    .OUTPUT(output, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(max_gram_length, Int)
+    .REQUIRED_ATTR(max_skip_count, Int)
+    .REQUIRED_ATTR(min_gram_length, Int)
+    .REQUIRED_ATTR(mode, String)
+    .REQUIRED_ATTR(ngram_counts, ListInt)
+    .REQUIRED_ATTR(ngram_indexes, ListInt)
+    .ATTR(pool_int64s, ListInt, {})
+    .ATTR(pool_strings, ListString, {})
+    .ATTR(weights, ListFloat, {})
+    .OP_END_FACTORY_REG(TfIdfVectorizer)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/vector_search.h b/third_party/fwkacllib/inc/ops/vector_search.h
new file mode 100644
index 00000000..425eb5d6
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/vector_search.h
@@ -0,0 +1,251 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file vector_search.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+* @brief Generate ADC(asymmetric distance computation) table. \n
+*
+* @par Inputs:
+* Four inputs, including:
+* @li query: A Tensor. Must be one of the following types: float16, float32.
+* @li code_book: A Tensor. Must be one of the following types: float16, float32.
+* @li centroids: A Tensor. Must be one of the following types: float16, float32.
+* @li bucket_list: A Tensor. Must be one of the following types: int32, int64.
+*
+* @par Outputs:
+* adc_tables: A Tensor. Must be one of the following types: float16, float32.
+*
+* @par Attributes:
+* distance_type: The string indicates the distance type of ADC tables. Examples: `"l2sqr", "inner_product"`.
+The default value is "l2sqr".
+*/
+REG_OP(GenADC)
+    .INPUT(query, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(code_book, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(centroids, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(bucket_list, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(adc_tables, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(distance_type, String, "l2sqr")
+    .OP_END_FACTORY_REG(GenADC)
+
+/**
+* @brief Finds values and indices of the "k" largest or least elements for the last dimension. \n
+*
+* @par Inputs:
+* Dynamin inputs, including:
+* @li actual_count: A Tensor of type int32, the actual number of pq_distance.
+* @li pq_distance: A Tensor, Will be updated after calculation. Must be one of the following types: float32, float16. 
+* @li grouped_extreme_distance: A Tensor, the extremum in each group. Must be one of the following types: float32, float16.
+* @li pq_index: A Tensor of type int32, index corresponding to pq_distance.
+* @li pq_ivf: A Tensor of type int32 , the bucket number corresponding to pq_distance.
+*
+* @par Attributes:
+* @li order: A string, indicates the sorting method of topk_pq_distance. \n
+* @li k: Int, k maximum or minimum values. \n
+* @li group_size: Int, the group size of the extremum. \n
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(TopKPQDistance)
+    .DYNAMIC_INPUT(actual_count, TensorType({DT_INT32}))
+    .DYNAMIC_INPUT(pq_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_INPUT(grouped_extreme_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_INPUT(pq_ivf, TensorType({DT_INT32}))
+    .DYNAMIC_INPUT(pq_index, TensorType({DT_INT32}))
+    .OUTPUT(topk_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(topk_ivf, TensorType({DT_INT32}))
+    .OUTPUT(topk_index, TensorType({DT_INT32}))
+    .ATTR(order, String, "ASC")
+    .REQUIRED_ATTR(k, Int)
+    .REQUIRED_ATTR(group_size, Int)
+    .OP_END_FACTORY_REG(TopKPQDistance)
+
+/**
+* @brief Calculate PQ distance. \n
+*
+* @par Inputs:
+* Six inputs, including:
+* @li ivf: A Tensor, dtype is uint8.
+* @li bucket_list: A Tensor, dtype is int32.
+* @li bucket_base_distance: A Tensor, dtype is float16.
+* @li bucket_limits: A Tensor, dtype is int32.
+* @li bucket_offsets: A Tensor, dtype is int32.
+* @li adc_tables: A Tensor. dtype is float16. \n
+*
+* @par Outputs:
+* Five outputs, including:
+* @li actual_count: A Tensor, dtype is int32, the first element means the length of processed ivf.
+* @li pq_distance: A Tensor, dtype is float16.
+* @li grouped_extreme_distance: A Tensor, dtype is float16.
+* @li pq_ivf: A Tensor, dtype is int32.
+* @li pq_index: A Tensor, dtype is int32. \n
+*
+* @par Attributes:
+* Five attributes, including:
+* @li group_size: A Scalar, indicates the group size when compute grouped_extreme_distance.
+* @li total_limit: A Scalar, indicates the total length of the outputs.
+* @li extreme_mode: A Scalar, indicates the type of extremum, 0 means minimum, and 1 means maximum.
+* @li split_count: A Scalar.
+* @li split_index: A Scalar. \n
+*
+*/
+REG_OP(ScanPQCodes)
+    .INPUT(ivf, TensorType({DT_UINT8}))
+    .INPUT(bucket_list, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(bucket_base_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(bucket_limits, TensorType({DT_INT32}))
+    .INPUT(bucket_offsets, TensorType({DT_INT64}))
+    .INPUT(adc_tables, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(actual_count, TensorType({DT_INT32}))
+    .OUTPUT(pq_distance, TensorType({DT_FLOAT16}))
+    .OUTPUT(grouped_extreme_distance, TensorType({DT_FLOAT16}))
+    .OUTPUT(pq_ivf, TensorType({DT_INT32}))
+    .OUTPUT(pq_index, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(total_limit, Int)
+    .ATTR(group_size, Int, 64)
+    .ATTR(extreme_mode, Int, 0)
+    .ATTR(split_count, Int, 1)
+    .ATTR(split_index, Int, 0)
+    .OP_END_FACTORY_REG(ScanPQCodes)
+
+/**
+* @brief Calculate buckets limit and offset. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n
+* @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n
+* @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n
+
+* @par Attributes:
+* total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n
+
+* @par Outputs:
+* @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n
+* @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n
+*/
+REG_OP(CalcBucketsLimitAndOffset)
+    .INPUT(bucket_list, TensorType({DT_INT32}))
+    .INPUT(ivf_counts, TensorType({DT_INT32}))
+    .INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(buckets_limit, TensorType({DT_INT32}))
+    .OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(total_limit, Int)
+    .OP_END_FACTORY_REG(CalcBucketsLimitAndOffset)
+
+/**
+*@brief get block tensor according to base addr tensor, for hccl remote read to use.
+*@par Inputs:
+*@li base_addr: A Tensor of type int64/uint64. \n
+*@li row:A Tensor of type int64/uint64. \n
+*@li col: A Tensor of type int64/uint64.
+
+*@par Outputs:
+*addr_table: list of [rank id, host addr, device addr, read size]
+
+*@par Attributes:
+*@li ori_shape: An required list int. Shape of base tensor.
+*@li block_size: An required list int. Shape of split block tensor.
+*@li ori_storage_mode: An optional string from: '"Matrix", "UT"'. Defaults to
+"Matrix". Currently only support Matrix storage
+*@li block_storage_mode: An optional string from: '"Matrix", "UT"'. Defaults to
+"Matrix". Currently only support Matrix storage
+*@li rank_id: An optional int of rank id. Defaults is 0
+*@li dtype: An optional Type of base tensor. Defaults is DT_FLOAT
+*/
+REG_OP(IndexToAddr)
+    .INPUT(base_addr, TensorType({DT_INT64, DT_UINT64}))
+    .INPUT(x, TensorType({DT_INT64, DT_UINT64}))
+    .OUTPUT(addrs_table, TensorType({DT_INT64, DT_UINT64}))
+    .REQUIRED_ATTR(ori_shape, ListInt)
+    .REQUIRED_ATTR(block_size, ListInt)
+    .ATTR(ori_storage_mode, String, "Matrix")
+    .ATTR(block_storage_mode, String, "Matrix")
+    .ATTR(rank_id, Int, 0)
+    .ATTR(dtype, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(IndexToAddr)
+
+/**
+*@brief Convert one-dimensional coordinates to two-dimensional coordinates.
+*@par Inputs:
+*@li x: A Tensor of type int32/int64/uint64. One-dimensional coordinates.
+*@li shape: A Tensor of type int32/int64/uint64. 4D tensor [N,C,H,W].
+*@par Outputs:
+*@li row: row of two-dimensional
+*@li col: col of two-dimensional
+*@li n: col number of two-dimensional
+*/
+REG_OP(Coordinates1DTo2D)
+    .INPUT(x, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
+    .OUTPUT(row, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
+    .OUTPUT(col, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
+    .OUTPUT(n, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
+    .OP_END_FACTORY_REG(Coordinates1DTo2D)
+
+/**
+*@brief x[0] is i, x[1] is j and x[2] is k when algorithm is LU,
+y = 0 when i >= k && j < k,
+y = 1 when i == k && j == k,
+y = 2 when i > k && j == k,
+y = 3 when i == k && j > k,
+y = 4 when i > k && j > k,
+default y = 5
+use for lu decomposition
+*@par Inputs:
+*x: A Tensor of type int32/int64/uint64. \n
+
+*@par Attributes:
+*algorithm: A string, only support LU now
+*@par Outputs:
+*y: A Tensor of type int32
+*/
+REG_OP(CaseCondition)
+    .INPUT(x, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .ATTR(algorithm, String, "LU")
+    .OP_END_FACTORY_REG(CaseCondition)
+
+/**
+*@brief write tensor value to tensor x.
+*@par Inputs:
+*x: A Tensor of type float16/float/double/int32/int64. \n
+*begin:A Tensor of type int32/int64. \n
+*value: A Tensor of type float16/float/double/int32/int64.
+*@par Outputs:
+*x: same tensor with input x
+*/
+REG_OP(SliceWrite)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \
+        DT_INT32, DT_INT64}))
+    .INPUT(begin, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \
+        DT_INT32, DT_INT64}))
+    .OUTPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \
+        DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(SliceWrite)
+} // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_
diff --git a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
index e19cbd7c..8ef69d8b 100644
--- a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
+++ b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/opt_info/opt_info.h b/third_party/fwkacllib/inc/opt_info/opt_info.h
index 4dff695b..f04aa84c 100644
--- a/third_party/fwkacllib/inc/opt_info/opt_info.h
+++ b/third_party/fwkacllib/inc/opt_info/opt_info.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/register/op_kernel_registry.h b/third_party/fwkacllib/inc/register/op_kernel_registry.h
deleted file mode 100644
index 35fcc857..00000000
--- a/third_party/fwkacllib/inc/register/op_kernel_registry.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef INC_REGISTER_OP_KERNEL_REGISTRY_H_
-#define INC_REGISTER_OP_KERNEL_REGISTRY_H_
-#include <memory>
-#include <string>
-#include "register/register_types.h"
-#include "register.h"
-
-namespace ge {
-class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpKernelRegistry {
- public:
-  using CreateFn = HostCpuOp* (*)();
-  ~OpKernelRegistry();
-
-  static OpKernelRegistry& GetInstance();
-
-  bool IsRegistered(const std::string &op_type);
-
-  void RegisterHostCpuOp(const std::string &op_type, CreateFn create_fn);
-
-  std::unique_ptr<HostCpuOp> CreateHostCpuOp(const std::string &op_type);
-
- private:
-  OpKernelRegistry();
-  class OpKernelRegistryImpl;
-  /*lint -e148*/
-  std::unique_ptr<OpKernelRegistryImpl> impl_;
-};
-} // namespace ge
-
-#endif // INC_REGISTER_OP_KERNEL_REGISTRY_H_
diff --git a/third_party/fwkacllib/inc/register/op_registry.h b/third_party/fwkacllib/inc/register/op_registry.h
deleted file mode 100644
index f7e37390..00000000
--- a/third_party/fwkacllib/inc/register/op_registry.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef INC_REGISTER_OP_REGISTRY_H_
-#define INC_REGISTER_OP_REGISTRY_H_
-
-#include <limits.h>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <map>
-#include <vector>
-
-#include "register/register.h"
-
-namespace domi {
-enum RemoveInputType {
-  OMG_MOVE_TYPE_DTYPE = 0,
-  OMG_MOVE_TYPE_VALUE,
-  OMG_MOVE_TYPE_SHAPE,
-  OMG_MOVE_TYPE_FORMAT,
-  OMG_MOVE_TYPE_AXIS,
-  OMG_MOVE_TYPE_SCALAR_VALUE,
-  OMG_REMOVE_TYPE_WITH_COND = 1000,
-  OMG_REMOVE_INPUT_WITH_ORIGINAL_TYPE,
-  OMG_INPUT_REORDER,
-};
-
-struct RemoveInputConfigure {
-  int inputIdx = INT_MAX;
-  std::string attrName;
-  RemoveInputType moveType;
-  bool attrValue = false;
-  std::string originalType;
-  std::vector<int> input_order;
-};
-
-class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry {
- public:
-  static OpRegistry *Instance();
-
-  std::vector<OpRegistrationData> registrationDatas;
-
-  bool Register(const OpRegistrationData &reg_data);
-
-  domi::ImplyType GetImplyType(const std::string &op_type);
-
-  void GetOpTypeByImplyType(std::vector<std::string> &vec_op_type, const domi::ImplyType &imply_type);
-
-  domi::ParseParamFunc GetParseParamFunc(const std::string &op_type, const std::string &ori_type);
-
-  domi::ParseParamByOpFunc GetParseParamByOperatorFunc(const std::string &ori_type);
-
-  domi::FusionParseParamFunc GetFusionParseParamFunc(const std::string &op_type, const std::string &ori_type);
-
-  domi::FusionParseParamByOpFunc GetFusionParseParamByOpFunc(const std::string &op_type,
-                                                             const std::string &ori_type);
-
-  domi::ParseSubgraphFunc GetParseSubgraphPostFunc(const std::string &op_type);
-
-  Status GetParseSubgraphPostFunc(const std::string &op_type, domi::ParseSubgraphFuncV2 &parse_subgraph_func);
-
-  domi::ImplyType GetImplyTypeByOriOpType(const std::string &ori_optype);
-
-  const std::vector<RemoveInputConfigure> &GetRemoveInputConfigure(const std::string &ori_optype) const;
-
-  bool GetOmTypeByOriOpType(const std::string &ori_optype, std::string &om_type);
-
-  ParseOpToGraphFunc GetParseOpToGraphFunc(const std::string &op_type, const std::string &ori_type);
-
- private:
-  std::unordered_map<std::string, domi::ImplyType> op_run_mode_map_;
-  std::unordered_map<std::string, ParseParamFunc> op_parse_params_fn_map_;
-  std::unordered_map<std::string, ParseParamByOpFunc> parse_params_by_op_func_map_;
-  std::unordered_map<std::string, FusionParseParamFunc> fusion_op_parse_params_fn_map_;
-  std::unordered_map<std::string, FusionParseParamByOpFunc> fusion_parse_params_by_op_fn_map_;
-  std::unordered_map<std::string, ParseSubgraphFunc> op_types_to_parse_subgraph_post_func_;
-  std::unordered_map<std::string, std::vector<RemoveInputConfigure>> remove_input_configure_map_;
-  std::map<std::string, std::string> origin_type_to_om_type_;
-  std::unordered_map<std::string, ParseOpToGraphFunc> parse_op_to_graph_fn_map_;
-  std::unordered_map<std::string, ParseSubgraphFuncV2> op_types_to_parse_subgraph_post_func_v2_;
-};
-}  // namespace domi
-#endif  // INC_REGISTER_OP_REGISTRY_H_
diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h
index 7fc1cdea..8650f9c4 100644
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -1,26 +1,16 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
-
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
-
- * http://www.apache.org/licenses/LICENSE-2.0
-
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: base.h
+ * Create: 2020-01-01
+ */
 
-#ifndef __CCE_RUNTIME_BASE_H__
-#define __CCE_RUNTIME_BASE_H__
+#ifndef CCE_RUNTIME_BASE_H
+#define CCE_RUNTIME_BASE_H
 
 #include <stdint.h>
 #include "toolchain/prof_callback.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -36,6 +26,28 @@ extern "C" {
 typedef int32_t rtError_t;
 static const int32_t RT_ERROR_NONE = 0; // success
 
+#ifndef char_t
+typedef char char_t;
+#endif
+
+#ifndef float32_t
+typedef float float32_t;
+#endif
+
+#ifndef float64_t
+typedef double float64_t;
+#endif
+
+/**
+ * @ingroup dvrt_base
+ * @brief device mode.
+ */
+typedef enum tagRtDeviceMode {
+    RT_DEVICE_MODE_SINGLE_DIE = 0,
+    RT_DEVICE_MODE_MULTI_DIE,
+    RT_DEVICE_MODE_RESERVED
+} rtDeviceMode;
+
 /**
  * @ingroup dvrt_base
  * @brief runtime exception numbers.
@@ -73,28 +85,42 @@ typedef enum tagRtSwitchDataType {
 
 typedef enum tagRtStreamFlagType {
     RT_HEAD_STREAM = 0,  // first stream
-    RT_INVALID_FLAG = 0xFFFFFFFF,
+    RT_INVALID_FLAG = 0x7FFFFFFF,
 } rtStreamFlagType_t;
 
 typedef enum tagRtLimitType {
     RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0,  // timeout for power down , ms
 } rtLimitType_t;
 
+typedef enum tagRtFloatOverflowMode {
+    RT_OVERFLOW_MODE_SATURATION = 0,
+    RT_OVERFLOW_MODE_INFNAN,
+    RT_OVERFLOW_MODE_UNDEF,
+} rtFloatOverflowMode_t;
+
 typedef struct rtExceptionInfo {
     uint32_t taskid;
     uint32_t streamid;
     uint32_t tid;
     uint32_t deviceid;
     uint32_t retcode;
-} rtExceptionInfo;
+} rtExceptionInfo_t;
 
 typedef void (*rtErrorCallback)(rtExceptionType);
 
-typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo);
+typedef void (*rtTaskFailCallback)(rtExceptionInfo_t *exceptionInfo);
 
 typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen);
 
 /**
+ * @ingroup profiling_base
+ * @brief dataType: rtProfCtrlType_t
+ * @brief data: data swtich or reporter function
+ * @brief dataLen: length of data
+ */
+typedef rtError_t (*rtProfCtrlHandle)(uint32_t dataType, void *data, uint32_t dataLen);
+
+/**
  * @ingroup dvrt_base
  * @brief stream handle.
  */
@@ -118,47 +144,72 @@ typedef void *rtLabel_t;
  */
 typedef void *rtModel_t;
 
+#define RT_PROF_MAX_DEV_NUM 64
+
+#define PATH_LEN_MAX 1023
+#define PARAM_LEN_MAX 4095
+typedef struct rtCommandHandleParams {
+    uint32_t pathLen;
+    uint32_t storageLimit;  // MB
+    uint32_t profDataLen;
+    char_t path[PATH_LEN_MAX + 1];
+    char_t profData[PARAM_LEN_MAX + 1];
+} rtCommandHandleParams_t;
+
 /**
  * @ingroup profiling_base
- * @brief runtime handle.
+ * @brief profiling command info
  */
-RTS_API rtError_t rtSetProfDirEx(const char *profDir, const char *address, const char *jobCtx);
+typedef struct rtProfCommandHandle {
+    uint64_t profSwitch;
+    uint64_t profSwitchHi;
+    uint32_t devNums;
+    uint32_t devIdList[RT_PROF_MAX_DEV_NUM];
+    uint32_t modelId;
+    uint32_t type;
+    rtCommandHandleParams_t commandHandleParams;
+} rtProfCommandHandle_t;
 
 /**
  * @ingroup profiling_base
- * @brief init profiler object.
+ * @brief type of app register profiling switch or reporter callback
  */
-RTS_API rtError_t rtProfilerInit(const char *profDir, const char *address, const char *jobCtx);
+typedef enum {
+    RT_PROF_CTRL_INVALID = 0,
+    RT_PROF_CTRL_SWITCH,
+    RT_PROF_CTRL_REPORTER,
+    RT_PROF_CTRL_BUTT
+} rtProfCtrlType_t;
 
 /**
  * @ingroup profiling_base
- * @brief config rts profiler.
+ * @brief runtime handle.
  */
-RTS_API rtError_t rtProfilerConfig(uint16_t type);
+RTS_API rtError_t rtSetProfDirEx(const char_t *profDir, const char_t *address, const char_t *jobCtx);
 
 /**
  * @ingroup profiling_base
- * @brief start rts profiler.
+ * @brief init profiler object.
  */
-RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList);
+RTS_API rtError_t rtProfilerInit(const char_t *profDir, const char_t *address, const char_t *jobCtx);
 
 /**
  * @ingroup profiling_base
- * @brief stop rts profiler.
+ * @brief config rts profiler.
  */
-RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList);
+RTS_API rtError_t rtProfilerConfig(uint16_t profConfig);
 
 /**
  * @ingroup profiling_base
  * @brief ts send keypoint profiler log.
  */
-RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream);
+RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stm);
 
 /**
  * @ingroup profiling_base
  * @brief ts send keypoint profiler log.
  */
-RTS_API rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream);
+RTS_API rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stm);
 
 /**
  * @ingroup profiling_base
@@ -167,16 +218,65 @@ RTS_API rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagI
 RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback);
 
 /**
- * @ingroup dvrt_base
- * @brief Returns the last error from a runtime call.
+ * @ingroup profiling_base
+ * @brief add the map of deviceId and GE model index, called by ge
+ * @param [in] geModelIdx  The index of GE model
+ * @param [in] deviceId    The id of device
+ * @return RT_ERROR_NONE for ok
+ * @return ACL_ERROR_RT_PARAM_INVALID for error input
  */
-RTS_API rtError_t rtGetLastError();
+RTS_API rtError_t rtSetDeviceIdByGeModelIdx(uint32_t geModelIdx, uint32_t deviceId);
 
 /**
- * @ingroup dvrt_base
- * @brief Returns the last error from a runtime call.
+ * @ingroup profiling_base
+ * @brief del the map of deviceId and GE model index, called by ge
+ * @param [in] geModelIdx  The index of GE model
+ * @param [in] deviceId    The id of device
+ * @return RT_ERROR_NONE for ok
+ * @return ACL_ERROR_RT_PARAM_INVALID for error input
  */
-RTS_API rtError_t rtPeekAtLastError();
+RTS_API rtError_t rtUnsetDeviceIdByGeModelIdx(uint32_t geModelIdx, uint32_t deviceId);
+
+/**
+ * @ingroup profiling_base
+ * @brief find deviceId by GE model index, called by profiling
+ * @param [in]  geModelIdx  The index of GE model
+ * @param [out] deviceId    The id of device
+ * @return RT_ERROR_NONE for ok
+ * @return ACL_ERROR_RT_PARAM_INVALID for error input
+ * @return ACL_ERROR_RT_INTERNAL_ERROR for can't find deviceId by geModelIdx
+ */
+RTS_API rtError_t rtGetDeviceIdByGeModelIdx(uint32_t geModelIdx, uint32_t *deviceId);
+
+/**
+ * @ingroup profiling_base
+ * @brief set profling switch, called by profiling
+ * @param [in]  data  rtProfCommandHandle
+ * @param [out] len   length of data
+ * @return RT_ERROR_NONE for ok
+ * @return ACL_ERROR_RT_PARAM_INVALID for error input
+ */
+RTS_API rtError_t rtProfSetProSwitch(void *data, uint32_t len);
+
+/**
+ * @ingroup profiling_base
+ * @brief register callback of upper app, called by ge or acl
+ * @param [in]  moduleId of APP
+ * @param [in]  callback function when switch or reporter change
+ * @return RT_ERROR_NONE for ok
+ * @return ACL_ERROR_RT_PARAM_INVALID for error input
+ */
+RTS_API rtError_t rtProfRegisterCtrlCallback(uint32_t moduleId, rtProfCtrlHandle callback);
+
+/**
+ * @ingroup profiling_base
+ * @brief set profling switch, called by profiling
+ * @param [in]  data  rtProfilingCommandHandle
+ * @param [in]  len   length of data
+ * @return RT_ERROR_NONE for ok
+ * @return ACL_ERROR_RT_PARAM_INVALID for error input
+ */
+RTS_API rtError_t rtProfilingCommandHandle(uint32_t type, void *data, uint32_t len);
 
 /**
  * @ingroup dvrt_base
@@ -202,7 +302,7 @@ RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback);
  * @param [out] NA
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback);
+RTS_API rtError_t rtRegDeviceStateCallback(const char_t *regName, rtDeviceStateCallback callback);
 
 /**
  * @ingroup dvrt_base
@@ -212,7 +312,7 @@ RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCal
  * @param [out] NA
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallback callback);
+RTS_API rtError_t rtRegTaskFailCallbackByModule(const char_t *moduleName, rtTaskFailCallback callback);
 
 /**
  * @ingroup dvrt_base
@@ -223,142 +323,150 @@ typedef void *rtNotify_t;
 /**
  * @ingroup dvrt_base
  * @brief create label instance
- * @param [out]    label   created label
+ * @param [out]    lbl   created label
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelCreate(rtLabel_t *label);
+RTS_API rtError_t rtLabelCreate(rtLabel_t *lbl);
 
 /**
  * @ingroup dvrt_base
  * @brief create label instance
- * @param [out] label  created label
- * @param [in] model  label set model
+ * @param [out] lbl  created label
+ * @param [in] mdl  label set model
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelCreateV2(rtLabel_t *label, rtModel_t model);
+RTS_API rtError_t rtLabelCreateV2(rtLabel_t *lbl, rtModel_t mdl);
 
 /**
  * @ingroup dvrt_base
  * @brief set label and stream instance
- * @param [in] label   set label
- * @param [in] stream  set stream
+ * @param [in] lbl   set label
+ * @param [in] stm  set stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelSet(rtLabel_t label, rtStream_t stream);
+RTS_API rtError_t rtLabelSet(rtLabel_t lbl, rtStream_t stm);
 
 /**
  * @ingroup dvrt_base
  * @brief destroy label instance
- * @param [in] label   label to destroy
+ * @param [in] lbl   label to destroy
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelDestroy(rtLabel_t label);
+RTS_API rtError_t rtLabelDestroy(rtLabel_t lbl);
 
 /**
  * @ingroup dvrt_base
  * @brief label switch instance
  * @param [in] ptr  address to get value compared
  * @param [in] condition
- * @param [in] value  to compare
+ * @param [in] val  to compare
  * @param [in] true_label   goto label
- * @param [in] stream  to submit label_switch task
+ * @param [in] stm  to submit label_switch task
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelSwitch(void *ptr, rtCondition_t condition, uint32_t value, rtLabel_t trueLabel,
-                                rtStream_t stream);
+RTS_API rtError_t rtLabelSwitch(void *ptr, rtCondition_t condition, uint32_t val, rtLabel_t trueLabel,
+                                rtStream_t stm);
 
 /**
  * @ingroup dvrt_base
  * @brief goto label instance
- * @param [in] label   goto label
- * @param [in] stream  to submit label_goto task
+ * @param [in] lbl   goto label
+ * @param [in] stm  to submit label_goto task
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelGoto(rtLabel_t label, rtStream_t stream);
+RTS_API rtError_t rtLabelGoto(rtLabel_t lbl, rtStream_t stm);
 
 /**
  * @ingroup dvrt_base
  * @brief name label instance
- * @param [in] label  instance
+ * @param [in] lbl  instance
  * @param [in] name  label name
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtNameLabel(rtLabel_t label, const char *name);
+RTS_API rtError_t rtNameLabel(rtLabel_t lbl, const char_t *name);
 
 /**
  * @ingroup dvrt_base
  * @brief label switch by index
  * @param [in] ptr  index value ptr
- * @param [in] max  index max value
+ * @param [in] maxValue  index max value
  * @param [in] labelInfoPtr  label content info ptr
- * @param [in] stream  set stream
+ * @param [in] stm  set stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelSwitchByIndex(void *ptr, uint32_t max, void *labelInfoPtr, rtStream_t stream);
+RTS_API rtError_t rtLabelSwitchByIndex(void *ptr, uint32_t maxValue, void *labelInfoPtr, rtStream_t stm);
 
 /**
  * @ingroup dvrt_base
  * @brief stream goto label
- * @param [in] label  goto label
- * @param [in] stream  stream  to submit label_goto task
+ * @param [in] lbl  goto label
+ * @param [in] stm  stream  to submit label_goto task
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelGotoEx(rtLabel_t label, rtStream_t stream);
+RTS_API rtError_t rtLabelGotoEx(rtLabel_t lbl, rtStream_t stm);
 
 /**
  * @ingroup dvrt_base
  * @brief labels to dev info
- * @param [in] label  model label list
+ * @param [in] lbl  model label list
  * @param [in] labelNumber  label number
  * @param [in] dst  device ptr
  * @param [in] dstMax  dst size
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelListCpy(rtLabel_t *label, uint32_t labelNumber, void *dst, uint32_t dstMax);
+RTS_API rtError_t rtLabelListCpy(rtLabel_t *lbl, uint32_t labelNumber, void *dst, uint32_t dstMax);
 
 /**
  * @ingroup dvrt_base
  * @brief labels to dev info
- * @param [out] label  created label handle
- * @param [in] stream  label bind stream
+ * @param [out] lbl  created label handle
+ * @param [in] stm  label bind stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream);
+RTS_API rtError_t rtLabelCreateEx(rtLabel_t *lbl, rtStream_t stm);
 
 /**
  * @ingroup dvrt_base
  * @brief labels to dev info
- * @param [out] label  created label handle
- * @param [in] model  label bind model
- * @param [in] stream  label bind stream
+ * @param [out] lbl  created label handle
+ * @param [in] mdl  label bind model
+ * @param [in] stm  label bind stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream);
+RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *lbl, rtModel_t mdl, rtStream_t stm);
 
 /**
  * @ingroup dvrt_base
  * @brief get current thread last stream id and task id
- * @param [out] stream id and task id
+ * @param [out] stm id and task id
  * @param [in] null
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for input null ptr
  */
 RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+/**
+ * @ingroup dvrt_base
+ * @brief get max model num
+ * @param [out] max model num
+ * @param [in] null
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtGetMaxModelNum(uint32_t *maxModelCount);
+#if defined(__cplusplus)
 }
 #endif
 
-#endif  // __CCE_RUNTIME_BASE_H__
+#endif  // CCE_RUNTIME_BASE_H
diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h
index a244c793..f39574b3 100644
--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -1,49 +1,43 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
-
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
-
- * http://www.apache.org/licenses/LICENSE-2.0
-
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: config.h
+ * Create: 2020-01-01
+ */
 
-#ifndef __CCE_RUNTIME_CONFIG_H__
-#define __CCE_RUNTIME_CONFIG_H__
+#ifndef CCE_RUNTIME_CONFIG_H
+#define CCE_RUNTIME_CONFIG_H
 
 #include "base.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
-#define PLAT_COMBINE(arch, chip, ver) ((arch << 16) | (chip << 8) | (ver))
-#define PLAT_GET_ARCH(type)           ((type >> 16) & 0xffff)
-#define PLAT_GET_CHIP(type)           ((type >> 8) & 0xff)
-#define PLAT_GET_VER(type)            (type & 0xff)
+#define PLAT_COMBINE(arch, chip, ver) (((arch) << 16U) | ((chip) << 8U) | (ver))
+#define PLAT_GET_ARCH(type)           (((type) >> 16U) & 0xffffU)
+#define PLAT_GET_CHIP(type)           (((type) >> 8U) & 0xffU)
+#define PLAT_GET_VER(type)            ((type) & 0xffU)
 
 typedef enum tagRtArchType {
     ARCH_BEGIN = 0,
     ARCH_V100 = ARCH_BEGIN,
-    ARCH_V200,
-    ARCH_END,
+    ARCH_V200 = 1,
+    ARCH_V300 = 2,
+    ARCH_END = 3,
 } rtArchType_t;
 
 typedef enum tagRtChipType {
     CHIP_BEGIN = 0,
     CHIP_MINI = CHIP_BEGIN,
-    CHIP_CLOUD,
-    CHIP_MDC,
-    CHIP_LHISI,
-    CHIP_DC,
-    CHIP_CLOUD_V2,
-    CHIP_END,
+    CHIP_CLOUD = 1,
+    CHIP_MDC = 2,
+    CHIP_LHISI = 3,
+    CHIP_DC = 4,
+    CHIP_CLOUD_V2 = 5,
+    CHIP_NO_DEVICE = 6,
+    CHIP_MINI_V3 = 7,
+    CHIP_5612 = 8, /* 1911T */
+    CHIP_END = 9,
 } rtChipType_t;
 
 typedef enum tagRtAicpuScheType {
@@ -53,34 +47,39 @@ typedef enum tagRtAicpuScheType {
 } rtAicpuScheType;
 
 typedef enum tagRtDeviceCapabilityType {
-  RT_SCHEDULE_SOFTWARE = 0, // SoftWare Schedule
-  RT_SCHEDULE_SOFTWARE_OPT,
-  RT_SCHEDULE_HARDWARE, // HWTS Schedule
-  RT_AICPU_BLOCKING_OP_NOT_SUPPORT,
-  RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation
+    RT_SCHEDULE_SOFTWARE = 0, // Software Schedule
+    RT_SCHEDULE_SOFTWARE_OPT,
+    RT_SCHEDULE_HARDWARE, // HWTS Schedule
+    RT_AICPU_BLOCKING_OP_NOT_SUPPORT,
+    RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation
+    RT_MODE_NO_FFTS, // no ffts
+    RT_MODE_FFTS, // 1981 get ffts work mode, ffts
+    RT_MODE_FFTS_PLUS, // 1981 get ffts work mode, ffts plus
 } rtDeviceCapabilityType;
 
 typedef enum tagRtVersion {
     VER_BEGIN = 0,
     VER_NA = VER_BEGIN,
-    VER_ES,
-    VER_CS,
-    VER_SD3403,
-    VER_END,
+    VER_ES = 1,
+    VER_CS = 2,
+    VER_SD3403 = 3,
+    VER_END = 4,
 } rtVersion_t;
 
 /* match rtChipType_t */
 typedef enum tagRtPlatformType {
     PLATFORM_BEGIN = 0,
     PLATFORM_MINI_V1 = PLATFORM_BEGIN,
-    PLATFORM_CLOUD_V1,
-    PLATFORM_MINI_V2,
-    PLATFORM_LHISI_ES,
-    PLATFORM_LHISI_CS,
-    PLATFORM_DC,
-    PLATFORM_CLOUD_V2,
-    PLATFORM_LHISI_SD3403,
-    PLATFORM_END,
+    PLATFORM_CLOUD_V1 = 1,
+    PLATFORM_MINI_V2 = 2,
+    PLATFORM_LHISI_ES = 3,
+    PLATFORM_LHISI_CS = 4,
+    PLATFORM_DC = 5,
+    PLATFORM_CLOUD_V2 = 6,
+    PLATFORM_LHISI_SD3403 = 7,
+    PLATFORM_MINI_V3 = 8,
+    PLATFORM_MINI_5612 = 9,
+    PLATFORM_END = 10,
 } rtPlatformType_t;
 
 typedef enum tagRtCubeFracMKNFp16 {
@@ -188,11 +187,20 @@ RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig);
 
 /**
  * @ingroup
+ * @brief get float overflow mode
+ * @param [out] floatOverflowMode
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetFloatOverflowMode(rtFloatOverflowMode_t * const floatOverflowMode);
+
+/**
+ * @ingroup
  * @brief get l2 buffer Info,virtual baseaddr,Size
- * @param [in] stream
+ * @param [in] stm
  * @return RT_ERROR_NONE for ok, errno for failed
  */
-RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size);
+RTS_API rtError_t rtMemGetL2Info(rtStream_t stm, void **ptr, uint32_t *size);
 
 /**
  * @ingroup
@@ -211,11 +219,11 @@ RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);
  * @param [in] deviceId
  * @param [in] moduleType
  * @param [in] featureType
- * @param [out] value
+ * @param [out] val
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *value);
+RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *val);
 
 /**
  * @ingroup
@@ -235,8 +243,18 @@ RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout);
  */
 RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+/**
+ * @ingroup
+ * @brief get is Heterogenous.
+ * @param [out] heterogenous=1 Heterogenous Mode: read isHeterogenous=1 in ini file.
+ * @param [out] heterogenous=0 NOT Heterogenous Mode:
+ *      1:not found ini file, 2:error when reading ini, 3:Heterogenous value is not 1
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtGetIsHeterogenous(int32_t *heterogenous);
+
+#if defined(__cplusplus)
 }
 #endif
 
-#endif // __CCE_RUNTIME_STREAM_H__
+#endif // CCE_RUNTIME_CONFIG_H
diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h
index e95d4c89..378470c7 100644
--- a/third_party/fwkacllib/inc/runtime/context.h
+++ b/third_party/fwkacllib/inc/runtime/context.h
@@ -1,25 +1,15 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
-
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
-
- * http://www.apache.org/licenses/LICENSE-2.0
-
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: context.h
+ * Create: 2020-01-01
+ */
 
-#ifndef __CCE_RUNTIME_CONTEXT_H__
-#define __CCE_RUNTIME_CONTEXT_H__
+#ifndef CCE_RUNTIME_CONTEXT_H
+#define CCE_RUNTIME_CONTEXT_H
 
 #include "base.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -53,46 +43,57 @@ typedef struct tagRtGroupInfo {
 /**
  * @ingroup rt_context
  * @brief create context and associates it with the calling thread
- * @param [out] ctx   created context
+ * @param [out] createCtx   created context
+ * @param [in] flags   context creation flag. set to 0.
+ * @param [in] devId    device to create context on
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtCtxCreate(rtContext_t *createCtx, uint32_t flags, int32_t devId);
+
+/**
+ * @ingroup rt_context
+ * @brief create context and associates it with the calling thread
+ * @param [out] createCtx   created context
  * @param [in] flags   context creation flag. set to 0.
- * @param [in] device    device to create context on
+ * @param [in] devId    device to create context on
+ * @param [in] deviceMode    the device mode
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtCtxCreate(rtContext_t *ctx, uint32_t flags, int32_t device);
+RTS_API rtError_t rtCtxCreateV2(rtContext_t *createCtx, uint32_t flags, int32_t devId, rtDeviceMode deviceMode);
 
 /**
  * @ingroup rt_context
  * @brief create context and associates it with the calling thread
- * @param [out] ctx   created context
+ * @param [out] createCtx   created context
  * @param [in] flags   context creation flag. set to 0.
- * @param [in] device    device to create context on
+ * @param [in] devId    device to create context on
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtCtxCreateEx(rtContext_t *ctx, uint32_t flags, int32_t device);
+RTS_API rtError_t rtCtxCreateEx(rtContext_t *createCtx, uint32_t flags, int32_t devId);
 
 /**
  * @ingroup rt_context
  * @brief destroy context instance
- * @param [in] ctx   context to destroy
+ * @param [in] destroyCtx   context to destroy
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtCtxDestroy(rtContext_t ctx);
+RTS_API rtError_t rtCtxDestroy(rtContext_t destroyCtx);
 
 /**
  * @ingroup rt_context
  * @brief destroy context instance
- * @param [in] ctx   context to destroy
+ * @param [in] destroyCtx   context to destroy
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtCtxDestroyEx(rtContext_t ctx);
+RTS_API rtError_t rtCtxDestroyEx(rtContext_t destroyCtx);
 
 /**
  * @ingroup rt_context
  * @brief binds context to the calling CPU thread.
- * @param [in] ctx   context to bind. if NULL, unbind current context.
+ * @param [in] currentCtx   context to bind. if NULL, unbind current context.
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtCtxSetCurrent(rtContext_t ctx);
+RTS_API rtError_t rtCtxSetCurrent(rtContext_t currentCtx);
 
 /**
  * @ingroup rt_context
@@ -104,26 +105,26 @@ RTS_API rtError_t rtCtxSynchronize(void);
 /**
  * @ingroup rt_context
  * @brief returns the context bound to the calling CPU thread.
- * @param [out] ctx   returned context
+ * @param [out] currentCtx   returned context
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtCtxGetCurrent(rtContext_t *ctx);
+RTS_API rtError_t rtCtxGetCurrent(rtContext_t *currentCtx);
 
 /**
  * @ingroup rt_context
  * @brief returns the primary context of device.
- * @param [out] ctx   returned context
+ * @param [out] primaryCtx   returned context
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtGetPriCtxByDeviceId(int32_t device, rtContext_t *ctx);
+RTS_API rtError_t rtGetPriCtxByDeviceId(int32_t devId, rtContext_t *primaryCtx);
 
 /**
  * @ingroup rt_context
  * @brief returns the device ID for the current context
- * @param [out] device   returned device id
+ * @param [out] devId   returned device id
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtCtxGetDevice(int32_t *device);
+RTS_API rtError_t rtCtxGetDevice(int32_t *devId);
 
 /**
  * @ingroup
@@ -139,7 +140,7 @@ RTS_API rtError_t rtSetGroup(int32_t groupId);
  * @param [in] groupid count
  * @return RT_ERROR_NONE for ok, errno for failed
  */
-RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint32_t count);
+RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint32_t cnt);
 
 /**
  * @ingroup
@@ -147,19 +148,19 @@ RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint
  * @param [in] groupid count
  * @return RT_ERROR_NONE for ok, errno for failed
  */
-RTS_API rtError_t rtGetGroupCount(uint32_t *count);
+RTS_API rtError_t rtGetGroupCount(uint32_t *cnt);
 
 /**
  * @ingroup rt_context
  * @brief set context INF mode
- * @param [in] mode
+ * @param [in] infMode
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtSetCtxINFMode(bool mode);
+RTS_API rtError_t rtSetCtxINFMode(bool infMode);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
 
-#endif  // __CCE_RUNTIME_CONTEXT_H__
+#endif  // CCE_RUNTIME_CONTEXT_H
diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h
index 18d837eb..b1157c2b 100644
--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -1,30 +1,21 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
-
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
-
- * http://www.apache.org/licenses/LICENSE-2.0
-
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: dev.h
+ * Create: 2020-01-01
+ */
 
-#ifndef __CCE_RUNTIME_DEVICE_H__
-#define __CCE_RUNTIME_DEVICE_H__
+#ifndef CCE_RUNTIME_DEVICE_H
+#define CCE_RUNTIME_DEVICE_H
 
 #include "base.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
-#define RT_CAPABILITY_SUPPORT     (0x1)
-#define RT_CAPABILITY_NOT_SUPPORT (0x0)
+#define RT_CAPABILITY_SUPPORT     (0x1U)
+#define RT_CAPABILITY_NOT_SUPPORT (0x0U)
+#define MEMORY_INFO_TS_4G_LIMITED (0x0U) // for compatibility
 
 typedef struct tagRTDeviceInfo {
     uint8_t env_type;  // 0: FPGA  1: EMU 2: ESL
@@ -45,27 +36,28 @@ typedef struct tagRTDeviceInfo {
 
 typedef enum tagRtRunMode {
     RT_RUN_MODE_OFFLINE = 0,
-    RT_RUN_MODE_ONLINE = 1,
-    RT_RUN_MODE_AICPU_SCHED = 2,
+    RT_RUN_MODE_ONLINE,
+    RT_RUN_MODE_AICPU_SCHED,
     RT_RUN_MODE_RESERVED
 } rtRunMode;
 
 typedef enum tagRtAicpuDeployType {
     AICPU_DEPLOY_CROSS_OS = 0x0,
-    AICPU_DEPLOY_CROSS_PROCESS = 0x1,
-    AICPU_DEPLOY_CROSS_THREAD = 0x2,
+    AICPU_DEPLOY_CROSS_PROCESS,
+    AICPU_DEPLOY_CROSS_THREAD,
     AICPU_DEPLOY_RESERVED
 } rtAicpuDeployType_t;
 
 typedef enum tagRtFeatureType {
     FEATURE_TYPE_MEMCPY = 0,
-    FEATURE_TYPE_MEMORY = 1,
+    FEATURE_TYPE_MEMORY,
     FEATURE_TYPE_RSV
 } rtFeatureType_t;
 
 typedef enum tagRtDeviceFeatureType {
   FEATURE_TYPE_SCHE,
   FEATURE_TYPE_BLOCKING_OPERATOR,
+  FEATURE_TYPE_FFTS_MODE,
   FEATURE_TYPE_END,
 } rtDeviceFeatureType_t;
 
@@ -75,29 +67,44 @@ typedef enum tagMemcpyInfo {
 } rtMemcpyInfo_t;
 
 typedef enum tagMemoryInfo {
-    MEMORY_INFO_TS_4G_LIMITED = 0,
+    MEMORY_INFO_TS_LIMITED = 0,
     MEMORY_INFO_RSV
 } rtMemoryInfo_t;
 
 typedef enum tagRtDeviceModuleType {
-  RT_MODULE_TYPE_SYSTEM = 0,
-  RT_MODULE_TYPE_AICPU,
-  RT_MODULE_TYPE_CCPU,
-  RT_MODULE_TYPE_DCPU,
-  RT_MODULE_TYPE_AICORE,
-  RT_MODULE_TYPE_TSCPU,
-  RT_MODULE_TYPE_PCIE,
-  RT_MODULE_TYPE_VECTOR_CORE
-} tagRtDeviceModuleType_t;
+    RT_MODULE_TYPE_SYSTEM = 0,  /**< system info*/
+    RT_MODULE_TYPE_AICPU,       /** < aicpu info*/
+    RT_MODULE_TYPE_CCPU,        /**< ccpu_info*/
+    RT_MODULE_TYPE_DCPU,        /**< dcpu info*/
+    RT_MODULE_TYPE_AICORE,      /**< AI CORE info*/
+    RT_MODULE_TYPE_TSCPU,       /**< tscpu info*/
+    RT_MODULE_TYPE_PCIE,        /**< PCIE info*/
+    RT_MODULE_TYPE_VECTOR_CORE, /**< VECTOR CORE info*/
+} rtDeviceModuleType_t;
+
+typedef enum tagRtMemRequestFeature {
+    MEM_REQUEST_FEATURE_DEFAULT = 0,
+    MEM_REQUEST_FEATURE_OPP,
+    MEM_REQUEST_FEATURE_RESERVED
+} rtMemRequestFeature_t;
+
+// used for rtGetDevMsg callback function
+typedef void (*rtGetMsgCallback)(const char_t *msg, uint32_t len);
+
+typedef enum tagGetDevMsgType {
+    RT_GET_DEV_ERROR_MSG = 0,
+    RT_GET_DEV_RUNNING_STREAM_SNAPSHOT_MSG,
+    RT_GET_DEV_MSG_RESERVE
+} rtGetDevMsgType_t;
 
 /**
  * @ingroup dvrt_dev
  * @brief get total device number.
- * @param [in|out] count the device number
+ * @param [in|out] cnt the device number
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtGetDeviceCount(int32_t *count);
+RTS_API rtError_t rtGetDeviceCount(int32_t *cnt);
 /**
  * @ingroup dvrt_dev
  * @brief get device ids
@@ -135,29 +142,66 @@ RTS_API rtError_t rtGetDeviceIDs(uint32_t *devices, uint32_t len);
                     INFO_TYPE_IP,
                     INFO_TYPE_ENDIAN,
                } DEV_INFO_TYPE;
- * @param [out] value   the device info
+ * @param [out] val   the device info
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_DRV_ERR for error
  */
-RTS_API rtError_t rtGetDeviceInfo(uint32_t deviceId, int32_t moduleType, int32_t infoType, int64_t *value);
+RTS_API rtError_t rtGetDeviceInfo(uint32_t deviceId, int32_t moduleType, int32_t infoType, int64_t *val);
 
 /**
  * @ingroup dvrt_dev
  * @brief set target device for current thread
- * @param [int] device   the device id
+ * @param [int] devId   the device id
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtSetDevice(int32_t devId);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief set target device for current thread
+ * @param [int] devId   the device id
+ * @param [int] deviceMode   the device mode
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtSetDeviceV2(int32_t devId, rtDeviceMode deviceMode);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get deviceMode
+ * @param [out] deviceMode   the device mode
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetDeviceMode(rtDeviceMode *deviceMode);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief set target die for current thread
+ * @param [int] die   the die id
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtSetDie(int32_t die);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get target die of current thread
+ * @param [in|out] die   the die id
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtSetDevice(int32_t device);
+RTS_API rtError_t rtGetDie(int32_t *die);
 
 /**
  * @ingroup dvrt_dev
  * @brief set target device for current thread
- * @param [int] device   the device id
+ * @param [int] devId   the device id
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtSetDeviceEx(int32_t device);
+RTS_API rtError_t rtSetDeviceEx(int32_t devId);
 
 /**
  * @ingroup dvrt_dev
@@ -202,13 +246,13 @@ RTS_API rtError_t rtDisableP2P(uint32_t devIdDes, uint32_t phyIdSrc);
 /**
  * @ingroup dvrt_dev
  * @brief get cability of P2P omemry copy betwen device and peeredevic.
- * @param [in] device   the logical device id
+ * @param [in] devId   the logical device id
  * @param [in] peerDevice   the physical device id
  * @param [outv] *canAccessPeer   1:enable 0:disable
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDeviceCanAccessPeer(int32_t *canAccessPeer, uint32_t device, uint32_t peerDevice);
+RTS_API rtError_t rtDeviceCanAccessPeer(int32_t *canAccessPeer, uint32_t devId, uint32_t peerDevice);
 
 /**
  * @ingroup dvrt_dev
@@ -232,11 +276,11 @@ RTS_API rtError_t rtDeviceGetBareTgid(uint32_t *pid);
 /**
  * @ingroup dvrt_dev
  * @brief get target device of current thread
- * @param [in|out] device   the device id
+ * @param [in|out] devId   the device id
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtGetDevice(int32_t *device);
+RTS_API rtError_t rtGetDevice(int32_t *devId);
 
 /**
  * @ingroup dvrt_dev
@@ -244,7 +288,7 @@ RTS_API rtError_t rtGetDevice(int32_t *device);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDeviceReset(int32_t device);
+RTS_API rtError_t rtDeviceReset(int32_t devId);
 
 /**
  * @ingroup dvrt_dev
@@ -252,19 +296,19 @@ RTS_API rtError_t rtDeviceReset(int32_t device);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDeviceResetEx(int32_t device);
+RTS_API rtError_t rtDeviceResetEx(int32_t devId);
 
 /**
  * @ingroup dvrt_dev
  * @brief get total device infomation.
- * @param [in] device   the device id
+ * @param [in] devId   the device id
  * @param [in] type     limit type RT_LIMIT_TYPE_LOW_POWER_TIMEOUT=0
- * @param [in] value    limit value
+ * @param [in] val    limit value
  * @param [out] info   the device info
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDeviceSetLimit(int32_t device, rtLimitType_t type, uint32_t value);
+RTS_API rtError_t rtDeviceSetLimit(int32_t devId, rtLimitType_t type, uint32_t val);
 
 /**
  * @ingroup dvrt_dev
@@ -286,15 +330,6 @@ RTS_API rtError_t rtDeviceGetStreamPriorityRange(int32_t *leastPriority, int32_t
 
 /**
  * @ingroup dvrt_dev
- * @brief Set exception handling callback function
- * @param [in] callback   rtExceptiontype
- * @return RT_ERROR_NONE for ok
- * @return RT_ERROR_INVALID_VALUE for error input
- */
-RTS_API rtError_t rtSetExceptCallback(rtErrorCallback callback);
-
-/**
- * @ingroup dvrt_dev
  * @brief Setting Scheduling Type of Graph
  * @param [in] tsId   the ts id
  * @return RT_ERROR_NONE for ok
@@ -309,7 +344,7 @@ RTS_API rtError_t rtSetTSDevice(uint32_t tsId);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_DRV_ERR for can not get run mode
  */
-RTS_API rtError_t rtGetRunMode(rtRunMode *mode);
+RTS_API rtError_t rtGetRunMode(rtRunMode *runMode);
 
 /**
  * @ingroup dvrt_dev
@@ -325,14 +360,14 @@ RTS_API rtError_t rtGetAicpuDeploy(rtAicpuDeployType_t *deployType);
  * @brief set chipType
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtSetSocVersion(const char *version);
+RTS_API rtError_t rtSetSocVersion(const char_t *ver);
 
 /**
  * @ingroup dvrt_dev
  * @brief get chipType
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtGetSocVersion(char *version, const uint32_t maxLen);
+RTS_API rtError_t rtGetSocVersion(char_t *ver, const uint32_t maxLen);
 
 /**
  * @ingroup dvrt_dev
@@ -340,10 +375,10 @@ RTS_API rtError_t rtGetSocVersion(char *version, const uint32_t maxLen);
  * @param [in] devId   the logical device id
  * @param [in] otherDevId   the other logical device id
  * @param [in] infoType   info type
- * @param [in|out] value   pair info
+ * @param [in|out] val   pair info
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int32_t infoType, int64_t *value);
+RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int32_t infoType, int64_t *val);
 
 /**
  * @ingroup dvrt_dev
@@ -358,19 +393,19 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3
                     MEMCPY_INFO_SUPPORT_ZEROCOPY = 0,
                     MEMCPY_INFO _RSV,
                } rtMemcpyInfo_t;
- * @param [out] value  the capability info RT_CAPABILITY_SUPPORT or RT_CAPABILITY_NOT_SUPPORT
+ * @param [out] val  the capability info RT_CAPABILITY_SUPPORT or RT_CAPABILITY_NOT_SUPPORT
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value);
+RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *val);
 
 /**
  * @ingroup dvrt_dev
  * @brief set target device for current thread
- * @param [int] device   the device id
+ * @param [int] devId   the device id
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device);
+RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t devId);
 
 /**
  * @ingroup dvrt_dev
@@ -378,10 +413,54 @@ RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device);
+RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t devId);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get device message
+ * @param [in] rtGetDevMsgType_t getMsgType:msg type
+ * @param [in] GetMsgCallback callback:acl callback function
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetDevMsg(rtGetDevMsgType_t getMsgType, rtGetMsgCallback callback);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get ts mem type
+ * @param [in] rtMemRequestFeature_t mem request feature type
+ * @param [in] mem request size
+ * @return RT_MEMORY_TS, RT_MEMORY_HBM, RT_MEMORY_TS | RT_MEMORY_POLICY_HUGE_PAGE_ONLY
+ */
+RTS_API uint32_t rtGetTsMemType(rtMemRequestFeature_t featureType, uint32_t memSize);
+
+/**
+ * @ingroup
+ * @brief set saturation mode for current device.
+ * @param [in] saturation mode.
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtSetDeviceSatMode(rtFloatOverflowMode_t floatOverflowMode);
+
+/**
+ * @ingroup
+ * @brief get saturation mode for current device.
+ * @param [out] saturation mode.
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtGetDeviceSatMode(rtFloatOverflowMode_t *floatOverflowMode);
+
+/**
+ * @ingroup
+ * @brief get saturation mode for target stream.
+ * @param [in] target stm
+ * @param [out] saturation mode.
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtGetDeviceSatModeForStream(rtStream_t stm, rtFloatOverflowMode_t *floatOverflowMode);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
-#endif  // __CCE_RUNTIME_DEVICE_H__
+#endif  // CCE_RUNTIME_DEVICE_H
diff --git a/third_party/fwkacllib/inc/runtime/dvfsprofile.h b/third_party/fwkacllib/inc/runtime/dvfsprofile.h
deleted file mode 100644
index 6e451695..00000000
--- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
-
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
-
- * http://www.apache.org/licenses/LICENSE-2.0
-
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-#ifndef __CCE_RUNTIME_DVFSPROFILE_H__
-#define __CCE_RUNTIME_DVFSPROFILE_H__
-
-#include "base.h"
-
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
-extern "C" {
-#endif
-
-typedef enum dvfsProfileMode {
-  DVFS_PROFILE_PERFORMANCE_PRIORITY,
-  DVFS_PROFILE_BALANCE_PRIORITY,
-  DVFS_PROFILE_POWER_PRIORITY,
-  DVFS_PROFILE_PRIORITY_MAX
-} DvfsProfileMode;
-
-/**
- * @ingroup dvrt_dvfsprofile
- * @brief Set the performance mode of the device
- * @param [in] mode   dvfsProfileMode
- * @return RT_ERROR_NONE for ok
- * @return RT_ERROR_INVALID_VALUE for error input
- */
-RTS_API rtError_t rtSetDvfsProfile(DvfsProfileMode mode);
-
-/**
- * @ingroup dvrt_dvfsprofile
- * @brief Set the performance mode of the device
- * @return RT_ERROR_NONE for ok
- * @return RT_ERROR_INVALID_VALUE for invalid value
- */
-RTS_API rtError_t rtUnsetDvfsProfile();
-
-/**
- * @ingroup dvrt_dvfsprofile
- * @brief Get the current performance mode of the device
- * @param [in|out] pmode   dvfsProfileMode type pointer
- * @return RT_ERROR_NONE for ok
- * @return RT_ERROR_INVALID_VALUE for error input
- */
-RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode);
-
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
-}
-#endif
-
-#endif  // __CCE_RUNTIME_PROFILE_H__
diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h
index 57948c47..6bd47992 100644
--- a/third_party/fwkacllib/inc/runtime/event.h
+++ b/third_party/fwkacllib/inc/runtime/event.h
@@ -1,25 +1,15 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
-
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
-
- * http://www.apache.org/licenses/LICENSE-2.0
-
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: event.h
+ * Create: 2020-01-01
+ */
 
-#ifndef __CCE_RUNTIME_EVENT_H__
-#define __CCE_RUNTIME_EVENT_H__
+#ifndef CCE_RUNTIME_EVENT_H
+#define CCE_RUNTIME_EVENT_H
 
 #include "base.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -29,12 +19,17 @@ typedef enum rtEventWaitStatus {
     EVENT_STATUS_MAX = 2,
 } rtEventWaitStatus_t;
 
+typedef enum rtEventStatus {
+    RT_EVENT_INIT = 0,
+    RT_EVENT_RECORDED = 1,
+} rtEventStatus_t;
+
 /**
  * @ingroup event_flags
  * @brief event op bit flags
  */
-#define RT_EVENT_DEFAULT (0x0E)
-#define RT_EVENT_WITH_FLAG (0x0B)
+#define RT_EVENT_DEFAULT (0x0EU)
+#define RT_EVENT_WITH_FLAG (0x0BU)
 
 #define RT_EVENT_DDSYNC_NS    0x01U
 #define RT_EVENT_STREAM_MARK  0x02U
@@ -48,7 +43,7 @@ typedef enum rtEventWaitStatus {
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtEventCreate(rtEvent_t *event);
+RTS_API rtError_t rtEventCreate(rtEvent_t *evt);
 
 /**
  * @ingroup dvrt_event
@@ -57,103 +52,113 @@ RTS_API rtError_t rtEventCreate(rtEvent_t *event);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtEventCreateWithFlag(rtEvent_t *event, uint32_t flag);
+RTS_API rtError_t rtEventCreateWithFlag(rtEvent_t *evt, uint32_t flag);
 
 /**
  * @ingroup dvrt_event
  * @brief destroy event instance
- * @param [in] event   event to destroy
+ * @param [in] evt   event to destroy
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtEventDestroy(rtEvent_t event);
+RTS_API rtError_t rtEventDestroy(rtEvent_t evt);
 
 /**
  * @ingroup dvrt_event
  * @brief get event id
- * @param [in] event_ event to be get
+ * @param [in] evt event to be get
  * @param [in|out] event_id   event_id id
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtGetEventID(rtEvent_t event, uint32_t *eventId);
+RTS_API rtError_t rtGetEventID(rtEvent_t evt, uint32_t *evtId);
 
 /**
  * @ingroup dvrt_event
  * @brief event record
  * @param [int] event   event to record
- * @param [int] stream   stream handle
+ * @param [int] stm   stream handle
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtEventRecord(rtEvent_t event, rtStream_t stream);
+RTS_API rtError_t rtEventRecord(rtEvent_t evt, rtStream_t stm);
 
 /**
  * @ingroup dvrt_event
  * @brief event reset
  * @param [int] event   event to reset
- * @param [int] stream   stream handle
+ * @param [int] stm   stream handle
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtEventReset(rtEvent_t event, rtStream_t stream);
+RTS_API rtError_t rtEventReset(rtEvent_t evt, rtStream_t stm);
 
 /**
  * @ingroup dvrt_event
  * @brief wait event to be complete
- * @param [in] event   event to wait
+ * @param [in] evt   event to wait
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtEventSynchronize(rtEvent_t event);
+RTS_API rtError_t rtEventSynchronize(rtEvent_t evt);
 
 /**
  * @ingroup dvrt_event
  * @brief Queries an event's status
- * @param [in] event   event to query
+ * @param [in] evt   event to query
  * @return RT_ERROR_NONE for complete
  * @return RT_ERROR_EVENT_NOT_COMPLETE for not complete
  */
-RTS_API rtError_t rtEventQuery(rtEvent_t event);
+RTS_API rtError_t rtEventQuery(rtEvent_t evt);
 
 /**
  * @ingroup dvrt_event
  * @brief Queries an event's wait status
- * @param [in] event   event to query
+ * @param [in] evt   event to query
  * @param [in out] EVENT_WAIT_STATUS status
  * @return EVENT_STATUS_COMPLETE for complete
  * @return EVENT_STATUS_NOT_READY for not complete
  */
-RTS_API rtError_t rtEventQueryWaitStatus(rtEvent_t event, rtEventWaitStatus_t *status);
+RTS_API rtError_t rtEventQueryWaitStatus(rtEvent_t evt, rtEventWaitStatus_t *status);
+
+/**
+ * @ingroup dvrt_event
+ * @brief Queries an event's status
+ * @param [in] evt   event to query
+ * @param [in out] rtEventStatus_t status
+ * @return RT_EVENT_RECORDED  for recorded
+ * @return RT_EVENT_INIT for not recorded
+ */
+RTS_API rtError_t rtEventQueryStatus(rtEvent_t evt, rtEventStatus_t *status);
 
 /**
  * @ingroup dvrt_event
  * @brief computes the elapsed time between events.
- * @param [in] time   time between start and end in ms
- * @param [in] start  starting event
- * @param [in] end  ending event
+ * @param [in] timeInterval   time between start and end in ms
+ * @param [in] startEvent  starting event
+ * @param [in] endEvent  ending event
  * @return RT_ERROR_NONE for ok, errno for failed
  */
-RTS_API rtError_t rtEventElapsedTime(float *time, rtEvent_t start, rtEvent_t end);
+RTS_API rtError_t rtEventElapsedTime(float32_t *timeInterval, rtEvent_t startEvent, rtEvent_t endEvent);
 
 /**
  * @ingroup dvrt_event
  * @brief get the elapsed time from a event after event recorded.
- * @param [in] time   time in ms
- * @param [in] event  event handle
+ * @param [in] timeStamp   time in ms
+ * @param [in] evt  event handle
  * @return RT_ERROR_NONE for ok, errno for failed
  */
-RTS_API rtError_t rtEventGetTimeStamp(uint64_t *time, rtEvent_t event);
+RTS_API rtError_t rtEventGetTimeStamp(uint64_t *timeStamp, rtEvent_t evt);
 
 /**
  * @ingroup dvrt_event
  * @brief name an event
- * @param [in] event  event to be named
+ * @param [in] evt  event to be named
  * @param [in] name  identification name
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input of event, name
  * @return RT_ERROR_DRV_ERR for driver error
  */
-RTS_API rtError_t rtNameEvent(rtEvent_t event, const char *name);
+RTS_API rtError_t rtNameEvent(rtEvent_t evt, const char_t *name);
 
 /**
  * @ingroup dvrt_event
@@ -184,7 +189,7 @@ RTS_API rtError_t rtNotifyDestroy(rtNotify_t notify);
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx
  */
-RTS_API rtError_t rtNotifyRecord(rtNotify_t notify, rtStream_t stream);
+RTS_API rtError_t rtNotifyRecord(rtNotify_t notify, rtStream_t stm);
 
 /**
  * @ingroup dvrt_event
@@ -195,19 +200,19 @@ RTS_API rtError_t rtNotifyRecord(rtNotify_t notify, rtStream_t stream);
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx
  */
-RTS_API rtError_t rtNotifyWait(rtNotify_t notify, rtStream_t stream);
+RTS_API rtError_t rtNotifyWait(rtNotify_t notify, rtStream_t stm);
 
 /**
  * @ingroup dvrt_event
  * @brief Wait for a notify with time out
- * @param [in] notify_ notify to be wait
- * @param [in] stream_  input stream
+ * @param [in] notify notify to be wait
+ * @param [in] stm  input stream
  * @param [in] timeOut  input timeOut
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx
  */
-RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify_, rtStream_t stream_, uint32_t timeOut);
+RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify, rtStream_t stm, uint32_t timeOut);
 
 /**
  * @ingroup dvrt_event
@@ -217,7 +222,7 @@ RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify_, rtStream_t stream_
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtNameNotify(rtNotify_t notify, const char *name);
+RTS_API rtError_t rtNameNotify(rtNotify_t notify, const char_t *name);
 
 /**
  * @ingroup dvrt_event
@@ -237,7 +242,7 @@ RTS_API rtError_t rtGetNotifyID(rtNotify_t notify, uint32_t *notifyId);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input of
  */
-RTS_API rtError_t rtIpcSetNotifyName(rtNotify_t notify, char *name, uint32_t len);
+RTS_API rtError_t rtIpcSetNotifyName(rtNotify_t notify, char_t *name, uint32_t len);
 
 /**
  * @ingroup dvrt_event
@@ -247,7 +252,7 @@ RTS_API rtError_t rtIpcSetNotifyName(rtNotify_t notify, char *name, uint32_t len
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtIpcOpenNotify(rtNotify_t *notify, const char *name);
+RTS_API rtError_t rtIpcOpenNotify(rtNotify_t *notify, const char_t *name);
 
 /**
  * @ingroup dvrt_event
@@ -270,10 +275,10 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_DRV_ERR for driver error
  */
-RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num);
+RTS_API rtError_t rtSetIpcNotifyPid(const char_t *name, int32_t pid[], int32_t num);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
-#endif  // __CCE_RUNTIME_EVENT_H__
+#endif  // CCE_RUNTIME_EVENT_H
diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h
index 9b0221c7..85c2d832 100644
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -1,26 +1,16 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
-
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
-
- * http://www.apache.org/licenses/LICENSE-2.0
-
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: kernel.h
+ * Create: 2020-01-01
+ */
 
-#ifndef __CCE_RUNTIME_KERNEL_H__
-#define __CCE_RUNTIME_KERNEL_H__
+#ifndef CCE_RUNTIME_KERNEL_H
+#define CCE_RUNTIME_KERNEL_H
 
 #include "base.h"
 #include "stream.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -116,26 +106,71 @@ typedef struct rtKernelInfo {
  * @brief op name
  */
 typedef struct rtKernelLaunchNames {
-    const char *soName;      // defined for so name
-    const char *kernelName;  // defined for kernel type name
-    const char *opName;      // defined for operator name
+    const char_t *soName;      // defined for so name
+    const char_t *kernelName;  // defined for kernel type name
+    const char_t *opName;      // defined for operator name
 } rtKernelLaunchNames_t;
 
 /**
+ * @ingroup rt_kernel
+ * @brief args struct
+ */
+typedef struct tagRtArgsWithTiling {
+    void *args;                     // args host mem addr
+    uint32_t argsSize;              // input + output + tiling addr size + tiling data size
+    uint32_t argsSizeWithoutTiling; // input + output + tiling addr size
+    uint16_t tilingAddrOffset;      // tiling addr offset
+    uint16_t tilingDataOffset;      // tiling data offset
+    uint16_t hostInputAddrOffset;   // index of host_memory input in inputs_addrs list
+    uint16_t hostInputDataOffset;   // host_mem input data offset
+    uint8_t hasHostMemInput;        // has host_memory input data in args or not: 0 means no host_memory input data,
+                                    // others means has host_memory input data.
+    uint8_t isNoNeedH2DCopy;        // is no need host to device copy: 0 means need H2D copy,
+                                    // others means doesn't need H2D copy.
+    uint8_t reserved[6];
+} rtArgsWithTiling_t;
+
+/**
+ * @ingroup rt_kernel
+ * @brief host memory input struct
+ */
+typedef struct rtHostInputInfo {
+    uint16_t addrOffset;
+    uint16_t dataOffset;
+} rtHostInputInfo_t;
+
+/**
+ * @ingroup rt_kernel
+ * @brief args struct
+ */
+typedef struct tagRtArgsEx {
+    void *args;                     // args host mem addr
+    rtHostInputInfo_t *hostInputInfoPtr;     // nullptr means no host mem input
+    uint32_t argsSize;              // input + output + tiling addr size + tiling data size + host mem
+    uint16_t tilingAddrOffset;      // tiling addr offset
+    uint16_t tilingDataOffset;      // tiling data offset
+    uint16_t hostInputInfoNum;      // hostInputInfo num
+    uint8_t hasTiling;              // if has tiling: 0 means no tiling
+    uint8_t isNoNeedH2DCopy;        // is no need host to device copy: 0 means need H2D copy,
+                                    // others means doesn't need H2D copy.
+    uint8_t reserved[4];
+} rtArgsEx_t;
+
+/**
  * @ingroup rt_KernelConfigDump
  * @brief device dump type
  */
 typedef enum tagRtDumpKind {
     RT_DATA_DUMP_KIND_INVALID = -1,
     RT_DATA_DUMP_KIND_DUMP = 0,
-    RT_DATA_DUMP_KIND_RESERVED
+    RT_DATA_DUMP_KIND_RESERVED = 1,
 } rtDumpKind_t;
 
 /**
  * @ingroup rt_kernel
  * @brief report callback
  */
-typedef rtError_t (*rtKernelReportCallback)(rtStream_t stream, rtKernelInfo_t kernelInfo);
+typedef rtError_t (*rtKernelReportCallback)(rtStream_t stm, rtKernelInfo_t kernelInfo);
 
 /**
  * @ingroup rt_kernel
@@ -147,120 +182,130 @@ typedef void (*rtCallback_t)(void *fnData);
  * @ingroup rt_kernel
  * @brief magic number of plain binary for aicore
  */
-#define RT_DEV_BINARY_MAGIC_PLAIN 0xabceed50
+#define RT_DEV_BINARY_MAGIC_PLAIN 0xabceed50U
 
 /**
  * @ingroup rt_kernel
  * @brief magic number of plain binary for aicpu
  */
-#define RT_DEV_BINARY_MAGIC_PLAIN_AICPU 0xabceed51
+#define RT_DEV_BINARY_MAGIC_PLAIN_AICPU 0xabceed51U
 
 /**
  * @ingroup rt_kernel
  * @brief magic number of plain binary for aivector
  */
-#define RT_DEV_BINARY_MAGIC_PLAIN_AIVEC 0xabceed52
+#define RT_DEV_BINARY_MAGIC_PLAIN_AIVEC 0xabceed52U
 
 /**
  * @ingroup rt_kernel
  * @brief magic number of elf binary for aicore
  */
-#define RT_DEV_BINARY_MAGIC_ELF 0x43554245
+#define RT_DEV_BINARY_MAGIC_ELF 0x43554245U
 
 /**
  * @ingroup rt_kernel
  * @brief magic number of elf binary for aicpu
  */
-#define RT_DEV_BINARY_MAGIC_ELF_AICPU 0x41415243
+#define RT_DEV_BINARY_MAGIC_ELF_AICPU 0x41415243U
 
 /**
  * @ingroup rt_kernel
  * @brief magic number of elf binary for aivector
  */
-#define RT_DEV_BINARY_MAGIC_ELF_AIVEC 0x41415246
+#define RT_DEV_BINARY_MAGIC_ELF_AIVEC 0x41415246U
 
 /**
  * @ingroup rt_kernel
  * @brief magic number of elf binary for aicube
  */
-#define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41494343
+#define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41494343U
 
 /**
  * @ingroup rt_kernel_flags
  * @brief kernel op bit flags
  */
-#define RT_KERNEL_DEFAULT (0x00)
-#define RT_KERNEL_CONVERT (0x01)
-#define RT_KERNEL_DUMPFLAG (0x02)
-#define RT_FUSION_KERNEL_DUMPFLAG (0x04)
-#define RT_KERNEL_CUSTOM_AICPU (0x08)
+#define RT_KERNEL_DEFAULT (0x00U)
+#define RT_KERNEL_CONVERT (0x01U)
+#define RT_KERNEL_DUMPFLAG (0x02U)
+#define RT_FUSION_KERNEL_DUMPFLAG (0x04U)
+#define RT_KERNEL_CUSTOM_AICPU (0x08U)
+#define RT_KERNEL_FFTSPLUS_DYNAMIC_SHAPE_DUMPFLAG (0x10U)
+#define RT_KERNEL_FFTSPLUS_STATIC_SHAPE_DUMPFLAG  (0x20U)
 
 // STARS topic scheduler sqe : topic_type
-#define RT_KERNEL_DEVICE_FIRST (0x10)
-#define RT_KERNEL_HOST_ONLY (0x20)
-#define RT_KERNEL_HOST_FIRST (0x40)
+#define RT_KERNEL_DEVICE_FIRST (0x10U)
+#define RT_KERNEL_HOST_ONLY (0x20U)
+#define RT_KERNEL_HOST_FIRST (0x40U)
+#define RT_KERNEL_BIUPERF_FLAG (0x80U)
 
 /**
  * @ingroup rt_kernel
  * @brief kernel mode
 **/
-#define RT_DEFAULT_KERNEL_MODE (0x00)
-#define RT_NORMAL_KERNEL_MODE (0x01)
-#define RT_ALL_KERNEL_MODE (0x02)
+#define RT_DEFAULT_KERNEL_MODE (0x00U)
+#define RT_NORMAL_KERNEL_MODE (0x01U)
+#define RT_ALL_KERNEL_MODE (0x02U)
+
+/**
+ * @ingroup rt_kernel
+ * @brief SHAPE kernel type
+**/
+#define RT_STATIC_SHAPE_KERNEL (0x00U)
+#define RT_DYNAMIC_SHAPE_KERNEL (0x01U)
 
 /**
  * @ingroup rt_kernel
  * @brief kernel L1 Fusion Dump bit flags
  */
-#define RT_DDR_ADDR (0x0)
+#define RT_DDR_ADDR (0x0U)
 
 /**
  * @ingroup rt_kernel
  * @brief register device binary
  * @param [in] bin   device binary description
- * @param [out] handle   device binary handle
+ * @param [out] hdl   device binary handle
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle);
+RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **hdl);
 
 /**
  * @ingroup rt_kernel
  * @brief register device binary with all kernel
  * @param [in] bin   device binary description
- * @param [out] handle   device binary handle
+ * @param [out] hdl   device binary handle
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle);
+RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **hdl);
 
 /**
  * @ingroup rt_kernel
  * @brief register fast memeory device binary
- * @param [in] handle   device binary handle
+ * @param [in] hdl   device binary handle
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtBinaryRegisterToFastMemory(void *handle);
+RTS_API rtError_t rtBinaryRegisterToFastMemory(void *hdl);
 
 /**
  * @ingroup rt_kernel
  * @brief unregister device binary
- * @param [in] handle   device binary handle
+ * @param [in] hdl   device binary handle
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDevBinaryUnRegister(void *handle);
+RTS_API rtError_t rtDevBinaryUnRegister(void *hdl);
 
 /**
  * @ingroup rt_kernel
  * @brief register device binary metadata
- * @param [in] handle    device binary description
+ * @param [in] hdl    device binary description
  * @param [in] metadata  device binary metadata
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtMetadataRegister(void *handle, const char *metadata);
+RTS_API rtError_t rtMetadataRegister(void *hdl, const char_t *metadata);
 
 /**
  * @ingroup rt_kernel
@@ -278,13 +323,13 @@ RTS_API rtError_t rtDependencyRegister(void *mHandle, void *sHandle);
  * @param [in] binHandle   device binary handle
  * @param [in] stubFunc   stub function
  * @param [in] stubName   stub function name
- * @param [in] devFunc   device function description. symbol name or address
- *                       offset, depending binary type.
+ * @param [in] kernelInfoExt   kernel Info extension. device function description or tiling key,
+ *                             depending static shape or dynmaic shape.
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char *stubName, const void *devFunc,
-                                     uint32_t funcMode);
+RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName,
+                                     const void *kernelInfoExt, uint32_t funcMode);
 
 /**
  * @ingroup rt_kernel
@@ -294,7 +339,7 @@ RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, cons
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtGetFunctionByName(const char *stubName, void **stubFunc);
+RTS_API rtError_t rtGetFunctionByName(const char_t *stubName, void **stubFunc);
 
 /**
  * @ingroup rt_kernel
@@ -312,7 +357,7 @@ RTS_API rtError_t rtGetAddrByFun(const void *stubFunc, void **addr);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtQueryFunctionRegistered(const char *stubName);
+RTS_API rtError_t rtQueryFunctionRegistered(const char_t *stubName);
 
 /**
  * @ingroup rt_kernel
@@ -324,7 +369,22 @@ RTS_API rtError_t rtQueryFunctionRegistered(const char *stubName);
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, uint32_t blockDim, void **dumpBaseAddr,
-                                     rtStream_t stream);
+                                     rtStream_t stm);
+
+/**
+* @ingroup rt_kernel
+* @brief get kernel address and prefetchCnt
+* @param [in] hdl           program for dynamic shape
+* @param [in] tilingKey     tilingKey for dynamic shape
+* @param [in] stubFunc      stubFunc for static shape
+* @param [in] flag          flag for distinguishing between dynamic shape and static shape
+* @param [out] addr         address of kernel function
+* @param [out] prefetchCnt  prefetchCnt of kernel function
+* @return RT_ERROR_NONE for ok
+* @return RT_ERROR_INVALID_VALUE for error input
+*/
+RTS_API rtError_t rtKernelGetAddrAndPrefCnt(void *hdl, const uint64_t tilingKey, const void * const stubFunc,
+                                            const uint32_t flag, void **addr, uint32_t *prefetchCnt);
 
 /**
  * @ingroup rt_kernel
@@ -334,45 +394,44 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u
  * @param [in] args   argments address for kernel function
  * @param [in] argsSize   argements size
  * @param [in] smDesc   shared memory description
- * @param [in] stream   associated stream
+ * @param [in] stm   associated stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize,
-                                 rtSmDesc_t *smDesc, rtStream_t stream);
+                                 rtSmDesc_t *smDesc, rtStream_t stm);
 
 /**
  * @ingroup rt_kernel
  * @brief launch kernel with handle to device
- * @param [in] handle   program
- * @param [in] devFunc   device function description.
- * @param [in] blockDim   block dimentions
- * @param [in] args   argments address for kernel function
- * @param [in] argsSize   argements size
- * @param [in] smDesc   shared memory description
- * @param [in] stream   associated stream
- * @param [in] kernelInfo   kernel info
+ * @param [in] hdl             program
+ * @param [in] tilingKey       tilingKey
+ * @param [in] blockDim        block dimentions
+ * @param [in] argsInfo        argments address for kernel function
+ * @param [in] smDesc          shared memory description
+ * @param [in] stm             associated stream
+ * @param [in] kernelInfo      kernel info
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
-                                            rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo);
+RTS_API rtError_t rtKernelLaunchWithHandle(void *hdl, const uint64_t tilingKey, uint32_t blockDim,
+                                           rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm,
+                                           const void *kernelInfo);
 
 /**
- * @ingroup rt_kernel
+ * @ingroup rtKernelLaunchWithFlag
  * @brief launch kernel to device
  * @param [in] stubFunc   stub function
  * @param [in] blockDim   block dimentions
- * @param [in] args   argments address for kernel function
- * @param [in] argsSize   argements size
- * @param [in] smDesc   shared memory description
- * @param [in] stream   associated stream
- * @param [in] flag   dump flag
+ * @param [in] argsInfo   argments address for kernel function
+ * @param [in] smDesc     shared memory description
+ * @param [in] stm        associated stream
+ * @param [in] flags      dump flag
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtKernelLaunchWithFlag(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize,
-                                         rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags);
+RTS_API rtError_t rtKernelLaunchWithFlag(const void *stubFunc, uint32_t blockDim, rtArgsEx_t *argsInfo,
+                                         rtSmDesc_t *smDesc, rtStream_t stm, uint32_t flags);
 
 /**
  * @ingroup rt_kernel(abandoned)
@@ -380,11 +439,11 @@ RTS_API rtError_t rtKernelLaunchWithFlag(const void *stubFunc, uint32_t blockDim
  * @param [in] args       argments address for kernel function
  * @param [in] argsSize   argements size
  * @param [in] flags      launch flags
- * @param [in] stream     associated stream
+ * @param [in] stm     associated stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtKernelLaunchEx(void *args, uint32_t argsSize, uint32_t flags, rtStream_t stream);
+RTS_API rtError_t rtKernelLaunchEx(void *args, uint32_t argsSize, uint32_t flags, rtStream_t stm);
 
 /**
  * @ingroup rt_kernel(in use)
@@ -393,11 +452,11 @@ RTS_API rtError_t rtKernelLaunchEx(void *args, uint32_t argsSize, uint32_t flags
  * @param [in] args       argments address for kernel function
  * @param [in] argsSize   argements size
  * @param [in] flags      launch flags
- * @param [in] stream     associated stream
+ * @param [in] stm     associated stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtKernelLaunchFwk(const char *opName, void *args, uint32_t argsSize, uint32_t flags,
+RTS_API rtError_t rtKernelLaunchFwk(const char_t *opName, void *args, uint32_t argsSize, uint32_t flags,
                                     rtStream_t rtStream);
 
 /**
@@ -409,12 +468,12 @@ RTS_API rtError_t rtKernelLaunchFwk(const char *opName, void *args, uint32_t arg
  * @param [in] args          argments address for kernel function
  * @param [in] argsSize      argments size
  * @param [in] smDesc        shared memory description
- * @param [in] stream        associated stream
+ * @param [in] stm        associated stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtCpuKernelLaunch(const void *soName, const void *kernelName, uint32_t blockDim, const void *args,
-                                    uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream);
+                                    uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stm);
 
 /**
  * @ingroup rt_kernel(in use)
@@ -424,58 +483,57 @@ RTS_API rtError_t rtCpuKernelLaunch(const void *soName, const void *kernelName,
  * @param [in] args          argments address for kernel function
  * @param [in] argsSize      argments size
  * @param [in] smDesc        shared memory description
- * @param [in] stream        associated stream
+ * @param [in] stm        associated stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames,
-    uint32_t blockDim, const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream);
+    uint32_t blockDim, const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stm);
 
 /**
- * @ingroup rt_kernel(abandoned)
+ * @ingroup rtCpuKernelLaunchWithFlag(abandoned)
  * @brief launch cpu kernel to device  with dump identifier
  * @param [in] soName        so name
  * @param [in] kernelName    kernel name
  * @param [in] blockDim      block dimentions
- * @param [in] args          argments address for kernel function
- * @param [in] argsSize      argments size
+ * @param [in] argsInfo      argments address for kernel function
  * @param [in] smDesc        shared memory description
- * @param [in] stream        associated stream
+ * @param [in] stm           associated stream
  * @param [in] flag          dump flag or others function flag
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtCpuKernelLaunchWithFlag(const void *soName, const void *kernelName, uint32_t blockDim,
-                                            const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream,
+                                            const rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm,
                                             uint32_t flags);
 
 /**
- * @ingroup rt_kernel(in use)
+ * @ingroup rtAicpuKernelLaunchWithFlag(in use)
  * @brief launch cpu kernel to device  with dump identifier
  * @param [in] launchNames   names for kernel launch
  * @param [in] blockDim      block dimentions
  * @param [in] args          argments address for kernel function
- * @param [in] argsSize      argments size
  * @param [in] smDesc        shared memory description
- * @param [in] stream        associated stream
- * @param [in] flag          dump flag or others function flag
+ * @param [in] stm           associated stream
+ * @param [in] flags          dump flag or others function flag
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtAicpuKernelLaunchWithFlag(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim,
-    const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags);
+                                              const rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm,
+                                              uint32_t flags);
 
 /**
  * @ingroup rt_kernel
  * @brief L1 fusion dump addr transfered to device
- * @param [in] model    handle info
+ * @param [in] mdl    handle info
  * @param [in] addr     ddr address of L1 Fusion Dump
  * @param [in] dumpSize memory size
  * @param [in] flag     memory flag
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDumpAddrSet(rtModel_t model, void *addr, uint32_t dumpSize, uint32_t flag);
+RTS_API rtError_t rtDumpAddrSet(rtModel_t mdl, void *addr, uint32_t dumpSize, uint32_t flag);
 
 /**
  * @ingroup rt_kernel
@@ -487,6 +545,28 @@ RTS_API rtError_t rtDumpAddrSet(rtModel_t model, void *addr, uint32_t dumpSize,
  */
 RTS_API rtError_t rtDatadumpInfoLoad(const void *dumpInfo, uint32_t length);
 
+/**
+ * @ingroup rt_kernel
+ * @brief launch npu get float status task
+ * @param [in] outputAddr   pointer to op output addr
+ * @param [in] outputSize   op output size
+ * @param [in] checkMode   check mode
+ * @param [in] stm   associated stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtNpuGetFloatStatus(void *outputAddr, uint64_t outputSize, uint32_t checkMode, rtStream_t stm);
+
+/**
+ * @ingroup rt_kernel
+ * @brief launch npu clear float status task
+ * @param [in] checkMode   check mode
+ * @param [in] stm   associated stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtNpuClearFloatStatus(uint32_t checkMode, rtStream_t stm);
+
 #ifndef __CLANG_CCE_RUNTIME_H__
 #define __CLANG_CCE_RUNTIME_H__
 /**
@@ -494,14 +574,14 @@ RTS_API rtError_t rtDatadumpInfoLoad(const void *dumpInfo, uint32_t length);
  * @brief configure call argment for next rtLaunch in current thread
  * @param [in] numBlocks   block dimentions
  * @param [in] smDesc   shared memory description
- * @param [in] stream   associated stream
+ * @param [in] stm   associated stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 #ifdef __cplusplus
-RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc = nullptr, rtStream_t stream = nullptr);
+RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc = nullptr, rtStream_t stm = nullptr);
 #else
-RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc, rtStream_t stream);
+RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc, rtStream_t stm);
 
 #endif
 #endif  // __CLANG_CCE_RUNTIME_H__
@@ -509,13 +589,13 @@ RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc, rtStre
 /**
  * @ingroup rt_kernel
  * @brief setup argment for next rtLaunch in current thread
- * @param [in] arg   argment address for kernel function
+ * @param [in] args   argment address for kernel function
  * @param [in] size   argment size
  * @param [in] offset  argment table offset
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtSetupArgument(const void *arg, uint32_t size, uint32_t offset);
+RTS_API rtError_t rtSetupArgument(const void *args, uint32_t size, uint32_t offset);
 
 /**
  * @ingroup rt_kernel
@@ -534,29 +614,29 @@ RTS_API rtError_t rtLaunch(const void *stubFunc);
  * @param [in] ptr   host memory
  * @param [in] size   host memory size
  * @param [in] flag   reserved. set to 0
- * @param [out] arg   returned arg. used for next kernel's arg.
+ * @param [out] args   returned arg. used for next kernel's arg.
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg);
+RTS_API rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **args);
 
 /**
  * @ingroup rt_kernel
  * @brief start fusion kernels.
- * @param [in] stream   stream for fusion kernels
+ * @param [in] stm   stream for fusion kernels
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtKernelFusionStart(rtStream_t stream);
+RTS_API rtError_t rtKernelFusionStart(rtStream_t stm);
 
 /**
  * @ingroup rt_kernel
  * @brief end fusion kernels.
- * @param [in] stream   stream for fusion kernels
+ * @param [in] stm   stream for fusion kernels
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtKernelFusionEnd(rtStream_t stream);
+RTS_API rtError_t rtKernelFusionEnd(rtStream_t stm);
 
 /**
  * @ingroup rt_kernel
@@ -571,22 +651,22 @@ RTS_API rtError_t rtSetKernelReportCallback(rtKernelReportCallback callBack);
  * @ingroup rt_kernel
  * @brief subscribe stream callback report.
  * @param [in] threadId   thread id for stream
- * @param [in] stream   stream for subscribe
+ * @param [in] stm   stream for subscribe
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtSubscribeReport(uint64_t threadId, rtStream_t stream);
+RTS_API rtError_t rtSubscribeReport(uint64_t threadId, rtStream_t stm);
 
 /**
  * @ingroup rt_kernel
  * @brief add callback launch task in stream.
  * @param [in] callBackFunc   app callback function
  * @param [in] fnData   user data
- * @param [in] stream   subscribed stream
+ * @param [in] stm   subscribed stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtCallbackLaunch(rtCallback_t callBackFunc, void *fnData, rtStream_t stream, bool isBlock);
+RTS_API rtError_t rtCallbackLaunch(rtCallback_t callBackFunc, void *fnData, rtStream_t stm, bool isBlock);
 
 /**
  * @ingroup rt_kernel
@@ -601,11 +681,11 @@ RTS_API rtError_t rtProcessReport(int32_t timeout);
  * @ingroup rt_kernel
  * @brief unsubscribe callback report.
  * @param [in] threadId   thread id for stream
- * @param [in] stream   stream for subscribe
+ * @param [in] stm   stream for subscribe
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtUnSubscribeReport(uint64_t threadId, rtStream_t stream);
+RTS_API rtError_t rtUnSubscribeReport(uint64_t threadId, rtStream_t stm);
 
 /**
  * @ingroup profiling_base
@@ -613,7 +693,7 @@ RTS_API rtError_t rtUnSubscribeReport(uint64_t threadId, rtStream_t stream);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStartOnlineProf(rtStream_t stream, uint32_t sampleNum);
+RTS_API rtError_t rtStartOnlineProf(rtStream_t stm, uint32_t sampleNum);
 
 /**
  * @ingroup profiling_base
@@ -621,7 +701,7 @@ RTS_API rtError_t rtStartOnlineProf(rtStream_t stream, uint32_t sampleNum);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStopOnlineProf(rtStream_t stream);
+RTS_API rtError_t rtStopOnlineProf(rtStream_t stm);
 
 /**
  * @ingroup profiling_base
@@ -629,7 +709,7 @@ RTS_API rtError_t rtStopOnlineProf(rtStream_t stream);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtGetOnlineProfData(rtStream_t stream, rtProfDataInfo_t *pProfData, uint32_t profDataNum);
+RTS_API rtError_t rtGetOnlineProfData(rtStream_t stm, rtProfDataInfo_t *pProfData, uint32_t profDataNum);
 
 /**
  * @ingroup profiling_base
@@ -647,9 +727,9 @@ RTS_API rtError_t rtStartMDCProfiler(void **addr, uint32_t length);
  */
 RTS_API rtError_t rtStopMDCProfiler(void *addr);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
-#endif  // __CCE_RUNTIME_KERNEL_H__
+#endif  // CCE_RUNTIME_KERNEL_H
 
diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h
index bace4bc6..368c6201 100644
--- a/third_party/fwkacllib/inc/runtime/mem.h
+++ b/third_party/fwkacllib/inc/runtime/mem.h
@@ -1,30 +1,18 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
-
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
-
- * http://www.apache.org/licenses/LICENSE-2.0
-
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: mem.h
+ * Create: 2020-01-01
+ */
 
-#ifndef __CCE_RUNTIME_MEM_H__
-#define __CCE_RUNTIME_MEM_H__
+#ifndef CCE_RUNTIME_MEM_H
+#define CCE_RUNTIME_MEM_H
 
-/*lint -e7*/
 #include <stddef.h>
-/*lint +e7*/
 #include "base.h"
 #include "config.h"
 #include "stream.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -32,43 +20,53 @@ extern "C" {
  * @ingroup dvrt_mem
  * @brief memory type
  */
-#define RT_MEMORY_DEFAULT ((uint32_t)0x0)   // default memory on device
-#define RT_MEMORY_HBM ((uint32_t)0x2)       // HBM memory on device
-#define RT_MEMORY_RDMA_HBM ((uint32_t)0x3)  // RDMA-HBM memory on device
-#define RT_MEMORY_DDR ((uint32_t)0x4)       // DDR memory on device
-#define RT_MEMORY_SPM ((uint32_t)0x8)       // shared physical memory on device
-#define RT_MEMORY_P2P_HBM ((uint32_t)0x10)  // HBM memory on other 4P device
-#define RT_MEMORY_P2P_DDR ((uint32_t)0x11)  // DDR memory on other device
-#define RT_MEMORY_DDR_NC ((uint32_t)0x20)   // DDR memory of non-cache
-#define RT_MEMORY_TS_4G ((uint32_t)0x40)
-#define RT_MEMORY_TS ((uint32_t)0x80)
-#define RT_MEMORY_RESERVED ((uint32_t)0x100)
+#define RT_MEMORY_DEFAULT (0x0U)   // default memory on device
+#define RT_MEMORY_HBM (0x2U)       // HBM memory on device
+#define RT_MEMORY_RDMA_HBM (0x3U)  // RDMA-HBM memory on device
+#define RT_MEMORY_DDR (0x4U)       // DDR memory on device
+#define RT_MEMORY_SPM (0x8U)       // shared physical memory on device
+#define RT_MEMORY_P2P_HBM (0x10U)  // HBM memory on other 4P device
+#define RT_MEMORY_P2P_DDR (0x11U)  // DDR memory on other device
+#define RT_MEMORY_DDR_NC (0x20U)   // DDR memory of non-cache
+#define RT_MEMORY_TS (0x40U)       // Used for Ts memory
+#define RT_MEMORY_TS_4G (0x40U)    // Used for Ts memory(only 1951)
+#define RT_MEMORY_HOST (0x81U)     // Memory on host
+#define RT_MEMORY_RESERVED (0x100U)
 
-#define RT_MEMORY_L1 ((uint32_t)0x1<<16)
-#define RT_MEMORY_L2 ((uint32_t)0x1<<17)
+#define RT_MEMORY_L1 (0x1U << 16U)
+#define RT_MEMORY_L2 (0x1U << 17U)
 
 /**
  * @ingroup dvrt_mem
  * @brief memory info type
  */
-#define RT_MEM_INFO_TYPE_DDR_SIZE          ((uint32_t)0x1)
-#define RT_MEM_INFO_TYPE_HBM_SIZE          ((uint32_t)0x2)
-#define RT_MEM_INFO_TYPE_DDR_P2P_SIZE      ((uint32_t)0x3)
-#define RT_MEM_INFO_TYPE_HBM_P2P_SIZE      ((uint32_t)0x4)
+#define RT_MEM_INFO_TYPE_DDR_SIZE          (0x1U)
+#define RT_MEM_INFO_TYPE_HBM_SIZE          (0x2U)
+#define RT_MEM_INFO_TYPE_DDR_P2P_SIZE      (0x3U)
+#define RT_MEM_INFO_TYPE_HBM_P2P_SIZE      (0x4U)
 
 /**
  * @ingroup dvrt_mem
  * @brief memory Policy
  */
-#define RT_MEMORY_POLICY_NONE ((uint32_t)0x0)                     // Malloc mem prior hage page, then default page
-#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST ((uint32_t)0x1 << 10)    // Malloc mem prior hage page, then default page
-#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY ((uint32_t)0x1 << 11)     // Malloc mem only use hage page
-#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY ((uint32_t)0x1 << 12)  // Malloc mem only use default page
-#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P ((uint32_t)0x1 << 13)    // Malloc mem prior hage page, then default page, use for p2p
-#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P ((uint32_t)0x1 << 14)     // Malloc mem only use hage page, use for p2p
-#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P ((uint32_t)0x1 << 15)  // Malloc mem only use default page, use for p2p
+#define RT_MEMORY_POLICY_NONE (0x0U)                     // Malloc mem prior huge page, then default page
+#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST (0x400U)    // Malloc mem prior huge page, then default page, 0x1U << 10U
+#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY (0x800U)     // Malloc mem only use huge page, 0x1U << 11U
+#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY (0x1000U)  // Malloc mem only use default page, 0x1U << 12U
+// Malloc mem prior huge page, then default page, for p2p, 0x1U << 13U
+#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P (0x2000U)
+#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P (0x4000U)     // Malloc mem only use huge page, use for p2p, 0x1U << 14U
+#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P (0x8000U)  // Malloc mem only use default page, use for p2p, 0x1U << 15U
 
-#define MEM_ALLOC_TYPE_BIT ((uint32_t)0x3FF)  // mem type bit in <0, 9>
+/**
+ * @ingroup dvrt_mem
+ * @brief memory attribute
+ */
+#define RT_MEMORY_ATTRIBUTE_DEFAULT (0x0U)
+// memory read only attribute, now only dvpp memory support.
+#define RT_MEMORY_ATTRIBUTE_READONLY (0x100000U)    // Malloc readonly, 1<<20.
+
+#define MEM_ALLOC_TYPE_BIT (0x3FFU)  // mem type bit in <0, 9>
 
 /**
  * @ingroup dvrt_mem
@@ -80,10 +78,10 @@ typedef uint32_t rtMemType_t;
  * @ingroup dvrt_mem
  * @brief memory advise type
  */
-#define RT_MEMORY_ADVISE_EXE (0x02)
-#define RT_MEMORY_ADVISE_THP (0x04)
-#define RT_MEMORY_ADVISE_PLE (0x08)
-#define RT_MEMORY_ADVISE_PIN (0x16)
+#define RT_MEMORY_ADVISE_EXE (0x02U)
+#define RT_MEMORY_ADVISE_THP (0x04U)
+#define RT_MEMORY_ADVISE_PLE (0x08U)
+#define RT_MEMORY_ADVISE_PIN (0x16U)
 
 /**
  * @ingroup dvrt_mem
@@ -119,7 +117,7 @@ typedef enum tagRtRecudeKind {
     RT_MEMCPY_SDMA_AUTOMATIC_MAX = 11,
     RT_MEMCPY_SDMA_AUTOMATIC_MIN = 12,
     RT_MEMCPY_SDMA_AUTOMATIC_EQUAL = 13,
-    RT_RECUDE_KIND_END
+    RT_RECUDE_KIND_END = 14,
 } rtRecudeKind_t;
 
 typedef enum tagRtDataType {
@@ -132,9 +130,9 @@ typedef enum tagRtDataType {
     RT_DATA_TYPE_BFP16 = 6, // bfp16
     RT_DATA_TYPE_BFP32 = 7, // bfp32
     RT_DATA_TYPE_UINT8 = 8, // uint8
-    RT_DATA_TYPE_UINT16= 9, // uint16
-    RT_DATA_TYPE_UINT32= 10,// uint32
-    RT_DATA_TYPE_END
+    RT_DATA_TYPE_UINT16 = 9, // uint16
+    RT_DATA_TYPE_UINT32 = 10, // uint32
+    RT_DATA_TYPE_END = 11,
 } rtDataType_t;
 
 /**
@@ -190,22 +188,22 @@ typedef struct tagRtPointerAttributes {
 } rtPointerAttributes_t;
 
 
-typedef struct rtMallocHostSharedMemoryIn {
-    const char *name;
+typedef struct {
+    const char_t *name;
     const uint64_t size;
     uint32_t flag;
 } rtMallocHostSharedMemoryIn;
 
-typedef struct rtMallocHostSharedMemoryOut {
-    int fd;
+typedef struct {
+    int32_t fd;
     void *ptr;
     void *devPtr;
 } rtMallocHostSharedMemoryOut;
 
-typedef struct rtFreeHostSharedMemoryIn {
-    const char *name;
+typedef struct {
+    const char_t *name;
     const uint64_t size;
-    int fd;
+    int32_t fd;
     void *ptr;
     void *devPtr;
 } rtFreeHostSharedMemoryIn;
@@ -243,6 +241,18 @@ RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size);
 
 /**
  * @ingroup dvrt_mem
+ * @brief alloc device memory for dvpp, support set flag
+ * @param [in|out] devPtr   memory pointer
+ * @param [in] size   memory size
+ * @param [in] flag   mem flag, can use mem attribute set read only.
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ * @return others is error
+ */
+RTS_API rtError_t rtDvppMallocWithFlag(void **devPtr, uint64_t size, uint32_t flag);
+
+/**
+ * @ingroup dvrt_mem
  * @brief free device memory for dvpp
  * @param [in|out] devPtr   memory pointer
  * @return RT_ERROR_NONE for ok
@@ -310,6 +320,7 @@ RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag);
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtMemFreeManaged(void *ptr);
+
 /**
  * @ingroup dvrt_mem
  * @brief alloc cached device memory
@@ -344,12 +355,26 @@ RTS_API rtError_t rtInvalidCache(void *base, size_t len);
  * @param [in] dst   destination address pointer
  * @param [in] Max length of destination address memory
  * @param [in] src   source address pointer
- * @param [in] count   the number of byte to copy
+ * @param [in] cnt   the number of byte to copy
  * @param [in] kind   memcpy type
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind);
+RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief host task memcpy
+ * @param [in] dst   destination address pointer
+ * @param [in] destMax length of destination address memory
+ * @param [in] src   source address pointer
+ * @param [in] cnt   the number of byte to copy
+ * @param [in] kind  memcpy type
+ * @param [in] stm   task stream
+ * @return RT_ERROR_NONE for ok, errno for failed
+ */
+RTS_API rtError_t rtMemcpyHostTask(void * const dst, const uint64_t destMax, const void * const src,
+    const uint64_t cnt, rtMemcpyKind_t kind, rtStream_t stm);
 
 /**
  * @ingroup dvrt_mem
@@ -357,14 +382,42 @@ RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_
  * @param [in] dst   destination address pointer
  * @param [in] Max length of destination address memory
  * @param [in] src   source address pointer
- * @param [in] count   the number of byte to copy
+ * @param [in] cnt   the number of byte to copy
+ * @param [in] kind   memcpy type
+ * @param [in] stm   asynchronized task stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind,
+                                rtStream_t stm);
+
+typedef struct rtMemcpyAddrInfo {
+    uint32_t resv0;
+    uint32_t resv1;
+    uint32_t resv2;
+    uint32_t len;
+    uint64_t src;
+    uint64_t dst;
+} rtMemcpyAddrInfo;
+
+RTS_API rtError_t rtMemcpyAsyncPtr(void *memcpyAddrInfo, uint64_t destMax, uint64_t count,
+                                   rtMemcpyKind_t kind, rtStream_t stream);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief asynchronized reduce memcpy
+ * @param [in] dst   destination address pointer
+ * @param [in] Max length of destination address memory
+ * @param [in] src   source address pointer
+ * @param [in] cnt   the number of byte to copy
  * @param [in] kind   memcpy type
- * @param [in] stream   asynchronized task stream
+ * @param [in] type   data type
+ * @param [in] stm   asynchronized task stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind,
-                                rtStream_t stream);
+RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtRecudeKind_t kind,
+                                rtDataType_t type, rtStream_t stm);
 
 /**
  * @ingroup dvrt_mem
@@ -375,12 +428,46 @@ RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, ui
  * @param [in] count   the number of byte to copy
  * @param [in] kind   memcpy type
  * @param [in] type   data type
- * @param [in] stream   asynchronized task stream
+ * @param [in] stm   asynchronized task stream
+ * @param [in] overflowAddr   addr of overflow flag
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
-                                rtDataType_t type, rtStream_t stream);
+RTS_API rtError_t rtReduceAsyncV2(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
+                                  rtDataType_t type, rtStream_t stm, void *overflowAddr);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief synchronized memcpy2D
+ * @param [in] dst      destination address pointer
+ * @param [in] dstPitch pitch of destination memory
+ * @param [in] src      source address pointer
+ * @param [in] srcPitch pitch of source memory
+ * @param [in] width    width of matrix transfer
+ * @param [in] height   height of matrix transfer
+ * @param [in] kind     memcpy type
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMemcpy2d(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width,
+                             uint64_t height, rtMemcpyKind_t kind);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief asynchronized memcpy2D
+ * @param [in] dst      destination address pointer
+ * @param [in] dstPitch length of destination address memory
+ * @param [in] src      source address pointer
+ * @param [in] srcPitch length of destination address memory
+ * @param [in] width    width of matrix transfer
+ * @param [in] height   height of matrix transfer
+ * @param [in] kind     memcpy type
+ * @param [in] stm      asynchronized task stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMemcpy2dAsync(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width,
+                                  uint64_t height, rtMemcpyKind_t kind, rtStream_t stm);
 
 /**
  * @ingroup dvrt_mem
@@ -403,59 +490,69 @@ RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize)
 
 /**
  * @ingroup dvrt_mem
+ * @brief Specifies how memory is use
+ * @param [in] devPtr   memory pointer
+ * @param [in] count    memory count
+ * @param [in] advise   reserved, set to 1
+ * @return RT_ERROR_NONE for ok
+ * @return others for error
+ */
+RTS_API rtError_t rtMemAdvise(void *devPtr, uint64_t count, uint32_t advise);
+/**
+ * @ingroup dvrt_mem
  * @brief set memory with uint32_t value
  * @param [in] devPtr
  * @param [in] Max length of destination address memory
- * @param [in] value
- * @param [in] count byte num
+ * @param [in] val
+ * @param [in] cnt byte num
  * @return RT_ERROR_NONE for ok, errno for failed
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t value, uint64_t count);
+RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t val, uint64_t cnt);
 
 /**
  * @ingroup dvrt_mem
  * @brief set memory with uint32_t value async
  * @param [in] devPtr
  * @param [in] Max length of destination address memory
- * @param [in] value
- * @param [in] count byte num
- * @param [in] stream
+ * @param [in] val
+ * @param [in] cnt byte num
+ * @param [in] stm
  * @return RT_ERROR_NONE for ok, errno for failed
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uint64_t count, rtStream_t stream);
+RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t val, uint64_t cnt, rtStream_t stm);
 
 /**
  * @ingroup dvrt_mem
  * @brief get current device memory total and free
- * @param [out] free
- * @param [out] total
+ * @param [out] freeSize
+ * @param [out] totalSize
  * @return RT_ERROR_NONE for ok, errno for failed
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtMemGetInfo(size_t *free, size_t *total);
+RTS_API rtError_t rtMemGetInfo(size_t *freeSize, size_t *totalSize);
 
 /**
  * @ingroup dvrt_mem
  * @brief get current device memory total and free
  * @param [in] memInfoType
- * @param [out] free
- * @param [out] total
+ * @param [out] freeSize
+ * @param [out] totalSize
  * @return RT_ERROR_NONE for ok, errno for failed
  */
-RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size_t *total);
+RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *freeSize, size_t *totalSize);
 
 /**
  * @ingroup dvrt_mem
  * @brief set memory with uint32_t value
  * @param [in] devPtr
  * @param [in] len
- * @param [in] device
+ * @param [in] devId
  * @return RT_ERROR_NONE for ok, errno for failed
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t device);
+RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t devId);
 
 /**
  * @ingroup dvrt_mem
@@ -477,7 +574,7 @@ RTS_API rtError_t rtPointerGetAttributes(rtPointerAttributes_t *attributes, cons
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_DRV_ERR for driver error
  */
-RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char *name, uint32_t len);
+RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char_t *name, uint32_t len);
 
 /**
  * @ingroup dvrt_mem
@@ -487,7 +584,7 @@ RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char *
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_DRV_ERR for driver error
  */
-RTS_API rtError_t rtIpcDestroyMemoryName(const char *name);
+RTS_API rtError_t rtIpcDestroyMemoryName(const char_t *name);
 
 /**
  * @ingroup dvrt_mem
@@ -498,7 +595,7 @@ RTS_API rtError_t rtIpcDestroyMemoryName(const char *name);
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_DRV_ERR for driver error
  */
-RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char *name);
+RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char_t *name);
 
 /**
  * @ingroup dvrt_mem
@@ -514,14 +611,14 @@ RTS_API rtError_t rtIpcCloseMemory(const void *ptr);
 /**
  * @ingroup dvrt_mem
  * @brief HCCL Async memory cpy
- * @param [in] index sq index
+ * @param [in] sqIndex sq index
  * @param [in] wqeIndex moudle index
- * @param [in] stream asynchronized task stream
+ * @param [in] stm asynchronized task stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_DRV_ERR for driver error
  */
-RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqeIndex, rtStream_t stream);
+RTS_API rtError_t rtRDMASend(uint32_t sqIndex, uint32_t wqeIndex, rtStream_t stm);
 
 /**
  * @ingroup dvrt_mem
@@ -533,22 +630,22 @@ RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqeIndex, rtStream_t strea
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_DRV_ERR for driver error
  */
-RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num);
+RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int32_t num);
 
 /**
  * @ingroup dvrt_mem
  * @brief HCCL Async memory cpy
  * @param [in] dbindex single device 0
  * @param [in] dbinfo doorbell info
- * @param [in] stream asynchronized task stream
+ * @param [in] stm asynchronized task stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_DRV_ERR for driver error
  */
-RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream);
+RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stm);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
-#endif  // __CCE_RUNTIME_MEM_H__
+#endif  // CCE_RUNTIME_MEM_H
diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h
index aa394eea..de1b768b 100644
--- a/third_party/fwkacllib/inc/runtime/rt.h
+++ b/third_party/fwkacllib/inc/runtime/rt.h
@@ -1,32 +1,25 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: rt.h
+ * Create: 2020-01-01
+ */
 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
-
- * http://www.apache.org/licenses/LICENSE-2.0
-
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-#ifndef __CCE_RUNTIME_RT_H__
-#define __CCE_RUNTIME_RT_H__
+#ifndef CCE_RUNTIME_RT_H
+#define CCE_RUNTIME_RT_H
 
 #include "base.h"
 #include "config.h"
 #include "context.h"
 #include "dev.h"
-#include "dvfsprofile.h"
 #include "event.h"
 #include "kernel.h"
 #include "mem.h"
 #include "rt_model.h"
 #include "stream.h"
+#include "rt_stars.h"
 #include "rt_ffts.h"
+#include "rt_ffts_plus.h"
+#include "rt_dfx.h"
+#include "rt_mem_queue.h"
 
-#endif  // __CCE_RUNTIME_RT_H__
+#endif  // CCE_RUNTIME_RT_H
diff --git a/third_party/fwkacllib/inc/runtime/rt_dfx.h b/third_party/fwkacllib/inc/runtime/rt_dfx.h
new file mode 100644
index 00000000..71215f80
--- /dev/null
+++ b/third_party/fwkacllib/inc/runtime/rt_dfx.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ * Description: dfx interface
+ */
+
+#ifndef CCE_RUNTIME_RT_DFX_H
+#define CCE_RUNTIME_RT_DFX_H
+
+#include "base.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+// max task tag buffer is 1024(include '\0')
+#define TASK_TAG_MAX_LEN    1024U
+
+/**
+ * @brief set task tag.
+ * once set is only use by one task and thread local.
+ * attention:
+ *  1. it's used for dump current task in active stream now.
+ *  2. it must be called be for task submit and will be invalid after task submit.
+ * @param [in] taskTag  task tag, usually it's can be node name or task name.
+ *                      must end with '\0' and max len is TASK_TAG_MAX_LEN.
+ * @return RT_ERROR_NONE for ok
+ * @return other failed
+ */
+RTS_API rtError_t rtSetTaskTag(const char_t *taskTag);
+
+/**
+ * @brief set aicpu device attribute.
+ * it is used for aicpu device to be aware of enviroment config
+ * @param [in] key  attrubute key.
+ * @param [in] val  attrubute value.
+ * @return RT_ERROR_NONE for ok
+ * @return other failed
+ */
+RTS_API rtError_t rtSetAicpuAttr(const char_t *key, const char_t *val);
+
+#if defined(__cplusplus)
+}
+#endif
+#endif // CCE_RUNTIME_RT_DFX_H
diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts.h b/third_party/fwkacllib/inc/runtime/rt_ffts.h
old mode 100755
new mode 100644
index 720da7cd..5c0ab971
--- a/third_party/fwkacllib/inc/runtime/rt_ffts.h
+++ b/third_party/fwkacllib/inc/runtime/rt_ffts.h
@@ -1,14 +1,14 @@
 /*
- * Copyright (c) Huawei Technologies Co. , Ltd. 2021. All rights reserved.
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved.
  * Description: ffts interface
  */
 
-#ifndef __CCE_RUNTIME_FFTS_H
-#define __CCE_RUNTIME_FFTS_H
+#ifndef CCE_RUNTIME_RT_FFTS_H
+#define CCE_RUNTIME_RT_FFTS_H
 
 #include "base.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -19,8 +19,8 @@ extern "C" {
 #define RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN    32U
 
 typedef enum tagFftsType {
-    RT_FFTS_TYPE_AUTO_THREAD = 2,    // ffts auto thread mode, same as ffts define
-    RT_FFTS_TYPE_MANUAL_THREAD = 3,   // ffts manual thread mode, same as ffts define
+    RT_FFTS_TYPE_AUTO_THREAD = 2,   // ffts auto thread mode, same as ffts define
+    RT_FFTS_TYPE_MANUAL_THREAD = 3, // ffts manual thread mode, same as ffts define
 } rtFftsType_t;
 
 typedef enum tagFftsSubTaskType {
@@ -33,11 +33,11 @@ typedef enum tagFftsSubTaskType {
     RT_FFTS_SUB_TASK_TYPE_MIX_AIC = 6,
     RT_FFTS_SUB_TASK_TYPE_MIX_AIV = 7,
     RT_FFTS_SUB_TASK_TYPE_SDMA = 8,
-    RT_FFTS_SUB_TASK_TYPE_RESERVED,
+    RT_FFTS_SUB_TASK_TYPE_RESERVED = 9,
 } rtFftsSubTaskType_t;
 
 typedef struct tagManualThreadDmuInfo {
-    uint64_t dataAddr;    // device mem
+    uint64_t dataAddr; // device mem
     uint16_t numOuter;
     uint16_t numInner;
     uint32_t strideOuter;
@@ -50,58 +50,60 @@ typedef struct tagManualThreadDependency {
 } rtManualThreadDependency_t;
 
 typedef struct tagManualThreadAicAivInfo {
-    uint64_t taskParamAddr;    // device mem
+    uint64_t taskParamAddr; // device mem
     uint16_t taskParamOffset;
     // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16
-    // when satMode=0 and FP16 computation with none INF inputs overflows/underflows
-    // results will be saturated to +/- MAX of FP16
+    // when satMode=0 and FP16 computation with none INF inputs overflows/underflows,
+    // results will be saturated to +/-MAX of FP16
     uint8_t satMode;
-    uint8_t scheduleMode;    // 0:normal mode, 1:batch mode, 2:sync mode, 3: reserved
-    uint8_t iCachePrefetchCnt;    // units is 2K
-    uint8_t prefetchEnableBitmap;    // 8 bit bitmap 1 0 1 0
-    uint8_t prefetchOnceBitmap;    // 8 bit bitmap 1 0 1 0
-    uint16_t prefetchOnceDmuNum;   // prefetch_once_dmu_descriptor_index in ffts
-    // num: thread0_prefetch_dmu_descriptor_index - prefetch_once_dmu_descriptor_index
-    uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM];    // max valid is threadDim
+    uint8_t scheduleMode;   // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved
+    uint8_t iCachePrefetchCnt; // units is 2K
+    uint8_t prefetchEnableBitmap; // 8 bit bitmap  1 0 1 0
+    uint8_t prefetchOnceBitmap; // 8 bit bitmap  1 0 1 0
+    uint16_t prefetchOnceDmuNum; // prefetch_once_dmu_descriptor_index in ffts
+    // num： thread0_prefetch_dmu_descriptor_index – prefetch_once_dmu_descriptor_index
+    uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; // max valid is threadDim
     uint16_t threadBlkDim[RT_FFTS_MAX_MANUAL_THREAD_NUM];
-    const char *threadTaskFuncStub[RT_FFTS_MAX_MANUAL_THREAD_NUM];
+    const char_t *threadTaskFuncStub[RT_FFTS_MAX_MANUAL_THREAD_NUM];
 
-    rtManualThreadDmuInfo_t *prefetchList;  // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim - 1]
+    rtManualThreadDmuInfo_t *prefetchList; // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim-1]
     rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK];
 } rtManualThreadAicAivInfo_t;
 
 typedef struct tagAutoThreadPrefetch {
-    uint64_t dataAddr;    // device mem
+    uint64_t dataAddr; // device mem
     uint32_t dataAddrOffset;
     uint32_t nonTailDataLen;
     uint32_t tailDataLen;
 } rtAutoThreadPrefetch_t;
 
 typedef struct tagAutoThreadAicAivInfo {
-    uint64_t taskParamAddr;   // device mem
+    uint64_t taskParamAddr; // device mem
     uint16_t taskParamOffset;
-    // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16
-    // when satMode=0 and FP16 computation with none INF inputs overflows/underflows
-    // results will be saturated to +/- MAX of FP16
+    /*
+     * when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16
+     * when satMode=0 and FP16 computation with none INF inputs overflows/underflows, results will be saturated to
+     *     +/-MAX of FP16
+     */
     uint8_t satMode;
-    uint8_t scheduleMode;    // 0:normal mode, 1:batch mode, 2:sync mode, 3: reserved
-    uint8_t iCachePrefetchCnt;    // units is 2K
-    uint8_t prefetchEnableBitmap;    // 8 bit bitmap
-    uint8_t prefetchOnceBitmap;    // 8 bit bitmap
+    uint8_t scheduleMode;   // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved
+    uint8_t iCachePrefetchCnt; // units is 2K
+    uint8_t prefetchEnableBitmap;   // 8 bit bitmap
+    uint8_t prefetchOnceBitmap;     // 8 bit bitmap
 
     uint16_t tailBlkDim;
     uint16_t nonTailBlkDim;
 
-    const char *nonTailTaskFuncStub;
-    const char *tailTaskFuncStub;
+    const char_t *nonTailTaskFuncStub;
+    const char_t *tailTaskFuncStub;
 
-    // for prefetch, valid num is prefetchEnableBitmap bit count
-    // if prefetchEnableBitmap = '00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid
+    // for prefetch, valid num is prefetchEnableBitmap bit count.
+    // if prefetchEnableBitmap='00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid
     rtAutoThreadPrefetch_t srcPrefetch[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK];
 } rtAutoThreadAicAivInfo_t;
 
 typedef struct tagAutoThreadCacheInfo {
-    uint64_t dataAddr;   // device mem
+    uint64_t dataAddr; // device mem
     uint32_t dataAddrOffset;
     uint32_t nonTailDataLen;
     uint32_t tailDataLen;
@@ -109,7 +111,7 @@ typedef struct tagAutoThreadCacheInfo {
 } rtAutoThreadCacheInfo_t;
 
 typedef struct tagManualThreadCacheInfo {
-    rtManualThreadDmuInfo_t *dmuList;    // 0-64k
+    rtManualThreadDmuInfo_t *dmuList;  // 0-64k
     uint16_t dmuNum;
     uint16_t sliceDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM];
     uint16_t ticketCacheRefCntTbl[RT_FFTS_MAX_MANUAL_THREAD_NUM];
@@ -152,11 +154,11 @@ typedef struct tagFftsSubTaskInfo {
 } rtFftsSubTaskInfo_t;
 
 typedef struct tagFftsDescInfo {
-    uint8_t tm;    // thread subtask kickstart mode, 0:order, 1:disorder
-    uint8_t di;    // discard invalidate
-    uint8_t dw;    // discard write back
-    uint8_t df;    // discard flush
-    uint8_t dataSplitUnit;    // split source or ticket cache by 2~dataSplitUnit MB
+    uint8_t tm; // thread subtask kickstart mode, 0:order, 1:disorder
+    uint8_t di; // discard invalidate
+    uint8_t dw; // discard write back
+    uint8_t df; // discard flush
+    uint8_t dataSplitUnit;  // split source or ticket cache by 2^dataSplitUnit MB
     uint8_t prefetchOstNum;
     uint8_t cacheMaintainOstNum;
     uint8_t aicPrefetchUpper;
@@ -166,20 +168,23 @@ typedef struct tagFftsDescInfo {
 } rtFftsDescInfo_t;
 
 typedef struct tagFftsTaskInfo {
-    rtFftsType_t  fftsType;
+    rtFftsType_t fftsType;
     uint16_t subTaskNum;
     uint16_t tickCacheNum;
     rtFftsDescInfo_t fftsDesc;
     // sub task desc, real num is subTaskNum
     rtFftsSubTaskInfo_t subTask[RT_FFTS_MAX_SUB_TASK_NUM];
 
-    // ticket cache, real number is ticketCacheNum
+    // ticket cache, real number is tickCacheNum.
     rtTicketCache_t ticketCache[RT_FFTS_MAX_TICKET_CACHE_NUM];
 } rtFftsTaskInfo_t;
 
-RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream);
+RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stm);
+RTS_API rtError_t rtGetC2cCtrlAddr(uint64_t *addr, uint32_t *len);
+
+RTS_API rtError_t rtFftsTaskLaunchWithFlag(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stm, uint32_t flag);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
-#endif //__CCE_RUNTIME_FFTS_H
+#endif // CCE_RUNTIME_RT_FFTS_H
diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h b/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
new file mode 100644
index 00000000..b136f880
--- /dev/null
+++ b/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ * Description: ffts plus interface
+ */
+
+#ifndef CCE_RUNTIME_RT_FFTS_PLUS_H
+#define CCE_RUNTIME_RT_FFTS_PLUS_H
+
+#include "base.h"
+#include "rt_ffts_plus_define.h"
+#include "rt_stars_define.h"
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+extern "C" {
+#endif
+
+#pragma pack(push)
+#pragma pack (1)
+
+typedef struct tagFftsPlusDumpInfo {
+    const void *loadDumpInfo;
+    const void *unloadDumpInfo;
+    uint32_t loadDumpInfolen;
+    uint32_t unloadDumpInfolen;
+} rtFftsPlusDumpInfo_t;
+
+
+typedef struct tagFftsPlusTaskInfo {
+    const rtFftsPlusSqe_t *fftsPlusSqe;
+    const void *descBuf;           // include total context
+    size_t      descBufLen;        // the length of descBuf
+    rtFftsPlusDumpInfo_t fftsPlusDumpInfo; // used only in the dynamic shape
+} rtFftsPlusTaskInfo_t;
+
+#pragma pack(pop)
+
+RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *hdl, const void *kernelInfoExt, void **addr,
+    uint32_t *prefetchCnt);
+
+RTS_API rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stm);
+
+RTS_API rtError_t rtFftsPlusTaskLaunchWithFlag(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stm,
+                                               uint32_t flag);
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+}
+#endif
+#endif // CCE_RUNTIME_RT_FFTS_PLUS_H
diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h b/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h
new file mode 100644
index 00000000..9b043ee5
--- /dev/null
+++ b/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h
@@ -0,0 +1,747 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ * Description: the definition of ffts plus
+ */
+
+#ifndef CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
+#define CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
+
+#include "base.h"
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+extern "C" {
+#endif
+
+#pragma pack(push)
+#pragma pack (1)
+
+// hardware context type
+typedef enum tagFftsPlusHwType {
+    RT_HW_CTX_TYPE_AIC = 0,
+    RT_HW_CTX_TYPE_AIV = 1,
+    RT_HW_CTX_TYPE_NOTIFY_WAIT = 3,
+    RT_HW_CTX_TYPE_NOTIFY_RECORD = 4,
+    RT_HW_CTX_TYPE_WRITE_VALUE = 5,
+    RT_HW_CTX_TYPE_MIX_AIC = 6,
+    RT_HW_CTX_TYPE_MIX_AIV = 7,
+    RT_HW_CTX_TYPE_SDMA = 8,
+    RT_HW_CTX_TYPE_FLUSH_DATA = 9,
+    RT_HW_CTX_TYPE_INVALIDATE_DATA = 10,
+    RT_HW_CTX_TYPE_WRITEBACK_DATA = 11,
+    RT_HW_CTX_TYPE_AICPU = 12,
+    RT_HW_CTX_TYPE_LOAD = 13,
+    RT_HW_CTX_TYPE_MAX = 14,
+} rtFftsPlusHwType_t;
+
+// hardware context type
+typedef enum tagFftsPlusSoftType {
+    RT_SOFT_CTX_TYPE_COND_SWITCH = 1,
+    RT_SOFT_CTX_TYPE_CASE_SWITCH = 2,
+    RT_SOFT_CTX_TYPE_AT_START = 3,
+    RT_SOFT_CTX_TYPE_AT_END = 4,
+    RT_SOFT_CTX_TYPE_LABEL = 5,
+    RT_SOFT_CTX_PERSISTENT_CACHE = 6,
+    RT_SOFT_CTX_TYPE_MAX = 7,
+} rtFftsPlusSoftType_t;
+
+typedef enum tagFftsPlusContextType {
+    RT_CTX_TYPE_AICORE = 0x0000,
+    RT_CTX_TYPE_AIV = 0x0001,
+    RT_CTX_TYPE_NOTIFY_WAIT = 0x0003,
+    RT_CTX_TYPE_NOTIFY_RECORD = 0x0004,
+    RT_CTX_TYPE_WRITE_VALUE = 0x0005,
+    RT_CTX_TYPE_MIX_AIC = 0x0006,
+    RT_CTX_TYPE_MIX_AIV = 0x0007,
+    RT_CTX_TYPE_SDMA = 0x0008,
+    RT_CTX_TYPE_FLUSH_DATA = 0x0009,
+    RT_CTX_TYPE_INVALIDATE_DATA = 0x000A,
+    RT_CTX_TYPE_WRITEBACK_DATA = 0x000B,
+    RT_CTX_TYPE_AICPU = 0x000C,
+    RT_CTX_TYPE_COND_SWITCH = 0x010D,
+    RT_CTX_TYPE_CASE_SWITCH = 0x020D,
+    RT_CTX_TYPE_AT_START = 0x0300,
+    RT_CTX_TYPE_AT_END = 0x0400,
+    RT_CTX_TYPE_LABEL = 0x0500,
+    RT_CTX_TYPE_PERSISTENT_CACHE = 0x0600,
+}rtFftsPlusContextType_t;
+
+// condition type
+typedef enum tagFftsPlusCondType {
+    RT_COND_TYPE_EQUAL = 0,
+    RT_COND_TYPE_NOTEQUAL = 1,
+    RT_COND_TYPE_GREATER = 2,
+    RT_COND_TYPE_GREATER_OR_EQUAL = 3,
+    RT_COND_TYPE_LESS = 4,
+    RT_COND_TYPE_LESS_OR_EQUAL = 5,
+    RT_COND_TYPE_MAX = 6,
+} rtFftsPlusCondType_t;
+
+// the definition of ffts plus context
+
+#define RT_CTX_SUCCESSOR_NUM   26
+
+// ffts plus common context
+typedef struct tagFftsPlusComCtx {
+    // 0-3 bytes
+    uint16_t contextType;
+    uint8_t successorNum;
+    uint8_t rsv1 : 7;
+    uint8_t aten : 1;
+    // 4-7
+    uint8_t rsv2;
+    uint8_t rsv3;
+    uint8_t predCntInit;
+    uint8_t predCnt;
+    // 8-11
+    uint32_t rsv4;
+    // 12-63
+    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
+    // 64-71
+    uint32_t rsv5[2];
+    // 72-75
+    uint16_t threadId;
+    uint16_t threadDim;
+    // 76-127
+    uint32_t res6[13];
+} rtFftsPlusComCtx_t;
+
+// aic/aiv context
+typedef struct tagFftsPlusAicAivCtx {
+    // 0-3 bytes
+    uint16_t contextType;
+    uint8_t successorNum;
+    uint8_t resv : 6;
+    uint8_t dumpSwitch : 1;
+    uint8_t aten : 1;
+    // 4-7
+    uint8_t prefetchConfig;
+    uint8_t resv1;
+    uint8_t predCntInit;
+    uint8_t predCnt;
+    // 8-11
+    uint32_t resv2;
+    // 12-63
+    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
+    // 64-67
+    uint16_t resv3 : 1;
+    uint16_t schem : 2;
+    uint16_t icachePrefetchCnt : 5;
+    uint16_t resv4 : 7;
+    uint16_t atm : 1;
+    uint16_t prefetchEnableBitmap : 4;
+    uint16_t res6 : 4;
+    uint16_t prefetchOnceBitmap : 4;
+    uint16_t res7 : 4;
+    // 68-71
+    uint16_t pmg : 2;
+    uint16_t ns : 1;
+    uint16_t partId : 8;
+    uint16_t res8 : 1;
+    uint16_t qos : 4;
+    uint16_t res9;
+    // 72-75
+    uint16_t threadId;
+    uint16_t threadDim;
+    // 76-79
+    uint16_t nonTailBlockdim;
+    uint16_t tailBlockdim;
+    // 80-83
+    uint32_t taskParamPtrBaseL;
+    // 84-87
+    uint16_t taskParamPtrBaseH;
+    uint16_t taskParamPtrOffset;
+    // 88-95
+    uint32_t res10;
+    uint32_t res11;
+    // 96-103
+    uint32_t nonTailTaskStartPcL;
+    uint16_t nonTailTaskStartPcH;
+    uint16_t res12;
+    // 104-111
+    uint32_t tailTaskStartPcL;
+    uint16_t tailTaskStartPcH;
+    uint16_t res13;
+    // 112-119
+    uint32_t res14;
+    uint32_t res15;
+    // 120-127
+    uint16_t srcSlot[4];    // src_slot0-3(context ID for source data which is out of subgraph)
+} rtFftsPlusAicAivCtx_t;
+
+// mix aic/aiv context
+typedef struct tagFftsPlusMixAicAivCtx {
+    // 0-3 bytes
+    uint16_t contextType;
+    uint8_t successorNum;
+    uint8_t reserved1 : 6;
+    uint8_t dumpSwitch : 1;
+    uint8_t aten : 1;
+    // 4-7
+    uint8_t prefetchConfig;
+    uint8_t reserved2;
+    uint8_t predCntInit;
+    uint8_t predCnt;
+    // 8-11
+    uint32_t reserved3;
+    // 12-63
+    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
+    // 64-67
+    uint16_t reserved4 : 1;
+    uint16_t schem : 2;
+    uint16_t aicIcachePrefetchCnt : 5;
+    uint16_t aivIcachePrefetchCnt : 5;
+    uint16_t reserved5 : 2;
+    uint16_t atm : 1;
+    uint16_t prefetchEnableBitmap : 4;
+    uint16_t reserved6 : 4;
+    uint16_t prefetchOnceBitmap : 4;
+    uint16_t reserved7 : 4;
+    // 68-71
+    uint16_t pmg : 2;
+    uint16_t ns : 1;
+    uint16_t partId : 8;
+    uint16_t reserved8 : 1;
+    uint16_t qos : 4;
+    uint8_t nonTailBlockRatioN;
+    uint8_t tailBlockRatioN;
+    // 72-75
+    uint16_t threadId;
+    uint16_t threadDim;
+    // 76-79
+    uint16_t nonTailBlockdim;
+    uint16_t tailBlockdim;
+    // 80-87
+    uint32_t aicTaskParamPtrL;
+    uint16_t aicTaskParamPtrH;
+    uint16_t aicTaskParamPtrOffset;
+    // 88-95
+    uint32_t aivTaskParamPtrL;
+    uint16_t aivTaskParamPtrH;
+    uint16_t aivTaskParamPtrOffset;
+    // 96-103
+    uint32_t nonTailAicTaskStartPcL;
+    uint16_t nonTailAicTaskStartPcH;
+    uint16_t tailAicTaskStartPcH;
+    // 104-111
+    uint32_t tailAicTaskStartPcL;
+    uint32_t nonTailAivTaskStartPcL;
+    // 112-119
+    uint16_t nonTailAivTaskStartPcH;
+    uint16_t tailAivTaskStartPcH;
+    uint32_t tailAivTaskStartPcL;
+    // 120-127
+    uint16_t srcSlot[4];    // src_slot0-3(context ID for source data which is out of subgraph)
+} rtFftsPlusMixAicAivCtx_t;
+
+// sdma context
+typedef struct tagFftsPlusSdmaCtx {
+    // 0-3 bytes
+    uint16_t contextType;
+    uint8_t successorNum;
+    uint8_t res1 : 6;
+    uint8_t dumpSwitch : 1;
+    uint8_t aten : 1;
+    // 4-7
+    uint8_t res2;
+    uint8_t res3;
+    uint8_t predCntInit;
+    uint8_t predCnt;
+    // 8-11
+    uint32_t res4;
+    // 12-63
+    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
+    // 64-67
+    uint8_t res5;
+    uint8_t res6 : 7;
+    uint8_t atm : 1;
+    uint16_t res7;
+    // 68-71
+    uint16_t pmg : 2;
+    uint16_t ns : 1;
+    uint16_t partId : 8;
+    uint16_t res8 : 1;
+    uint16_t qos : 4;
+    uint16_t res9;
+    // 72-75
+    uint16_t threadId;
+    uint16_t threadDim;
+    // 76-79
+    uint32_t sdmaSqeHeader;  // (FORMAT/MPAMNS/PARTID/DRO/SRO/QOS/DNS/SNS/DSSV/SSSV/IE/UPCODE)
+    // 80-83
+    uint16_t sourceStreamId;
+    uint16_t sourceSubstreamId;
+    // 84-87
+    uint16_t destinationStreamId;
+    uint16_t destinationSubstreamId;
+    // 88-127
+    uint32_t sourceAddressBaseL;
+    uint32_t sourceAddressBaseH;
+    uint32_t sourceAddressOffset;
+    uint32_t destinationAddressBaseL;
+    uint32_t destinationAddressBaseH;
+    uint32_t destinationAddressOffset;
+    uint32_t nonTailDataLength;
+    uint32_t tailDataLength;
+    uint32_t res10[2];
+} rtFftsPlusSdmaCtx_t;
+
+// ffts plus notify record/wait context
+typedef struct tagFftsPlusNotifyCtx {
+    // 0-3 bytes
+    uint16_t contextType;
+    uint8_t successorNum;
+    uint8_t res : 7;
+    uint8_t aten : 1;
+    // 4-7
+    uint8_t res1;
+    uint8_t res2;
+    uint8_t predCntInit;
+    uint8_t predCnt;
+    // 8-11
+    uint32_t res3;
+    // 12-63
+    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
+    // 64-67
+    uint16_t res4 : 14;
+    uint16_t satm : 1;
+    uint16_t atm : 1;
+    uint16_t res6;
+    // 68-71
+    uint32_t res7;
+    // 72-75
+    uint16_t threadId;
+    uint16_t threadDim;
+    // 76-79
+    uint16_t notifyIdBase;
+    uint8_t autoWindow;
+    uint8_t res8;
+    // 80-127
+    uint32_t res9[4];
+    uint16_t notifyId[16];
+} rtFftsPlusNotifyCtx_t;
+
+// write Value context
+typedef struct tagFftsPlusWriteValueCtx {
+    // 0-3 bytes
+    uint16_t contextType;
+    uint8_t successorNum;
+    uint8_t resv1 : 7;
+    uint8_t aten : 1;
+    // 4-7
+    uint8_t resv2;
+    uint8_t resv3;
+    uint8_t predCntInit;
+    uint8_t predCnt;
+    // 8-11
+    uint32_t resv4;
+    // 12-63
+    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
+    // 64-67
+    uint16_t resv5 : 15;
+    uint16_t atm : 1;
+    uint16_t resv6;
+    // 68-71
+    uint32_t resv7;
+    // 72-75
+    uint16_t threadId;
+    uint16_t threadDim;
+    // 76-79
+    uint8_t awSize : 3;
+    uint8_t awSnoop : 1;
+    uint8_t resv8 : 4;
+    uint8_t awCache : 4;
+    uint8_t awProt : 3;
+    uint8_t awVa : 1;
+
+    uint8_t arSize : 3;
+    uint8_t arSnoop : 1;
+    uint8_t resv9 : 4;
+    uint8_t arCache : 4;
+    uint8_t arProt : 3;
+    uint8_t arVa : 1;
+    // 80-83
+    uint32_t writeAddressBaseL;
+    // 84-87
+    uint32_t writeAddressBaseH : 17;
+    uint32_t res10 : 15;
+    // 88-91
+    uint32_t writeAddressOffset;
+    // 92-95
+    uint32_t res11;
+    // 96-111
+    uint32_t writeValue[4]; // write_value_00 -> write_value_03
+    // 112-127
+    uint32_t res12[4];
+} rtFftsPlusWriteValueCtx_t;
+
+// ai cpu context
+typedef struct tagFftsPlusAiCpuCtx {
+    // 0-3 bytes
+    uint16_t contextType;
+    uint8_t successorNum;
+    uint8_t res1 : 6;
+    uint8_t dumpSwitch : 1;
+    uint8_t aten : 1;
+    // 4-7
+    uint8_t res2;
+    uint8_t res3;
+    uint8_t predCntInit;
+    uint8_t predCnt;
+    // 8-11
+    uint32_t res4;
+    // 12-63
+    uint16_t successorContextID[RT_CTX_SUCCESSOR_NUM];
+    // 64-67
+    uint16_t res5 : 15;
+    uint16_t atm : 1;
+    uint16_t res6;
+    // 68-71
+    uint16_t sqeIndex;
+    uint8_t kernelType : 7;
+    uint8_t bm : 1;
+    uint8_t topicType : 4;
+    uint8_t qos : 3;
+    uint8_t res7 : 1;
+    // 72-75
+    uint16_t threadId;
+    uint16_t threadDim;
+    // 76-79
+    uint16_t nonTailBlockdim;
+    uint16_t tailBlockdim;
+    // 80-115
+    uint32_t usrData[9];   // usr_data0 -> usr_data8 usr_data2(task_param_base_l) usr_data3(task_param_base_h)
+    // 116--119
+    uint32_t res8;
+    // 120-123
+    uint32_t subtopicId : 12;
+    uint32_t topicId : 6;
+    uint32_t groupId : 6;
+    uint32_t usrDataLength : 8;
+    // 124-127
+    uint32_t taskParamOffset;
+} rtFftsPlusAiCpuCtx_t;
+
+// data context
+typedef struct tagFftsPlusDataCtx {
+    // 0-3 bytes
+    uint16_t contextType;
+    uint8_t successorNum;
+    uint8_t res1 : 7;
+    uint8_t aten : 1;
+    // 4-7
+    uint8_t res2;
+    uint8_t res3;
+    uint8_t cntInit; // cons_cnt_init / prod_cnt_init
+    uint8_t cnt;     // cons_cnt / prod_cnt
+    // 8-11
+    uint32_t res4;
+    // 12-63
+    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
+    // 64-67
+    uint16_t res5 : 15;
+    uint16_t atm : 1;
+    uint16_t res6;
+    // 68-71
+    uint16_t pmg : 2;
+    uint16_t ns : 1;
+    uint16_t partId : 8;
+    uint16_t res7 : 1;
+    uint16_t qos : 4;
+    uint16_t res8;
+    // 72-75
+    uint16_t threadId;
+    uint16_t threadDim;
+    // 76-79
+    uint16_t origConsumerCounter;
+    uint16_t runConsumerCounter;
+    // 80-83
+    uint32_t addressBaseL;
+    // 84-87
+    uint32_t addressBaseH;
+    // 88-91
+    uint32_t addressOffset;
+    // 92-95
+    uint32_t res9;
+    // 96-99
+    uint16_t nonTailNumOutter;
+    uint16_t nonTailNumInner;
+    // 100-103
+    uint32_t nonTailLengthInner;
+    // 104-107
+    uint32_t nonTailStrideOutter;
+    // 108-111
+    uint32_t nonTailStrideInner;
+    // 112-115
+    uint16_t tailNumOutter;
+    uint16_t tailNumInner;
+    // 116-119
+    uint32_t tailLengthInner;
+    // 120-123
+    uint32_t tailStrideOutter;
+    // 124-127
+    uint32_t tailStrideInner;
+} rtFftsPlusDataCtx_t;
+
+// at start context
+typedef struct tagFftsPlusAtStartCtx {
+    // 0-3 bytes
+    uint16_t contextType;
+    uint8_t successorNum;
+    uint8_t rs1 : 7;
+    uint8_t aten : 1;
+    // 4-7
+    uint8_t rs2;
+    uint8_t rs3;
+    uint8_t predCntInit;
+    uint8_t predCnt;
+    // 8-11
+    uint32_t rs4;
+    // 12-63
+    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
+    // 64-67
+    uint16_t rs5;
+    uint16_t rs6;
+    // 68-71
+    uint16_t rs7;
+    uint16_t rs8;
+    // 72-75
+    uint16_t threadId;
+    uint16_t threadDim;
+    // 76-79
+    uint16_t threadIdInit;
+    uint16_t threadWindowSize;
+    // 80-127
+    uint32_t res9[12];
+} rtFftsPlusAtStartCtx_t;
+
+// at end context
+#define RT_CTX_SUCC_AT_START_SLOT_NUM   12
+#define RT_CTX_SUCC_OUT_LABEL_SLOT_NUM  12
+
+typedef struct tagFftsPlusAtEndCtx {
+    // 0-3 bytes
+    uint16_t contextType;
+    uint8_t atStartSlotNumber;
+    uint8_t outLabelSlotNumber : 7;
+    uint8_t aten : 1;
+    // 4-7
+    uint8_t res1;
+    uint8_t res2;
+    uint8_t predCntInit;
+    uint8_t predCnt;
+    // 8-11
+    uint32_t res3;
+    // 12-59
+    uint16_t succAtStartSlot[RT_CTX_SUCC_AT_START_SLOT_NUM];
+    uint16_t succOutLabelSlot[RT_CTX_SUCC_OUT_LABEL_SLOT_NUM];
+    // 60-63
+    uint16_t res4;
+    uint16_t res5;
+    // 64-67
+    uint16_t res6;
+    uint16_t res7;
+    // 68-71
+    uint16_t res8;
+    uint16_t res9;
+    // 72-75
+    uint16_t threadId;
+    uint16_t res10;
+    // 76-79
+    uint16_t res11;
+    uint16_t res12;
+    // 80-127
+    uint32_t res13[12];
+} rtFftsPlusAtEndCtx_t;
+
+// label context
+typedef struct tagFftsPlusLabelCtx {
+    // 0-3 bytes
+    uint16_t contextType;
+    uint8_t successorNum;
+    uint8_t res1;
+    // 4-7
+    uint8_t res2;
+    uint8_t res3;
+    uint8_t predCntInit;
+    uint8_t predCnt;
+    // 8-11
+    uint32_t res4;
+    // 12-63
+    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
+    // 64-79
+    uint16_t res5[8];
+    // 80-127
+    uint32_t res6[12];
+} rtFftsPlusLabelCtx_t;
+
+// case switch context
+typedef struct tagFftsPlusCaseSwitchCtx {
+    // 0-3 bytes
+    uint16_t contextType;
+    uint8_t successorNum;
+    uint8_t resv0 : 7;
+    uint8_t aten : 1;
+    // 4-7
+    uint8_t startLabelId;
+    uint8_t labelListLen;
+    uint8_t predCntInit;
+    uint8_t predCnt;
+    // 8-11
+    uint32_t resv1;
+    // 12-63
+    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
+    // 64-67
+    uint16_t resv2 : 15;
+    uint16_t atm : 1;
+    uint16_t resv3;
+    // 68-71
+    uint32_t resv4;
+    // 72-75
+    uint16_t threadId;
+    uint16_t threadDim;
+    // 76-79
+    uint8_t arSize : 3;
+    uint8_t snoop : 1;
+    uint8_t resv5 : 4;
+    uint8_t arCache : 4;
+    uint8_t arProt : 3;
+    uint8_t va : 1;
+    uint16_t resv6;
+    // 80-83
+    uint32_t loadAddress0BaseL;
+    // 84-87
+    uint32_t loadAddress0BaseH : 17;
+    uint32_t resv7 : 14;
+    uint32_t ld0En : 1;
+    // 88-91
+    uint32_t loadAddress0Offset;
+    // 92-95
+    uint32_t resv8;
+    // 96-99
+    uint32_t loadAddress1BaseL;
+    // 100-103
+    uint32_t loadAddress1BaseH : 17;
+    uint32_t resv9 : 14;
+    uint32_t ld1En : 1;
+    // 104-107
+    uint32_t loadAddress1Offset;
+    // 108-127
+    uint32_t resv10[5];
+} rtFftsPlusCaseSwitchCtx_t;
+
+// case default context
+typedef struct tagFftsPlusCaseDefCtx {
+    // 0-3 bytes
+    uint16_t contextType;
+    uint8_t successorNum;
+    uint8_t rs0 : 7;
+    uint8_t aten : 1;
+    // 4-7
+    uint8_t startLabelId;
+    uint8_t labelListLen;
+    uint8_t predCntInit;
+    uint8_t predCnt;
+    // 8-11
+    uint32_t rs1;
+    // 12-63
+    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
+    // 64-67
+    uint16_t rs2;
+    uint16_t rs3;
+    // 68-127
+    uint32_t rs4[15];
+} rtFftsPlusCaseDefCtx_t;
+
+// condition switch context
+#define RT_CTX_TRUE_SUCCESSOR_NUM 13
+#define RT_CTX_FALSE_SUCCESSOR_NUM 13
+
+typedef struct tagFftsPlusCondSwitchCtx {
+    // 0-3 bytes
+    uint16_t contextType;
+    uint8_t trueSuccessorNum;
+    uint8_t falseSuccessorNum : 7;
+    uint8_t aten : 1;
+    // 4-7
+    uint8_t condition;
+    uint8_t res1;
+    uint8_t predCntInit;
+    uint8_t predCnt;
+    // 8-11
+    uint32_t res2;
+    // 12-63
+    uint16_t trueSuccessorList[RT_CTX_TRUE_SUCCESSOR_NUM];
+    uint16_t falseSuccessorList[RT_CTX_FALSE_SUCCESSOR_NUM];
+    // 64-67
+    uint16_t res3 : 15;
+    uint16_t atm : 1;
+    uint16_t res4;
+    // 68-71
+    uint32_t res5;
+    // 72-75
+    uint16_t threadId;
+    uint16_t threadDim;
+    // 76-79
+    uint8_t arSize : 3;
+    uint8_t snoop : 1;
+    uint8_t res6 : 4;
+    uint8_t arCache : 4;
+    uint8_t arProt : 3;
+    uint8_t va : 1;
+    uint16_t res7;
+    // 80-83
+    uint32_t loadAddress0BaseL;
+    // 84-87
+    uint32_t loadAddress0BaseH : 17;
+    uint32_t res8 : 14;
+    uint32_t ld0En : 1;
+    // 88-91
+    uint32_t loadAddress0Offset;
+    // 92-95
+    uint32_t res9;
+    // 96-99
+    uint32_t loadAddress1BaseL;
+    // 100-103
+    uint32_t loadAddress1BaseH : 17;
+    uint32_t res10 : 14;
+    uint32_t ld1En : 1;
+    // 104-107
+    uint32_t loadAddress1Offset;
+    // 108-127
+    uint32_t res11[3];
+    uint32_t cmpValue1;
+    uint32_t cmpValue2;
+} rtFftsPlusCondSwitchCtx_t;
+
+// ffts plus persistent cache context
+typedef struct tagFftsPlusPersistentCacheCtx {
+    // 0- 3bytes
+    uint16_t contextType;
+    uint8_t successorNum;
+    uint8_t res1 : 7;
+    uint8_t aten : 1;
+    // 4-7
+    uint8_t res2[2];
+    uint8_t predCntInit;
+    uint8_t predCnt;
+    // 8-11
+    uint8_t res3[4];
+    // 12-63
+    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
+    // 64-67
+    uint8_t persistentEnable : 1;
+    uint8_t res4 : 7;
+    uint8_t res5;
+    uint16_t persistentSize;
+    // 68-71
+    uint32_t persistentId;
+    // 72-127
+    uint32_t res6[14];
+} rtFftsPlusPersistentCacheCtx_t;
+
+#pragma pack(pop)
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+}
+#endif
+#endif // CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/runtime/rt_mem_queue.h b/third_party/fwkacllib/inc/runtime/rt_mem_queue.h
new file mode 100644
index 00000000..2ed9fd08
--- /dev/null
+++ b/third_party/fwkacllib/inc/runtime/rt_mem_queue.h
@@ -0,0 +1,612 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ * Description: mbuf and queue interface
+ */
+
+#ifndef CCE_RUNTIME_RT_MEM_QUEUE_H
+#define CCE_RUNTIME_RT_MEM_QUEUE_H
+
+#include "base.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define RT_MQ_MAX_NAME_LEN 128 // same as driver's
+#define RT_MQ_DEPTH_MIN 2U
+#define RT_MQ_MODE_PUSH 1
+#define RT_MQ_MODE_PULL 2
+#define RT_MQ_MODE_DEFAULT RT_MQ_MODE_PUSH
+#define RT_EVENT_SUMMARY_RSV 4
+#define RT_EVENT_MAX_MSG_LEN  128
+
+typedef struct tagMemQueueInfo {
+    int32_t id;
+    int32_t size;
+    uint32_t depth;
+    int32_t status;
+} rtMemQueueInfo_t;
+
+typedef struct tagMemQueueAttr {
+    char_t name[RT_MQ_MAX_NAME_LEN];
+    uint32_t depth;
+    uint32_t workMode;
+    uint32_t flowCtrlDropTime;
+    bool flowCtrlFlag;
+    bool overWriteFlag;
+} rtMemQueueAttr_t;
+
+typedef struct tagMemQueueShareAttr {
+    uint32_t manage : 1;
+    uint32_t read : 1;
+    uint32_t write : 1;
+    uint32_t rsv : 29;
+} rtMemQueueShareAttr_t;
+
+typedef struct tagMemQueueBuffInfo {
+    void *addr;
+    size_t len;
+} rtMemQueueBuffInfo;
+
+typedef struct tagMemQueueBuff {
+    void *contextAddr;
+    size_t contextLen;
+    rtMemQueueBuffInfo *buffInfo;
+    uint32_t buffCount;
+} rtMemQueueBuff_t;
+
+
+typedef enum tagMemQueueQueryCmd {
+    RT_MQ_QUERY_QUE_ATTR_OF_CUR_PROC = 0, // input is qid(4bytes), output is rtMemQueueShareAttr_t
+    RT_MQ_QUERY_QUES_OF_CUR_PROC = 1,
+    RT_MQ_QUERY_CMD_MAX = 2
+} rtMemQueueQueryCmd_t;
+
+#define RT_MQ_EVENT_QS_MSG 27 // same as driver's
+
+#define RT_MQ_SCHED_PRIORITY_LEVEL0 0 // same as driver's
+#define RT_MQ_SCHED_PRIORITY_LEVEL1 1
+#define RT_MQ_SCHED_PRIORITY_LEVEL2 2
+#define RT_MQ_SCHED_PRIORITY_LEVEL3 3
+#define RT_MQ_SCHED_PRIORITY_LEVEL4 4
+#define RT_MQ_SCHED_PRIORITY_LEVEL5 5
+#define RT_MQ_SCHED_PRIORITY_LEVEL6 6
+#define RT_MQ_SCHED_PRIORITY_LEVEL7 7
+
+/* Events can be released between different systems. This parameter specifies the destination type of events
+   to be released. The destination type is defined based on the CPU type of the destination system. */
+#define RT_MQ_DST_ENGINE_ACPU_DEVICE 0            // device AICPU, same as driver's
+#define RT_MQ_DST_ENGINE_ACPU_HOST 1              // Host AICPU
+#define RT_MQ_DST_ENGINE_CCPU_DEVICE 2           // device CtrlCPU
+#define RT_MQ_DST_ENGINE_CCPU_HOST 3             // Host CtrlCPU
+#define RT_MQ_DST_ENGINE_DCPU_DEVICE 4          // device DataCPU
+#define RT_MQ_DST_ENGINE_TS_CPU 5                 // device TS CPU
+#define RT_MQ_DST_ENGINE_DVPP_CPU 6               // device DVPP CPU
+
+#define RT_MQ_SCHED_EVENT_QS_MSG 25 // same as driver's EVENT_QS_MSG
+
+/* When the destination engine is AICPU, select a policy.
+   ONLY: The command is executed only on the local AICPU.
+   FIRST: The local AICPU is preferentially executed. If the local AICPU is busy, the remote AICPU can be used. */
+#define RT_SCHEDULE_POLICY_ONLY 0 // same as driver's schedule_policy
+#define RT_SCHEDULE_POLICY_FIRST 1 // same as driver's schedule_policy
+
+
+typedef struct tagEschedEventSummary {
+    int32_t pid; // dst PID
+    uint32_t grpId;
+    int32_t eventId; // only RT_MQ_SCHED_EVENT_QS_MSG is supported
+    uint32_t subeventId;
+    uint32_t msgLen;
+    char_t *msg;
+    uint32_t dstEngine; // dst system cpu type
+    int32_t policy; // RT_SCHEDULE_POLICY_ONLY or RT_SCHEDULE_POLICY_FIRST
+} rtEschedEventSummary_t;
+
+typedef struct tagEschedEventReply {
+    char_t *buf;
+    uint32_t bufLen;
+    uint32_t replyLen; // output, ack msg len, same with msgLen in halEschedAckEvent
+} rtEschedEventReply_t;
+
+#define RT_DEV_PROCESS_CP1 0
+#define RT_DEV_PROCESS_CP2 1
+#define RT_DEV_PROCESS_DEV_ONLY 2
+#define RT_DEV_PROCESS_QS 3
+#define RT_DEV_PROCESS_SIGN_LENGTH 49
+
+typedef struct tagBindHostpidInfo {
+    int32_t hostPid;
+    uint32_t vfid;
+    uint32_t chipId;
+    int32_t cpType; // type of custom-process, see RT_DEV_PROCESS_XXX
+} rtBindHostpidInfo_t;
+
+#define RT_MEM_BUFF_MAX_CFG_NUM 64
+
+typedef struct {
+    uint32_t cfgId;    // cfg id, start from 0
+    uint32_t totalSize;  // one zone total size
+    uint32_t blkSize;  // blk size, 2^n (0, 2M]
+    uint32_t maxBufSize; // max size can alloc from zone
+    uint32_t pageType;  // page type, small page / huge page
+    int32_t elasticEnable; // elastic enable
+    int32_t elasticRate;
+    int32_t elasticRateMax;
+    int32_t elasticHighLevel;
+    int32_t elasticLowLevel;
+} rtMemZoneCfg_t;
+
+typedef struct {
+    rtMemZoneCfg_t cfg[RT_MEM_BUFF_MAX_CFG_NUM];
+}rtMemBuffCfg_t;
+
+typedef enum rt_queue_work_mode {
+    RT_QUEUE_MODE_PUSH = 1,
+    RT_QUEUE_MODE_PULL,
+} RT_QUEUE_WORK_MODE;
+
+typedef void *rtMbufPtr_t;
+
+typedef enum rtEventIdType {
+    RT_EVENT_RANDOM_KERNEL,      /* Random operator event */
+    RT_EVENT_DVPP_MSG,           /* operator events commited by DVPP */
+    RT_EVENT_FR_MSG,             /* operator events commited by Feature retrieves */
+    RT_EVENT_TS_HWTS_KERNEL,     /* operator events commited by ts/hwts */
+    RT_EVENT_AICPU_MSG,          /* aicpu activates its own stream events */
+    RT_EVENT_TS_CTRL_MSG,        /* controls message events of TS */
+    RT_EVENT_QUEUE_ENQUEUE,      /* entry event of Queue(consumer) */
+    RT_EVENT_QUEUE_FULL_TO_NOT_FULL,   /* full to non-full events of Queue(producers) */
+    RT_EVENT_QUEUE_EMPTY_TO_NOT_EMPTY,   /* empty to non-empty event of Queue(consumer) */
+    RT_EVENT_TDT_ENQUEUE,        /* data entry event of TDT */
+    RT_EVENT_TIMER,              /* ros timer */
+    RT_EVENT_HCFI_SCHED_MSG,     /* scheduling events of HCFI */
+    RT_EVENT_HCFI_EXEC_MSG,      /* performs the event of HCFI */
+    RT_EVENT_ROS_MSG_LEVEL0,
+    RT_EVENT_ROS_MSG_LEVEL1,
+    RT_EVENT_ROS_MSG_LEVEL2,
+    RT_EVENT_ACPU_MSG_TYPE0,
+    RT_EVENT_ACPU_MSG_TYPE1,
+    RT_EVENT_ACPU_MSG_TYPE2,
+    RT_EVENT_CCPU_CTRL_MSG,
+    RT_EVENT_SPLIT_KERNEL,
+    RT_EVENT_DVPP_MPI_MSG,
+    RT_EVENT_CDQ_MSG,
+    /* Add a new event here */
+    RT_EVENT_TEST,               /* Reserve for test */
+    RT_EVENT_MAX_NUM
+} rtEventIdType_t;
+
+typedef enum rtGroupType {
+    /* Bound to a AICPU, multiple threads can be woken up simultaneously within a group */
+    RT_GRP_TYPE_BIND_DP_CPU = 1,
+    RT_GRP_TYPE_BIND_CP_CPU,             /* Bind to the control CPU */
+    RT_GRP_TYPE_BIND_DP_CPU_EXCLUSIVE    /* Bound to a AICPU, intra-group threads are mutex awakened */
+} rtGroupType_t;
+
+typedef struct tagInitFlowGwInfo {
+    const char_t *groupName;
+    uint64_t schedPolicy;
+    uint64_t reschedInterval;
+    char_t rsv[128];
+} rtInitFlowGwInfo_t;
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief init queue schedule
+ * @param [in] devId   the logical device id
+ * @param [in] grpName   the name of group, can be nullptr
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueInitQS(int32_t devId, const char_t *grpName);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief init flow gateway
+ * @param [in] devId   the logical device id
+ * @param [in] initInfo   Initialization parameters
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueInitFlowGw(int32_t devId, const rtInitFlowGwInfo_t * const initInfo);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief create mbuf queue
+ * @param [in] devId   the logical device id
+ * @param [in] queAttr   attribute of queue
+ * @param [out] qid  queue id
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueCreate(int32_t devId, const rtMemQueueAttr_t *queAttr, uint32_t *qid);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief destroy mbuf queue
+ * @param [in] devId   the logical device id
+ * @param [in] qid  queue id
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueDestroy(int32_t devId, uint32_t qid);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief destroy mbuf queue init
+ * @param [in] devId   the logical device id
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueInit(int32_t devId);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief enqueue memBuf
+ * @param [in] devId   the logical device id
+ * @param [in] qid  queue id
+ * @param [in] memBuf   enqueue memBuf
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueEnQueue(int32_t devId, uint32_t qid, void *memBuf);
+
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief dequeue memBuf
+ * @param [in] devId   the logical device id
+ * @param [in] qid  queue id
+ * @param [out] memBuf   dequeue memBuf
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueDeQueue(int32_t devId, uint32_t qid, void **memBuf);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief enqueu peek
+ * @param [in] devId   the logical device id
+ * @param [in] qid  queue id
+ * @param [out] bufLen   length of mbuf in queue
+ * @param [in] timeout  peek timeout  (ms), -1: wait all the time until peeking success
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueuePeek(int32_t devId, uint32_t qid, size_t *bufLen, int32_t timeout);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief enqueu  buff
+ * @param [in] devId   the logical device id
+ * @param [in] qid  queue id
+ * @param [in] inBuf   enqueue buff
+ * @param [in] timeout  enqueue timeout  (ms), -1: wait all the time until enqueue success
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueEnQueueBuff(int32_t devId, uint32_t qid, rtMemQueueBuff_t *inBuf, int32_t timeout);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief enqueu  buff
+ * @param [in] devId   the logical device id
+ * @param [in] qid  queue id
+ * @param [out] outBuf   dequeue buff
+ * @param [in] timeout  dequeue timeout  (ms), -1: wait all the time until dequeue success
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueDeQueueBuff(int32_t devId, uint32_t qid, rtMemQueueBuff_t *outBuf, int32_t timeout);
+
+/**
+ * @ingroup rt_mem_queue
+ * @brief query current queue info
+ * @param [in] devId   the logical device id
+ * @param [in] qid  queue id
+ * @param [out] queInfo   current queue info
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMemQueueQueryInfo(int32_t devId, uint32_t qid, rtMemQueueInfo_t *queInfo);
+
+/**
+* @ingroup rt_mem_queue
+* @brief  query queue status
+* @param [in] devId: the logical device id
+* @param [in] cmd: query cmd
+* @param [in] inBuff: input buff
+* @param [in] inLen: the length of input
+* @param [in|out] outBuff: output buff
+* @param [in|out] outLen: the length of output
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMemQueueQuery(int32_t devId, rtMemQueueQueryCmd_t cmd, const void *inBuff, uint32_t inLen,
+    void *outBuff, uint32_t *outLen);
+
+/**
+* @ingroup rt_mem_queue
+* @brief  grant queue
+* @param [in] devId: logic devid
+* @param [in] qid: queue id
+* @param [in] pid: pid
+* @param [in] attr: queue share attr
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMemQueueGrant(int32_t devId, uint32_t qid, int32_t pid, rtMemQueueShareAttr_t *attr);
+
+/**
+* @ingroup rt_mem_queue
+* @brief  attach queue
+* @param [in] devId: logic devid
+* @param [in] qid: queue id
+* @param [in] timeOut: timeOut
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMemQueueAttach(int32_t devId, uint32_t qid, int32_t timeOut);
+
+/**
+* @ingroup rt_mem_queue
+* @brief  Commit the event to a specific process
+* @param [in] devId: logic devid
+* @param [in] evt: event summary info
+* @param [out] ack: event reply info
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtEschedSubmitEventSync(int32_t devId, rtEschedEventSummary_t *evt,
+                                          rtEschedEventReply_t *ack);
+
+/**
+* @ingroup rt_mem_queue
+* @brief  query device proccess id
+* @param [in] info: see struct rtBindHostpidInfo_t
+* @param [out] devPid: device proccess id
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtQueryDevPid(rtBindHostpidInfo_t *info, int32_t *devPid);
+
+/**
+* @ingroup rt_mem_queue
+* @brief device buff init
+* @param [in] cfg, init cfg
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMbufInit(rtMemBuffCfg_t *cfg);
+
+/**
+* @ingroup rt_mem_queue
+* @brief alloc buff
+* @param [out] memBuf: buff addr alloced
+* @param [in]  size: The amount of memory space requested
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *memBuf, uint64_t size);
+
+/**
+* @ingroup rt_mem_queue
+* @brief free buff
+* @param [in] memBuf: buff addr to be freed
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMbufFree(rtMbufPtr_t memBuf);
+
+/**
+* @ingroup rt_mem_queue
+* @brief set Data len of Mbuf
+* @param [in] memBuf: Mbuf addr
+* @param [in] len: data len
+* @return   RT_ERROR_NONE for success, others for fail
+*/
+RTS_API rtError_t rtMbufSetDataLen(rtMbufPtr_t memBuf, uint64_t len);
+
+/**
+* @ingroup rt_mem_queue
+* @brief get Data addr of Mbuf
+* @param [in] memBuf: Mbuf addr
+* @param [out] buf: Mbuf data addr
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMbufGetBuffAddr(rtMbufPtr_t memBuf, void **buf);
+
+/**
+* @ingroup rt_mem_queue
+* @brief get total Buffer size of Mbuf
+* @param [in] memBuf: Mbuf addr
+* @param [out] totalSize: total buffer size of Mbuf
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t memBuf, uint64_t *totalSize);
+
+/**
+* @ingroup rt_mem_queue
+* @brief Get the address and length of its user_data from the specified Mbuf
+* @param [in] memBuf: Mbuf addr
+* @param [out] priv: address of its user_data
+* @param [out]  size: length of its user_data
+* @return RT_ERROR_NONE for ok
+*/
+RTS_API rtError_t rtMbufGetPrivInfo(rtMbufPtr_t memBuf,  void **priv, uint64_t *size);
+
+// mem group
+typedef struct {
+    uint64_t maxMemSize; // max buf size in grp, in KB. = 0 means no limit
+} rtMemGrpConfig_t;
+
+typedef struct {
+    uint32_t admin : 1;     // admin permission, can add other proc to grp
+    uint32_t read : 1;     // read only permission
+    uint32_t write : 1;    // read and write permission
+    uint32_t alloc : 1;    // alloc permission (have read and write permission)
+    uint32_t rsv : 28;
+} rtMemGrpShareAttr_t;
+
+#define RT_MEM_GRP_QUERY_GROUPS_OF_PROCESS 1  // query process all grp
+
+typedef struct {
+    int32_t pid;
+} rtMemGrpQueryByProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS
+
+typedef struct {
+    int32_t cmd;
+    union {
+        rtMemGrpQueryByProc_t grpQueryByProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS
+    };
+} rtMemGrpQueryInput_t;
+
+#define RT_MEM_GRP_NAME_LEN 32  // it must be same as driver define BUFF_GRP_NAME_LEN
+
+typedef struct {
+    char_t groupName[RT_MEM_GRP_NAME_LEN];  // group name
+    rtMemGrpShareAttr_t attr; // process in group attribute
+} rtMemGrpOfProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS
+
+typedef struct {
+    rtMemGrpOfProc_t *groupsOfProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS
+    size_t maxNum; // max number of result
+    size_t resultNum; // if the number of results exceeds 'maxNum', only 'maxNum' results are filled in buffer
+} rtMemGrpQueryOutput_t;
+
+/**
+* @ingroup rt_mem_queue
+* @brief create mem group
+* @attention null
+* @param [in] name, group name
+* @param [in] cfg, group cfg
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtMemGrpCreate(const char_t *name, const rtMemGrpConfig_t *cfg);
+
+/**
+* @ingroup rt_mem_queue
+* @brief add process to group
+* @param [in] name, group name
+* @param [in] pid, process id
+* @param [in] attr, process permission in group
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtMemGrpAddProc(const char_t *name, int32_t pid, const rtMemGrpShareAttr_t *attr);
+
+/**
+* @ingroup rt_mem_queue
+* @brief attach proccess to check permission in group
+* @param [in] name, group name
+* @param [in] timeout, time out ms
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtMemGrpAttach(const char_t *name, int32_t timeout);
+
+/**
+* @ingroup rt_mem_queue
+* @brief buff group query
+* @param [in] input, query input
+* @param [in|out] output, query output
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtMemGrpQuery(const rtMemGrpQueryInput_t *input, rtMemGrpQueryOutput_t *output);
+
+/**
+* @ingroup rt_mem_queue
+* @brief buff group query
+* @param [in] devId, cdevice id
+* @param [in] name, group name
+* @param [out] qid, queue id
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtMemQueueGetQidByName(int32_t devId, const char_t *name, uint32_t *qId);
+
+/**
+* @ingroup rt_mem_queue
+* @brief esched attach device
+* @param [in] devId, device id
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtEschedAttachDevice(int32_t devId);
+
+/**
+* @ingroup rt_mem_queue
+* @brief esched dettach device
+* @param [in] devId, device id
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtEschedDettachDevice(int32_t devId);
+
+/**
+* @ingroup rt_mem_queue
+* @brief esched wait event
+* @param [in] devId, device id
+* @param [in] grpId, group id
+* @param [in] threadId, thread id
+* @param [in] timeout
+* @param [in] evt
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtEschedWaitEvent(int32_t devId, uint32_t grpId, uint32_t threadId,
+                                    int32_t timeout, rtEschedEventSummary_t *evt);
+
+/**
+* @ingroup rt_mem_queue
+* @brief esched create group
+* @param [in] devId, device id
+* @param [in] grpId, group id
+* @param [in] type, group type
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtEschedCreateGrp(int32_t devId, uint32_t grpId, rtGroupType_t type);
+
+/**
+* @ingroup rt_mem_queue
+* @brief esched submit event
+* @param [in] devId, device id
+* @param [in] evt
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtEschedSubmitEvent(int32_t devId, rtEschedEventSummary_t *evt);
+
+/**
+* @ingroup rt_mem_queue
+* @brief esched submit event
+* @param [in] devId, device id
+* @param [in] grpId, group id
+* @param [in] threadId, thread id
+* @param [in] eventBitmap
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtEschedSubscribeEvent(int32_t devId, uint32_t grpId, uint32_t threadId, uint64_t eventBitmap);
+
+/**
+* @ingroup rtEschedAckEvent
+* @brief esched ack event
+* @param [in] devId, device id
+* @param [in] evtId, event type
+* @param [in] subEvtId, sub event type
+* @param [in] msg, message info
+* @param [in] len, message length
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtEschedAckEvent(int32_t devId, rtEventIdType_t evtId,
+                                   uint32_t subEvtId, char_t *msg, uint32_t len);
+
+/**
+* @ingroup rtQueueSubF2NFEvent
+* @brief full to not full event
+* @param [in] devId, device id
+* @param [in] qid, queue id
+* @param [in] groupId, group id
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtQueueSubF2NFEvent(int32_t devId, uint32_t qId, uint32_t groupId);
+
+/**
+* @ingroup rtQueueSubscribe
+* @brief queue subscribe
+* @param [in] devId, device id
+* @param [in] qid, queue id
+* @param [in] groupId, group id
+* @param [in] type
+
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtQueueSubscribe(int32_t devId, uint32_t qId, uint32_t groupId, int32_t type);
+
+/**
+* @ingroup rtBufEventTrigger
+* @brief buf event trigger
+* @param [in] name, group name
+* @return   0 for success, others for fail
+*/
+RTS_API rtError_t rtBufEventTrigger(const char_t *name);
+
+#if defined(__cplusplus)
+}
+#endif
+#endif // CCE_RUNTIME_RT_MEM_QUEUE_H
diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h
index a7618b45..c3d4bbd1 100644
--- a/third_party/fwkacllib/inc/runtime/rt_model.h
+++ b/third_party/fwkacllib/inc/runtime/rt_model.h
@@ -1,25 +1,15 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
-
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
-
- * http://www.apache.org/licenses/LICENSE-2.0
-
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: rt_model.h
+ * Create: 2020-01-01
+ */
 
-#ifndef __CCE_RUNTIME_MODEL_H__
-#define __CCE_RUNTIME_MODEL_H__
+#ifndef CCE_RUNTIME_RT_MODEL_H
+#define CCE_RUNTIME_RT_MODEL_H
 
 #include "base.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -42,7 +32,7 @@ typedef enum tagModelTaskType {
     RT_MODEL_TASK_NOTIFY_WAIT,
     RT_MODEL_TASK_REDUCE_ASYNC,
     RT_MODEL_TASK_RDMA_SEND,
-    RT_MODEL_TASK_EVENT_RESET = 18,
+    RT_MODEL_TASK_EVENT_RESET,
     RT_MODEL_TASK_MODEL_END_GRAPH,
     RT_MODEL_TASK_STREAM_SWITCH_N,
     RT_MODEL_TASK_RDMA_DB_SEND,
@@ -53,6 +43,12 @@ typedef enum tagModelTaskType {
     RT_MODEL_TASK_ALL_KERNEL,
     RT_MODEL_TASK_PROFILER_TRACE_EX,
     RT_MODEL_TASK_FFTS_TASK,
+    RT_MODEL_TASK_FFTS_PLUS_TASK,
+    RT_MODEL_TASK_DSA_TASK,
+    RT_MODEL_TASK_CMO,
+    RT_MODEL_TASK_BARRIER,
+    RT_MODEL_TASK_NPU_GET_FLOAT_STATUS,
+    RT_MODEL_TASK_NPU_CLEAR_FLOAT_STATUS,
 } rtModelTaskType_t;
 
 typedef enum tagModelStreamType {
@@ -65,16 +61,16 @@ typedef enum tagModelQueueFlag {
     RT_MODEL_OUTPUT_QUEUE = 1
 } rtModelQueueFlag_t;
 
-#define EXECUTOR_NONE ((uint32_t)0x0)
-#define EXECUTOR_TS ((uint32_t)0x01)
-#define EXECUTOR_AICPU ((uint32_t)0x02)
+#define EXECUTOR_NONE (0x0U)
+#define EXECUTOR_TS (0x01U)
+#define EXECUTOR_AICPU (0x02U)
 
 /*
  * @ingroup rt_model
  * @brief debug flag for kernel exception dump
  */
-#define RT_DEBUG_FLAG_AICORE_OVERFLOW (0x1 << 0)
-#define RT_DEBUG_FLAG_ATOMIC_ADD_OVERFLOW (0x1 << 1)
+#define RT_DEBUG_FLAG_AICORE_OVERFLOW (0x1U << 0U)
+#define RT_DEBUG_FLAG_ATOMIC_ADD_OVERFLOW (0x1U << 1U)
 
 /**
  * @ingroup
@@ -124,9 +120,9 @@ typedef struct tagKernelTaskInfo {
     uint16_t argsCount;
     uint16_t argsSize;
     uint16_t reserved;
-    char *stubFunc;
+    const char_t *stubFunc;
     uint8_t *smDesc;
-    uint8_t *args;
+    const uint8_t *args;
     uint16_t *argsOffset;
 } rtKernelTaskInfo_t;
 
@@ -135,17 +131,17 @@ typedef struct tagAllKernelTaskInfo {
     uint16_t argsCount;
     uint16_t argsSize;
     uint16_t reserved;
-    void *devfunc;
+    uint64_t tilingKey;
     void *handle;
     uint8_t *smDesc;
-    uint8_t *args;
+    const uint8_t *args;
     uint16_t *argsOffset;
 } rtAllKernelTaskInfo_t;
 
 typedef struct tagKernelTaskInfoEx {
     uint32_t flags;
     uint32_t argsSize;
-    void *args;
+    const void *args;
     uint32_t reserved[6];
 } rtKernelTaskInfoEx_t;
 
@@ -207,9 +203,9 @@ typedef struct tagProfilerTraceExTaskInfo {
 } rtProfilerTraceEx_t;
 
 typedef struct tagrtMemcpyAsyncTaskInfo {
-    void *dst;
+    const void *dst;
     uint64_t destMax;
-    void *src;
+    const void *src;
     uint64_t count;
     uint32_t kind;
     uint32_t reserved;
@@ -221,9 +217,9 @@ typedef struct tagrtNotifyTaskInfo {
 } rtNotifyTaskInfo_t;
 
 typedef struct tagrtReduceAsyncTaskInfo {
-    void *dst;
+    const void *dst;
     uint64_t destMax;
-    void *src;
+    const void *src;
     uint64_t count;
     uint32_t kind;
     uint32_t type;
@@ -267,6 +263,18 @@ typedef struct tagrtStreamLabelGotoTask_t {
     uint8_t reserved[36];
 } rtStreamLabelGotoTask_t;
 
+typedef struct tagrtNpuGetFloatStatusTask_t {
+    uint64_t outputAddr;
+    uint64_t outputSize;
+    uint32_t checkMode;
+    uint8_t reserved[20];
+} rtNpuGetFloatStatusTask_t;
+
+typedef struct tagrtNpuClearFloatStatusTask_t {
+    uint32_t checkMode;
+    uint8_t reserved[36];
+} rtNpuClearFloatStatusTask_t;
+
 typedef struct tagTaskInfo {
     uint32_t type;
     uint32_t streamID;
@@ -292,6 +300,8 @@ typedef struct tagTaskInfo {
         rtStreamSwitchNTaskInfo_t streamSwitchNTask;
         rtStreamLabelSwitchByIndexTask_t streamLabelSwitchIndexTask;
         rtStreamLabelGotoTask_t streamLabelGotoTask;
+        rtNpuGetFloatStatusTask_t npuGetFloatStatusTask;
+        rtNpuClearFloatStatusTask_t npuClearFloatStatusTask;
         uint32_t reserved[10];
     } u;
 } rtTaskInfo_t;
@@ -319,7 +329,7 @@ typedef struct tagLabelDevInfo_t {
     }u;
 }rtLabelDevInfo;
 
-typedef rtError_t (*rtTaskGenCallback)(rtModel_t model, rtTaskInfo_t *taskInfo);
+typedef rtError_t (*rtTaskGenCallback)(rtModel_t mdl, rtTaskInfo_t *taskInfo);
 
 /**
  * @ingroup rt_model
@@ -333,165 +343,185 @@ RTS_API rtError_t rtSetTaskGenCallback(rtTaskGenCallback callback);
 /**
  * @ingroup rt_model
  * @brief create model instance
- * @param [out]    model   created model
+ * @param [out]    mdl     created model
  * @param [in]     flag    reserved
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelCreate(rtModel_t *model, uint32_t flag);
+RTS_API rtError_t rtModelCreate(rtModel_t *mdl, uint32_t flag);
+
+/**
+ * @ingroup rt_model
+ * @brief set ge model id to aicpu
+ * @param [in]     model   aicpu model
+ * @param [in]     extid   ge model id
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+rtError_t rtModelSetExtId(rtModel_t mdl, uint32_t extId);
 
 /**
  * @ingroup rt_model
  * @brief destroy model instance
- * @param [in] model   model to destroy
+ * @param [in] mdl   model to destroy
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelDestroy(rtModel_t model);
+RTS_API rtError_t rtModelDestroy(rtModel_t mdl);
 
 /**
  * @ingroup rt_model
  * @brief bind model and stream instance
- * @param [in] model   binded model
- * @param [in] stream  binded stream
+ * @param [in] mdl   binded model
+ * @param [in] stm  binded stream
  * @param [in] flag    reserved
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelBindStream(rtModel_t model, rtStream_t stream, uint32_t flag);
+RTS_API rtError_t rtModelBindStream(rtModel_t mdl, rtStream_t stm, uint32_t flag);
 
 /**
  * @ingroup rt_model
  * @brief unbind model and stream instance
- * @param [in] model   unbinded model
- * @param [in] stream  unbinded stream
+ * @param [in] mdl   unbinded model
+ * @param [in] stm  unbinded stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelUnbindStream(rtModel_t model, rtStream_t stream);
+RTS_API rtError_t rtModelUnbindStream(rtModel_t mdl, rtStream_t stm);
 
 /**
  * @ingroup rt_model
  * @brief tell runtime Model has been Loaded
- * @param [in] model   model to execute
+ * @param [in] mdl   model to execute
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtModelLoadComplete(rtModel_t model);
+RTS_API rtError_t rtModelLoadComplete(rtModel_t mdl);
 
 /**
  * @ingroup rt_model
  * @brief execute model instance
- * @param [in] model   model to execute
+ * @param [in] mdl   model to execute
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelExecute(rtModel_t model, rtStream_t stream, uint32_t flag);
+RTS_API rtError_t rtModelExecute(rtModel_t mdl, rtStream_t stm, uint32_t flag);
 
 /**
  * @ingroup rt_model
  * @brief get model the last persist task id
- * @param [in] model   model to execute
- * @param [out] taskid last task id of the model
- * @param [out] streamid last steam id of the model
+ * @param [in] mdl   model to execute
+ * @param [out] taskId last task id of the model
+ * @param [out] streamId last steam id of the model
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelGetTaskId(rtModel_t model, uint32_t *taskid, uint32_t *streamid);
+RTS_API rtError_t rtModelGetTaskId(rtModel_t mdl, uint32_t *taskId, uint32_t *streamId);
 
 /**
  * @ingroup rt_model
  * @brief add a end graph task to stream
- * @param [in] model   model to execute
+ * @param [in] mdl   model to execute
  * @param [in] end graph stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtEndGraph(rtModel_t model, rtStream_t stream);
+RTS_API rtError_t rtEndGraph(rtModel_t mdl, rtStream_t stm);
 
 /**
  * @ingroup rt_model
  * @brief add a end graph task with flag to stream
- * @param [in] model   model to execute
+ * @param [in] mdl   model to execute
  * @param [in] end graph stream
  * @param [in] flags   AICPU datadump
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtEndGraphEx(rtModel_t model, rtStream_t stream, uint32_t flags);
+RTS_API rtError_t rtEndGraphEx(rtModel_t mdl, rtStream_t stm, uint32_t flags);
 
 /**
  * @ingroup rt_model
  * @brief add a end graph task to stream
- * @param [in] model   model to execute
+ * @param [in] mdl   model to execute
  * @param [in] flags EXECUTOR_TS | EXECUTOR_AICPU
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelExecutorSet(rtModel_t model, uint8_t flags);
+RTS_API rtError_t rtModelExecutorSet(rtModel_t mdl, uint8_t flags);
 
 /**
  * @ingroup rt_model
  * @brief abort model
- * @param [in] model   model to abort
+ * @param [in] mdl   model to abort
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelAbort(rtModel_t model);
+RTS_API rtError_t rtModelAbort(rtModel_t mdl);
 
 /**
  * @ingroup rt_model
  * @brief end graph task to model default stream
- * @param [in] model   model to execute
+ * @param [in] mdl   model to execute
  * @param [in] end graph stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelExit(rtModel_t model, rtStream_t stream);
+RTS_API rtError_t rtModelExit(rtModel_t mdl, rtStream_t stm);
 
 /**
  * @ingroup rt_model
  * @brief bind queue
- * @param [in] model     model to bind
+ * @param [in] mdl     model to bind
  * @param [in] queueId   queueId to bind
  * @param [in] flag
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelBindQueue(rtModel_t model, uint32_t queueId, rtModelQueueFlag_t flag);
+RTS_API rtError_t rtModelBindQueue(rtModel_t mdl, uint32_t queueId, rtModelQueueFlag_t flag);
 
 /**
  * @ingroup rt_model
  * @brief get model id
- * @param [in] model
+ * @param [in] mdl
  * @param [out] modelId   model id
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelGetId(rtModel_t model, uint32_t *modelId);
+RTS_API rtError_t rtModelGetId(rtModel_t mdl, uint32_t *modelId);
 
 /*
  * @ingroup rt_model
  * @brief enable debug for dump overflow exception
  * @param [in] addr: ddr address of kernel exception dumpped
- * @param [in] model: model handle
+ * @param [in] mdl: model handle
  * @param [in] flag: debug flag
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDebugRegister(rtModel_t model, uint32_t flag, const void *addr,
+RTS_API rtError_t rtDebugRegister(rtModel_t mdl, uint32_t flag, const void *addr,
                                   uint32_t *streamId, uint32_t *taskId);
 
 /*
  * @ingroup rt_model
  * @brief disable debug for dump overflow exception
- * @param [in] model: model handle
+ * @param [in] mdl: model handle
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDebugUnRegister(rtModel_t mdl);
+
+/**
+ * @ingroup rt_model
+ * @brief set model group id
+ * @param [in]    mdl     model
+ * @param [in]     schGrpId    groupId  (0,4) 0:default invalid value   1-4 valid value Maximum support 4 groups
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDebugUnRegister(rtModel_t model);
+RTS_API rtError_t rtModelSetSchGroupId(rtModel_t mdl, const int16_t schGrpId);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
-#endif  // __CCE_RUNTIME_MODEL_H__
+#endif  // CCE_RUNTIME_RT_MODEL_H
diff --git a/third_party/fwkacllib/inc/runtime/rt_stars.h b/third_party/fwkacllib/inc/runtime/rt_stars.h
new file mode 100644
index 00000000..b778550f
--- /dev/null
+++ b/third_party/fwkacllib/inc/runtime/rt_stars.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ * Description: the definition of stars
+ */
+
+#ifndef CCE_RUNTIME_RT_STARS_H
+#define CCE_RUNTIME_RT_STARS_H
+
+#include "base.h"
+#include "rt_stars_define.h"
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/**
+ * @ingroup rt_stars
+ * @brief launch stars task.
+ * used for send star sqe directly.
+ * @param [in] taskSqe     stars task sqe
+ * @param [in] sqeLen      stars task sqe length
+ * @param [in] stm      associated stream
+ * @return RT_ERROR_NONE for ok, others failed
+ */
+RTS_API rtError_t rtStarsTaskLaunch(const void *taskSqe, uint32_t sqeLen, rtStream_t stm);
+
+
+/**
+ * @ingroup rt_stars
+ * @brief create cdq instance.
+ * @param [in] batchNum     batch number
+ * @param [in] batchSize    batch size
+ * @param [in] queName      cdq name
+ * @return RT_ERROR_NONE for ok, ACL_ERROR_RT_NO_CDQ_RESOURCE for no cdq resources
+ */
+RTS_API rtError_t rtCdqCreate(uint32_t batchNum, uint32_t batchSize, const char_t *queName);
+
+/**
+ * @ingroup rt_stars
+ * @brief destroy cdq instance.
+ * @param [in] queName      cdq name
+ * @return RT_ERROR_NONE for ok, others failed
+ */
+RTS_API rtError_t rtCdqDestroy(const char_t *queName);
+
+/**
+ * @ingroup rt_stars
+ * @brief get free batch in the queue.
+ * @param [in] queName      cdq name
+ * @param [in] timeout      batch size
+ * @param [out] batchId     batch index
+ * @return RT_ERROR_NONE for ok, ACL_ERROR_RT_WAIT_TIMEOUT for timeout
+ */
+RTS_API rtError_t rtCdqAllocBatch(const char_t *queName, int32_t timeout, uint32_t *batchId);
+
+/**
+ * @ingroup rt_stars
+ * @brief launch a write_cdqm task on the stream.
+ * When the task is executed, the data information will be inserted into the cdqe index position of the queue.
+ * @param [in] queName      cdq name
+ * @param [in] cdqeIndex    cdqe index
+ * @param [in] data         cdqe infomation
+ * @param [in] dataSize     data size
+ * @param [in] stm       launch task on the stream
+ * @return RT_ERROR_NONE for ok, others failed
+ */
+RTS_API rtError_t rtCdqEnQueue(const char_t *queName, uint32_t cdqeIndex, void *data, uint32_t dataSize,
+    rtStream_t stm);
+
+/**
+ * @ingroup rt_stars
+ * @brief launch a write_cdqm task on the stream.
+ * When the task is executed, the data information will be inserted into the cdqe index position of the queue.
+ * @param [in] queName      cdq name
+ * @param [in] cdqeIndex    cdqe index
+ * @param [in] data         cdqe infomation
+ * @param [in] dataSize     data size
+ * @param [in] stm       launch task on the stream
+ * @return RT_ERROR_NONE for ok, others failed
+ */
+RTS_API rtError_t rtCdqEnQueuePtrMode(const char_t *queName, uint32_t cdqeIndex, const void *ptrAddr,
+    rtStream_t stm);
+
+/**
+ * @ingroup rt_stars
+ * @brief launch common cmo task on the stream.
+ * @param [in] taskInfo     cmo task info
+ * @param [in] stm          launch task on the stream
+ * @param [in] flag         flag
+ * @return RT_ERROR_NONE for ok, others failed
+ */
+RTS_API rtError_t rtCmoTaskLaunch(rtCmoTaskInfo_t *taskInfo, rtStream_t stm, uint32_t flag);
+
+/**
+ * @ingroup rt_stars
+ * @brief launch barrier cmo task on the stream.
+ * @param [in] taskInfo     barrier task info
+ * @param [in] stm          launch task on the stream
+ * @param [in] flag         flag
+ * @return RT_ERROR_NONE for ok, others failed
+ */
+RTS_API rtError_t rtBarrierTaskLaunch(rtBarrierTaskInfo_t *taskInfo, rtStream_t stm, uint32_t flag);
+#if defined(__cplusplus)
+
+}
+#endif
+#endif // CCE_RUNTIME_RT_STARS_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/runtime/rt_stars_define.h b/third_party/fwkacllib/inc/runtime/rt_stars_define.h
new file mode 100644
index 00000000..ef18877d
--- /dev/null
+++ b/third_party/fwkacllib/inc/runtime/rt_stars_define.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ * Description: the definition of stars
+ */
+
+#ifndef CCE_RUNTIME_RT_STARS_DEFINE_H
+#define CCE_RUNTIME_RT_STARS_DEFINE_H
+
+#include "base.h"
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+extern "C" {
+#endif
+
+#pragma pack(push)
+#pragma pack (1)
+
+typedef struct tagStarsSqeHeader {
+    uint8_t type : 6;
+    uint8_t l1Lock : 1;
+    uint8_t l1Unlock : 1;
+
+    uint8_t ie : 2;
+    uint8_t preP : 2;
+    uint8_t postP : 2;
+    uint8_t wrCqe : 1;
+    uint8_t reserved : 1;
+
+    uint16_t blockDim;
+
+    uint16_t rtStreamId;
+    uint16_t taskId;
+} rtStarsSqeHeader_t;
+
+typedef struct tagStarsDsaSqe {
+    // 0-7 bytes
+    rtStarsSqeHeader_t sqeHeader;
+    // 8-11 bytes
+    uint32_t start : 1;
+    uint32_t functionType : 3;
+    uint32_t dataType : 3;
+    uint32_t algoType : 3;
+    uint32_t paramVldBitmap : 5;
+    uint32_t paramAddrValBitmap : 7;
+    uint32_t reserved0 : 10;
+    // 12-15 bytes
+    uint16_t sqeIndex;
+    uint8_t kernelCredit;
+    uint8_t reserved1;
+    // 16-31 bytes
+    uint32_t dsaCfgResultAddrLow;
+    uint32_t dsaCfgResultAddrHigh;
+    uint32_t dsaCfgStateAddrLow;
+    uint32_t dsaCfgStateAddrHigh;
+    // 32-47 bytes
+    uint32_t dsaCfgParamAddrLow;
+    uint32_t dsaCfgParamAddrHigh;
+    uint32_t dsaCfgSeedLow;
+    uint32_t dsaCfgSeedHigh;
+    // 48-63 bytes
+    uint32_t dsaCfgNumberLow;
+    uint32_t dsaCfgNumberHigh;
+    uint32_t reserved2[2];
+} rtStarsDsaSqe_t;
+
+// ffts+ type
+typedef enum tagFftsPlusType {
+    RT_FFTS_PLUS_TYPE_RES1 = 2,   // Reserved
+    RT_FFTS_PLUS_TYPE_RES2 = 3,   // Reserved
+    RT_FFTS_PLUS_TYPE = 4,        // FFTS+ mode
+} rtFftsPlusType_t;
+
+typedef struct tagStarsFftsPlusHeader {
+    uint8_t type : 6;
+    uint8_t l1Lock : 1;
+    uint8_t l1Unlock : 1;
+
+    uint8_t ie : 2;
+    uint8_t preP : 2;
+    uint8_t postP : 2;
+    uint8_t wrCqe : 1;
+    /* tell mcu if this subgraph is overflow-enabled and mcu will send this flag to aicpu when aicpu ctx is excuted */
+    uint8_t overflowEn : 1;
+
+    uint16_t blockDim;
+
+    uint16_t rtStreamId;
+    uint16_t taskId;
+} rtStarsFftsPlusHeader_t;
+// ffts+ sqe
+typedef struct tagFftsPlusSqe {
+    // 0-7 bytes
+    rtStarsSqeHeader_t sqeHeader; // use rtStarsFftsPlusHeader_t instead
+    // 8-11 bytes
+    uint16_t fftsType : 3;
+    uint16_t reserved1 : 9;
+    uint16_t wrrRatio : 4;
+    uint16_t reserved2;
+    // 12-15 bytes
+    uint16_t sqeIndex;
+    uint8_t  kernelCredit;
+    uint8_t  reserved4;
+    // 16-23 bytes
+    uint32_t stackPhyBaseL;
+    uint32_t stackPhyBaseH;
+    // 24-31 bytes
+    uint16_t  totalContextNum;
+    uint16_t  readyContextNum;
+    uint16_t  preloadContextNum;
+    uint16_t  reserved5;
+    // 32-35 bytes
+    uint16_t  reserved6;
+    uint16_t  prefetchOstNum : 5;
+    uint16_t  reserved9 : 3;
+    uint16_t  cmaintOstNum : 5;
+    uint16_t  reserved10 : 3;
+    // 36-39 bytes
+    uint16_t  aicPrefetchLower : 5;
+    uint16_t  reserved11 : 3;
+    uint16_t  aicPrefetchUpper : 5;
+    uint16_t  reserved12 : 3;
+    uint16_t  aivPrefetchLower : 5;
+    uint16_t  reserved13 : 3;
+    uint16_t  aivPrefetchUpper : 5;
+    uint16_t  reserved14 : 3;
+    // 40-47 bytes
+    uint32_t contextAddressBaseL;
+    uint32_t contextAddressBaseH : 17;
+    uint32_t reserved15 : 15;
+    // 48-63 bytes
+    uint32_t reserved16[4];
+} rtFftsPlusSqe_t;
+
+typedef struct tagCmoTaskInfo {
+    uint8_t  qos;
+    uint8_t  partId;
+    uint8_t  pmg;
+    uint8_t  reserved;
+    uint16_t cmoType;
+    uint16_t opCode;
+    uint16_t numInner;
+    uint16_t numOuter;
+    uint32_t logicId;
+    uint32_t lengthInner;
+    uint64_t sourceAddr;
+    uint32_t striderOuter;
+    uint32_t striderInner;
+} rtCmoTaskInfo_t;
+
+typedef struct tagBarrierCmoInfo {
+    uint16_t cmoType; // 0 is barrier, 1 is invalid, Prefetch is 2, Write_back is 3, FE/GE only use invalid type.
+    uint32_t logicId;
+} rtBarrierCmoInfo_t;
+
+#define RT_CMO_MAX_BARRIER_NUM 6U // 6U is max support
+typedef struct tagBarrierTaskInfo {
+    uint8_t logicIdNum;
+    rtBarrierCmoInfo_t cmoInfo[RT_CMO_MAX_BARRIER_NUM];
+} rtBarrierTaskInfo_t;
+
+#pragma pack(pop)
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+}
+#endif
+#endif // CCE_RUNTIME_RT_STARS_DEFINE_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h
index f9981514..a6abc8fa 100644
--- a/third_party/fwkacllib/inc/runtime/stream.h
+++ b/third_party/fwkacllib/inc/runtime/stream.h
@@ -1,26 +1,16 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
-
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
-
- * http://www.apache.org/licenses/LICENSE-2.0
-
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: stream.h
+ * Create: 2020-01-01
+ */
 
-#ifndef __CCE_RUNTIME_STREAM_H__
-#define __CCE_RUNTIME_STREAM_H__
+#ifndef CCE_RUNTIME_STREAM_H
+#define CCE_RUNTIME_STREAM_H
 
 #include "base.h"
 #include "event.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -28,95 +18,107 @@ extern "C" {
  * @ingroup stream_flags
  * @brief stream op bit flags
  */
-#define RT_STREAM_DEFAULT (0x00)
-#define RT_STREAM_PERSISTENT (0x01)
-#define RT_STREAM_FORCE_COPY (0x02)
-#define RT_STREAM_HUGE (0x04)
-#define RT_STREAM_AICPU (0x08)
-#define RT_STREAM_FORBIDDEN_DEFAULT (0x10)
-#define RT_STREAM_HEAD (0x20)
-#define RT_STREAM_PRIMARY_DEFAULT (0x40)
-#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80)
+#define RT_STREAM_DEFAULT (0x00U)
+#define RT_STREAM_PERSISTENT (0x01U)
+#define RT_STREAM_FORCE_COPY (0x02U)
+#define RT_STREAM_HUGE (0x04U)
+#define RT_STREAM_AICPU (0x08U)
+#define RT_STREAM_FORBIDDEN_DEFAULT (0x10U)
+#define RT_STREAM_HEAD (0x20U)
+#define RT_STREAM_PRIMARY_DEFAULT (0x40U)
+#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80U)
+#define RT_STREAM_OVERFLOW (0x100U)
 
 /**
  * @ingroup stream_type
  * @brief stream type
  */
-#define RT_NORMAL_STREAM    (0x00)
-#define RT_HUGE_STREAM      (0x01)
+#define RT_NORMAL_STREAM    (0x00U)
+#define RT_HUGE_STREAM      (0x01U)
 
 /**
  * priority level default value when create a stream
  */
-#define RT_STREAM_PRIORITY_DEFAULT (0)
+#define RT_STREAM_PRIORITY_DEFAULT (0U)
 
 /**
  * @ingroup dvrt_stream
  * @brief create stream instance
- * @param [in|out] stream   created stream
+ * @param [in|out] stm   created stream
  * @param [in] priority   stream priority
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStreamCreate(rtStream_t *stream, int32_t priority);
+RTS_API rtError_t rtStreamCreate(rtStream_t *stm, int32_t priority);
 
 /**
  * @ingroup dvrt_stream
  * @brief create stream instance
- * @param [in|out] stream   created stream
+ * @param [in|out] stm   created stream
  * @param [in] priority   stream priority
  * @param [in] flags  stream op flags
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStreamCreateWithFlags(rtStream_t *stream, int32_t priority, uint32_t flags);
+RTS_API rtError_t rtStreamCreateWithFlags(rtStream_t *stm, int32_t priority, uint32_t flags);
 
 /**
  * @ingroup dvrt_stream
  * @brief destroy stream instance.
- * @param [in] stream   the stream to destroy
+ * @param [in] stm   the stream to destroy
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStreamDestroy(rtStream_t stream);
+RTS_API rtError_t rtStreamDestroy(rtStream_t stm);
 
 /**
  * @ingroup dvrt_stream
  * @brief wait an recorded event for stream
- * @param [in] stream   the wait stream
+ * @param [in] stm   the wait stream
+ * @param [in] event   the event to wait
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtStreamWaitEvent(rtStream_t stm, rtEvent_t evt);
+
+/**
+ * @ingroup dvrt_stream
+ * @brief wait an recorded event for stream, used for 1951 pg1
+ * @param [in] stm   the wait stream
  * @param [in] event   the event to wait
+ * @param [in] timeout   timeout value for 1951 pg1
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStreamWaitEvent(rtStream_t stream, rtEvent_t event);
+RTS_API rtError_t rtStreamWaitEventWithTimeout(rtStream_t stm, rtEvent_t evt, uint32_t timeout);
 
 /**
  * @ingroup dvrt_stream
  * @brief wait stream to be complete
- * @param [in] stream   stream to wait
+ * @param [in] stm   stream to wait
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStreamSynchronize(rtStream_t stream);
+RTS_API rtError_t rtStreamSynchronize(rtStream_t stm);
 
 /**
  * @ingroup dvrt_stream
  * @brief queries an asynchronous stream for completion status
- * @param [in] stream   stream to query
+ * @param [in] stm   stream to query
  * @return RT_ERROR_NONE for complete
  * @return RT_ERROR_STREAM_NOT_COMPLETE for not complete
  */
-RTS_API rtError_t rtStreamQuery(rtStream_t stream);
+RTS_API rtError_t rtStreamQuery(rtStream_t stm);
 
 /**
  * @ingroup dvrt_stream
  * @brief get stream id from a stream handle
- * @param [in] stream   stream hadle
+ * @param [in] stm   stream hadle
  * @param [in] streamId   stream id
  * @return RT_ERROR_NONE for complete
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtGetStreamId(rtStream_t stream, int32_t *streamId);
+RTS_API rtError_t rtGetStreamId(rtStream_t stm, int32_t *streamId);
 
 /**
  * @ingroup dvrt_stream
@@ -132,26 +134,26 @@ RTS_API rtError_t rtGetMaxStreamAndTask(uint32_t streamType, uint32_t *maxStrCou
 /**
  * @ingroup dvrt_stream
  * @brief Name a stream
- * @param [in] stream  stream to be named
+ * @param [in] stm  stream to be named
  * @param [in] name   identification name
  * @return RT_ERROR_NONE for complete
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtNameStream(rtStream_t stream, const char *name);
+RTS_API rtError_t rtNameStream(rtStream_t stm, const char_t *name);
 
 /**
  * @ingroup dvrt_stream
  * @brief switch to the corresponding stream according to the contents of the ptr
  * @param [in] ptr  Determine the address where the value of the true and false branches is located
  * @param [in] condition switch condition
- * @param [in] value  switch value
+ * @param [in] val  switch value
  * @param [in] trueStream  Stream that needs to be activated when the value is non-zero
- * @param [in] stream input stream to init task
+ * @param [in] stm input stream to init task
  * @return RT_ERROR_NONE for complete
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t value, rtStream_t trueStream,
-                                 rtStream_t stream);
+RTS_API rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t val, rtStream_t trueStream,
+                                 rtStream_t stm);
 
 /**
  * @brief execute extensible stream switch task
@@ -159,22 +161,22 @@ RTS_API rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t val
  * @param [in] condition   judge condition
  * @param [in] value_ptr   pointer of target value
  * @param [in] true_stream   stream to be activated when value is not zero
- * @param [in] stream   stream id
+ * @param [in] stm   stream id
  * @param [in] dataType   data type of target value
  * @return RT_ERROR_NONE for complete
  */
 RTS_API rtError_t rtStreamSwitchEx(void *ptr, rtCondition_t condition, void *valuePtr, rtStream_t trueStream,
-                                   rtStream_t stream, rtSwitchDataType_t dataType);
+                                   rtStream_t stm, rtSwitchDataType_t dataType);
 
 /**
  * @ingroup dvrt_stream
  * @brief Active a stream
  * @param [in] activeStream stream to be activated
- * @param [in] stream input stream to init task
+ * @param [in] stm input stream to init task
  * @return RT_ERROR_NONE for complete
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStreamActive(rtStream_t activeStream, rtStream_t stream);
+RTS_API rtError_t rtStreamActive(rtStream_t activeStream, rtStream_t stm);
 
 /**
  * @brief execute extensible stream case switch task
@@ -183,36 +185,56 @@ RTS_API rtError_t rtStreamActive(rtStream_t activeStream, rtStream_t stream);
  * @param [in] valuePtr  pointer of target value, length = size * elementSize
  * @param [in] trueStreamPtr streams to be activated
  * @param [in] elementSize  size of to be activated true streams
- * @param [in] stream input stream to init task
+ * @param [in] stm input stream to init task
  * @param [in] dataType   data type of target value
  * @return RT_ERROR_NONE for complete
  */
 RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtStream_t *trueStreamPtr,
-                                  uint32_t elementSize, rtStream_t stream, rtSwitchDataType_t dataType);
+                                  uint32_t elementSize, rtStream_t stm, rtSwitchDataType_t dataType);
 
 /*
  * @ingroup dvrt_stream
  * @brief enable debug for dump overflow exception with stream
  * @param [in] addr: ddr address of kernel exception dumpped
- * @param [in] stream: stream handle
+ * @param [in] stm: stream handle
  * @param [in] flag: debug flag
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, const void *addr,
+RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stm, uint32_t flag, const void *addr,
                                    uint32_t *streamId, uint32_t *taskId);
 
 /*
  * @ingroup rt_model
  * @brief disable debug for dump overflow exception with stream
- * @param [in] stream: stream handle
+ * @param [in] stm: stream handle
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stm);
+
+/*
+ * @ingroup dvrt_stream
+ * @brief enable or disable stream overflow
+ * @param [in] stm: stream handle
+ * @param [in] flag: 0:disable others:enable
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtSetStreamOverflowSwitch(rtStream_t stm, uint32_t flags);
+
+/*
+ * @ingroup dvrt_stream
+ * @brief get whether overflow of the stream is enable or disable
+ * @param [in] stm: stream handle
+ * @param [out] flag: 0:disable others:enable
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream);
+RTS_API rtError_t rtGetStreamOverflowSwitch(rtStream_t stm, uint32_t *flags);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
-#endif  // __CCE_RUNTIME_STREAM_H__
+#endif  // CCE_RUNTIME_STREAM_H
diff --git a/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h b/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h
deleted file mode 100644
index bef5c05d..00000000
--- a/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
-* @file ExternalSoftDp.h
-*
-* Copyright (c) Huawei Technologies Co., Ltd. 2012-2018. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-
-#ifndef EXTERNALSOFTDP_H
-#define EXTERNALSOFTDP_H
-
-#include <stdint.h>
-
-extern "C" {
-struct SoftDpProcsessInfo {
-    uint8_t* inputBuffer;
-    uint32_t inputBufferSize;
-
-    uint8_t* outputBuffer;
-    uint32_t outputBufferSize;
-
-    uint32_t outputWidth;
-    uint32_t outputHeight;
-
-    uint32_t reserved;
-};
-
-struct DpCropInfo {
-    uint32_t left;
-    uint32_t right;
-    uint32_t up;
-    uint32_t down;
-};
-
-/*
- * @brief decode and resize interface
- * @param [in] SoftDpProcsessInfo& softDpProcsessInfo : soft dp struct
- * @return success: return 0, fail: return error number
- */
-uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo);
-
-/*
- * @brief decode crop and resize interface
- * @param [in] SoftDpProcsessInfo& softDpProcsessInfo : soft dp struct
- * @param [in] const DpCropInfo& cropInfo: crop struct
- * @return success: return 0, fail: return error number
- */
-uint32_t DecodeAndCropAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo, const DpCropInfo& cropInfo);
-}
-#endif // EXTERNALSOFTDP_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/tdt/data_common.h b/third_party/fwkacllib/inc/tdt/data_common.h
index 7b1d631b..a9b347c4 100644
--- a/third_party/fwkacllib/inc/tdt/data_common.h
+++ b/third_party/fwkacllib/inc/tdt/data_common.h
@@ -1,21 +1,14 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+* @file data_common.h
+*
+* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
+*
+* This program is used to data structure
+*/
 
 #ifndef HOST_INNER_INC_DATA_COMMON_H_
 #define HOST_INNER_INC_DATA_COMMON_H_
+#include <string>
 
 namespace tdt {
 #ifndef TDT_DATA_TYPE
diff --git a/third_party/fwkacllib/inc/tdt/status.h b/third_party/fwkacllib/inc/tdt/status.h
index d5050f35..b78eee75 100644
--- a/third_party/fwkacllib/inc/tdt/status.h
+++ b/third_party/fwkacllib/inc/tdt/status.h
@@ -1,18 +1,10 @@
 ﻿/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+* @file status.h
+*
+* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
+*
+* This program is used to describe status
+*/
 
 #ifndef INC_TDT_STATUS_H_
 #define INC_TDT_STATUS_H_
diff --git a/third_party/fwkacllib/inc/tdt/tdt_host_interface.h b/third_party/fwkacllib/inc/tdt/tdt_host_interface.h
index 3e7d11ee..ea23211c 100644
--- a/third_party/fwkacllib/inc/tdt/tdt_host_interface.h
+++ b/third_party/fwkacllib/inc/tdt/tdt_host_interface.h
@@ -1,18 +1,10 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+* @file tdt_host_interface.h
+*
+* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
+*
+* This program is used to host server
+*/
 
 #ifndef HOST_INNER_INC_TDT_HOST_INTERFACE_H_
 #define HOST_INNER_INC_TDT_HOST_INTERFACE_H_
diff --git a/third_party/fwkacllib/inc/tdt/tsd_client.h b/third_party/fwkacllib/inc/tdt/tsd_client.h
index 665c8b82..b4accbf6 100644
--- a/third_party/fwkacllib/inc/tdt/tsd_client.h
+++ b/third_party/fwkacllib/inc/tdt/tsd_client.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Hisilicon Technologies Co., Ltd. 2018-2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,21 +14,37 @@
  * limitations under the License.
  */
 
-#ifndef TDT_HOST_INNER_INC_TSD_CLIENT_H_
-#define TDT_HOST_INNER_INC_TSD_CLIENT_H_
+#ifndef TDT_HOST_INNER_INC_TSD_CLIENT_H
+#define TDT_HOST_INNER_INC_TSD_CLIENT_H
 
 #include <condition_variable>
 #include <map>
 #include <memory>
 #include <mutex>
-#include "tdt/status.h"
-#include "tdt/data_common.h"
+#include "tsd/status.h"
 #include "toolchain/prof_callback.h"
 
+#ifdef WIN_TSD
+#define TDT_LIB_EXPORT __declspec(dllexport)
+#else
+#define TDT_LIB_EXPORT __attribute__((visibility("default")))
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif  // __cplusplus
 
+struct InitFlowGwInfo {
+    const char_t *groupName;
+    uint64_t schedPolicy;
+    uint64_t reschedInterval;
+    char_t rsv[128];
+};
+
+typedef enum {
+    TSD_CAPABILITY_PIDQOS = 0,
+    TSD_CAPABILITY_BUT
+} TsdCapabilityType;
 /**
 * @ingroup Open
 * @brief Used for the Framework process to communicate with the TSDDaemon process,
@@ -50,34 +66,42 @@ extern "C" {
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
-TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize);
+TDT_LIB_EXPORT uint32_t TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize);
 
 /**
-* @ingroup Close
-* @brief notify TSDClient close resource
+* @ingroup Open
+* @brief Used for the Framework process to communicate with the TSDDaemon process in 1981,
+* and notify TSD to complete the initialization of other processes
 *
 * @par Function
-* notify TSDClient close resource
+* Used for the Framework process to communicate with the TSDDaemon process,
+* and notify TSD to complete the initialization of other processes
 *
-* @param NA
+* @param logicDeviceId [IN] type #unsigned int. Logic device ID
+* @param rankSize [IN] type #unsigned int. The rankSize of the training.
+* The default value is 1. When rankSize is greater than 1,
+* HCCP will be pulled to perform set communication related operations.
+* @param deviceMode [IN] type unsigned int. The device running mode of aicpuSd,
+* it include chipMode and DieMode
 * @retval TDT_OK Success
 * @retval OtherValues Failure
 *
 * @par Dependency
-* @li libtsdclient.so: Library to which the interface belongs.
-* @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
-TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId);
+TDT_LIB_EXPORT uint32_t TsdOpenEx(const uint32_t logicDeviceId, const uint32_t rankSize, const uint32_t deviceMode);
 
 /**
-* @ingroup UpdateProfilingMode
-* @brief notify TSDClient update profiling mode
+* @ingroup InitialQs
+* @brief Used for the Framework process to communicate with the TSDDaemon process,
+* and notify TSD to complete the initialization of QS processes
 *
 * @par Function
-* notify TSDClient update profiling mode
+* Used for the Framework process to communicate with the TSDDaemon process,
+* and notify TSD to complete the initialization of other processes
 *
-* @param NA
+* @param logicDeviceId [IN] type #unsigned int. Logic device ID
+* @param groupName [IN] type #char pointer. qs group name send by host process
 * @retval TDT_OK Success
 * @retval OtherValues Failure
 *
@@ -86,16 +110,19 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId);
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
-TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag);
+TDT_LIB_EXPORT uint32_t TsdInitQs(const uint32_t logicDeviceId, const char_t * const groupName = nullptr);
 
 /**
-* @ingroup TsdSetMsprofReporterCallback
-* @brief 用于推理场景下设置aicpu的profilng的callback函数
+* @ingroup InitFlowGw
+* @brief Used for the Framework process to communicate with the TSDDaemon process,
+* and notify TSD to complete the initialization of FlowGw processes
 *
 * @par Function
-* 设置offline模式下aicpu_sd进程的profiling的callback函数
+* Used for the Framework process to communicate with the TSDDaemon process,
+* and notify TSD to complete the initialization of other processes
 *
-* @param callback [IN] type #MsprofReporterCallback. 回调函数
+* @param logicDeviceId [IN] type #unsigned int. Logic device ID
+* @param initInfo [IN] type #InitFlowGwInfo pointer. Initialization parameters
 * @retval TDT_OK Success
 * @retval OtherValues Failure
 *
@@ -103,93 +130,109 @@ TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, con
 * @li libtsdclient.so: Library to which the interface belongs.
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
-* @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined
 */
-TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback);
+TDT_LIB_EXPORT uint32_t TsdInitFlowGw(const uint32_t logicDeviceId, const InitFlowGwInfo * const initInfo);
 
 /**
-* @ingroup CreateCmdParameterObj
-* @brief creat tsdclient func parameter obj.
+* @ingroup Close
+* @brief notify TSDClient close resource
 *
 * @par Function
-* creat tsdclient func parameter obj.
+* notify TSDClient close resource
 *
-* @param type [IN] type tdt::TsdCmdType, tsd func type.
-* @param cmdParameterObj [IN] type void *, func parameter obj.
+* @param NA
 * @retval TDT_OK Success
-* @retval TDT_INTERFACE_NOT_SUPPORT
+* @retval OtherValues Failure
 *
 * @par Dependency
+
 * @li libtsdclient.so: Library to which the interface belongs.
-* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined.
-* @li status.h: Header file where 'TDT_StatusT' defined
+* @li tsd_client.h: Header file where the interface declaration is located.
+* @li data_common.h: Header file where 'TDT_StatusT' defined
 */
-TDT_StatusT CreateCmdParameterObj(tdt::TsdCmdType type, void **cmdParameterObj);
+TDT_LIB_EXPORT uint32_t TsdClose(const uint32_t logicDeviceId);
 
 /**
-* @ingroup SetCmdParameterObjAttribute
-* @brief set cmdParameterObj input value.
+* @ingroup UpdateProfilingMode
+* @brief notify TSDClient update profiling mode
 *
 * @par Function
-* set cmdParameterObj input value.
+* notify TSDClient update profiling mode
 *
-* @param type [IN] type tdt::TsdCmdType, tsd func type.
-* @param cmdParameterObj [IN] type void *, func parameter obj.
-* @param itemType [IN] type tdt::InputItem, func input type.
-* @param valuePtr [IN] type const void *, input value.
-* @param valueLength [IN] type int, input value length.
+* @param NA
 * @retval TDT_OK Success
-* @retval TDT_INTERFACE_NOT_SUPPORT
+* @retval OtherValues Failure
 *
 * @par Dependency
 * @li libtsdclient.so: Library to which the interface belongs.
-* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined.
-* @li status.h: Header file where 'TDT_StatusT' defined
+* @li tsd_client.h: Header file where the interface declaration is located.
+* @li data_common.h: Header file where 'TDT_StatusT' defined
 */
-TDT_StatusT SetCmdParameterObjAttribute(tdt::TsdCmdType type, void *cmdParameterObj, tdt::InputItem itemType, const void *valuePtr, int valueLength);
+TDT_LIB_EXPORT uint32_t UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag);
 
 /**
-* @ingroup GetCmdParameterObjAttribute
-* @brief set cmdParameterObj input value.
+* @ingroup TsdSetMsprofReporterCallback
+* @brief 用于推理场景下设置aicpu的profilng的callback函数
 *
 * @par Function
-* set cmdParameterObj input value.
+* 设置offline模式下aicpu_sd进程的profiling的callback函数
 *
-* @param type [IN] type tdt::TsdCmdType, tsd func type.
-* @param cmdParameterObj [IN] type void *, func parameter obj.
-* @param itemType [IN] type tdt::InputItem, func input type.
-* @param valuePtr [IN] type const void *, input value.
-* @param valueLength [IN] type int, input value length.
+* @param callback [IN] type #MsprofReporterCallback. 回调函数
 * @retval TDT_OK Success
-* @retval TDT_INTERFACE_NOT_SUPPORT
+* @retval OtherValues Failure
 *
 * @par Dependency
 * @li libtsdclient.so: Library to which the interface belongs.
-* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined.
-* @li status.h: Header file where 'TDT_StatusT' defined
+* @li tsd_client.h: Header file where the interface declaration is located.
+* @li data_common.h: Header file where 'TDT_StatusT' defined
+* @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined
 */
-TDT_StatusT GetCmdParameterObjAttribute(tdt::TsdCmdType type, void *cmdParameterObj, tdt::InputItem itemType, void *valuePtr, int &valueLength);
+TDT_LIB_EXPORT uint32_t TsdSetMsprofReporterCallback(const MsprofReporterCallback callback);
 
 /**
-* @ingroup TsdClientCmd
-* @brief creat tsdclient func parameter obj.
+* @ingroup TsdSetAttr
+* @brief used to set tsd attr
 *
-* @par Function
-* creat tsdclient func parameter obj.
+* @par key
+* key set for tsd attr,now only support RunMode
 *
-* @param type [IN] type tdt::TsdCmdType, tsd func type.
-* @param cmdParameterObj [IN] type void *, func parameter obj.
+* @par value
+* value set to run correspond mode, PROCESS_MODE or THREAD_MODE
 * @retval TDT_OK Success
-* @retval TDT_INTERFACE_NOT_SUPPORT
+* @retval OtherValues Failure
+*/
+TDT_LIB_EXPORT uint32_t TsdSetAttr(const char * const attrKey, const char * const attrValue);
+
+/**
+* @ingroup TsdCapabilityGet
+* @brief use tsd to get some capability
 *
-* @par Dependency
-* @li libtsdclient.so: Library to which the interface belongs.
-* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined.
-* @li status.h: Header file where 'TDT_StatusT' defined
+* @par type
+* capability type
+*
+* @par ptr
+* the result
+* @retval TDT_OK Success
+* @retval OtherValues Failure
+*/
+TDT_LIB_EXPORT uint32_t TsdCapabilityGet(const uint32_t logicDeviceId, const int32_t type, const uint64_t ptr);
+
+
+/**
+* @ingroup GetHdcConctStatus
+* @brief used to get hdc connection status
+*
+* @par logicDeviceId
+* logic device id
+*
+* @par hdcSessStat
+* hdc session status, DRV_ERROR_SOCKET_CONNECT or DRV_ERROR_SOCKET_CLOSE
+* @retval TDT_OK Success
+* @retval OtherValues Failure
 */
-TDT_StatusT TsdClientCmd(tdt::TsdCmdType cmd, void *cmdParameterObj);
+TDT_LIB_EXPORT uint32_t GetHdcConctStatus(const uint32_t logicDeviceId, int32_t *hdcSessStat);
 
 #ifdef __cplusplus
 }
 #endif  // __cplusplus
-#endif  // TDT_HOST_INNER_INC_TSD_CLIENT_H_
+#endif  // TDT_HOST_INNER_INC_TSD_CLIENT_H
diff --git a/third_party/fwkacllib/inc/toolchain/adx_datadump_callback.h b/third_party/fwkacllib/inc/toolchain/adx_datadump_callback.h
new file mode 100644
index 00000000..ca428e6a
--- /dev/null
+++ b/third_party/fwkacllib/inc/toolchain/adx_datadump_callback.h
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ADX_DATADUMP_CALLBACK_H
+#define ADX_DATADUMP_CALLBACK_H
+#include <cstdint>
+namespace Adx {
+const uint32_t MAX_FILE_PATH_LENGTH          = 4096;
+struct DumpChunk {
+    char       fileName[MAX_FILE_PATH_LENGTH];   // file name, absolute path
+    uint32_t   bufLen;                           // dataBuf length
+    uint32_t   isLastChunk;                      // is last chunk. 0: not 1: yes
+    int64_t    offset;                           // Offset in file. -1: append write
+    int32_t    flag;                             // flag
+    uint8_t    dataBuf[0];                       // data buffer
+};
+
+    int AdxRegDumpProcessCallBack(int (* const messageCallback) (const Adx::DumpChunk *, int));
+    void AdxUnRegDumpProcessCallBack();
+}
+
+#endif  // ADX_DATADUMP_CALLBACK_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h
index a1c39a51..67adecd9 100644
--- a/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h
+++ b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h
@@ -1,12 +1,18 @@
 /**
-* @file adx_datadump_server.h
-*
-* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 #ifndef ADX_DATADUMP_SERVER_H
 #define ADX_DATADUMP_SERVER_H
diff --git a/third_party/fwkacllib/inc/toolchain/plog.h b/third_party/fwkacllib/inc/toolchain/plog.h
index 0d42e31d..8dd8d403 100644
--- a/third_party/fwkacllib/inc/toolchain/plog.h
+++ b/third_party/fwkacllib/inc/toolchain/plog.h
@@ -1,59 +1,59 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _PLOG_H_
-#define _PLOG_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
-#ifndef LINUX
-#define LINUX 0
-#endif // LINUX
-
-#ifndef WIN
-#define WIN 1
-#endif
-
-#ifndef OS_TYPE
-#define OS_TYPE 0
-#endif // OS_TYPE
-
-#if (OS_TYPE == LINUX)
-#define DLL_EXPORT __attribute__((visibility("default")))
-#else
-#define DLL_EXPORT _declspec(dllexport)
-#endif
-
-/**
- * @ingroup plog
- * @brief DlogReportInitialize: init log in service process before all device setting.
- * @return: 0: SUCCEED, others: FAILED
- */
-DLL_EXPORT int DlogReportInitialize();
-
-/**
- * @ingroup plog
- * @brief DlogReportFinalize: release log resource in service process after all device reset.
- * @return: 0: SUCCEED, others: FAILED
- */
-DLL_EXPORT int DlogReportFinalize();
-
-#ifdef __cplusplus
-}
-#endif // __cplusplus
-#endif // D_PLOG_H_
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _PLOG_H_
+#define _PLOG_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#ifndef LINUX
+#define LINUX 0
+#endif // LINUX
+
+#ifndef WIN
+#define WIN 1
+#endif
+
+#ifndef OS_TYPE
+#define OS_TYPE 0
+#endif // OS_TYPE
+
+#if (OS_TYPE == LINUX)
+#define DLL_EXPORT __attribute__((visibility("default")))
+#else
+#define DLL_EXPORT _declspec(dllexport)
+#endif
+
+/**
+ * @ingroup plog
+ * @brief DlogReportInitialize: init log in service process before all device setting.
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogReportInitialize(void);
+
+/**
+ * @ingroup plog
+ * @brief DlogReportFinalize: release log resource in service process after all device reset.
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogReportFinalize(void);
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+#endif // D_PLOG_H_
diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
index c8715041..718fc69d 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
@@ -1,164 +1,167 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
+ * Description: handle perf data
+ * Author: xp
+ * Create: 2019-10-13
  */
 
-#ifndef MSPROF_ENGINE_PROF_ACL_API_H_
-#define MSPROF_ENGINE_PROF_ACL_API_H_
-
-#define MSVP_MAX_DEV_NUM 64
-#define MSVP_PROF_API __attribute__((visibility("default")))
+#ifndef MSPROFILER_API_PROF_ACL_API_H_
+#define MSPROFILER_API_PROF_ACL_API_H_
 
 // DataTypeConfig
-#define PROF_ACL_API                0x0001
-#define PROF_TASK_TIME              0x0002
-#define PROF_AICORE_METRICS         0x0004
-#define PROF_AICPU_TRACE            0x0008
-#define PROF_MODEL_EXECUTE          0x0010
-#define PROF_RUNTIME_API            0x0020
-#define PROF_RUNTIME_TRACE          0x0040
-#define PROF_SCHEDULE_TIMELINE      0x0080
-#define PROF_SCHEDULE_TRACE         0x0100
-#define PROF_AIVECTORCORE_METRICS   0x0200
-#define PROF_SUBTASK_TIME           0x0400
-
-#define PROF_TRAINING_TRACE         0x0800
-#define PROF_HCCL_TRACE             0x1000
-#define PROF_DATA_PROCESS           0x2000
-#define PROF_TASK_TRACE             0x3842
-
-#define PROF_MODEL_LOAD             0x8000000000000000
+#define PROF_ACL_API                0x00000001ULL
+#define PROF_TASK_TIME              0x00000002ULL
+#define PROF_AICORE_METRICS         0x00000004ULL
+#define PROF_AICPU_TRACE            0x00000008ULL
+#define PROF_L2CACHE                0x00000010ULL
+#define PROF_HCCL_TRACE             0x00000020ULL
+#define PROF_TRAINING_TRACE         0x00000040ULL
+#define PROF_MSPROFTX               0x00000080ULL
+#define PROF_RUNTIME_API            0x00000100ULL
+
+// system profilinig switch
+#define PROF_CPU                    0x00010000ULL
+#define PROF_HARDWARE_MEMORY        0x00020000ULL
+#define PROF_IO                     0x00040000ULL
+#define PROF_INTER_CONNECTION       0x00080000ULL
+#define PROF_DVPP                   0x00100000ULL
+#define PROF_SYS_AICORE_SAMPLE      0x00200000ULL
+#define PROF_AIVECTORCORE_SAMPLE    0x00400000ULL
+
+#define PROF_MODEL_EXECUTE          0x0000001000000ULL
+#define PROF_RUNTIME_TRACE          0x0000004000000ULL
+#define PROF_SCHEDULE_TIMELINE      0x0000008000000ULL
+#define PROF_SCHEDULE_TRACE         0x0000010000000ULL
+#define PROF_AIVECTORCORE_METRICS   0x0000020000000ULL
+#define PROF_SUBTASK_TIME           0x0000040000000ULL
+#define PROF_OP_DETAIL              0x0000080000000ULL
+
+#define PROF_AICPU_MODEL            0x4000000000000000ULL
+#define PROF_MODEL_LOAD             0x8000000000000000ULL
+
+#define PROF_TASK_TRACE             (PROF_MODEL_EXECUTE | PROF_RUNTIME_TRACE | PROF_TRAINING_TRACE | \
+                                     PROF_HCCL_TRACE | PROF_TASK_TIME)
 
 // DataTypeConfig MASK
-#define PROF_ACL_API_MASK                0x0001
-#define PROF_TASK_TIME_MASK              0x0002
-#define PROF_AICORE_METRICS_MASK         0x0004
-#define PROF_AICPU_TRACE_MASK            0x0008
-#define PROF_MODEL_EXECUTE_MASK          0x0010
-#define PROF_RUNTIME_API_MASK            0x0020
-#define PROF_RUNTIME_TRACE_MASK          0x0040
-#define PROF_SCHEDULE_TIMELINE_MASK      0x0080
-#define PROF_SCHEDULE_TRACE_MASK         0x0100
-#define PROF_AIVECTORCORE_METRICS_MASK   0x0200
-#define PROF_SUBTASK_TIME_MASK           0x0400
-
-#define PROF_TRAINING_TRACE_MASK         0x0800
-#define PROF_HCCL_TRACE_MASK             0x1000
-#define PROF_DATA_PROCESS_MASK           0x2000
-
-#define PROF_MODEL_LOAD_MASK             0x8000000000000000
+#define PROF_ACL_API_MASK                0x00000001ULL
+#define PROF_TASK_TIME_MASK              0x00000002ULL
+#define PROF_AICORE_METRICS_MASK         0x00000004ULL
+#define PROF_AICPU_TRACE_MASK            0x00000008ULL
+#define PROF_L2CACHE_MASK                0x00000010ULL
+#define PROF_HCCL_TRACE_MASK             0x00000020ULL
+#define PROF_TRAINING_TRACE_MASK         0x00000040ULL
+#define PROF_MSPROFTX_MASK               0x00000080ULL
+#define PROF_RUNTIME_API_MASK            0x00000100ULL
+
+// system profilinig mask
+#define PROF_CPU_MASK                    0x00010000ULL
+#define PROF_HARDWARE_MEMORY_MASK        0x00020000ULL
+#define PROF_IO_MASK                     0x00040000ULL
+#define PROF_INTER_CONNECTION_MASK       0x00080000ULL
+#define PROF_DVPP_MASK                   0x00100000ULL
+#define PROF_SYS_AICORE_SAMPLE_MASK      0x00200000ULL
+#define PROF_AIVECTORCORE_SAMPLE_MASK    0x00400000ULL
+
+#define PROF_MODEL_EXECUTE_MASK          0x0000001000000ULL
+#define PROF_RUNTIME_TRACE_MASK          0x0000004000000ULL
+#define PROF_SCHEDULE_TIMELINE_MASK      0x0000008000000ULL
+#define PROF_SCHEDULE_TRACE_MASK         0x0000010000000ULL
+#define PROF_AIVECTORCORE_METRICS_MASK   0x0000020000000ULL
+#define PROF_SUBTASK_TIME_MASK           0x0000040000000ULL
+#define PROF_OP_DETAIL_MASK              0x0000080000000ULL
+
+#define PROF_AICPU_MODEL_MASK            0x4000000000000000ULL
+#define PROF_MODEL_LOAD_MASK             0x8000000000000000ULL
+
+#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER))
+#define MSVP_PROF_API __declspec(dllexport)
+#else
+#define MSVP_PROF_API __attribute__((visibility("default")))
+#endif
 
 #include <cstdint>
-#include <string>
-
-/**
- * @name  ProrErrorCode
- * @brief error code enum of prof_acl_apis
- */
-enum ProfErrorCode {
-    PROF_ERROR_NONE = 0,            // ok
-    PROF_ERROR_PARAM_INVALID,       // param invalid, for example nullptr
-    PROF_ERROR_REPEAT_INIT,         // profiling has already been inited
-    PROF_ERROR_CONFIG_INVALID,      // config invalid, for example invalid json string
-    PROF_ERROR_DIR_NO_ACCESS,       // dir is not accessable
-    PROF_ERROR_FAILURE,             // failed to init or start profiling
-    PROF_ERROR_NOT_INITED,          // profiling has not been inited
-    PROF_ERROR_DEVICE_INVALID,      // device id invalid
-    PROF_ERROR_UNSUPPORTED,         // unsupported data type or ai core metrics
-    PROF_ERROR_REPEAT_START,        // profiilng has already been started
-    PROF_ERROR_NOT_STARTED,         // profiling has not been started
-};
-
-/**
- * @brief transfer profiling config in acl.json to sample config
- * @param aclCfg       [IN]  profiling json string from acl.json as {"switch":"on", "result_path":"/home",...}
- * @param sampleCfg    [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]}
- * @return ProfErrorCode
- */
-MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg);
-
-/**
- * @name  ProfInit
- * @brief init profiling
- * @param profInitCfg [IN] config of init profiling of json format
- * @return ProfErrorCode
- */
-MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg);
-
-/**
- * @name  ProfAicoreMetrics
- * @brief aicore metrics enum
- */
-enum ProfAicoreMetrics {
-    PROF_AICORE_ARITHMATIC_THROUGHPUT = 0,
-    PROF_AICORE_PIPELINE = 1,
-    PROF_AICORE_SYNCHRONIZATION = 2,
-    PROF_AICORE_MEMORY = 3,
-    PROF_AICORE_INTERNAL_MEMORY = 4,
-    PROF_AICORE_STALL = 5,
-    PROF_AICORE_EVENT = 255
-};
-
-/**
- * @name  ProfConfig
- * @brief struct of ProfStart
- */
-struct ProfConfig {
-    uint32_t devNums;                     // length of device id list
-    uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list
-    ProfAicoreMetrics aicoreMetrics;      // aicore metric
-    uint64_t dataTypeConfig;              // data type to start profiling
-};
+#include <cstddef>
 
+namespace Msprofiler {
+namespace Api {
 /**
- * @name  ProfStartProfiling
- * @brief start profiling
- * @param profStartCfg [IN] config to start profiling
- * @return ProfErrorCode
+ * @name  ProfGetOpExecutionTime
+ * @brief get op execution time of specific part of data
+ * @param data  [IN] data read from pipe
+ * @param len   [IN] data length
+ * @param index [IN] index of part(op)
+ * @return op execution time (us)
  */
-MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg);
+MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);
+}
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);
+
+typedef int32_t Status;
+typedef struct aclprofSubscribeConfig aclprofSubscribeConfig1;
+///
+/// @ingroup AscendCL
+/// @brief subscribe profiling data of graph
+/// @param [in] graphId: the graph id subscribed
+/// @param [in] profSubscribeConfig: pointer to config of model subscribe
+/// @return Status result of function
+///
+MSVP_PROF_API Status aclgrphProfGraphSubscribe(const uint32_t graphId,
+    const aclprofSubscribeConfig1 *profSubscribeConfig);
+
+///
+/// @ingroup AscendCL
+/// @brief unsubscribe profiling data of graph
+/// @param [in] graphId: the graph id subscribed
+/// @return Status result of function
+///
+MSVP_PROF_API Status aclgrphProfGraphUnSubscribe(const uint32_t graphId);
 
 /**
- * @name  ProfStopConfig
- * @brief struct of ProfStop
+ * @ingroup AscendCL
+ * @brief get graph id from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ *
+ * @retval graph id of subscription data
+ * @retval 0 for failed
  */
-struct ProfStopConfig {
-    uint64_t padding;
-};
+MSVP_PROF_API size_t aclprofGetGraphId(const void *opInfo, size_t opInfoLen, uint32_t index);
 
 /**
- * @name  ProfStopProfiling
- * @brief stop profiling
- * @param profStopCfg [IN] config to stop profiling
- * @return ProfErrorCode
- */
-MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg);
+* @ingroup AscendCL
+* @brief set stamp pay load
+*
+*
+* @retval void
+*/
+MSVP_PROF_API int aclprofSetStampPayload(void *stamp, const int32_t type, void *value);
 
 /**
- * @name  ProfFinalize
- * @brief finalize profiling task
- * @return ProfErrorCode
- */
-MSVP_PROF_API int32_t ProfFinalize();
+* @ingroup AscendCL
+* @brief set category and name
+*
+*
+* @retval void
+*/
+MSVP_PROF_API int aclprofSetCategoryName(uint32_t category, const char *categoryName);
 
 /**
- * @name  ProfGetDataTypeConfig
- * @brief get dataTypeConfig started with of one device
- * @param deviceId          [IN] deviceId to get dataTypeConfig
- * @param dataTypeConfig    [OUT] result get
- * @return ProfErrorCode
- */
-MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig);
-
-#endif  // MSPROF_ENGINE_PROF_ACL_API_H_
+* @ingroup AscendCL
+* @brief set category to stamp
+*
+*
+* @retval void
+*/
+MSVP_PROF_API int aclprofSetStampCategory(void *stamp, uint32_t category);
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // MSPROFILER_API_PROF_ACL_API_H_
diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h
index 5073cfb1..47d33a9e 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_callback.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h
@@ -1,20 +1,8 @@
-/**
- * Copyright 2020-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * @file prof_callback.h
- * @brief declaraion of profiling callbacks
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
+ * Description: handle perf data
+ * Author: xp
+ * Create: 2019-10-13
  */
 
 #ifndef MSPROFILER_PROF_CALLBACK_H_
@@ -24,6 +12,11 @@
 extern "C" {
 #endif // __cplusplus
 
+#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER))
+#define MSVP_PROF_API __declspec(dllexport)
+#else
+#define MSVP_PROF_API __attribute__((visibility("default")))
+#endif
 
 #include "stddef.h"
 #include "stdint.h"
@@ -41,7 +34,7 @@ enum MsprofErrorCode {
     MSPROF_ERROR,
 };
 
-#define MSPROF_ENGINE_MAX_TAG_LEN (31)
+#define MSPROF_ENGINE_MAX_TAG_LEN (63)
 
 /**
  * @name  ReporterData
@@ -55,6 +48,17 @@ struct ReporterData {
 };
 
 /**
+ * @name  MsprofHashData
+ * @brief struct of data to hash
+ */
+struct MsprofHashData {
+    int deviceId;                             // the index of device
+    size_t dataLen;                           // the length of data
+    unsigned char *data;                      // the data content
+    uint64_t hashId;                          // the id of hashed data
+};
+
+/**
  * @name  MsprofReporterModuleId
  * @brief module id of data to report
  */
@@ -63,7 +67,8 @@ enum MsprofReporterModuleId {
     MSPROF_MODULE_HCCL,                   // HCCL
     MSPROF_MODULE_ACL,                    // AclModule
     MSPROF_MODULE_FRAMEWORK,              // Framework
-    MSPROF_MODULE_RUNTIME                 // runtime
+    MSPROF_MODULE_RUNTIME,                // runtime
+    MSPROF_MODULE_MSPROF                  // msprofTx
 };
 
 /**
@@ -75,20 +80,9 @@ enum MsprofReporterCallbackType {
     MSPROF_REPORTER_INIT,                 // init reporter
     MSPROF_REPORTER_UNINIT,               // uninit reporter
     MSPROF_REPORTER_DATA_MAX_LEN,         // data max length for calling report callback
+    MSPROF_REPORTER_HASH                  // hash data to id
 };
 
-/**
- * @name  MsprofReporterCallback
- * @brief callback to start reporter/stop reporter/report date
- * @param moduleId  [IN] enum MsprofReporterModuleId
- * @param type      [IN] enum MsprofReporterCallbackType
- * @param data      [IN] callback data (nullptr on INTI/UNINIT)
- * @param len       [IN] callback data size (0 on INIT/UNINIT)
- * @return enum MsprofErrorCode
- */
-typedef int32_t (*MsprofReporterCallback)(uint32_t moduleId, uint32_t type, void *data, uint32_t len);
-
-
 #define MSPROF_OPTIONS_DEF_LEN_MAX (2048)
 
 /**
@@ -106,58 +100,105 @@ struct MsprofGeOptions {
  */
 enum MsprofCtrlCallbackType {
     MSPROF_CTRL_INIT_ACL_ENV = 0,           // start profiling with acl env
-    MSPROF_CTRL_INIT_ACL_JSON,              // start profiling with acl.json
+    MSPROF_CTRL_INIT_ACL_JSON,              // start pro with acl.json
     MSPROF_CTRL_INIT_GE_OPTIONS,            // start profiling with ge env and options
     MSPROF_CTRL_FINALIZE,                   // stop profiling
-    MSPROF_CTRL_REPORT_FUN_P,               // for report callback
-    MSPROF_CTRL_PROF_SWITCH_ON,             // for prof switch on
-    MSPROF_CTRL_PROF_SWITCH_OFF             // for prof switch off
+    MSPROF_CTRL_INIT_HELPER,                // start profiling in helper device
+    MSPROF_CTRL_INIT_DYNA = 0xFF,           // start profiling for dynamic profiling
 };
 
-#define MSPROF_MAX_DEV_NUM (64)
+enum MsprofCommandHandleType {
+    PROF_COMMANDHANDLE_TYPE_INIT = 0,
+    PROF_COMMANDHANDLE_TYPE_START,
+    PROF_COMMANDHANDLE_TYPE_STOP,
+    PROF_COMMANDHANDLE_TYPE_FINALIZE,
+    PROF_COMMANDHANDLE_TYPE_MODEL_SUBSCRIBE,
+    PROF_COMMANDHANDLE_TYPE_MODEL_UNSUBSCRIBE
+};
 
-struct MsprofCommandHandle {
-    uint64_t profSwitch;
-    uint32_t devNums; // length of device id list
-    uint32_t devIdList[MSPROF_MAX_DEV_NUM];
-    uint32_t modelId;
+/**
+ * @brief   profiling command type
+ */
+enum ProfCtrlType {
+    PROF_CTRL_INVALID = 0,
+    PROF_CTRL_SWITCH,
+    PROF_CTRL_REPORTER,
+    PROF_CTRL_STEPINFO,
+    PROF_CTRL_BUTT
 };
 
 /**
- * @name  MsprofCtrlCallback
- * @brief callback to start/stop profiling
- * @param type      [IN] enum MsprofCtrlCallbackType
- * @param data      [IN] callback data
- * @param len       [IN] callback data size
- * @return enum MsprofErrorCode
+ * @brief   Prof Chip ID
  */
+enum Prof_Chip_ID {
+    PROF_CHIP_ID0 = 0
+};
+
 typedef int32_t (*MsprofCtrlCallback)(uint32_t type, void *data, uint32_t len);
+typedef int32_t (*MsprofReporterCallback)(uint32_t moduleId, uint32_t type, void *data, uint32_t len);
 
 /**
- * @name  MsprofSetDeviceCallback
- * @brief callback to notify set/reset device
- * @param devId     [IN] device id
- * @param isOpenDevice  [IN] true: set device, false: reset device
+ * @brief  the struct of profiling set setp info
  */
-typedef void (*MsprofSetDeviceCallback)(uint32_t devId, bool isOpenDevice);
+typedef struct ProfStepInfoCmd {
+    uint64_t index_id;
+    uint16_t tag_id;
+    void *stream;
+} ProfStepInfoCmd_t;
 
+/**
+ * @name  ProfCommandHandle
+ * @brief callback to start/stop profiling
+ * @param type      [IN] enum call back type
+ * @param data      [IN] callback data
+ * @param len       [IN] callback data size
+ * @return enum MsprofErrorCode
+ */
+typedef int32_t (*ProfCommandHandle)(uint32_t type, void *data, uint32_t len);
 /*
- * @name  MsprofInit
+ * @name  profInit
  * @brief Profiling module init
  * @param [in] dataType: profiling type: ACL Env/ACL Json/GE Option
  * @param [in] data: profiling switch data
  * @param [in] dataLen: Length of data
  * @return 0:SUCCESS, >0:FAILED
  */
-int32_t MsprofInit(uint32_t dataType, void *data, uint32_t dataLen);
+MSVP_PROF_API int32_t MsprofInit(uint32_t moduleId, void *data, uint32_t dataLen);
+/**
+ * @name  profRegisterCallback
+ * @brief register callback to profiling
+ * @param moduleId  [IN] module Id
+ * @param handle    [IN] the pointer of callback
+ */
+MSVP_PROF_API int32_t MsprofRegisterCallback(uint32_t moduleId, ProfCommandHandle callback);
+/*
+ * @name profReportData
+ * @brief start reporter/stop reporter/report date
+ * @param moduleId  [IN] enum profReporterModuleId
+ * @param type      [IN] enum profReporterCallbackType
+ * @param data      [IN] data (nullptr on INTI/UNINIT)
+ * @param len       [IN] data size (0 on INIT/UNINIT)
+ * @return enum MsprofErrorCod
+ */
+MSVP_PROF_API int32_t MsprofReportData(uint32_t moduleId, uint32_t type, void* data, uint32_t len);
 
+MSVP_PROF_API int32_t MsprofSetDeviceIdByGeModelIdx(const uint32_t geModelIdx, const uint32_t deviceId);
+MSVP_PROF_API int32_t MsprofUnsetDeviceIdByGeModelIdx(const uint32_t geModelIdx, const uint32_t deviceId);
 /*
- * @name AscendCL
+ * @name profFinalize
  * @brief Finishing Profiling
  * @param NULL
  * @return 0:SUCCESS, >0:FAILED
  */
-int32_t MsprofFinalize();
+MSVP_PROF_API int32_t MsprofFinalize();
+/**
+ * @name  profNotifySetDevice
+ * @brief notify set/reset device
+ * @param devId     [IN] device id
+ * @param isOpenDevice  [IN] true: set device, false: reset device
+ */
+MSVP_PROF_API int32_t MsprofNotifySetDevice(uint32_t chipId, uint32_t deviceId, bool isOpen);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/third_party/fwkacllib/inc/toolchain/prof_common.h b/third_party/fwkacllib/inc/toolchain/prof_common.h
new file mode 100644
index 00000000..e2eb5b69
--- /dev/null
+++ b/third_party/fwkacllib/inc/toolchain/prof_common.h
@@ -0,0 +1,475 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
+ * Description: handle perf data
+ * Author: Huawei Technologies Co., Ltd.
+ * Create: 2019-10-13
+ */
+#ifndef MSPROFILER_PROF_COMMON_H_
+#define MSPROFILER_PROF_COMMON_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#include <stdint.h>
+
+#define MSPROF_DATA_HEAD_MAGIC_NUM  0x5a5a
+
+enum MsprofDataTag {
+    MSPROF_ACL_DATA_TAG = 0,            //acl data tag, range: 0~19
+    MSPROF_GE_DATA_TAG_MODEL_LOAD = 20, //ge data tag, range: 20~39
+    MSPROF_GE_DATA_TAG_FUSION = 21,
+    MSPROF_GE_DATA_TAG_INFER = 22,
+    MSPROF_GE_DATA_TAG_TASK = 23,
+    MSPROF_GE_DATA_TAG_TENSOR = 24,
+    MSPROF_GE_DATA_TAG_STEP = 25,
+    MSPROF_GE_DATA_TAG_ID_MAP = 26,
+    MSPROF_GE_DATA_TAG_HOST_SCH = 27,
+    MSPROF_RUNTIME_DATA_TAG_API = 40,   //runtime data tag, range: 40~59
+    MSPROF_RUNTIME_DATA_TAG_TRACK = 41,
+    MSPROF_AICPU_DATA_TAG = 60,         //aicpu data tag, range: 60~79
+    MSPROF_AICPU_MODEL_TAG = 61,
+    MSPROF_HCCL_DATA_TAG = 80,          //hccl data tag, range: 80~99
+    MSPROF_DP_DATA_TAG = 100,           //dp data tag, range: 100~119
+    MSPROF_MSPROFTX_DATA_TAG = 120,     //hccl data tag, range: 120~139
+    MSPROF_DATA_TAG_MAX = 65536,        //data tag value type is uint16_t
+};
+
+/**
+ * @brief struct of mixed data
+ */
+#define MSPROF_MIX_DATA_RESERVE_BYTES 7
+#define MSPROF_MIX_DATA_STRING_LEN 120
+enum MsprofMixDataType {
+    MSPROF_MIX_DATA_HASH_ID = 0,
+    MSPROF_MIX_DATA_STRING,
+};
+struct MsprofMixData {
+    uint8_t type;  // MsprofMixDataType
+    uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES];
+    union {
+        uint64_t hashId;
+        char dataStr[MSPROF_MIX_DATA_STRING_LEN];
+    } data;
+};
+
+#define PATH_LEN_MAX 1023
+#define PARAM_LEN_MAX 4095
+struct MsprofCommandHandleParams {
+    uint32_t pathLen;
+    uint32_t storageLimit;  // MB
+    uint32_t profDataLen;
+    char path[PATH_LEN_MAX + 1];
+    char profData[PARAM_LEN_MAX + 1];
+};
+
+/**
+ * @brief profiling command info
+ */
+#define MSPROF_MAX_DEV_NUM 64
+struct MsprofCommandHandle {
+    uint64_t profSwitch;
+    uint64_t profSwitchHi;
+    uint32_t devNums;
+    uint32_t devIdList[MSPROF_MAX_DEV_NUM];
+    uint32_t modelId;
+    uint32_t type;
+    struct MsprofCommandHandleParams params;
+};
+
+/**
+ * @brief struct of data reported by acl
+ */
+#define MSPROF_ACL_DATA_RESERVE_BYTES 32
+#define MSPROF_ACL_API_NAME_LEN 64
+enum MsprofAclApiType {
+    MSPROF_ACL_API_TYPE_OP = 1,
+    MSPROF_ACL_API_TYPE_MODEL,
+    MSPROF_ACL_API_TYPE_RUNTIME,
+    MSPROF_ACL_API_TYPE_OTHERS,
+};
+struct MsprofAclProfData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_ACL_DATA_TAG;
+    uint32_t apiType;       // enum MsprofAclApiType
+    uint64_t beginTime;
+    uint64_t endTime;
+    uint32_t processId;
+    uint32_t threadId;
+    char apiName[MSPROF_ACL_API_NAME_LEN];
+    uint8_t  reserve[MSPROF_ACL_DATA_RESERVE_BYTES];
+};
+
+/**
+ * @brief struct of data reported by GE
+ */
+#define MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES 104
+struct MsprofGeProfModelLoadData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_MODEL_LOAD;
+    uint32_t modelId;
+    MsprofMixData modelName;
+    uint64_t startTime;
+    uint64_t endTime;
+    uint8_t  reserve[MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_GE_FUSION_DATA_RESERVE_BYTES 8
+#define MSPROF_GE_FUSION_OP_NUM 8
+struct MsprofGeProfFusionData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_FUSION;
+    uint32_t modelId;
+    MsprofMixData fusionName;
+    uint64_t inputMemSize;
+    uint64_t outputMemSize;
+    uint64_t weightMemSize;
+    uint64_t workspaceMemSize;
+    uint64_t totalMemSize;
+    uint64_t fusionOpNum;
+    uint64_t fusionOp[MSPROF_GE_FUSION_OP_NUM];
+    uint8_t  reserve[MSPROF_GE_FUSION_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_GE_INFER_DATA_RESERVE_BYTES 64
+struct MsprofGeProfInferData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_INFER;
+    uint32_t modelId;
+    MsprofMixData modelName;
+    uint32_t requestId;
+    uint32_t threadId;
+    uint64_t inputDataStartTime;
+    uint64_t inputDataEndTime;
+    uint64_t inferStartTime;
+    uint64_t inferEndTime;
+    uint64_t outputDataStartTime;
+    uint64_t outputDataEndTime;
+    uint8_t  reserve[MSPROF_GE_INFER_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_GE_TASK_DATA_RESERVE_BYTES 12
+#define MSPROF_GE_OP_TYPE_LEN 56
+enum MsprofGeTaskType {
+    MSPROF_GE_TASK_TYPE_AI_CORE = 0,
+    MSPROF_GE_TASK_TYPE_AI_CPU,
+    MSPROF_GE_TASK_TYPE_AIV,
+};
+enum MsprofGeShapeType {
+    MSPROF_GE_SHAPE_TYPE_STATIC = 0,
+    MSPROF_GE_SHAPE_TYPE_DYNAMIC,
+};
+struct MsprofGeOpType {
+    uint8_t type;  // MsprofMixDataType
+    uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES];
+    union {
+        uint64_t hashId;
+        char dataStr[MSPROF_GE_OP_TYPE_LEN];
+    } data;
+};
+struct MsprofGeProfTaskData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_TASK;
+    uint32_t taskType;      // MsprofGeTaskType
+    MsprofMixData opName;
+    MsprofGeOpType opType;
+    uint64_t curIterNum;
+    uint64_t timeStamp;
+    uint32_t shapeType;     // MsprofGeShapeType
+    uint32_t blockDims;
+    uint32_t modelId;
+    uint32_t streamId;
+    uint32_t taskId;
+    uint32_t threadId;
+    uint32_t contextId;
+    uint8_t  reserve[MSPROF_GE_TASK_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_GE_TENSOR_DATA_RESERVE_BYTES 8
+#define MSPROF_GE_TENSOR_DATA_SHAPE_LEN 8
+#define MSPROF_GE_TENSOR_DATA_NUM 5
+enum MsprofGeTensorType {
+    MSPROF_GE_TENSOR_TYPE_INPUT = 0,
+    MSPROF_GE_TENSOR_TYPE_OUTPUT,
+};
+struct MsprofGeTensorData {
+    uint32_t tensorType;    // MsprofGeTensorType
+    uint32_t format;
+    uint32_t dataType;
+    uint32_t shape[MSPROF_GE_TENSOR_DATA_SHAPE_LEN];
+};
+
+struct MsprofGeProfTensorData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_TENSOR;
+    uint32_t modelId;
+    uint64_t curIterNum;
+    uint32_t streamId;
+    uint32_t taskId;
+    uint32_t tensorNum;
+    MsprofGeTensorData tensorData[MSPROF_GE_TENSOR_DATA_NUM];
+    uint8_t  reserve[MSPROF_GE_TENSOR_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_GE_STEP_DATA_RESERVE_BYTES 27
+enum MsprofGeStepTag {
+    MSPROF_GE_STEP_TAG_BEGIN = 0,
+    MSPROF_GE_STEP_TAG_END,
+};
+struct MsprofGeProfStepData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_STEP;
+    uint32_t modelId;
+    uint32_t streamId;
+    uint32_t taskId;
+    uint64_t timeStamp;
+    uint64_t curIterNum;
+    uint32_t threadId;
+    uint8_t  tag;           // MsprofGeStepTag
+    uint8_t  reserve[MSPROF_GE_STEP_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES 6
+struct MsprofGeProfIdMapData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_ID_MAP;
+    uint32_t graphId;
+    uint32_t modelId;
+    uint32_t sessionId;
+    uint64_t timeStamp;
+    uint16_t mode;
+    uint8_t  reserve[MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES 24
+struct MsprofGeProfHostSchData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_HOST_SCH;
+    uint32_t threadId;      // record in start event
+    uint64_t element;
+    uint64_t event;
+    uint64_t startTime;     // record in start event
+    uint64_t endTime;       // record in end event
+    uint8_t  reserve[MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES];
+};
+
+/**
+ * @brief struct of data reported by RunTime
+ */
+#define MSPROF_RUNTIME_API_DATA_RESERVE_BYTES 106
+#define MSPROF_RUNTIME_TASK_ID_NUM 10
+#define MSPROF_RUNTIME_API_NAME_LEN 64
+struct MsprofRuntimeProfApiData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_API;
+    uint32_t threadId;
+    uint64_t entryTime;
+    uint64_t exitTime;
+    uint64_t dataSize;
+    uint8_t  apiName[MSPROF_RUNTIME_API_NAME_LEN];
+    uint32_t retCode;
+    uint32_t streamId;
+    uint32_t taskNum;
+    uint32_t taskId[MSPROF_RUNTIME_TASK_ID_NUM];
+    uint16_t memcpyDirection;
+    uint8_t  reserve[MSPROF_RUNTIME_API_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES 10
+#define MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN 32
+struct MsprofRuntimeProfTrackData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_TRACK;
+    uint32_t threadId;
+    uint64_t timeStamp;
+    char taskType[MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN];
+    uint32_t taskId;
+    uint16_t streamId;
+    uint8_t  reserve[MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES];
+};
+
+/**
+ * @brief struct of data reported by RunTime
+ */
+#define MSPROF_AICPU_DATA_RESERVE_BYTES 9
+struct MsprofAicpuProfData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_AICPU_DATA_TAG;
+    uint16_t streamId;
+    uint16_t taskId;
+    uint64_t runStartTime;
+    uint64_t runStartTick;
+    uint64_t computeStartTime;
+    uint64_t memcpyStartTime;
+    uint64_t memcpyEndTime;
+    uint64_t runEndTime;
+    uint64_t runEndTick;
+    uint32_t threadId;
+    uint32_t deviceId;
+    uint64_t submitTick;
+    uint64_t scheduleTick;
+    uint64_t tickBeforeRun;
+    uint64_t tickAfterRun;
+    uint32_t kernelType;
+    uint32_t dispatchTime;
+    uint32_t totalTime;
+    uint16_t fftsThreadId;
+    uint8_t  version;
+    uint8_t  reserve[MSPROF_AICPU_DATA_RESERVE_BYTES];
+};
+
+struct MsprofAicpuModelProfData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_AICPU_MODEL_TAG;
+    uint32_t rsv;   // Ensure 8-byte alignment
+    uint64_t timeStamp;
+    uint64_t indexId;
+    uint32_t modelId;
+    uint16_t tagId;
+    uint16_t rsv1;
+    uint64_t eventId;
+    uint8_t  reserve[24];
+};
+
+/**
+ * @brief struct of data reported by DP
+ */
+#define MSPROF_DP_DATA_RESERVE_BYTES 16
+#define MSPROF_DP_DATA_ACTION_LEN 16
+#define MSPROF_DP_DATA_SOURCE_LEN 64
+struct MsprofDpProfData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_DP_DATA_TAG;
+    uint32_t rsv;   // Ensure 8-byte alignment
+    uint64_t timeStamp;
+    char action[MSPROF_DP_DATA_ACTION_LEN];
+    char source[MSPROF_DP_DATA_SOURCE_LEN];
+    uint64_t index;
+    uint64_t size;
+    uint8_t  reserve[MSPROF_DP_DATA_RESERVE_BYTES];
+};
+
+/**
+ * @brief struct of data reported by HCCL
+ */
+#pragma pack(4)
+struct MsprofHcclProfNotify {
+    uint32_t taskID;
+    uint64_t notifyID;
+    uint32_t stage;
+    uint32_t remoteRank;
+    uint32_t transportType;
+    uint32_t role; // role {0: dst, 1:src}
+    double durationEstimated;
+};
+
+struct MsprofHcclProfReduce {
+    uint32_t taskID;
+    uint64_t src;
+    uint64_t dst;
+    uint64_t size;
+    uint32_t op;       // {0: sum, 1: mul, 2: max, 3: min}
+    uint32_t dataType; // data type {0: INT8, 1: INT16, 2: INT32, 3: FP16, 4:FP32, 5:INT64, 6:UINT64}
+    uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
+    uint32_t remoteRank;
+    uint32_t transportType; //  transport type {0: SDMA, 1: RDMA, 2:LOCAL}
+    uint32_t role;          // role {0: dst, 1:src}
+    double durationEstimated;
+};
+
+struct MsprofHcclProfRDMA {
+    uint32_t taskID;
+    uint64_t src;
+    uint64_t dst;
+    uint64_t size;
+    uint64_t notifyID;
+    uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
+    uint32_t remoteRank;
+    uint32_t transportType; //  transport type {0: RDMA, 1:SDMA, 2:LOCAL}
+    uint32_t role;          // role {0: dst, 1:src}
+    uint32_t type;          // RDMA type {0: RDMASendNotify, 1:RDMASendPayload}
+    double durationEstimated;
+};
+
+struct MsprofHcclProfMemcpy {
+    uint32_t taskID;
+    uint64_t src;
+    uint64_t dst;
+    uint64_t size;
+    uint64_t notifyID;
+    uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
+    uint32_t remoteRank;
+    uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL}
+    uint32_t role;          // role {0: dst, 1:src}
+    double durationEstimated;
+};
+
+struct MsprofHcclProfStageStep {
+    uint32_t rank;
+    uint32_t rankSize;
+};
+
+struct MsprofHcclProfFlag {
+    uint64_t cclTag;
+    uint64_t groupName;
+    uint32_t localRank;
+    uint32_t workFlowMode;
+};
+
+/**
+ * @name MsprofHcclProfData
+ * @brief struct of data reported by hccl
+ */
+struct MsprofHcclProfData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_HCCL_DATA_TAG;
+    uint32_t planeID;
+    uint32_t deviceID;
+    uint32_t streamID;
+    double ts;
+    char name[16];
+    union {
+        MsprofHcclProfNotify notify;
+        MsprofHcclProfReduce reduce;
+        MsprofHcclProfStageStep stageStep;
+        MsprofHcclProfMemcpy forMemcpy;
+        MsprofHcclProfRDMA RDMA;
+        MsprofHcclProfFlag flag;
+    } args;
+};
+#pragma pack()
+
+/**
+ * @name  MsprofStampInfo
+ * @brief struct of data reported by msproftx
+ */
+struct MsprofStampInfo {
+    uint16_t magicNumber;
+    uint16_t dataTag;
+    uint32_t processId;
+    uint32_t threadId;
+    uint32_t category;         //marker category
+    uint32_t  eventType;
+    int32_t payloadType;
+    union PayloadValue         //payload info for marker
+    {
+        uint64_t ullValue;
+        int64_t llValue;
+        double dValue;
+        uint32_t uiValue[2];
+        int32_t iValue[2];
+        float fValue[2];
+    } payload;
+    uint64_t startTime;
+    uint64_t endTime;
+    int32_t messageType;
+    char message[128];
+    uint8_t reserve0[4];
+    uint8_t reserve1[72];
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // MSPROFILER_PROF_COMMON_H_
diff --git a/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h b/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h
index 4f013eef..f8cb1b22 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h
@@ -16,7 +16,16 @@
 
 #ifndef MSPROF_ENGINE_PROF_MGR_CORE_H_
 #define MSPROF_ENGINE_PROF_MGR_CORE_H_
+#ifndef OS_TYPE
+#define OS_TYPE 0
+#endif // OS_TYPE
+
+#if (OS_TYPE != LINUX)
+#define MSVP_PROF_API __declspec(dllexport)
+#else
 #define MSVP_PROF_API __attribute__((visibility("default")))
+#endif
+
 
 #include <string>
 #include <vector>
diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h
index ff91351b..afd4863f 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h
@@ -14,13 +14,10 @@
  * limitations under the License.
  */
 
-#ifndef MSPROF_ENGINE_PROF_REPORTER_H_
-#define MSPROF_ENGINE_PROF_REPORTER_H_
-#ifndef OS_TYPE
-#define OS_TYPE 0
-#endif // OS_TYPE
+#ifndef MSPROF_ENGINE_PROF_REPORTER_H
+#define MSPROF_ENGINE_PROF_REPORTER_H
 
-#if (OS_TYPE != LINUX)
+#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER))
 #define MSVP_PROF_API __declspec(dllexport)
 #else
 #define MSVP_PROF_API __attribute__((visibility("default")))
@@ -41,42 +38,44 @@ namespace Engine {
  *  the Reporter class .used to send data to profiling
  */
 class MSVP_PROF_API Reporter {
- public:
-  virtual ~Reporter() {}
+public:
+    virtual ~Reporter() {}
 
- public:
-  /**
-   * @ingroup reporter
-   * @name  : Report
-   * @brief : API of libmsprof, report data to libmsprof, it's a non-blocking function \n
-              The data will be firstly appended to cache, if the cache is full, data will be ignored
-   * @param data [IN] const ReporterData * the data send to libmsporf
-   * @retval PROFILING_SUCCESS 0 (success)
-   * @retval PROFILING_FAILED -1 (failed)
-   *
-   * @par depend:
-   * @li libmsprof
-   * @li prof_reporter.h
-   * @since c60
-   * @see Flush
-   */
-  virtual int Report(const ReporterData *data) = 0;
+public:
+    /**
+     * @ingroup reporter
+     * @name  : Report
+     * @brief : API of libmsprof, report data to libmsprof, it's a non-blocking function \n
+                The data will be firstly appended to cache, if the cache is full, data will be ignored
+    * @param data [IN] const ReporterData * the data send to libmsporf
+    * @retval PROFILING_SUCCESS 0 (success)
+    * @retval PROFILING_FAILED -1 (failed)
+    *
+    * @par depend:
+    * @li libmsprof
+    * @li prof_reporter.h
+    * @since c60
+    * @see Flush
+    */
+    virtual int Report(const ReporterData *data) = 0;
 
-  /**
-   * @ingroup reporter
-   * @name  : Flush
-   * @brief : API of libmsprof, notify libmsprof send data over, it's a blocking function \n
-              The all datas of cache will be write to file or send to host
-   * @retval PROFILING_SUCCESS 0 (success)
-   * @retval PROFILING_FAILED -1 (failed)
-   *
-   * @par depend:
-   * @li libmsprof
-   * @li prof_reporter.h
-   * @since c60
-   * @see ProfMgrStop
-   */
-  virtual int Flush() = 0;
+    /**
+     * @ingroup reporter
+     * @name  : Flush
+     * @brief : API of libmsprof, notify libmsprof send data over, it's a blocking function \n
+                The all datas of cache will be write to file or send to host
+    * @retval PROFILING_SUCCESS 0 (success)
+    * @retval PROFILING_FAILED -1 (failed)
+    *
+    * @par depend:
+    * @li libmsprof
+    * @li prof_reporter.h
+    * @since c60
+    * @see ProfMgrStop
+    */
+    virtual int Flush() = 0;
+
+    virtual uint32_t GetReportDataMaxLen() = 0;
 };
 
 }  // namespace Engine
diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h
index cc7c83ca..f42ea167 100644
--- a/third_party/fwkacllib/inc/toolchain/slog.h
+++ b/third_party/fwkacllib/inc/toolchain/slog.h
@@ -111,14 +111,14 @@ extern "C" {
 #define OPERATION_LOG_MASK  (0x10000000)
 #define RESERVERD_LENGTH 52
 
-typedef struct tagDCODE {
-  const char *cName;
-  int cVal;
+typedef struct {
+    const char *cName;
+    int cVal;
 } DCODE;
 
 typedef struct tagKV {
-  char *kname;
-  char *value;
+    char *kname;
+    char *value;
 } KeyValue;
 
 typedef enum {
@@ -139,72 +139,75 @@ typedef struct {
  * module id
  */
 enum {
-  SLOG,          /**< Slog */
-  IDEDD,         /**< IDE daemon device */
-  IDEDH,         /**< IDE daemon host */
-  HCCL,          /**< HCCL */
-  FMK,           /**< Adapter */
-  HIAIENGINE,    /**< Matrix */
-  DVPP,          /**< DVPP */
-  RUNTIME,       /**< Runtime */
-  CCE,           /**< CCE */
+    SLOG,          /**< Slog */
+    IDEDD,         /**< IDE daemon device */
+    IDEDH,         /**< IDE daemon host */
+    HCCL,          /**< HCCL */
+    FMK,           /**< Adapter */
+    HIAIENGINE,    /**< Matrix */
+    DVPP,          /**< DVPP */
+    RUNTIME,       /**< Runtime */
+    CCE,           /**< CCE */
 #if (OS_TYPE == LINUX)
     HDC,         /**< HDC */
 #else
     HDCL,
 #endif // OS_TYPE
-  DRV,           /**< Driver */
-  MDCFUSION,     /**< Mdc fusion */
-  MDCLOCATION,   /**< Mdc location */
-  MDCPERCEPTION, /**< Mdc perception */
-  MDCFSM,
-  MDCCOMMON,
-  MDCMONITOR,
-  MDCBSWP,    /**< MDC base software platform */
-  MDCDEFAULT, /**< MDC undefine */
-  MDCSC,      /**< MDC spatial cognition */
-  MDCPNC,
-  MLL,      /**< abandon */
-  DEVMM,    /**< Dlog memory managent */
-  KERNEL,   /**< Kernel */
-  LIBMEDIA, /**< Libmedia */
-  CCECPU,   /**< aicpu shedule */
-  ASCENDDK, /**< AscendDK */
-  ROS,      /**< ROS */
-  HCCP,
-  ROCE,
-  TEFUSION,
-  PROFILING, /**< Profiling */
-  DP,        /**< Data Preprocess */
-  APP,       /**< User Application */
-  TS,        /**< TS module */
-  TSDUMP,    /**< TSDUMP module */
-  AICPU,     /**< AICPU module */
-  LP,        /**< LP module */
-  TDT,       /**< tsdaemon or aicpu shedule */
-  FE,
-  MD,
-  MB,
-  ME,
-  IMU,
-  IMP,
-  GE, /**< Fmk */
-  MDCFUSA,
-  CAMERA,
-  ASCENDCL,
-  TEEOS,
-  ISP,
-  SIS,
-  HSM,
-  DSS,
-  PROCMGR,     // Process Manager, Base Platform
-  BBOX,
-  AIVECTOR,
-  TBE,
-  FV,
-  MDCMAP,
-  TUNE,
-  INVLID_MOUDLE_ID
+    DRV,           /**< Driver */
+    MDCFUSION,     /**< Mdc fusion */
+    MDCLOCATION,   /**< Mdc location */
+    MDCPERCEPTION, /**< Mdc perception */
+    MDCFSM,
+    MDCCOMMON,
+    MDCMONITOR,
+    MDCBSWP,    /**< MDC base software platform */
+    MDCDEFAULT, /**< MDC undefine */
+    MDCSC,      /**< MDC spatial cognition */
+    MDCPNC,
+    MLL,      /**< abandon */
+    DEVMM,    /**< Dlog memory managent */
+    KERNEL,   /**< Kernel */
+    LIBMEDIA, /**< Libmedia */
+    CCECPU,   /**< aicpu shedule */
+    ASCENDDK, /**< AscendDK */
+    ROS,      /**< ROS */
+    HCCP,
+    ROCE,
+    TEFUSION,
+    PROFILING, /**< Profiling */
+    DP,        /**< Data Preprocess */
+    APP,       /**< User Application */
+    TS,        /**< TS module */
+    TSDUMP,    /**< TSDUMP module */
+    AICPU,     /**< AICPU module */
+    LP,        /**< LP module */
+    TDT,       /**< tsdaemon or aicpu shedule */
+    FE,
+    MD,
+    MB,
+    ME,
+    IMU,
+    IMP,
+    GE, /**< Fmk */
+    MDCFUSA,
+    CAMERA,
+    ASCENDCL,
+    TEEOS,
+    ISP,
+    SIS,
+    HSM,
+    DSS,
+    PROCMGR,     // Process Manager, Base Platform
+    BBOX,
+    AIVECTOR,
+    TBE,
+    FV,
+    MDCMAP,
+    TUNE,
+    HSS, /**< helper */
+    FFTS,
+    OP,
+    INVLID_MOUDLE_ID
 };
 
 /**
@@ -261,9 +264,9 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
  * @param [in]fmt: log content
  */
 #define dlog_error(moduleId, fmt, ...)                                          \
-  do {                                                                          \
-    DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
-  } while (TMP_LOG != 0)
+    do {                                                                          \
+        DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+    } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -274,11 +277,11 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
  * @param [in]fmt: log content
  */
 #define dlog_warn(moduleId, fmt, ...)                                               \
-  do {                                                                              \
-    if(CheckLogLevel(moduleId, DLOG_WARN) == 1) {                                   \
-        DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
-    }                                                                               \
-  } while (TMP_LOG != 0)
+    do {                                                                              \
+        if (CheckLogLevel(moduleId, DLOG_WARN) == 1) {                                   \
+            DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
+        }                                                                               \
+    } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -289,11 +292,11 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
  * @param [in]fmt: log content
  */
 #define dlog_info(moduleId, fmt, ...)                                               \
-  do {                                                                              \
-    if(CheckLogLevel(moduleId, DLOG_INFO) == 1) {                                   \
-        DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
-    }                                                                               \
-  } while (TMP_LOG != 0)
+    do {                                                                              \
+        if (CheckLogLevel(moduleId, DLOG_INFO) == 1) {                                   \
+            DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
+        }                                                                               \
+    } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -304,11 +307,11 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
  * @param [in]fmt: log content
  */
 #define dlog_debug(moduleId, fmt, ...)                                              \
-  do {                                                                              \
-    if(CheckLogLevel(moduleId, DLOG_DEBUG) == 1) {                                  \
-        DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
-    }                                                                               \
-  } while (TMP_LOG != 0)
+    do {                                                                              \
+        if (CheckLogLevel(moduleId, DLOG_DEBUG) == 1) {                                  \
+            DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+        }                                                                               \
+    } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -318,9 +321,9 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
  * @param [in]fmt: log content
  */
 #define dlog_event(moduleId, fmt, ...)                                          \
-  do {                                                                          \
-    DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
-  } while (TMP_LOG != 0)
+    do {                                                                          \
+        DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+    } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -332,11 +335,11 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
  * @param [in]fmt: log content
  */
 #define Dlog(moduleId, level, fmt, ...)                                                 \
-  do {                                                                                  \
-    if(CheckLogLevel(moduleId, level) == 1) {                                           \
-        DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);   \
-     }                                                                                  \
-  } while (TMP_LOG != 0)
+    do {                                                                                  \
+        if (CheckLogLevel(moduleId, level) == 1) {                                           \
+            DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);   \
+        }                                                                                  \
+    } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -349,11 +352,11 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
  * @param [in]fmt: log content
  */
 #define DlogSub(moduleId, submodule, level, fmt, ...)                                                   \
-  do {                                                                                                  \
-    if(CheckLogLevel(moduleId, level) == 1) {                                                           \
-        DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__);    \
-    }                                                                                                   \
-  } while (TMP_LOG != 0)
+    do {                                                                                                  \
+        if (CheckLogLevel(moduleId, level) == 1) {                                                           \
+            DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__);    \
+        }                                                                                                   \
+    } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -367,11 +370,11 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
  * @param [in]fmt: log content
  */
 #define DlogWithKV(moduleId, level, pstKVArray, kvNum, fmt, ...)                                                \
-  do {                                                                                                          \
-    if(CheckLogLevel(moduleId, level) == 1) {                                                                   \
-        DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
-    }                                                                                                           \
-  } while (TMP_LOG != 0)
+    do {                                                                                                          \
+        if (CheckLogLevel(moduleId, level) == 1) {                                                                   \
+            DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
+        }                                                                                                           \
+    } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -451,11 +454,11 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr);
  * @param [in]fmt: log content
  */
 #define DlogForC(moduleId, level, fmt, ...)                                                 \
-  do {                                                                                  \
-    if(CheckLogLevelForC(moduleId, level) == 1) {                                           \
-        DlogInnerForC(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);   \
-     }                                                                                  \
-  } while (TMP_LOG != 0)
+    do {                                                                                  \
+        if (CheckLogLevelForC(moduleId, level) == 1) {                                           \
+            DlogInnerForC(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);   \
+        }                                                                                  \
+    } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -468,11 +471,11 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr);
  * @param [in]fmt: log content
  */
 #define DlogSubForC(moduleId, submodule, level, fmt, ...)                                                   \
-  do {                                                                                                  \
-    if(CheckLogLevelForC(moduleId, level) == 1) {                                                           \
-        DlogInnerForC(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__);    \
-    }                                                                                                   \
-  } while (TMP_LOG != 0)
+    do {                                                                                                  \
+        if (CheckLogLevelForC(moduleId, level) == 1) {                                                           \
+            DlogInnerForC(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__);    \
+        }                                                                                                   \
+    } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -486,11 +489,11 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr);
  * @param [in]fmt: log content
  */
 #define DlogWithKVForC(moduleId, level, pstKVArray, kvNum, fmt, ...)                                                \
-  do {                                                                                                          \
-    if(CheckLogLevelForC(moduleId, level) == 1) {                                                                   \
-        DlogWithKVInnerForC(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
-    }                                                                                                           \
-  } while (TMP_LOG != 0)
+    do {                                                                                                          \
+        if (CheckLogLevelForC(moduleId, level) == 1) {                                                                \
+            DlogWithKVInnerForC(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+        }                                                                                                           \
+    } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
diff --git a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h
deleted file mode 100644
index 2cf6e0c4..00000000
--- a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/**
- * @file tune_api.h
- *
- * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.\n
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n
- * 描述：aoe调优接口头文件
- */
-/** @defgroup aoe aoe调优接口 */
-#ifndef TUNE_API_H
-#define TUNE_API_H
-#include <map>
-#include <string>
-#include "ge/ge_api.h"
-#include "aoe_types.h"
-
-/**
- * @ingroup aoe
- * @par 描述: 命令行调优
- *
- * @attention 无
- * @param  option [IN] 调优参数
- * @param  msg [OUT] 调优异常下返回信息
- * @retval #AOE_SUCCESS 执行成功
- * @retval #AOE_FAILURE 执行失败
- * @par 依赖:
- * @li tune_api.cpp：该接口所属的开发包。
- * @li tune_api.h：该接口声明所在的头文件。
- * @see 无
- * @since
- */
-AoeStatus AoeOfflineTuning(const std::map<std::string, std::string> &option, std::string &msg);
-
-/**
- * @ingroup aoe
- * @par 描述: 调优初始化
- *
- * @attention 无
- * @param  session [IN] ge连接会话
- * @param  option [IN] 参数集. 包含调优参数及ge参数
- * @retval #AOE_SUCCESS 执行成功
- * @retval #AOE_FAILURE 执行失败
- * @par 依赖:
- * @li tune_api.cpp：该接口所属的开发包。
- * @li tune_api.h：该接口声明所在的头文件。
- * @see 无
- * @since
- */
-extern "C" AoeStatus AoeOnlineInitialize(ge::Session *session, const std::map<std::string, std::string> &option);
-
-/**
- * @ingroup aoe
- * @par 描述: 调优去初始化
- *
- * @attention 无
- * @param  无
- * @retval #AOE_SUCCESS 执行成功
- * @retval #AOE_FAILURE 执行失败
- * @par 依赖:
- * @li tune_api.cpp：该接口所属的开发包。
- * @li tune_api.h：该接口声明所在的头文件。
- * @see 无
- * @since
- */
-extern "C" AoeStatus AoeOnlineFinalize();
-
-/**
- * @ingroup aoe
- * @par 描述: 调优处理
- *
- * @attention 无
- * @param  tuningGraph [IN] 调优图
- * @param  dependGraph [IN] 调优依赖图
- * @param  session [IN] ge连接会话
- * @param  option [IN] 参数集. 包含调优参数及ge参数
- * @retval #AOE_SUCCESS 执行成功
- * @retval #AOE_FAILURE 执行失败
- * @par 依赖:
- * @li tune_api.cpp：该接口所属的开发包。
- * @li tune_api.h：该接口声明所在的头文件。
- * @see 无
- * @since
- */
-extern "C" AoeStatus AoeOnlineTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
-    ge::Session *session, const std::map<std::string, std::string> &option);
-#endif
diff --git a/third_party/fwkacllib/inc/tsd/status.h b/third_party/fwkacllib/inc/tsd/status.h
new file mode 100644
index 00000000..e0a9b619
--- /dev/null
+++ b/third_party/fwkacllib/inc/tsd/status.h
@@ -0,0 +1,29 @@
+/**
+ * Copyright (c) Hisilicon Technologies Co., Ltd. 2019-2021. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_TDT_STATUS_H
+#define INC_TDT_STATUS_H
+#include "common/type_def.h"
+namespace tsd {
+#ifdef __cplusplus
+    using TSD_StatusT = uint32_t;
+#else
+    typedef uint32_t TSD_StatusT;
+#endif
+    // success code
+    constexpr TSD_StatusT TSD_OK = 0U;
+}
+#endif  // INC_TDT_STATUS_H
diff --git a/third_party/prebuild/aarch64/libalog.so b/third_party/prebuild/aarch64/libalog.so
index e041ad7e..65aefa59 100755
Binary files a/third_party/prebuild/aarch64/libalog.so and b/third_party/prebuild/aarch64/libalog.so differ
diff --git a/third_party/prebuild/aarch64/liberror_manager.so b/third_party/prebuild/aarch64/liberror_manager.so
index 759d8e30..6358365b 100755
Binary files a/third_party/prebuild/aarch64/liberror_manager.so and b/third_party/prebuild/aarch64/liberror_manager.so differ
diff --git a/third_party/prebuild/aarch64/libmmpa.a b/third_party/prebuild/aarch64/libmmpa.a
index d7c29e2b..7d042c4c 100755
Binary files a/third_party/prebuild/aarch64/libmmpa.a and b/third_party/prebuild/aarch64/libmmpa.a differ
diff --git a/third_party/prebuild/x86_64/libalog.so b/third_party/prebuild/x86_64/libalog.so
index 051f85d9..4c8a45a4 100755
Binary files a/third_party/prebuild/x86_64/libalog.so and b/third_party/prebuild/x86_64/libalog.so differ
diff --git a/third_party/prebuild/x86_64/liberror_manager.so b/third_party/prebuild/x86_64/liberror_manager.so
index cd9ad8bc..d97e6ef1 100755
Binary files a/third_party/prebuild/x86_64/liberror_manager.so and b/third_party/prebuild/x86_64/liberror_manager.so differ
diff --git a/third_party/prebuild/x86_64/libmmpa.a b/third_party/prebuild/x86_64/libmmpa.a
index bec195ad..13ca68db 100755
Binary files a/third_party/prebuild/x86_64/libmmpa.a and b/third_party/prebuild/x86_64/libmmpa.a differ