From 0e43f8c1d52e60fba16aad837f90ec3ad08fcffc Mon Sep 17 00:00:00 2001
From: yanghaoran <yanghaoran2@huawei.com>
Date: Thu, 29 Jul 2021 09:50:53 +0800
Subject: [PATCH] update headers

---
 inc/external/acl/acl.h                             |   4 +-
 inc/external/acl/acl_base.h                        |   2 +
 inc/external/acl/acl_mdl.h                         |  16 +-
 inc/external/acl/acl_op.h                          |  28 +
 inc/external/acl/acl_op_compiler.h                 |   6 +-
 inc/external/acl/acl_prof.h                        |  37 ++
 inc/external/acl/acl_rt.h                          |  18 +
 inc/external/acl/ops/acl_dvpp.h                    | 109 +++-
 inc/external/ge/ge_ir_build.h                      |  28 +-
 inc/external/hccl/hccl.h                           |  27 +
 inc/framework/executor/ge_executor.h               | 138 ++--
 inc/framework/ge_runtime/task_info.h               | 253 ++++++--
 third_party/fwkacllib/inc/ops/array_ops.h          |  68 +-
 third_party/fwkacllib/inc/ops/control_flow_ops.h   |   2 +-
 third_party/fwkacllib/inc/ops/ctc_ops.h            |   8 +-
 third_party/fwkacllib/inc/ops/data_flow_ops.h      |  41 +-
 .../fwkacllib/inc/ops/elewise_calculation_ops.h    | 166 ++---
 third_party/fwkacllib/inc/ops/functional_ops.h     |   3 -
 third_party/fwkacllib/inc/ops/image_ops.h          | 311 ++++++++-
 third_party/fwkacllib/inc/ops/linalg_ops.h         |  17 +-
 third_party/fwkacllib/inc/ops/list_ops.h           |  64 +-
 third_party/fwkacllib/inc/ops/lookup_ops.h         |   4 +-
 third_party/fwkacllib/inc/ops/math_ops.h           | 135 ++--
 .../fwkacllib/inc/ops/matrix_calculation_ops.h     | 232 +++++--
 third_party/fwkacllib/inc/ops/nn_calculation_ops.h | 699 ++++++++++-----------
 third_party/fwkacllib/inc/ops/nn_detect_ops.h      | 153 +++--
 third_party/fwkacllib/inc/ops/nn_norm_ops.h        | 175 ++----
 third_party/fwkacllib/inc/ops/nn_ops.h             |  28 +-
 third_party/fwkacllib/inc/ops/nn_pooling_ops.h     |  97 ++-
 third_party/fwkacllib/inc/ops/nn_training_ops.h    |  11 +-
 third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h  |  85 +--
 third_party/fwkacllib/inc/ops/pad_ops.h            |  22 +-
 third_party/fwkacllib/inc/ops/parsing_ops.h        | 135 ++--
 third_party/fwkacllib/inc/ops/quantize_ops.h       |  10 +-
 third_party/fwkacllib/inc/ops/ragged_array_ops.h   |   9 +-
 .../fwkacllib/inc/ops/ragged_conversion_ops.h      |   3 +-
 third_party/fwkacllib/inc/ops/ragged_math_ops.h    |   6 +-
 third_party/fwkacllib/inc/ops/random_ops.h         | 143 ++++-
 third_party/fwkacllib/inc/ops/reduce_ops.h         | 107 +++-
 .../fwkacllib/inc/ops/resource_variable_ops.h      |  28 +-
 third_party/fwkacllib/inc/ops/rnn.h                |  18 +-
 third_party/fwkacllib/inc/ops/rpn_ops.h            |   6 +-
 third_party/fwkacllib/inc/ops/sdca_ops.h           |  14 +-
 third_party/fwkacllib/inc/ops/selection_ops.h      | 163 +++--
 third_party/fwkacllib/inc/ops/sparse_ops.h         |  43 +-
 third_party/fwkacllib/inc/ops/spectral_ops.h       |  20 +-
 .../fwkacllib/inc/ops/split_combination_ops.h      |   9 +-
 third_party/fwkacllib/inc/ops/state_ops.h          |   2 +-
 .../fwkacllib/inc/ops/stateful_random_ops.h        |  31 +-
 third_party/fwkacllib/inc/ops/string_ops.h         |  50 +-
 third_party/fwkacllib/inc/ops/transformation_ops.h |  17 +-
 third_party/fwkacllib/inc/runtime/base.h           |   4 +-
 third_party/fwkacllib/inc/runtime/config.h         |  15 +-
 third_party/fwkacllib/inc/runtime/context.h        |   4 +-
 third_party/fwkacllib/inc/runtime/dev.h            |  22 +-
 third_party/fwkacllib/inc/runtime/dvfsprofile.h    |   4 +-
 third_party/fwkacllib/inc/runtime/event.h          |  14 +-
 third_party/fwkacllib/inc/runtime/kernel.h         |   4 +-
 third_party/fwkacllib/inc/runtime/mem.h            |   4 +-
 third_party/fwkacllib/inc/runtime/rt_ffts.h        |  73 ++-
 third_party/fwkacllib/inc/runtime/rt_model.h       |   4 +-
 third_party/fwkacllib/inc/runtime/rt_stars.h       |   8 +-
 third_party/fwkacllib/inc/runtime/stream.h         |   4 +-
 third_party/fwkacllib/inc/toolchain/prof_acl_api.h |  32 +
 .../fwkacllib/inc/toolchain/prof_callback.h        |  12 +
 65 files changed, 2582 insertions(+), 1423 deletions(-)
 mode change 100755 => 100644 third_party/fwkacllib/inc/runtime/rt_ffts.h

diff --git a/inc/external/acl/acl.h b/inc/external/acl/acl.h
index 8d261201..a5194472 100644
--- a/inc/external/acl/acl.h
+++ b/inc/external/acl/acl.h
@@ -25,9 +25,9 @@
 extern "C" {
 #endif
 
-// Current version is 1.0.0
+// Current version is 1.1.0
 #define ACL_MAJOR_VERSION 1
-#define ACL_MINOR_VERSION 0
+#define ACL_MINOR_VERSION 1
 #define ACL_PATCH_VERSION 0
 
 /**
diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h
index 64d4bd81..90da8b8f 100644
--- a/inc/external/acl/acl_base.h
+++ b/inc/external/acl/acl_base.h
@@ -150,6 +150,8 @@ typedef enum {
   ACL_DOUBLE = 11,
   ACL_BOOL = 12,
   ACL_STRING = 13,
+  ACL_COMPLEX64 = 16,
+  ACL_COMPLEX128 = 17
 } aclDataType;
 
 typedef enum {
diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h
index 2bf85e29..522dbd38 100644
--- a/inc/external/acl/acl_mdl.h
+++ b/inc/external/acl/acl_mdl.h
@@ -297,9 +297,21 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset,
 
 /**
  * @ingroup AscendCL
+ * @brief Get aclTensorDesc from aclmdlDataset
+ *
+ * @param dataset [IN]    aclmdlDataset pointer;
+ * @param index [IN]      index of tensorDesc
+ *
+ * @retval Get address of aclTensorDesc when executed successfully.
+ * @retval Failure return NULL
+ */
+ACL_FUNC_VISIBILITY aclTensorDesc *aclmdlGetDatasetTensorDesc(const aclmdlDataset *dataset, size_t index);
+
+/**
+ * @ingroup AscendCL
  * @brief Get the number of aclDataBuffer in aclmdlDataset
  *
- * @param dataset [IN]   aclmdlDataset poiter
+ * @param dataset [IN]   aclmdlDataset pointer
  *
  * @retval the number of aclDataBuffer
  */
@@ -309,7 +321,7 @@ ACL_FUNC_VISIBILITY size_t aclmdlGetDatasetNumBuffers(const aclmdlDataset *datas
  * @ingroup AscendCL
  * @brief Get the aclDataBuffer in aclmdlDataset by index
  *
- * @param dataset [IN]   aclmdlDataset poiter
+ * @param dataset [IN]   aclmdlDataset pointer
  * @param index [IN]     the index of aclDataBuffer
  *
  * @retval Get successfully, return the address of aclDataBuffer
diff --git a/inc/external/acl/acl_op.h b/inc/external/acl/acl_op.h
index d2e59bfb..f340b6bc 100644
--- a/inc/external/acl/acl_op.h
+++ b/inc/external/acl/acl_op.h
@@ -137,6 +137,34 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *att
 
 /**
  * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is aclDataType
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param attrValue [IN]   attribute value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrDataType(aclopAttr *attr, const char *attrName, aclDataType attrValue);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of aclDataType
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numValues [IN]   number of values. false if attrValue is 0, true otherwise.
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListDataType(aclopAttr *attr, const char *attrName, int numValues,
+                                                      const aclDataType values[]);
+
+/**
+ * @ingroup AscendCL
  * @brief set an attribute. the type of the attribute is list of bools
  *
  * @param attr [OUT]       pointer to the instance of aclopAttr
diff --git a/inc/external/acl/acl_op_compiler.h b/inc/external/acl/acl_op_compiler.h
index d9d1b3da..b64b2bad 100644
--- a/inc/external/acl/acl_op_compiler.h
+++ b/inc/external/acl/acl_op_compiler.h
@@ -86,9 +86,9 @@ ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, con
  * @retval OtherValues Failure
  */
 ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(
-  const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
-  int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr,
-  aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream);
+    const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
+    int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr,
+    aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream);
 
 /**
  * @ingroup AscendCL
diff --git a/inc/external/acl/acl_prof.h b/inc/external/acl/acl_prof.h
index 3784d8c6..a93374b0 100644
--- a/inc/external/acl/acl_prof.h
+++ b/inc/external/acl/acl_prof.h
@@ -40,13 +40,20 @@ typedef enum {
   ACL_AICORE_MEMORY_BANDWIDTH = 2,
   ACL_AICORE_L0B_AND_WIDTH = 3,
   ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
+  ACL_AICORE_MEMORY_UB = 5,
   ACL_AICORE_NONE = 0xFF
 } aclprofAicoreMetrics;
 
+typedef enum {
+  ACL_STEP_START = 0,  // step  start
+  ACL_STEP_END = 1     // step  end
+} aclprofStepTag;
+
 typedef struct aclprofConfig aclprofConfig;
 typedef struct aclprofStopConfig aclprofStopConfig;
 typedef struct aclprofAicoreEvents aclprofAicoreEvents;
 typedef struct aclprofSubscribeConfig aclprofSubscribeConfig;
+typedef struct aclprofStepInfo aclprofStepInfo;
 
 /**
  * @ingroup AscendCL
@@ -322,6 +329,36 @@ ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opI
  */
 ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index);
 
+/**
+ * @ingroup AscendCL
+ * @brief
+ *
+ * @param  stepInfo [IN]     pointer to stepInfo data
+ * @param  aclprofstepTag [IN] start or end flag
+ * @param  stream [IN] steam info
+ *
+ * @retval 0 for failed
+ */
+ACL_FUNC_VISIBILITY aclError aclprofGetStepTimestamp(aclprofStepInfo *stepInfo, aclprofStepTag tag, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create pointer to aclprofStepInfo data
+ *
+ *
+ * @retval aclprofStepInfo pointer
+ */
+ACL_FUNC_VISIBILITY aclprofStepInfo *aclprofCreateStepInfo();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy aclprofStepInfo pointer
+ *
+ *
+ * @retval void
+ */
+ACL_FUNC_VISIBILITY void aclprofDestroyStepInfo(aclprofStepInfo *stepinfo);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h
index 5ee70724..50dbc34d 100644
--- a/inc/external/acl/acl_rt.h
+++ b/inc/external/acl/acl_rt.h
@@ -44,6 +44,12 @@ typedef enum aclrtEventStatus {
   ACL_EVENT_STATUS_RESERVED = 2,
 } aclrtEventStatus;
 
+typedef enum aclrtEventWaitStatus {
+  ACL_EVENT_WAIT_STATUS_COMPLETE = 0,
+  ACL_EVENT_WAIT_STATUS_NOT_READY = 1,
+  ACL_EVENT_WAIT_STATUS_RESERVED = 0xffff,
+} aclrtEventWaitStatus;
+
 typedef enum aclrtCallbackBlockType {
   ACL_CALLBACK_NO_BLOCK,
   ACL_CALLBACK_BLOCK,
@@ -501,6 +507,18 @@ ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus
 
 /**
  * @ingroup AscendCL
+ * @brief Queries an event's wait-status
+ *
+ * @param  event [IN]    event to query
+ * @param  status [OUT]  event wait-status
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtQueryEventWaitStatus(aclrtEvent event, aclrtEventWaitStatus *status);
+
+/**
+ * @ingroup AscendCL
  * @brief Block Host Running, wait event to be complete
  *
  * @param  event [IN]   event to wait
diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h
index dcaa3936..5418ebd3 100644
--- a/inc/external/acl/ops/acl_dvpp.h
+++ b/inc/external/acl/ops/acl_dvpp.h
@@ -158,6 +158,20 @@ enum acldvppJpegFormat {
   ACL_JPEG_CSS_UNKNOWN = 1000
 };
 
+enum acldvppChannelDescParamType { ACL_DVPP_CSC_MATRIX_UINT32 = 0 };
+
+enum aclvdecChannelDescParamType { ACL_VDEC_CSC_MATRIX_UINT32 = 0 };
+
+// Csc Matrix can be used both for acldvppChannelDescParamType and aclvdecChannelDescParamType
+enum acldvppCscMatrix {
+  ACL_DVPP_CSC_MATRIX_BT601_WIDE = 0,
+  ACL_DVPP_CSC_MATRIX_BT601_NARROW,
+  ACL_DVPP_CSC_MATRIX_BT709_WIDE,
+  ACL_DVPP_CSC_MATRIX_BT709_NARROW,
+  ACL_DVPP_CSC_MATRIX_BT2020_WIDE,
+  ACL_DVPP_CSC_MATRIX_BT2020_NARROW
+};
+
 /**
  * @ingroup AscendCL
  * @brief alloc device memory for dvpp.
@@ -1910,9 +1924,9 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc
  * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
  */
 ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(
-  acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
-  acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[],
-  acldvppResizeConfig *resizeConfig, aclrtStream stream);
+    acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
+    acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[],
+    acldvppResizeConfig *resizeConfig, aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -2557,10 +2571,93 @@ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist);
  * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
  */
 ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(
-  acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
-  acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[],
-  acldvppResizeConfig *resizeConfig, aclrtStream stream);
+    acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
+    acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[],
+    acldvppResizeConfig *resizeConfig, aclrtStream stream);
+/**
+ * @ingroup AscendCL
+ * @brief set param for dvpp channel desc
+ *
+ * @par Function
+ * set attribution in dvpp channelDesc for specified type
+ *
+ * @param channelDesc [OUT]             the channel destruction
+ * @param paramType [IN]                specified param type
+ * @param length [IN]                   mem length of param
+ * @param param [IN]                    pointer to param
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppGetChannelDescParam | acldvppCreateChannelDesc | acldvppDestroyChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescParam(acldvppChannelDesc *channelDesc,
+                                                        acldvppChannelDescParamType paramType, size_t length,
+                                                        const void *param);
+
+/**
+ * @ingroup AscendCL
+ * @brief get param of dvpp channel desc
+ *
+ * @par Function
+ * get attribution value in dvpp channelDesc for specified type
+ *
+ * @param channelDesc [IN]              the channel destruction
+ * @param paramType [IN]                specified param type
+ * @param length [IN]                   mem length allocated for output param
+ * @param paramRetSize [OUT]            mem length of output param
+ * @param param [OUT]                   pointer to output param
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppSetChannelDescParam | acldvppCreateChannelDesc | acldvppDestroyChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppGetChannelDescParam(const acldvppChannelDesc *channelDesc,
+                                                        acldvppChannelDescParamType paramType, size_t length,
+                                                        size_t *paramRetSize, void *param);
+/**
+ * @ingroup AscendCL
+ * @brief set param for vdec channel desc
+ *
+ * @par Function
+ * set attribution in channelDesc for specified type
+ *
+ * @param channelDesc [OUT]             the vdec channel destruction
+ * @param paramType [IN]                specified param type
+ * @param length [IN]                   mem length of param
+ * @param param [IN]                    pointer to param
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecGetChannelDescParam | aclvdecCreateChannelDesc | aclvdecDestroyChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescParam(aclvdecChannelDesc *channelDesc,
+                                                        aclvdecChannelDescParamType paramType, size_t length,
+                                                        const void *param);
 
+/**
+ * @ingroup AscendCL
+ * @brief get param of vdec channel desc
+ *
+ * @par Function
+ * get attribution value in channelDesc for specified type
+ *
+ * @param channelDesc [IN]              the vdec channel destruction
+ * @param paramType [IN]                specified param type
+ * @param length [IN]                   mem length allocated for output param
+ * @param paramRetSize [OUT]            mem length of output param
+ * @param param [OUT]                   pointer to output param
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecSetChannelDescParam | aclvdecCreateChannelDesc | aclvdecDestroyChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecGetChannelDescParam(const aclvdecChannelDesc *channelDesc,
+                                                        aclvdecChannelDescParamType paramType, size_t length,
+                                                        size_t *paramRetSize, void *param);
 #ifdef __cplusplus
 }
 #endif
diff --git a/inc/external/ge/ge_ir_build.h b/inc/external/ge/ge_ir_build.h
index 04e059a1..729685a9 100644
--- a/inc/external/ge/ge_ir_build.h
+++ b/inc/external/ge/ge_ir_build.h
@@ -1,18 +1,18 @@
 /**
-* Copyright 2020 Huawei Technologies Co., Ltd
-
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-
-* http://www.apache.org/licenses/LICENSE-2.0
-
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 #ifndef INC_EXTERNAL_GE_IR_BUILD_H_
 #define INC_EXTERNAL_GE_IR_BUILD_H_
diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h
index 8261adc4..c24b5374 100644
--- a/inc/external/hccl/hccl.h
+++ b/inc/external/hccl/hccl.h
@@ -145,6 +145,33 @@ extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank);
 extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream);
 
 /**
+ * @brief AllGather operator.
+ *
+ * @param sendBuff A pointer identifying the input data address of the operator.
+ * @param count An integer(u64) identifying the number of the send data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
+ * @param destRank An integer identifying the destination rank.
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult
+ */
+extern HcclResult HcclSend(void *sendBuf, uint64_t count, HcclDataType dataType, uint32_t destRank, HcclComm comm,
+                           aclrtStream stream);
+/**
+ * @brief AllGather operator.
+ *
+ * @param recvBuff A pointer identifying the output data address of the operator.
+ * @param count An integer(u64) identifying the number of the receive data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
+ * @param srcRank An integer identifying the source rank.
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult
+ */
+extern HcclResult HcclRecv(void *recvBuf, uint64_t count, HcclDataType dataType, uint32_t srcRank, HcclComm comm,
+                           aclrtStream stream);
+
+/**
  * @brief Destroy HCCL comm
  *
  * @param comm A pointer identifying the communication resource targetting
diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h
index fcca561c..ce7c82ac 100644
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -50,14 +50,30 @@ class GE_FUNC_VISIBILITY GeExecutor {
  public:
   GeExecutor();
   ~GeExecutor() = default;
-  ge::Status Initialize();
-  ge::Status Finalize();
 
-  ge::Status UnloadModel(uint32_t modelId);
+  Status Initialize();
+  Status Finalize();
+
+  ///
+  /// @ingroup ge
+  /// @brief Initialize global execute environment.
+  /// @param [in] options: environment variables.
+  /// @return init result
+  ///
+  static Status Initialize(const std::map<std::string, std::string> &options);
+
+  ///
+  /// @ingroup ge
+  /// @brief Finalize global execute environment.
+  /// @return execute result
+  ///
+  static Status FinalizeEx();
+
+  Status UnloadModel(uint32_t modelId);
 
   // Get input and output descriptor
-  ge::Status GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
-                              std::vector<ge::TensorDesc> &output_desc, bool new_model_desc = false);
+  Status GetModelDescInfo(uint32_t model_id, std::vector<TensorDesc> &input_desc, std::vector<TensorDesc> &output_desc,
+                          bool new_model_desc = false);
 
   ///
   /// @ingroup ge
@@ -68,7 +84,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [in] batch_size: batch size entered by user in dynamic multi-batch scenario
   /// @return execute result
   ///
-  ge::Status SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t batch_size);
+  Status SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t batch_size);
 
   ///
   /// @ingroup ge
@@ -80,8 +96,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [in] image_width: image width entered by user in dynamic multi-resolution scenario
   /// @return execute result
   ///
-  ge::Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height,
-                                 uint64_t image_width);
+  Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height,
+                             uint64_t image_width);
 
   ///
   /// @ingroup ge
@@ -93,8 +109,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [in] dynamic_dims: array of dynamic dimensions
   /// @return execute result
   ///
-  ge::Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
-                            const std::vector<uint64_t> &dynamic_dims);
+  Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
+                        const std::vector<uint64_t> &dynamic_dims);
 
   ///
   /// @ingroup ge
@@ -104,8 +120,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] cur_dynamic_dims: current dynamic dims
   /// @return execute result
   ///
-  ge::Status GetCurDynamicDims(uint32_t model_id, const std::vector<uint64_t> &dynamic_dims,
-                               std::vector<uint64_t> &cur_dynamic_dims);
+  Status GetCurDynamicDims(uint32_t model_id, const std::vector<uint64_t> &dynamic_dims,
+                           std::vector<uint64_t> &cur_dynamic_dims);
 
   ///
   /// @ingroup ge
@@ -115,8 +131,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] dynamic_type
   /// @return execute result
   ///
-  ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
-                                 int32_t &dynamic_type);
+  Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type);
 
   ///
   /// @ingroup ge
@@ -125,7 +140,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] batch_info
   /// @return execute result
   ///
-  ge::Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);
+  Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);
 
   ///
   /// @ingroup ge
@@ -134,9 +149,9 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] user_designate_shape_order
   /// @return execute result
   ///
-  ge::Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector<std::string> &user_designate_shape_order);
+  Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector<std::string> &user_designate_shape_order);
 
-  ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);
+  Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);
 
   ///
   /// @ingroup ge
@@ -148,22 +163,22 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [in] aippParms: kAippDynamicPara by user in dynamic aipp
   /// @return execute result
   ///
-  ge::Status SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
-                                const std::vector<kAippDynamicBatchPara> &aippBatchPara,
-                                const kAippDynamicPara &aippParms);
+  Status SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
+                            const std::vector<kAippDynamicBatchPara> &aipp_batch_para,
+                            const kAippDynamicPara &aippParms);
 
-  ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
+  Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
 
-  ge::Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
-                       std::string &attr_value);
+  Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
+                   std::string &attr_value);
 
-  ge::Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);
+  Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);
 
-  ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);
+  Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);
 
-  ge::Status CommandHandle(const ge::Command &command);
+  Status CommandHandle(const Command &command);
 
-  ge::Status SetDump(const DumpConfig &dump_config);
+  Status SetDump(const DumpConfig &dump_config);
 
   ///
   /// @ingroup ge
@@ -173,7 +188,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @return SUCCESS
   /// @return FAILED
   ///
-  ge::Status GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size);
+  Status GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size);
 
   ///
   /// @ingroup ge
@@ -182,7 +197,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] ModelData &model_data: Offline model memory data
   /// @return SUCCESS handle successfully / others handle failed
   ///
-  ge::Status LoadDataFromFile(const std::string &path, ge::ModelData &model_data);
+  Status LoadDataFromFile(const std::string &path, ModelData &model_data);
 
   ///
   /// @ingroup ge
@@ -195,8 +210,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] uint32_t &model_id: Corresponding identification after model loading
   /// @return SUCCESS handle successfully / others handle failed
   ///
-  ge::Status LoadModelFromData(uint32_t &model_id, const ge::ModelData &model_data, void *dev_ptr, size_t mem_size,
-                               void *weight_ptr, size_t weight_size);
+  Status LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size,
+                           void *weight_ptr, size_t weight_size);
 
   ///
   /// @ingroup ge
@@ -207,9 +222,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [in] output_queue_ids: input queue ids create from user.
   /// @return: 0 for success / others for fail
   ///
-  ge::Status LoadModelWithQ(uint32_t &model_id, const ge::ModelData &model_data,
-                            const std::vector<uint32_t> &input_queue_ids,
-                            const std::vector<uint32_t> &output_queue_ids);
+  Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector<uint32_t> &input_queue_ids,
+                        const std::vector<uint32_t> &output_queue_ids);
 
   ///
   /// @ingroup ge
@@ -221,8 +235,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] domi::OutputData *output_data: Model output data
   /// @return SUCCESS handle successfully / others handle failed
   ///
-  ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &input_data,
-                       ge::RunModelData &output_data, bool async_mode = false);
+  Status ExecModel(uint32_t model_id, void *stream, const RunModelData &input_data, RunModelData &output_data,
+                   bool async_mode = false);
 
   ///
   /// @ingroup ge
@@ -236,9 +250,9 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] std::vector<GeTensorDesc> &output_desc: description of model output data
   /// @return SUCCESS handle successfully / others handle failed
   ///
-  ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
-                       const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data,
-                       std::vector<GeTensorDesc> &output_desc, bool async_mode = false);
+  Status ExecModel(uint32_t model_id, void *stream, const RunModelData &run_input_data,
+                   const std::vector<GeTensorDesc> &input_desc, RunModelData &run_output_data,
+                   std::vector<GeTensorDesc> &output_desc, bool async_mode = false);
 
   ///
   /// @ingroup ge
@@ -248,7 +262,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] size_t &weight_size Weight memory space size
   /// @return SUCCESS handle successfully / others handle failed
   ///
-  ge::Status GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size);
+  Status GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size);
 
   ///
   /// @ingroup ge
@@ -259,39 +273,39 @@ class GE_FUNC_VISIBILITY GeExecutor {
   /// @param [out] size_t &weight_size Weight memory space size
   /// @return SUCCESS handle successfully / others handle failed
   ///
-  ge::Status GetMemAndWeightSize(const void *model_data, size_t model_size, size_t &mem_size, size_t &weight_size);
+  Status GetMemAndWeightSize(const void *model_data, size_t model_size, size_t &mem_size, size_t &weight_size);
 
-  static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream,
-                                 SingleOp **single_op);
+  static Status LoadSingleOp(const std::string &modelName, const ModelData &modelData, void *stream,
+                             SingleOp **single_op);
 
-  static ge::Status LoadSingleOpV2(const std::string &modelName, const ge::ModelData &modelData, void *stream,
-                                   SingleOp **single_op, const uint64_t model_id);
+  static Status LoadSingleOpV2(const std::string &modelName, const ModelData &modelData, void *stream,
+                               SingleOp **single_op, const uint64_t model_id);
 
-  static ge::Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,
-                                 std::vector<DataBuffer> &outputs);
+  static Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,
+                             std::vector<DataBuffer> &outputs);
 
-  static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
-                                        DynamicSingleOp **single_op);
+  static Status LoadDynamicSingleOp(const std::string &model_name, const ModelData &modelData, void *stream,
+                                    DynamicSingleOp **single_op);
 
-  static ge::Status LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream,
-                                          DynamicSingleOp **single_op, const uint64_t model_id);
+  static Status LoadDynamicSingleOpV2(const std::string &model_name, const ModelData &modelData, void *stream,
+                                      DynamicSingleOp **single_op, const uint64_t model_id);
 
-  static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc,
-                                 const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc,
-                                 std::vector<DataBuffer> &outputs);
+  static Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc,
+                             const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc,
+                             std::vector<DataBuffer> &outputs);
 
-  static ge::Status ReleaseSingleOpResource(void *stream);
+  static Status ReleaseSingleOpResource(void *stream);
 
-  static ge::Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id);
+  static Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id);
 
-  ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count);
-  ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
-  ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
-                                       std::vector<InputOutputDims> &output_dims);
-  ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);
+  Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count);
+  Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
+  Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
+                                   std::vector<InputOutputDims> &output_dims);
+  Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);
 
  private:
-  static bool isInit_;
+  static std::atomic_bool is_inited_;
 };
 }  // namespace ge
 
diff --git a/inc/framework/ge_runtime/task_info.h b/inc/framework/ge_runtime/task_info.h
index 4530bff7..abc4783d 100644
--- a/inc/framework/ge_runtime/task_info.h
+++ b/inc/framework/ge_runtime/task_info.h
@@ -50,10 +50,18 @@ enum TaskInfoType {
 class TaskInfo {
  public:
   virtual ~TaskInfo() {}
-  uint32_t stream_id() const { return stream_id_; }
-  TaskInfoType type() const { return type_; }
-  std::string op_name() const { return op_name_; }
-  bool dump_flag() const { return dump_flag_; }
+  uint32_t stream_id() const {
+    return stream_id_;
+  }
+  TaskInfoType type() const {
+    return type_;
+  }
+  std::string op_name() const {
+    return op_name_;
+  }
+  bool dump_flag() const {
+    return dump_flag_;
+  }
 
  protected:
   TaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, bool dump_flag)
@@ -84,15 +92,33 @@ class CceTaskInfo : public TaskInfo {
         is_flowtable_(is_flowtable) {}
   ~CceTaskInfo() override {}
 
-  cce::ccOpContext cc_context() const { return ctx_; }
-  std::string stub_func() const { return stub_func_; }
-  uint32_t block_dim() const { return block_dim_; }
-  const std::vector<uint8_t> &args() const { return args_; }
-  uint32_t args_size() const { return args_size_; }
-  const std::vector<uint8_t> &sm_desc() const { return sm_desc_; }
-  const std::vector<uint8_t> &flow_table() const { return flow_table_; }
-  const std::vector<uint8_t> &args_offset() const { return args_offset_; }
-  bool is_flowtable() const { return is_flowtable_; }
+  cce::ccOpContext cc_context() const {
+    return ctx_;
+  }
+  std::string stub_func() const {
+    return stub_func_;
+  }
+  uint32_t block_dim() const {
+    return block_dim_;
+  }
+  const std::vector<uint8_t> &args() const {
+    return args_;
+  }
+  uint32_t args_size() const {
+    return args_size_;
+  }
+  const std::vector<uint8_t> &sm_desc() const {
+    return sm_desc_;
+  }
+  const std::vector<uint8_t> &flow_table() const {
+    return flow_table_;
+  }
+  const std::vector<uint8_t> &args_offset() const {
+    return args_offset_;
+  }
+  bool is_flowtable() const {
+    return is_flowtable_;
+  }
 
  private:
   cce::ccOpContext ctx_;
@@ -126,17 +152,39 @@ class TbeTaskInfo : public TaskInfo {
         workspace_addrs_(workspace_addrs) {}
   ~TbeTaskInfo() override {}
 
-  const std::string &stub_func() const { return stub_func_; }
-  uint32_t block_dim() const { return block_dim_; }
-  const std::vector<uint8_t> &args() const { return args_; }
-  uint32_t args_size() const { return args_size_; }
-  const std::vector<uint8_t> &sm_desc() const { return sm_desc_; }
-  void *binary() const { return binary_; }
-  uint32_t binary_size() const { return binary_size_; }
-  const std::vector<uint8_t> &meta_data() const { return meta_data_; }
-  const std::vector<void *> &input_data_addrs() const { return input_data_addrs_; }
-  const std::vector<void *> &output_data_addrs() const { return output_data_addrs_; }
-  const std::vector<void *> &workspace_addrs() const { return workspace_addrs_; }
+  const std::string &stub_func() const {
+    return stub_func_;
+  }
+  uint32_t block_dim() const {
+    return block_dim_;
+  }
+  const std::vector<uint8_t> &args() const {
+    return args_;
+  }
+  uint32_t args_size() const {
+    return args_size_;
+  }
+  const std::vector<uint8_t> &sm_desc() const {
+    return sm_desc_;
+  }
+  void *binary() const {
+    return binary_;
+  }
+  uint32_t binary_size() const {
+    return binary_size_;
+  }
+  const std::vector<uint8_t> &meta_data() const {
+    return meta_data_;
+  }
+  const std::vector<void *> &input_data_addrs() const {
+    return input_data_addrs_;
+  }
+  const std::vector<void *> &output_data_addrs() const {
+    return output_data_addrs_;
+  }
+  const std::vector<void *> &workspace_addrs() const {
+    return workspace_addrs_;
+  }
 
   void SetBinary(void *binary, uint32_t binary_size) {
     binary_ = binary;
@@ -171,12 +219,24 @@ class AicpuTaskInfo : public TaskInfo {
         output_data_addrs_(output_data_addrs) {}
   ~AicpuTaskInfo() override {}
 
-  const std::string &so_name() const { return so_name_; }
-  const std::string &kernel_name() const { return kernel_name_; }
-  const std::string &node_def() const { return node_def_; }
-  const std::vector<void *> &input_data_addrs() const { return input_data_addrs_; }
-  const std::vector<void *> &output_data_addrs() const { return output_data_addrs_; }
-  const std::string &ext_info() const { return ext_info_; }
+  const std::string &so_name() const {
+    return so_name_;
+  }
+  const std::string &kernel_name() const {
+    return kernel_name_;
+  }
+  const std::string &node_def() const {
+    return node_def_;
+  }
+  const std::vector<void *> &input_data_addrs() const {
+    return input_data_addrs_;
+  }
+  const std::vector<void *> &output_data_addrs() const {
+    return output_data_addrs_;
+  }
+  const std::string &ext_info() const {
+    return ext_info_;
+  }
 
  private:
   std::string so_name_;
@@ -192,7 +252,9 @@ class LabelSetTaskInfo : public TaskInfo {
   LabelSetTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id)
       : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_SET, false), label_id_(label_id) {}
   ~LabelSetTaskInfo() override {}
-  uint32_t label_id() const { return label_id_; }
+  uint32_t label_id() const {
+    return label_id_;
+  }
 
  private:
   uint32_t label_id_;
@@ -203,7 +265,9 @@ class LabelGotoTaskInfo : public TaskInfo {
   LabelGotoTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id)
       : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_GOTO, false), label_id_(label_id) {}
   ~LabelGotoTaskInfo() override {}
-  uint32_t label_id() const { return label_id_; }
+  uint32_t label_id() const {
+    return label_id_;
+  }
 
  private:
   uint32_t label_id_;
@@ -218,9 +282,15 @@ class LabelSwitchTaskInfo : public TaskInfo {
         label_list_(label_list),
         cond_(cond) {}
   ~LabelSwitchTaskInfo() override {}
-  uint32_t label_size() const { return label_size_; }
-  const std::vector<uint32_t> &label_list() const { return label_list_; }
-  void *cond() const { return cond_; }
+  uint32_t label_size() const {
+    return label_size_;
+  }
+  const std::vector<uint32_t> &label_list() const {
+    return label_list_;
+  }
+  void *cond() const {
+    return cond_;
+  }
 
  private:
   uint32_t label_size_;
@@ -230,7 +300,9 @@ class LabelSwitchTaskInfo : public TaskInfo {
 
 class EventTaskInfo : public TaskInfo {
  public:
-  uint32_t event_id() const { return event_id_; }
+  uint32_t event_id() const {
+    return event_id_;
+  }
 
  protected:
   EventTaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, uint32_t event_id)
@@ -271,14 +343,13 @@ class FusionEndTaskInfo : public TaskInfo {
 class HcclTaskInfo : public TaskInfo {
  public:
   HcclTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string hccl_type, void *input_data_addr,
-               void *output_data_addr, void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num,
+               void *output_data_addr, int64_t workspace_size, int64_t hccl_stream_num,
                const std::vector<uint8_t> &private_def, void *ops_kernel_store, int32_t count, int64_t root_id,
                int64_t op_type, int64_t data_type, const std::string &group, bool dump_flag)
       : TaskInfo(op_name, stream_id, TaskInfoType::HCCL, dump_flag),
         hccl_type_(hccl_type),
         input_data_addr_(input_data_addr),
         output_data_addr_(output_data_addr),
-        workspace_addr_(workspace_addr),
         workspace_size_(workspace_size),
         hccl_stream_num_(hccl_stream_num),
         private_def_(private_def),
@@ -290,25 +361,47 @@ class HcclTaskInfo : public TaskInfo {
         group_(group) {}
   ~HcclTaskInfo() override {}
 
-  const std::string &hccl_type() const { return hccl_type_; }
-  void *input_data_addr() const { return input_data_addr_; }
-  void *output_data_addr() const { return output_data_addr_; }
-  void *workspace_addr() const { return workspace_addr_; }
-  int64_t workspace_size() const { return workspace_size_; }
-  int64_t hccl_stream_num() const { return hccl_stream_num_; }
-  const std::vector<uint8_t> &private_def() const { return private_def_; }
-  void *ops_kernel_store() const { return ops_kernel_store_; }
-  int32_t count() const { return count_; }
-  int64_t root_id() const { return root_id_; }
-  int64_t op_type() const { return op_type_; }
-  int64_t data_type() const { return data_type_; }
-  const std::string &group() const { return group_; }
+  const std::string &hccl_type() const {
+    return hccl_type_;
+  }
+  void *input_data_addr() const {
+    return input_data_addr_;
+  }
+  void *output_data_addr() const {
+    return output_data_addr_;
+  }
+  int64_t workspace_size() const {
+    return workspace_size_;
+  }
+  int64_t hccl_stream_num() const {
+    return hccl_stream_num_;
+  }
+  const std::vector<uint8_t> &private_def() const {
+    return private_def_;
+  }
+  void *ops_kernel_store() const {
+    return ops_kernel_store_;
+  }
+  int32_t count() const {
+    return count_;
+  }
+  int64_t root_id() const {
+    return root_id_;
+  }
+  int64_t op_type() const {
+    return op_type_;
+  }
+  int64_t data_type() const {
+    return data_type_;
+  }
+  const std::string &group() const {
+    return group_;
+  }
 
  private:
   std::string hccl_type_;
   void *input_data_addr_;
   void *output_data_addr_;
-  void *workspace_addr_;
   int64_t workspace_size_;
   int64_t hccl_stream_num_;
   std::vector<uint8_t> private_def_;
@@ -329,9 +422,15 @@ class ProfilerTraceTaskInfo : public TaskInfo {
         flat_(flat) {}
   ~ProfilerTraceTaskInfo() override {}
 
-  uint64_t log_id() const { return log_id_; }
-  bool notify() const { return notify_; }
-  uint32_t flat() const { return flat_; }
+  uint64_t log_id() const {
+    return log_id_;
+  }
+  bool notify() const {
+    return notify_;
+  }
+  uint32_t flat() const {
+    return flat_;
+  }
 
  private:
   uint64_t log_id_;
@@ -351,11 +450,21 @@ class MemcpyAsyncTaskInfo : public TaskInfo {
         kind_(kind) {}
   ~MemcpyAsyncTaskInfo() override {}
 
-  void *dst() const { return dst_; }
-  uint64_t dst_max() const { return dst_max_; }
-  void *src() const { return src_; }
-  uint64_t count() const { return count_; }
-  uint32_t kind() const { return kind_; }
+  void *dst() const {
+    return dst_;
+  }
+  uint64_t dst_max() const {
+    return dst_max_;
+  }
+  void *src() const {
+    return src_;
+  }
+  uint64_t count() const {
+    return count_;
+  }
+  uint32_t kind() const {
+    return kind_;
+  }
 
  private:
   void *dst_;
@@ -377,11 +486,21 @@ class StreamSwitchTaskInfo : public TaskInfo {
         data_type_(data_type) {}
   ~StreamSwitchTaskInfo() override {}
 
-  int64_t true_stream_id() const { return true_stream_id_; }
-  void *input_addr() const { return input_addr_; }
-  void *value_addr() const { return value_addr_; }
-  int64_t cond() const { return cond_; }
-  int64_t data_type() const { return data_type_; }
+  int64_t true_stream_id() const {
+    return true_stream_id_;
+  }
+  void *input_addr() const {
+    return input_addr_;
+  }
+  void *value_addr() const {
+    return value_addr_;
+  }
+  int64_t cond() const {
+    return cond_;
+  }
+  int64_t data_type() const {
+    return data_type_;
+  }
 
  private:
   int64_t true_stream_id_;
@@ -397,7 +516,9 @@ class StreamActiveTaskInfo : public TaskInfo {
       : TaskInfo(op_name, stream_id, TaskInfoType::STREAM_ACTIVE, false), active_stream_id_(active_stream_id) {}
   ~StreamActiveTaskInfo() override {}
 
-  uint32_t active_stream_id() const { return active_stream_id_; }
+  uint32_t active_stream_id() const {
+    return active_stream_id_;
+  }
 
  private:
   uint32_t active_stream_id_;
diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h
index fd35b546..450c893e 100644
--- a/third_party/fwkacllib/inc/ops/array_ops.h
+++ b/third_party/fwkacllib/inc/ops/array_ops.h
@@ -35,7 +35,7 @@ namespace ge {
 * @li values:A `Tensor`. Must have the same type as `sorted_x`. \n
 
 *@par Attributes:
-*@li out_type:An optional `DType` from: `int32, int64`.
+*out_type:An optional `DType` from: `int32, int64`.
 Defaults to `int32`. \n
 
 *@par Outputs:
@@ -504,7 +504,7 @@ REG_OP(Constant)
 *x: A tensor. \n
 
 *@par Outputs:
-*y: A tensor. \n
+*y: A copy of input tensor. \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator Snapshot.
@@ -684,7 +684,9 @@ REG_OP(ExpandDims)
 
 *@par Inputs:
 *@li x: Original tensor.
-*@li axis: List of ints. \n
+
+*@par Attributes:
+*@li axes: List of ints indicating the dimensions to be inserted. \n
 
 *@par Outputs:
 *y: Reshape tensor with same data as input. \n
@@ -755,10 +757,10 @@ REG_OP(Squeeze)
 *@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n
 
 *@par Inputs:
-*x: A tensor. \n
+*x: A Tensor of type float32, float16, int8, int16, uint16, uint8, int32, int64, uint32, uint64, bool, double. \n
 
 *@par Outputs:
-*y: A tensor. The rank of input tensor. \n
+*y: A tensor. The rank of input tensor. Type is int32. \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator Rank.
@@ -848,7 +850,6 @@ REG_OP(PlaceHolder)
 *x: A tensor. \n
 
 *@par Attributes:
-*@li dtype: data type of tensor.
 *@li shape: tensor shape. \n
 
 *@par Outputs:
@@ -867,13 +868,13 @@ REG_OP(PlaceholderWithDefault)
 *@brief Reads and returns the value of the input variable tensor. \n
 
 *@par Inputs:
-*x: A tensor. \n
+*x: A tensor must have numeric type. \n
 
 *@par Attributes:
 *dtype: An optional int32 or int64. The output data type. Defaults to int32. \n
 
 *@par Outputs:
-*y: A tensor. \n
+*y: A tensor must have numeric type. \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ReadVariableOp.
@@ -1134,10 +1135,10 @@ This is an M-length vector.
 This is an R-length vector
 
 *@par Attributes:
-*@li normalize: boolean (if true, edit distances are normalized by length of truth). \n
+*normalize: boolean (if true, edit distances are normalized by length of truth). \n
 
 *@par Outputs:
-*@li output: A dense float tensor with rank R - 1. \n
+*output: A dense float tensor with rank R - 1. \n
 
 *@par Third-party framework compatibility
 * Compatible with TensorFlow EditDistance operator.
@@ -1154,18 +1155,17 @@ REG_OP(EditDistance)
     .OP_END_FACTORY_REG(EditDistance)
 
 /**
-* @brief sort_v2.
+* @brief sort the input tensor without returning the value of index.
 
 * @par Inputs:
-* @li x: An ND tensor of type float16.
+* x: An ND tensor of type float16.
 
 * @par Attributes:
-
 * @li axis: An optional int. The dimension to sort along. This value defaults to -1.
 * @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False.
 
 * @par Outputs:
-* @li y: An ND tensor of type float16.
+* y: An ND tensor of type float16.
 
 * @attention Constraints:
 * @li Axis should select the last dim.
@@ -1206,7 +1206,7 @@ REG_OP(Expand)
 *@Returns a tensor containing the indices of all non-zero elements of input. \n
 
 *@par Inputs:
-*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64.
+*x: A Tensor. Must be one of the following types: float16, float32, int32, int64.
 
 *@par Attributes:
 * transpose: the output tensor will be transposed if true. \n
@@ -1230,15 +1230,15 @@ REG_OP(NonZero)
 
 * @par Inputs:
 * One inputs, including:
-* @li x: A Tensor. Must be one of the following types:
+* x: A Tensor. Must be one of the following types:
 *     float16, float32, int32, int8 ,uint8. \n
 
 * @par Attributes:
-* @li shape: A required listInt to specify the shape that the input tensor expanded to. \n
+* shape: A required listInt to specify the shape that the input tensor expanded to. \n
 
 
 * @par Outputs:
-* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n
+* y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n
 
 * @par Third-party framework compatibility
 * Compatible with the ONNX operator Expand.
@@ -1249,6 +1249,38 @@ REG_OP(ExpandD)
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
     .REQUIRED_ATTR(shape, ListInt)
     .OP_END_FACTORY_REG(ExpandD)
+
+/**
+*@brief Finds unique elements in a 1D tensor. \n
+
+*@par Inputs:
+*x: 1D tensor. Must be one of the following types:
+*     float16, float32, double, int64, int32, int16, uint16, int8 ,uint8. \n
+
+*@par Attributes:
+*@li return_inverse: Whether to also return the indices for where elements in the original 
+*                input ended up in the returned unique list.
+*@li return_inverse: Whether to also return the counts for each unique element.
+
+*@par Outputs:
+*@li y1: The output list of unique scalar elements. Has the same type as "x".
+*@li y2: Representing the indices for where elements in the original input map to in the output.
+*@li y3: Representing the number of occurrences for each unique value or tensor. \n
+
+* @par Third-party framework compatibility
+* Compatible with the troch operator _unique2.
+*/
+
+REG_OP(UniqueWithCountsAndSorting)
+    .INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+           DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
+    .OUTPUT(y1, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+           DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
+    .OUTPUT(y2, TensorType({ DT_INT32, DT_INT64 }))
+    .OUTPUT(y3, TensorType({ DT_INT32, DT_INT64 }))
+    .ATTR(return_inverse, Bool, false)
+    .ATTR(return_counts, Bool, false)
+    .OP_END_FACTORY_REG(UniqueWithCountsAndSorting)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/control_flow_ops.h b/third_party/fwkacllib/inc/ops/control_flow_ops.h
index e5bd3534..cd993599 100644
--- a/third_party/fwkacllib/inc/ops/control_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/control_flow_ops.h
@@ -96,7 +96,7 @@ REG_OP(RefMerge)
  *       Otherwise, the data is forwarded to "output_false" . \n
 
  *@par Inputs:
- *@li data: The tensor to be forwarded. \n
+ *@li data: The tensor to be forwarded.
  *          Must be one of the following types: float16, float32, float64,
  *          int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool.
  *@li pred: A boolean scalar. The output port that will receive data . \n
diff --git a/third_party/fwkacllib/inc/ops/ctc_ops.h b/third_party/fwkacllib/inc/ops/ctc_ops.h
index e907b828..bbc610ff 100644
--- a/third_party/fwkacllib/inc/ops/ctc_ops.h
+++ b/third_party/fwkacllib/inc/ops/ctc_ops.h
@@ -74,7 +74,7 @@ REG_OP(CTCLoss)
 *@li sequence_length: A vector containing sequence lengths, size `(batch_size)`. \n
 
 *@par Attributes:
-*@li merge_repeated: If True, merge repeated classes in output. \n
+* merge_repeated: If True, merge repeated classes in output. \n
 
 *@par Outputs:
 *@li decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`,
@@ -108,6 +108,8 @@ REG_OP(CTCGreedyDecoder)
 
 *@par Attributes:
 *@li merge_repeated: If True, merge repeated classes in output. \n
+*@li beam_width:A scalar >= 0 (beam search beam width).
+*@li top_paths:A scalar >= 0, <= beam_width (controls output size).
 
 *@par Outputs:
 *@li decoded_indices: A list (length: top_paths) of indices matrices.  Matrix j,
@@ -162,7 +164,7 @@ REG_OP(CTCBeamSearchDecoder)
 * Compatible with Pytorch CTCLoss operator.
 
 *@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*The length of Label should in [4, 1000].
 */
 REG_OP(CTCLossV2)
     .INPUT(log_probs, TensorType({DT_FLOAT, DT_DOUBLE}))
@@ -203,7 +205,7 @@ REG_OP(CTCLossV2)
 * Compatible with Pytorch CTCLoss operator.
 
 *@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*The limit of Label’s length is 1K.
 */
 REG_OP(CTCLossV2Grad)
     .INPUT(grad_out, TensorType({DT_FLOAT, DT_DOUBLE}))
diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h
index 6021f4e3..32454d27 100644
--- a/third_party/fwkacllib/inc/ops/data_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h
@@ -1201,6 +1201,8 @@ REG_OP(TensorArraySize)
 *@brief A queue implementation that dequeues elements in a random order. \n
 
 *@par Attributes:
+*@li component_types:A list of fully-defined Tensortype objects with
+the same length as shapes, or None.
 *@li shapes: (Optional.) A list of fully-defined TensorShape objects with
 the same length as dtypes, or None.
 *@li capacity: An integer. The upper bound on the number of elements that may
@@ -1281,6 +1283,7 @@ The length of this attr must be either 0 or the same as the length of
 elements are not constrained, and only one element may be dequeued at a time.
 *@li container: An optional string. Defaults to "". If non-empty, this queue
 is placed in the given container. Otherwise, a default container is used.
+*@li capacity:An integer. The upper bound on the number of elements that may be stored in this queue.
 *@li shared_name: An optional string. Defaults to "". If non-empty, this
 queue will be shared under the given name across multiple sessions. \n
 
@@ -1435,7 +1438,7 @@ REG_OP(OrderedMapClear)
 
 *@par Inputs:
 *Including:
-* @li resource: A Tensor of type DT_RESOURCE.
+* resource: A Tensor of type DT_RESOURCE.
 
 *@par Outputs:
 *handle: A Tensor of type DT_STRING ref. \n
@@ -1526,7 +1529,7 @@ REG_OP(OrderedMapPeek)
 
 *@par Inputs:
 *Including:
-* @li indices: A Tensor of type DT_INT32. \n
+* indices: A Tensor of type DT_INT32. \n
 
 *@par Attributes:
 *@li capacity: An optional int that is >= 0. Defaults to "0".
@@ -2332,6 +2335,40 @@ REG_OP(CacheAllIndexToLocal)
   .OP_END_FACTORY_REG(CacheAllIndexToLocal)
 
 /**
+*@brief LRUCacheV2, aicore LRUCache.
+*@par Inputs:
+*index_list: exchange index list
+*data: host data
+*cache: gm cache
+*tag: cache's tag
+*is_last_call: if is last call write all cache to data
+*@par Outputs:
+*data: output data
+*cache: gm cache
+*tag: cache's tag
+*index_offset_list: index_offset_list
+*not_in_cache_index_list: output not in cache's index_list
+*not_in_cache_number: scalar
+*@par Attributes:
+*pre_route_count: types of all outputs
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(LRUCacheV2)
+    .INPUT(index_list, TensorType::BasicType())
+    .INPUT(data, TensorType::BasicType())
+    .INPUT(cache, TensorType::BasicType())
+    .INPUT(tag, TensorType::BasicType())
+    .INPUT(is_last_call, TensorType::BasicType())
+    .OUTPUT(data, TensorType::BasicType())
+    .OUTPUT(cache, TensorType::BasicType())
+    .OUTPUT(tag, TensorType::BasicType())
+    .OUTPUT(index_offset_list, TensorType::BasicType())
+    .OUTPUT(not_in_cache_index_list, TensorType::BasicType())
+    .OUTPUT(not_in_cache_number, TensorType::BasicType())
+    .REQUIRED_ATTR(pre_route_count, Int)
+    .OP_END_FACTORY_REG(LRUCacheV2)
+
+/**
 *@brief DynamicGetNext, dynamic get next data
 *@par Inputs:
 *x: the iterator, all types are available
diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
index f61e2939..b4299026 100644
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -624,9 +624,9 @@ REG_OP(Log1p)
 
 *@attention Constraints:
 *@li x2: The input data does not support 0
-*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the 
+*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
 *requirement of double thousandths in the mini form
-*@li Due to different architectures, the calculation results of this operator 
+*@li Due to different architectures, the calculation results of this operator
 *on NPU and CPU may be inconsistent
 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
 
@@ -2066,9 +2066,9 @@ REG_OP(FloorDiv)
 
 *@attention Constraints:
 *@li x2: The input data does not support 0
-*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the 
+*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
 *requirement of double thousandths in the mini form
-*@li Due to different architectures, the calculation results of this operator 
+*@li Due to different architectures, the calculation results of this operator
 *on NPU and CPU may be inconsistent
 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
 
@@ -2200,9 +2200,9 @@ REG_OP(Tan)
 
 *@attention Constraints:
 *@li x2: The input data does not support 0
-*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the 
+*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
 *requirement of double thousandths in the mini form
-*@li Due to different architectures, the calculation results of this operator 
+*@li Due to different architectures, the calculation results of this operator
 *on NPU and CPU may be inconsistent
 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
 
@@ -2467,11 +2467,11 @@ REG_OP(Eltwise)
 
  *@par Inputs:
  *One inputs, including:
- * @li input_x: A tensor. Must be one of the following types:
+ * input_x: A tensor. Must be one of the following types:
  *     float16, float32. \n
 
  *@par Outputs:
- *y: A Tensor with the same type and shape of input_x's. \n
+ *output_y: A Tensor with the same type and shape of input_x's. \n
 
  *@par Third-party framework compatibility
  *Compatible with the Pytorch operator Erfinv. \n
@@ -3154,13 +3154,13 @@ REG_OP(FusedMulAddNL2loss)
 *@brief Tests whether the input exceeds a threshold. \n
 
 *@par Inputs:
-*@li x: A Tensor with any format. Must be one of the following types: float16, float32. \n
+* x: A Tensor with any format. Must be one of the following types: float16, float32. \n
 
 *@par Attributes:
-*@li threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n
+* threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n
 
 *@par Outputs:
-*@li y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32.
+* y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32.
 *@par Third-party framework compatibility
 * Compatible with the Caffe operator Threshold.
 */
@@ -3175,7 +3175,7 @@ REG_OP(FusedMulAddNL2loss)
 *@brief Returns the index number corresponding to the maximum value entered. \n
 
 *@par Inputs:
-*@li x: A tensor. Must be one of the following types: float16, float32. \n
+*x: A tensor. Must be one of the following types: float16, float32. \n
 
 *@par Attributes:
 *@li axis: An optional int. Specify the axis to be cut at the input tensor. If this parameter is not provided, find the topk for each batch. Defaults to 10000
@@ -3203,12 +3203,11 @@ REG_OP(ArgMaxWithK)
 *@brief Multiply tensor with scale. \n
 
 *@par Inputs:
-*Five inputs, including:
-* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
-* @li x2: A scale. Must be float. \n
+*One input, including:
+*x: A Tensor. Must be one of the following types:int32,int16, float16, float32.
 
 *@par Outputs:
-*@li y: A Tensor. Has the same type and shape as "x1". \n
+*y: A Tensor. Has the same type and shape as "x1". \n
 
 *@par Third-party framework compatibility:
 * Compatible with the Pytorch operator muls.
@@ -3223,12 +3222,11 @@ REG_OP(Muls)
 *@brief Fill tensor with scale. \n
 
 *@par Inputs:
-*Five inputs, including:
-* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
-* @li x2: A scale. Must be float. \n
+*One input, including:
+*x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
 
 *@par Outputs:
-*@li y: A Tensor. Has the same type and shape as "x1". \n
+*y: A Tensor. Has the same type and shape as "x1". \n
 
 *@par Third-party framework compatibility:
 * Compatible with the Pytorch operator fills.
@@ -3378,7 +3376,7 @@ REG_OP(TensorMove)
 
 *@par Inputs:
 *One inputs, including:
-* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n
+*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n
 
 *@par Outputs:
 *output_x: A Tensor. Has the same type as "x". \n
@@ -3397,7 +3395,7 @@ REG_OP(TensorRedirect)
 * multiply the result by the scalar value and add it to tensor x1
 
 * @par Inputs:
-* Three inputs, including:
+* Four inputs, including:
 * @li input_data: A mutable input Tensor. Must be one of the following types:
 *     float16, float32.
 * @li x1: A mutable input Tensor of the same type as x1.
@@ -3406,7 +3404,7 @@ REG_OP(TensorRedirect)
 *     float16, float32, int32. \n
 
 * @par Outputs:
-* @li y: A mutable Tensor. Has the same type as "x1". \n
+* y: A mutable Tensor. Has the same type as "x1". \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Addcdiv.
@@ -3420,12 +3418,12 @@ REG_OP(Addcdiv)
     .OP_END_FACTORY_REG(Addcdiv)
 
 /**
-* @brief Performs the element-wise multiplication of tensor x2 by tensor x3, 
-* multiply the result by the scalar value and add it to tensor input_data 
+* @brief Performs the element-wise multiplication of tensor x2 by tensor x3,
+* multiply the result by the scalar value and add it to tensor input_data
 
 
 * @par Inputs:
-* Three inputs, including:
+* Four inputs, including:
 * @li input_data: A mutable input Tensor. Must be one of the following types:
 *     float16, float32, int8, int32, uint8.
 * @li x1: A mutable input Tensor of the same type as x1.
@@ -3433,7 +3431,7 @@ REG_OP(Addcdiv)
 * @li value: A tensor which includes only one element of the same type as x1. \n
 
 * @par Outputs:
-* @li y: A mutable output Tensor. Has the same type as "x1". \n
+* y: A mutable output Tensor. Has the same type as "x1". \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Addcmul.
@@ -3455,7 +3453,7 @@ REG_OP(Addcmul)
 * @li alpha: A scalar tensor of type float16, float32. \n
 
 * @par Outputs:
-* @li y: An ND tensor tensor with the same shape and type as "x1". \n
+* y: An ND tensor tensor with the same shape and type as "x1". \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Axpy.
@@ -3468,25 +3466,6 @@ REG_OP(AxpyV2)
     .OP_END_FACTORY_REG(AxpyV2)
 
 /**
-* @brief Computes the result of x1 - x2.
-
-* @par Inputs:
-* @li x1: An ND tensor of type float16, float, int32.
-* @li x2: An ND tensor of type float16, float, int32. \n
-
-* @par Outputs:
-* @li y: An ND tensor tensor with the same type as "x1". \n
-
-* @par Third-party framework compatibility
-* Compatible with the Pytorch operator Sub.
-*/
-REG_OP(PtSub)
-    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
-    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
-    .OP_END_FACTORY_REG(PtSub)
-
-/**
 * @brief Add the partial values of two tensors in format NC1HWC0.
 
 * @par Inputs:
@@ -3502,7 +3481,7 @@ REG_OP(PtSub)
 * the difference between C1 and offset in "x1" and "x2". \n
 
 * @par Outputs:
-* @li y:  A Tensor of the same type as "x1", and the same shape as "x1",
+* y:  A Tensor of the same type as "x1", and the same shape as "x1",
 * except for the C1 value. Record the result after adding. \n
 */
 REG_OP(StrideAdd)
@@ -3523,7 +3502,7 @@ REG_OP(StrideAdd)
 * @li input_y: A Tensor. the second tensor. \n
 
 * @par Outputs:
-* @li output_z: A Tensor. Bool type, compare result of the two inputs. \n
+*output_z: A Tensor. Bool type, compare result of the two inputs. \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch equal operator. \n
@@ -3535,21 +3514,21 @@ REG_OP(TensorEqual)
     .OP_END_FACTORY_REG(TensorEqual)
 
 /**
- * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). 
- * All inputs and outputs must have the same data type. This operator supports multidirectional 
+ * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support).
+ * All inputs and outputs must have the same data type. This operator supports multidirectional
  * (i.e., Numpy-style) broadcasting
- * 
- * @par inputs
+ *
+ * @par Inputs:
  * one input including:
- * @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64
- * 
- * @par output
+ * x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64
+ *
+ * @par Outputs:
  * one output including:
- * @li y:A Tensor of the same type as x
- * 
+ * y:A Tensor of the same type as x
+ *
  */
 REG_OP(MaxN)
-    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) 
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
     .OP_END_FACTORY_REG(MaxN)
 
@@ -3634,16 +3613,16 @@ REG_OP(DataCompare)
 *which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the
 *corresponding input.
 *
-*@par inputs
+*@par Inputs:
 *one input including:
-*@li x: input A Tensor.Must be one of the following types:float32,float16
+*x: input A Tensor.Must be one of the following types:float32,float16
 *
 *@par Attributes:
-*@li axis:A required int attribute that decides which dimension will be used to cal the hard_max
+*axis:A required int attribute that decides which dimension will be used to cal the hard_max
 *
-*@par output:
+*@par Outputs:
 *one output including:
-*@li y:A Tensor of the same type as x
+*y:A Tensor of the same type as x
 *
 */
 REG_OP(HardMax)
@@ -3661,7 +3640,7 @@ REG_OP(HardMax)
 * @li input_y: A Tensor. the second tensor must be 1d. \n
 
 * @par Outputs:
-* @li output: A Tensor. Result of the two inputs, must be 1d. \n
+* output: A Tensor. Result of the two inputs, must be 1d. \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch dot operator. \n
@@ -3671,7 +3650,7 @@ REG_OP(Dot)
     .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
     .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
     .OP_END_FACTORY_REG(Dot)
-	
+
 /**
 *@brief Returns a new tensor with boolean elements representing \n
 *if each element of input is “close” to the corresponding element of other \n
@@ -3719,7 +3698,7 @@ REG_OP(IsClose)
 *
 *@attention Constraints:
 *@li indices: only support int32,and shape same to "updates"
-*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". 
+*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x".
 *@li y:A Tensor, the type and shape is same to "var" \n
 
 *@par Third-party framework compatibility
@@ -3754,7 +3733,7 @@ REG_OP(ArgMaxGrad)
 
 *@attention Constraints:
 *@li indices: only support int32,and shape same to "updates"
-*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". 
+*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x".
 *@li y:A Tensor, the type and shape is same to "var" \n
 
 *@par Third-party framework compatibility
@@ -3805,15 +3784,15 @@ REG_OP(AddMatMatElements)
 
 *@par Inputs:
 *Two inputs, including:
-* @li input_x1: A tensor. Must be the following types:
-*     float32. \n
+* @li input_x1: A tensor. Must be the following types: float32.
+* @li input_x2: A tensor. Must of the following types: float32. \n
 
-*@par Inputs:
-*@li input_x2: A tensor. Must of the following types:
-*     float32. \n
+* @par Attributes:
+* @li dim:The type is Int and the default value is 1.
+* @li eps:The type is Float and the default value is 1e-8. \n
 
 *@par Outputs:
-*@li output_y: A Tensor with the same type of input_x's. \n
+* output_y: A Tensor with the same type of input_x's. \n
 
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator CosineSimilarity. \n
@@ -3826,6 +3805,45 @@ REG_OP(CosineSimilarity)
     .ATTR(eps, Float, 1e-8)
     .OP_END_FACTORY_REG(CosineSimilarity)
 
+/**
+*@brief count adam result. \n
+
+*@par Inputs:
+*eleven inputs, including:
+* @li var: A Tensor. Support float16/float32.\n
+* @li m: A Tensor. Datatype and shape are same as exp_avg.\n
+* @li v: A Tensor. Datatype and shape are same as exp_avg.\n
+* @li lr: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
+* @li beta1: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
+* @li beta2: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
+* @li epsilon: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
+* @li grad: A Tensor. Datatype and shape are same as exp_avg.\n
+* @li max_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
+* @li global_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
+* @li weight_decay: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
+
+*@par Outputs:
+*three inputs, including:
+* @li var: A Tensor. Datatype and shape are same as exp_avg.\n
+* @li m: A Tensor. Datatype and shape are same as exp_avg.\n
+* @li v: A Tensor. Datatype and shape are same as exp_avg.\n
+*/
+REG_OP(ApplyAdamV2)
+    .INPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(lr, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(beta1, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(beta2, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(epsilon, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(grad, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(max_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(global_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(weight_decay, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OP_END_FACTORY_REG(ApplyAdamV2)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h
index b09ac058..7cfe39c4 100644
--- a/third_party/fwkacllib/inc/ops/functional_ops.h
+++ b/third_party/fwkacllib/inc/ops/functional_ops.h
@@ -163,9 +163,6 @@ REG_OP(Case)
  *          if it is not a scalar, non-empty means True and empty means False.
  *@li body: A subgraph takes 'input' and returns a another list of tensors .  \n
 
- *@par Attributes:
- *parallel_iterations: An optional int, default as 10 . \n
-
  *@par Outputs:
  *output: The output tensors returned by "body". Has the same type as "input" . \n
 
diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h
index 6909345a..2327e76e 100644
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -28,7 +28,7 @@ namespace ge {
 *@brief Decode the frame(s) of a GIF-encoded image to a uint8 tensor . \n
 
 *@par Inputs:
-*@li contents:A Tensor of type string. 0-D. The GIF-encoded image. \n
+*contents:A Tensor of type string. 0-D. The GIF-encoded image. \n
 
 *@par Outputs:
 *image:A Tensor of type uint8. \n
@@ -128,8 +128,8 @@ crops from the input image tensor and resizes them using bilinear sampling or
 nearest neighbor sampling to a common output size specified by crop_size . \n
 
 *@par Inputs:
-*Input images must be a 4-D tensor. Inputs include:
-*@li images:A Tensor. Must be one of the following types:uint8, uint16, int8,
+*Input x must be a 4-D tensor. Inputs include:
+*@li x:A Tensor. Must be one of the following types:uint8, uint16, int8,
 int16, int32, int64, float16, float, double. A 4-D tensor of shape
 [batch, image_height, image_width, depth]. The format must be NHWC.
 *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4].
@@ -266,8 +266,9 @@ depth] containing the original image size. Both image_height and image_width
 need to be positive . \n
 
 *@par Attributes:
-method: A string specifying the interpolation method. Only 'bilinear' is
-supported for now . \n
+*@li method: A string specifying the interpolation method. Only 'bilinear' is
+supported for now .
+*@li T: output of type  \n
 
 *@par Outputs:
 *y:A 4-D tensor of shape [batch, image_height, image_width, depth]. The format
@@ -585,9 +586,11 @@ REG_OP(ResizeNearestNeighborV2GradD)
 channels], The image tensor that was resized . \n
 
 *@par Attributes:
-*align_corners: An optional bool. Defaults to False. If true, the centers of
+*@li align_corners: An optional bool. Defaults to False. If true, the centers of
 the 4 corner pixels of the input and grad tensors are aligned. Defaults to
-false . \n
+false .
+*@li half_pixel_centers: indicates if the offset coordinates are normalized. Defaults
+to false . \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as original_image . \n
@@ -617,9 +620,10 @@ REG_OP(ResizeBilinearV2Grad)
 size for the images . \n
 
 *@par Attributes:
-*align_corners: If true, the centers of the 4 corner pixels of the input and
+* @li align_corners: If true, the centers of the 4 corner pixels of the input and
 output tensors are aligned, preserving the values at the corner pixels.
-Defaults to false . \n
+Defaults to false .
+* @li half_pixel_centers: An optional bool. Defaults to False . \n
 
 *@par Outputs:
 *y: 4-D with shape [batch, new_height, new_width, channels] . \n
@@ -684,6 +688,9 @@ be non-negative. In the case of 0, the cropped area does not need to overlap
 any of the bounding boxes supplied .
 *@li aspect_ratio_range: The cropped area of the image must have an aspect
 ratio = width / height within this range.
+*@li area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
+cropped area of the image must contain a fraction of the supplied image
+within this range.
 *@li max_attempts: Number of attempts at generating a cropped region of the
 image of the specified constraints. After max_attempts failures, return the
 entire image.
@@ -740,6 +747,9 @@ generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
 *@li seed2: A second seed to avoid seed collision.
 *@li aspect_ratio_range: The cropped area of the image must have an aspect
 ratio = width / height within this range.
+*@li area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
+cropped area of the image must contain a fraction of the supplied image
+within this range.
 *@li max_attempts: Number of attempts at generating a cropped region of the
 image of the specified constraints. After max_attempts failures, return the
 entire image.
@@ -787,9 +797,10 @@ REG_OP(SampleDistortedBoundingBoxExt2)
 The new size for the images . \n
 
 *@par Attributes:
-*align_corners: If true, the centers of the 4 corner pixels of the input and
+*@li align_corners: If true, the centers of the 4 corner pixels of the input and
 output tensors are aligned, preserving the values at the corner pixels.
 Defaults to false . \n
+*@li half_pixel_centers: An optional bool. Defaults to False . \n
 
 *@par Outputs:
 *y: 4-D with shape [batch, new_height, new_width, channels] . \n
@@ -999,10 +1010,6 @@ deciding whether boxes overlap too.
 *@li score_threshold: A 0-D float tensor representing the threshold for
 deciding when to remove boxes based on score . \n
 
-*@par Attributes:
-*pad_to_max_output_size: If true, the output selected_indices is padded
-to be of length max_output_size. Defaults to false . \n
-
 *@par Outputs:
 *selected_indices: A 1-D integer tensor of shape [M] representing the
 selected indices from the boxes tensor, where M <= max_output_size . \n
@@ -1094,8 +1101,8 @@ REG_OP(EncodePng)
 *contents: 0-D. PNG-decoded image .
 
 *@par Attributes:
-*channels: graph channels \n
-*dtype: type of image
+*@li channels: graph channels \n
+*@li dtype: type of image
 
 *@par Outputs:
 *image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels]
@@ -1116,10 +1123,10 @@ REG_OP(DecodePng)
 *@brief Bmp-decode an image. \n
 
 *@par Inputs:
-*@li contents: A Tensor of type string. 0-D. The BMP-encoded image. \n
+*contents: A Tensor of type string. 0-D. The BMP-encoded image. \n
 
 *@par Attributes:
-*@li channels: Decode the desired number of color channels of the image. \n
+*channels: Decode the desired number of color channels of the image. \n
 
 *@par Outputs:
 *image: A Tensor dtype of uint8.
@@ -1253,6 +1260,7 @@ REG_OP(KeepRatioResizeBilinear)
 No default value.
 *@li align_corners: An optional bool. If "true", the centers of the corner
 pixels of the input and output tensors are aligned. Defaults to "false" . \n
+*@li half_pixel_centers: An optional bool. Defaults to False . \n
 
 *@par Outputs:
 *y: A Tensor with the same type and format as input "images" . \n
@@ -1381,6 +1389,7 @@ REG_OP(NonMaxSuppressionV5)
 *@li scale: A `Tensor` of type `float32`.
 *@li translation: A `Tensor` of type `float32` . \n
 
+*@par Attributes:
 *@li kernel_type: type is string, default  lanczos3
 *@li antialias: type is bool, default true \n
 
@@ -1411,6 +1420,7 @@ REG_OP(ScaleAndTranslate)
 *@li scale: A `Tensor` of type `float32`.
 *@li translation: A `Tensor` of type `float32` . \n
 
+*@par Attributes:
 *@li kernel_type: type is string, default  lanczos3
 *@li antialias: type is bool, default true
 
@@ -1460,9 +1470,10 @@ if they fall beyond [0, 1]. If false, do not do clipping and output the box
 coordinates as it is. If not specified, defaults to true . \n
 
 *@par Outputs:
-*nmsed_boxes:type is float
-*nmsed_scores:type is float
-*nmsed_classes:type is float  \n
+*@li nmsed_boxes:type is float
+*@li nmsed_scores:type is float
+*@li nmsed_classes:type is float  
+*@li valid_detections:type is INT32 \n
 
 *@par Third-party framework compatibility
 * Compatible with tensorflow CombinedNonMaxSuppression operator.
@@ -1508,6 +1519,9 @@ REG_OP(IMGWarp)
 
 *@par Outputs:
 *map_img: A Tensor after resize. \n
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(Remap)
     .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
@@ -1524,7 +1538,7 @@ and 4 mean input[(h_top, w_left), (h_top, w_right), (h_bottom, w_left),  (h_bott
 *@li warp_index: the resize offset A 4-D float tensor of shape `[n, 2, h, w]`, 2 means (x, y) for resize point.
 
 *@par Outputs:
-*remap_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n
+*warp_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n
 */
 REG_OP(IMGWarpResize)
     .INPUT(img, TensorType({DT_FLOAT32}))
@@ -1559,6 +1573,39 @@ REG_OP(SpatialTransformerD)
     .OP_END_FACTORY_REG(SpatialTransformerD)
 
 /**
+*@brief Function spatial transformer . \n
+
+*@par Inputs:
+*@li x: A Tensor dtype of float16, float32, double, uint8, int8, uint16, int16, int32, uint32, uint64, int64.
+*@li theta: A Tensor dtype of float16, float32, double, uint8, int8, uint16, int16, int32, uint32, uint64, int64, 
+     auxiliary coefficients . \n
+
+*@par Attributes:
+*@li output_size: A tuple output size.
+*@li default_theta: A tuple default theta
+*@li use_default_theta: List use default theta
+
+*@par Outputs:
+*y: A Tensor dtype of float16, float32, double, uint8, int8, uint16, int16, int32, uint32, uint64, int64, 
+    should be same shape and type as x.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(SpatialTransformer)
+    .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16,DT_DOUBLE,DT_UINT8,DT_INT8,DT_UINT16,
+                          DT_INT16,DT_INT32,DT_UINT32,DT_UINT64,DT_INT64}))
+    .OPTIONAL_INPUT(theta, TensorType({DT_FLOAT,DT_FLOAT16,DT_DOUBLE,DT_UINT8,DT_INT8,
+                                       DT_UINT16,DT_INT16,DT_INT32,DT_UINT32,DT_UINT64,DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16,DT_DOUBLE,DT_UINT8,DT_INT8,DT_UINT16,
+                           DT_INT16,DT_INT32,DT_UINT32,DT_UINT64,DT_INT64}))
+    .ATTR(output_size, ListInt, {-1, -1})
+    .ATTR(default_theta, ListFloat, {})
+    .ATTR(align_corners, Bool, false)
+    .ATTR(use_default_theta, ListInt, {})
+    .OP_END_FACTORY_REG(SpatialTransformer)
+
+/**
 * @brief Resize the input tensor. \n
 currently, only support resize image tensor using nearest neighbor and linear interpolation.
 
@@ -1623,7 +1670,7 @@ REG_OP(Resize)
 *@brief Function parse image from string to int. \n
 
 *@par Inputs:
-*@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n
+* contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n
 
 *@par Attributes:
 *@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image.
@@ -1668,7 +1715,7 @@ REG_OP(DenseImageWarp)
 
 *@par Inputs:
 *One inputs, including:
-* @li x: A tensor. Must be one of the following types:
+* x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 
 *@par Attributes:
@@ -1713,7 +1760,7 @@ REG_OP(ResizeD)
 
 *@par Inputs:
 *One inputs, including:
-* @li grads: A tensor. Must be one of the following types:
+* grads: A tensor. Must be one of the following types:
 *     float16, float32. \n
 
 *@par Attributes:
@@ -1762,8 +1809,8 @@ REG_OP(ResizeGradD)
 *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n
 
 *@par Outputs:
-*grad_image: Returns 4-D with the same shape and dtype as `image`.
-*grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n
+*@li grad_image: Returns 4-D with the same shape and dtype as `image`.
+*@li grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n
 */
 REG_OP(DenseImageWarpGrad)
     .INPUT(grad, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -1817,12 +1864,12 @@ REG_OP(GridSampler2D)
 *@li assist: Assist matrix, a 4-D tensor of type float16.
 
 *@par Attributes:
-*@li align_corners: An optional bool. If "true", the centers of the corner
+*align_corners: An optional bool. If "true", the centers of the corner
  pixels of the input and output tensors are aligned. Defaults to "false" .
 
 *@par Outputs:
-*diff: Returns 4-D Tensor with the same shape and dtype as `grid`.
-*position: Returns 4-D Tensor with the same shape as `grid`.
+*@li diff: Returns 4-D Tensor with the same shape and dtype as `grid`.
+*@li position: Returns 4-D Tensor with the same shape as `grid`.
 */
 REG_OP(GridUnnormal)
     .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1840,10 +1887,13 @@ REG_OP(GridUnnormal)
 *@li position: 4-D Tensor with shape `[batch, output_height, output_width, 2]`.
 
 *@par Attributes:
-*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .
+*padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .
 
 *@par Outputs:
 *y: Returns 4-D Tensor with the same dtype as `x`.
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(ImageUnfold)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1936,5 +1986,204 @@ REG_OP(GridSampler3DGrad)
     .ATTR(align_corners, Bool, false)
     .OP_END_FACTORY_REG(GridSampler3DGrad)
 
+/**
+*@brief Upsample the 3-D data with the nearest neighbor ​interpolation algorithm. \n
+
+*@par Inputs:
+*One inputs, including:
+*x: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
+*     float16, float32, float64. \n
+
+*@par Attributes:
+*@li output_size: An optional listInt. Defaults to none.
+    contain 3 elements: output_depth, output_height, output_width. The number of elements of 'output_size'
+    should be the same as the rank of input 'x'. Only one of 'scales' and 'output_size' can be specified. \n
+*@li scales: An optional listFloat. Defaults to none.
+    The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. 
+    The number of elements of 'scales' should be the same as the rank of input 'x'. One of 'scales' and
+    'output_size' MUST be specified and it is an error if both are specified. \n
+
+*@par Outputs:
+*y: A 5-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
+*/
+
+REG_OP(UpsampleNearest3d)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(output_size, ListInt, {})
+    .ATTR(scales, ListFloat, {})
+    .OP_END_FACTORY_REG(UpsampleNearest3d)
+
+/**
+*@brief Upsample the 3-D data with the trilinear ​interpolation algorithm. \n
+
+*@par Inputs:
+*One inputs, including:
+*x: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
+*     float16, float32, float64. \n
+
+*@par Attributes:
+*@li output_size: An optional listInt. Defaults to none.
+    contain 3 elements: output_depth, output_height, output_width. The number of elements of 'output_size' should
+    be the same as the rank of input 'x'. Only one of 'scales' and 'output_size' can be specified. \n
+*@li scales: An optional listFloat. Defaults to none.
+    The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width.
+    The number of elements of 'scales' should be the same as the rank of input 'x'.
+    One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n
+*@li align_corners: An optional bool. Defaults to false.
+    If true, the input and output tensors are aligned by the center points of their corner pixels, preserving the
+    values at the corner pixels. If false, the input and output tensors are aligned by the corner points of their
+    corner pixels, and the interpolation use edge value padding for out of boundary values. \n
+
+*@par Outputs:
+*y: A 5-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
+*/
+
+REG_OP(UpsampleTrilinear3d)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(output_size, ListInt, {})
+    .ATTR(scales, ListFloat, {})
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(UpsampleTrilinear3d)
+
+/**
+*@brief Upsample the 3-D gradient data  with the nearest neighbor ​interpolation algorithm. \n
+
+*@par Inputs:
+*One inputs, including:
+*grad_output: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
+*     float16, float32, float64. \n
+
+*@par Attributes:
+*@li input_size: An required listInt.
+    contain 5 elements: [min_batch, channels, depth, height, width]. Must:
+      input_size[0] == grad_output_tensor_size[0]
+      input_size[1] == grad_output_tensor_size[1]. \n
+*@li output_size: An optional listInt. Defaults to none.
+    contain 3 elements: depth, height, width. The number of elements of 'output_size' should
+    be the same as the rank of input 'grad_output'. Only one of 'scales' and 'output_size' can be specified. Must:
+      grad_output_tensor_size[2] == floor(input_size[2] * scales[0]) == output_size[0]
+      grad_output_tensor_size[3] == floor(input_size[3] * scales[1]) == output_size[1]
+      grad_output_tensor_size[4] == floor(input_size[4] * scales[2]) == output_size[2]. \n
+*@li scales: An optional listFloat. Defaults to none.
+    The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. 
+    The number of elements of 'scales' should be the same as the rank of input 'grad_output'.
+    One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n
+
+*@par Outputs:
+*y: A 5-D tensor. Has the same type as input grad_output, shape depends on Attributes:input_size. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(UpsampleNearest3dGrad)
+    .INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(input_size, ListInt)
+    .ATTR(output_size, ListInt, {})
+    .ATTR(scales, ListFloat, {})
+    .OP_END_FACTORY_REG(UpsampleNearest3dGrad)
+
+/**
+*@brief Upsample the 3-D gradient data  trilinear ​interpolation algorithm. \n
+
+*@par Inputs:
+*One inputs, including:
+*grad_output: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
+*     float16, float32, float64. \n
+
+*@par Attributes:
+*@li input_size: An required listInt.
+    contain 5 elements: [min_batch, channels, depth, height, width]. Must:
+      input_size[0] == grad_output_tensor_size[0]
+      input_size[1] == grad_output_tensor_size[1]. \n
+*@li output_size: An optional listInt. Defaults to none.
+    contain 3 elements: depth, height, width. The number of elements of 'output_size' should
+    be the same as the rank of input 'grad_output'. Only one of 'scales' and 'output_size' can be specified. Must:
+      grad_output_tensor_size[2] == floor(input_size[2] * scales[0]) == output_size[0]
+      grad_output_tensor_size[3] == floor(input_size[3] * scales[1]) == output_size[1]
+      grad_output_tensor_size[4] == floor(input_size[4] * scales[2]) == output_size[2]. \n
+*@li scales: An optional listFloat. Defaults to none.
+    The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. 
+    The number of elements of 'scales' should be the same as the rank of input 'grad_output'.
+    One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n
+
+*@par Outputs:
+*y: A Tensor with shape depends on intput_size and output_size/scales. Must be one of the following
+    types: float16, float32, float64. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(UpsampleTrilinear3dGrad)
+    .INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(input_size, ListInt)
+    .ATTR(output_size, ListInt, {})
+    .ATTR(scales, ListFloat, {})
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(UpsampleTrilinear3dGrad)
+
+
+/**
+*@brief Upsample the 1-D data with the nearest neighbor ​interpolation algorithm. \n
+
+*@par Inputs:
+*x: A 1-D input tensor [N, C, W]. Must be one of the following types:
+*     float16, float32, float64. \n
+
+*@par Attributes:
+*@li output_size: An required listInt contains output_width.
+*@li scales: An optional listFloat contains scale_width. Defaults to be zero. \n
+
+*@par Outputs:
+*y: A 3-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
+*/
+
+REG_OP(UpsampleNearest1d)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(output_size, ListInt)
+    .ATTR(scales, ListFloat, {})
+    .OP_END_FACTORY_REG(UpsampleNearest1d)
+
+/**
+*@brief Upsample the 1-D gradient data  with the nearest neighbor ​interpolation algorithm. \n
+
+*@par Inputs:
+*grad_output: A 3-D input tensor [N, C, W]. Must be one of the following types:
+*     float16, float32, float64. \n
+
+*@par Attributes:
+*@li output_size: An required listInt contains output_width.
+*@li scales: An optional listFloat contains scale_width. Defaults to be zero.
+*@li input_size: An required listInt contains output_width. \n
+
+*@par Outputs:
+*y: A 3-D tensor. Has the same type as input grad_output, shape depends on Attributes:input_size. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
+*/
+
+REG_OP(UpsampleNearest1dGrad)
+    .INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(input_size, ListInt)
+    .REQUIRED_ATTR(output_size, ListInt)
+    .ATTR(scales, ListFloat, {})
+    .OP_END_FACTORY_REG(UpsampleNearest1dGrad)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h
index 69c77bf6..f6cc8694 100644
--- a/third_party/fwkacllib/inc/ops/linalg_ops.h
+++ b/third_party/fwkacllib/inc/ops/linalg_ops.h
@@ -347,6 +347,9 @@ REG_OP(SelfAdjointEig)
     .OP_END_FACTORY_REG(SelfAdjointEig)
 
 /**
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+
 *@brief Computes the sign and the log of the absolute value of the determinant
 of one or more square matrices . \n
 
@@ -382,9 +385,10 @@ REG_OP(Slogdet)
 *x:Tensor of shape [..., M, N]. Let P be the minimum of M and N . \n
 
 *@par Attributes:
-*compute_uv:If True then left and right singular vectors will be computed and
+*@li compute_uv:If True then left and right singular vectors will be computed and
 returned in u and v, respectively. Otherwise, only the singular values will
-be computed, which can be significantly faster . \n
+be computed, which can be significantly faster .
+*@li full_matrices:the param effect u,v.  \n
 
 *@par Outputs:
 *@li sigma:Singular values. Shape is [..., P]. The values are sorted in
@@ -427,6 +431,9 @@ denotes the lower triangular factor `L` with unit diagonal.
 *@li p: upper triangular part denotes the upper triangular factor `U`.Permutation
 of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n
 
+*@par Attributes:
+*output_idx_type: An optional DType from: int32, int64.
+
 *@par Third-party framework compatibility
 * Compatible with TensorFlow Lu operator.
 */
@@ -467,6 +474,12 @@ left-hand side . \n
 *@par Outputs:
 y: Tensor of shape `[..., M, K]` containing the solutions \n
 
+*@par Attributes:
+*partial_pivoting: Whether to perform partial pivoting. `True` by default.
+Partial pivoting makes the procedure more stable, but slower. Partial
+pivoting is unnecessary in some cases, including diagonally dominant and
+symmetric positive definite matrices
+
 *@par Third-party framework compatibility
 * Compatible with TensorFlow TridiagonalSolve operator.
 */
diff --git a/third_party/fwkacllib/inc/ops/list_ops.h b/third_party/fwkacllib/inc/ops/list_ops.h
index a1b622e9..0aa94e73 100644
--- a/third_party/fwkacllib/inc/ops/list_ops.h
+++ b/third_party/fwkacllib/inc/ops/list_ops.h
@@ -35,10 +35,10 @@ namespace ge {
 *@li max_num_elements: The maximum number of elements. \n
 
 *@par Attributes:
-*@li element_dtype: The type of elements in the list. \n
+*element_dtype: The type of elements in the list. \n
 
 *@par Outputs:
-*@li handle: An empty tensor list . \n
+*handle: An empty tensor list . \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow EmptyTensorList operator.
@@ -59,10 +59,10 @@ and the other elements of the given list in `input_handle`. \n
 *@li tensor: The tensor to put on the list. \n
 
 *@par Attributes:
-*@li element_dtype: The type of elements in the list. \n
+*element_dtype: The type of elements in the list. \n
 
 *@par Outputs:
-*@li output_handle:A list with the elements of old list followed by tensor. \n
+*output_handle:A list with the elements of old list followed by tensor. \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListPushBack operator.
@@ -86,7 +86,7 @@ list with all but that element. \n
 *@li element_shape: A shape compatible with that of elements in the list. \n
 
 *@par Attributes:
-*@li element_dtype: The type of elements in the list. \n
+*element_dtype: The type of elements in the list. \n
 
 *@par Outputs:
 *@li output_handle:A list with the elements of the old list followed by tensor.
@@ -110,10 +110,10 @@ REG_OP(TensorListPopBack)
 *@brief The number of tensors in the input tensor list. \n
 
 *@par Inputs:
-*@li input_handle: The input list. \n
+*input_handle: The input list. \n
 
 *@par Outputs:
-*@li length:The number of tensors in the list. \n
+*length:The number of tensors in the list. \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListLength operator.
@@ -127,13 +127,13 @@ REG_OP(TensorListLength)
 *@brief The shape of elements in the input tensor list. \n
 
 *@par Inputs:
-*@li input_handle: The input list. \n
+*input_handle: The input list. \n
 
 *@par Attributes:
-*@li shape_type: The type of shape in the list. \n
+*shape_type: The type of shape in the list. \n
 
 *@par Outputs:
-*@li element_shape:A shape compatible with that of elements in the list. \n
+*element_shape:A shape compatible with that of elements in the list. \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListElementShape operator.
@@ -156,7 +156,7 @@ REG_OP(TensorListElementShape)
 *@li shape_type: The type of shape in the list. \n
 
 *@par Outputs:
-*@li handle: An output tensor list . \n
+*handle: An output tensor list . \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListReserve operator.
@@ -178,10 +178,10 @@ REG_OP(TensorListReserve)
 *@li element_shape: A shape compatible with that of elements in the list. \n
 
 *@par Attributes:
-*@li element_dtype: The type of elements in the list. \n
+*element_dtype: The type of elements in the list. \n
 
 *@par Outputs:
-*@li item: An output tensor value of index position . \n
+*item: An output tensor value of index position . \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListGetItem operator.
@@ -206,10 +206,10 @@ REG_OP(TensorListGetItem)
 *@li item: The element to be assigned to that position. \n
 
 *@par Attributes:
-*@li element_dtype: The type of elements in the list. \n
+*element_dtype: The type of elements in the list. \n
 
 *@par Outputs:
-*@li output_handle: An output tensor list . \n
+*output_handle: An output tensor list . \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListSetItem operator.
@@ -233,10 +233,10 @@ REG_OP(TensorListSetItem)
 *@li tensor: The tensor push into tensor list. \n
 
 *@par Attributes:
-*@li element_dtype: The type of elements in the list. \n
+*element_dtype: The type of elements in the list. \n
 
 *@par Outputs:
-*@li output_handles: The output tensor lists. \n
+*output_handles: The output tensor lists. \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListPushBackBatch operator.
@@ -263,7 +263,7 @@ REG_OP(TensorListPushBackBatch)
 *@li num_elements: The number of elements in the list. \n
 
 *@par Outputs:
-*@li tensor: The tensor of list. \n
+*tensor: The tensor of list. \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListStack operator.
@@ -293,7 +293,7 @@ the leading dim of input_handle.element_shape or the element_shape input arg
 is not already set. \n
 
 *@par Attributes:
-*@li element_dtype: The type of elements in the list. \n
+*element_dtype: The type of elements in the list. \n
 
 *@par Outputs:
 *@li tensor: The concated result.
@@ -324,10 +324,10 @@ REG_OP(TensorListConcatV2)
 *@li lengths: Vector of sizes of the 0th dimension of tensors in the list. \n
 
 *@par Attributes:
-*@li element_dtype: The type of elements in the list. \n
+*element_dtype: The type of elements in the list. \n
 
 *@par Outputs:
-*@li output_handle: The list. \n
+*output_handle: The list. \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListSplit operator.
@@ -351,10 +351,10 @@ REG_OP(TensorListSplit)
 *@li element_shape: The shape of elements in the list. \n
 
 *@par Attributes:
-*@li element_dtype: The type of elements in the list. \n
+*element_dtype: The type of elements in the list. \n
 
 *@par Outputs:
-*@li output_handle: An output tensor list . \n
+*output_handle: An output tensor list . \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListFromTensor operator.
@@ -377,7 +377,7 @@ REG_OP(TensorListFromTensor)
 *@li size: size of the output list. \n
 
 *@par Outputs:
-*@li output_handle: The output tensor list. \n
+*output_handle: The output tensor list. \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListResize operator.
@@ -397,10 +397,10 @@ REG_OP(TensorListResize)
 *@li element_shape: The shape of elements in the list. \n
 
 *@par Attributes:
-*@li element_dtype: The type of elements in the list. \n
+*element_dtype: The type of elements in the list. \n
 
 *@par Outputs:
-*@li values: The tensor. \n
+*values: The tensor. \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListGather operator.
@@ -429,10 +429,10 @@ the largest index in indices. If -1, the list is just large enough to include
 the largest index in indices. \n
 
 *@par Attributes:
-*@li element_dtype: The type of elements in the list. \n
+*element_dtype: The type of elements in the list. \n
 
 *@par Outputs:
-*@li output_handle: The TensorList. \n
+*output_handle: The TensorList. \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListScatterV2 operator.
@@ -458,10 +458,10 @@ REG_OP(TensorListScatterV2)
 *@li indices: The indices used to index into the list. \n
 
 *@par Attributes:
-*@li element_dtype: The type of elements in the list. \n
+*element_dtype: The type of elements in the list. \n
 
 *@par Outputs:
-*@li output_handle: The TensorList. \n
+*output_handle: The TensorList. \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListScatterIntoExistingList operator.
@@ -485,10 +485,10 @@ REG_OP(TensorListScatterIntoExistingList)
 *@li input_b: The input tensor list B. \n
 
 *@par Attributes:
-*@li element_dtype: The type of elements in the list. \n
+*element_dtype: The type of elements in the list. \n
 
 *@par Outputs:
-*@li output: The output list. \n
+*output: The output list. \n
 
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListConcatLists operator.
diff --git a/third_party/fwkacllib/inc/ops/lookup_ops.h b/third_party/fwkacllib/inc/ops/lookup_ops.h
index 5d928e5a..b1fc254f 100644
--- a/third_party/fwkacllib/inc/ops/lookup_ops.h
+++ b/third_party/fwkacllib/inc/ops/lookup_ops.h
@@ -77,8 +77,8 @@ REG_OP(LookupTableInsert)
 *handle: A Tensor of type resource. Handle to the table . \n
 
 *@par Attributes:
-*@li Tkeys: A DType.
-*@li Tvalues: A DType . \n
+*@li Tkeys: A DType of keys.
+*@li Tvalues: A DType of values.
 
 *@par Outputs:
 *@li keys: A Tensor of type Tkeys.
diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h
index 319bcf70..6eb418d8 100644
--- a/third_party/fwkacllib/inc/ops/math_ops.h
+++ b/third_party/fwkacllib/inc/ops/math_ops.h
@@ -227,10 +227,10 @@ REG_OP(Bucketize)
 
 *@par Inputs:
 *One inputs, including:
-*   @li input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n
+*input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n
 
 *@par Outputs:
-*y: A tensor with the same type and shape of input_x \n
+*output_y: A tensor with the same type and shape of input_x \n
 
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator Trunc. \n
@@ -298,7 +298,7 @@ REG_OP(SparseSegmentMean)
 
 *@par Inputs:
 *The input grad must have be type float or double. Inputs include:
-*@li grad: A Tensor. Must be one of the following types: float, double.
+*@li x: A Tensor. Must be one of the following types: float, double.
 gradient propagated to the SparseSegmentMean op.
 *@li indices: A Tensor. Must be one of the following types: int32, int64.
 indices passed to the corresponding SparseSegmentMean op.
@@ -365,6 +365,7 @@ REG_OP(InitData)
 component of an element of this dataset.
 *@li output_shapes: A nested structure of TensorShape objects corresponding
 to each component of an element of this dataset.
+*@li output_num:output of nums.
 *@li channel_name: A string. Default "" . \n
 
 *@par Outputs:
@@ -538,11 +539,11 @@ REG_OP(NextAfter)
 
 *@par Inputs:
 *One inputs, including:
-* @li input_x: A tensor. Must be one of the following types:
+* input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 
 *@par Attributes:
-*@li  p: An optional float.Defaults to 2. \n
+*p: An optional float.Defaults to 2. \n
 
 *@par Outputs:
 *y: A Tensor with the same type and shape of input_x's. \n
@@ -560,10 +561,10 @@ REG_OP(Pdist)
  *@brief Compute element-wise finiteness, return a boolean tensor.
 
  *@par Inputs:
- *x:A Tensor.
+ *x:A Tensor of type float16, float32, double.
 
  *@par Outputs:
- *y:A Tensor. Has the same shape as x.
+ *y:A Tensor. Returns which elements of x are finite
 
  *@par Third-party framework compatibility.
  *Compatible with tensorflow IsFinite operator.
@@ -577,10 +578,10 @@ REG_OP(IsFinite)
  *@brief Compute element-wise infiniteness, return a boolean tensor.
 
  *@par Inputs:
- *x:A Tensor.
+ *x:A Tensor of type float16, float32, double.
 
  *@par Outputs:
- *y:A Tensor. Has the same shape as x.
+ *y:A Tensor. Has the same shape as x. Returns which elements of x are isinf.
 
  *@par Third-party framework compatibility.
  *Compatible with tensorflow IsInf operator.
@@ -594,7 +595,11 @@ REG_OP(IsInf)
  *@brief Computes the complex absolute value of a tensor.
 
  *@par Inputs:
- *x:A Tensor.
+ *x: x of complex numbers, this operation returns a tensor of type 
+ float or double that is the absolute value of each element in x .
+
+* @par Attributes:
+* Tout: representing the output of type. 
 
  *@par Outputs:
  *y:A tensor of type `float` or `double` that is the absolute value of each element in `x`.
@@ -612,10 +617,10 @@ REG_OP(ComplexAbs)
  *@brief Returns which elements of x are NaN.
 
  *@par Inputs:
- *x:A Tensor.
+ *x:A Tensor of type float16, float32, double.
 
  *@par Outputs:
- *y:A Tensor. Has the same shape as x.
+ *y:A Tensor. Has the same shape as x. Returns which elements of x are isnan
 
  *@par Third-party framework compatibility.
  *Compatible with tensorflow IsNan operator.
@@ -629,7 +634,10 @@ REG_OP(IsNan)
  *@brief Returns the real part of a complex number.
 
  *@par Inputs:
- *input:A Tensor.
+ *input:A Tensor. Must have numeric type.
+
+ *@par Attributes:
+ *Tout: Type of outputs. \n
 
  *@par Outputs:
  *output:A Tensor. Has the same shape as input.
@@ -670,7 +678,8 @@ REG_OP(Conj)
 *@li weight: A Tensor dtype of float32 . \n
 
 *@par Attributes:
-*reduction: An optional attribute. Defaults to "mean" . \n
+*@li reduction: An optional attribute. Defaults to "mean" .
+*@li ignore_index:An optional attribute.Defaults to -100 . \n
 
 *@par Outputs:
 *@li y: A Tensor dtype of float32.
@@ -700,7 +709,8 @@ REG_OP(NLLLoss)
 *@li total_weight:A Tensor dtype of float32 . \n
 
 *@par Attributes:
-*reduction: An optional attribute. Defaults to "mean" . \n
+*@li reduction: An optional attribute. Defaults to "mean" .
+*@li ignore_index:An optional attribute.Defaults to -100 . \n
 
 *@par Outputs:
 *x_grad: A Tensor. Must be the following type: float32 . \n
@@ -720,24 +730,24 @@ REG_OP(NLLLossGrad)
     .OP_END_FACTORY_REG(NLLLossGrad)
 
 /**
-*@brief The ifmr . \n
+*@brief IFMR(Input Feature Map Reconstruction). \n
 
 *@par Inputs:
-*@li data:A Tensor of feature map
-*@li data_min:A Tensor of min value of feature map.
-*@li data_max:A Tensor of max value of feature map.
-*@li cumsum:A Tensor of cumsum bin of data . \n
+*@li data: A Tensor of feature map.
+*@li data_min: A Tensor of min value of feature map.
+*@li data_max: A Tensor of max value of feature map.
+*@li cumsum: A Tensor of cumsum bin of data . \n
 
 *@par Attributes:
-*min_percentile: min init percentile.
-*max_percentile: max init percentile.
-*search_range: search range.
-*search_step: step size of searching.
-*with_offset: whether using offset . \n
+*@li min_percentile: min init percentile.
+*@li max_percentile: max init percentile.
+*@li search_range: search range.
+*@li search_step: step size of searching.
+*@li with_offset: whether using offset . \n
 
 *@par Outputs:
-*scale: optimal scale.
-*offset: optimal offset . \n
+*@li scale: optimal scale.
+*@li offset: optimal offset . \n
 
 *@par Third-party framework compatibility
 *Compatible with mindspore
@@ -758,16 +768,16 @@ REG_OP(IFMR)
   .OP_END_FACTORY_REG(IFMR)
 
 /**
-*@brief weights adaptive range quantization. \n
+*@brief Weights Adaptive Range Quantization. \n
 
 *@par Inputs:
-*@li w:A Tensor of weights. \n
-*@li w_min:A Tensor of weights reduce_min. \n
-*@li w_max:A Tensor of weights reduce_max. \n
+*@li w: A Tensor of weights. \n
+*@li w_min: A Tensor of weights reduce_min. \n
+*@li w_max: A Tensor of weights reduce_max. \n
 
 *@par Attributes:
-*num_bits: the bits num used for quantize.
-*offset_flag: whether using offset. \n
+*@li num_bits: the bits num used for quantize.
+*@li offset_flag: whether using offset. \n
 
 *@par Outputs:
 *y: fake quantized weights. \n
@@ -789,22 +799,22 @@ REG_OP(WtsARQ)
   .OP_END_FACTORY_REG(WtsARQ)
 
 /**
-*@brief The acts_ulq. \n
+*@brief Activations Universal Linear Quantization. \n
 
 *@par Inputs:
-*@li x:A Tensor of feature map
-*@li clamp _min:A Tensor of min clamp value of feature map.
-*@li clamp _max:A Tensor of max clamp value of feature map.
+*@li x: A Tensor of feature map.
+*@li clamp _min: A Tensor of min clamp value of feature map.
+*@li clamp _max: A Tensor of max clamp value of feature map.
 
 *@par Attributes:
-*fixed_min: fix min to zero.
-*num_bits: quant bits. \n
+*@li fixed_min: fix min to zero.
+*@li num_bits: quant bits. \n
 
 *@par Outputs:
-*y: output fake quant feature map.
-*clamp_min_mask: where x > clamp_min
-*clamp_min_mask: where x < clamp_max
-*x_clamped_loss: clamp loss. \n
+*@li y: output fake quant feature map.
+*@li clamp_min_mask: where x > clamp_min.
+*@li clamp_min_mask: where x < clamp_max.
+*@li x_clamped_loss: clamp loss. \n
 
 *@par Third-party framework compatibility
 *Compatible with mindspore
@@ -826,12 +836,12 @@ REG_OP(ActsULQ)
   .OP_END_FACTORY_REG(ActsULQ)
 
 /**
-*@brief The acts_ulq_input_grad. \n
+*@brief The gradient of Activations Universal Linear Quantization. \n
 
 *@par Inputs:
-*@li y_grad: A Tensor of gradient
-*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed'
-*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed'
+*@li y_grad: A Tensor of gradient.
+*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed'.
+*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed'.
 
 *@par Outputs:
 *x_grapd: The gradient of inpust. \n
@@ -851,10 +861,10 @@ REG_OP(ActsULQInputGrad)
   .OP_END_FACTORY_REG(ActsULQInputGrad)
 
 /**
-*@brief The act_ulq_clamp_max_grad. \n
+*@brief The gradient of Activations Universal Linear Quantization clamp max. \n
 
 *@par Inputs:
-*@li y_grad: A Tensor of gradient
+*@li y_grad: A Tensor of gradient.
 *@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed.
 *@li x_clamped_loss: A Tensor of gradient. \n
 
@@ -876,10 +886,10 @@ REG_OP(ActULQClampMaxGrad)
   .OP_END_FACTORY_REG(ActULQClampMaxGrad)
 
 /**
-*@brief The act_ulq_clamp_min_grad. \n
+*@brief The gradient of Activations Universal Linear Quantization clamp min. \n
 
 *@par Inputs:
-*@li y_grad: A Tensor of gradient
+*@li y_grad: A Tensor of gradient.
 *@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed.
 *@li x_clamped_loss: A Tensor of gradient. \n
 
@@ -904,7 +914,7 @@ REG_OP(ActULQClampMinGrad)
 * @brief Computes Lp norm.
 
 * @par Inputs:
-* @li x: An ND tensor of type float16, float32. \n
+* x: An ND tensor of type float16, float32. \n
 *
 * @par Attributes:
 * @li p: Int, "inf" or "-inf", default value is 2.
@@ -913,7 +923,7 @@ REG_OP(ActULQClampMinGrad)
 * @li epsilon: Float, default is 1e-12. \n
 
 * @par Outputs:
-* @li y: An ND tensor of type float16, float32. The shape of y is depending
+* y: An ND tensor of type float16, float32. The shape of y is depending
 * on axes and keepdim. \n
 
 * @par Third-party framework compatibility
@@ -932,11 +942,13 @@ REG_OP(LpNorm)
 * @brief get complex.
 
 * @par Inputs:
-* @li real: An ND tensor of type  float32. double
-* @li imag: An ND tensor of type  float32. double \n
+* @li real: An ND tensor of type  float32 double, representing the real part of a complex number.
+* @li imag: An ND tensor of type  float32 double, representing the imaginary part of a complex number. \n
 *
+* @par Attributes:
+* Tout: representing the output of type. 
 * @par Outputs:
-* @li out: An ND tensor of type complex64, complex128 \n
+* out: An ND tensor of type complex64, complex128 \n
 */
 REG_OP(Complex)
     .INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE}))
@@ -949,10 +961,13 @@ REG_OP(Complex)
 * @brief  deal complex.
 
 * @par Inputs:
-* @li input: An ND tensor of type complex64, complex128 \n
-*
+* input: An ND tensor of type complex64, complex128 \n
+
+* @par Attributes:
+* Tout: representing the output of type. 
+
 * @par Outputs:
-* @li output: An ND tensor of type float32. double \n
+* output: An ND tensor of type float32. double \n
 */
 REG_OP(Imag)
     .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
@@ -988,7 +1003,7 @@ REG_OP(Angle)
 *     float16, float32. \n
 
 *@par Attributes:
-* @li reduction: Specifies the reduction to apply to the output:
+* reduction: Specifies the reduction to apply to the output:
 *     'none' | 'mean' | 'sum'. Default: 'mean'. \n
 
 *@par Outputs:
diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
index b317be37..81c6a29e 100644
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -61,21 +61,28 @@ REG_OP(MatMul)
 *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
 
 *@par Inputs:
-*Two inputs, including:
-* @li x1: A matrix Tensor. 2D. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
-* @li x2: A matrix Tensor. 2D. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
-* @li bias: A 1D Tensor. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC] . \n
+*Four inputs, including:
+* @li x1: A matrix Tensor. 2D. Must be one of the following types: float32,
+ float16, int32, int8. Has format [ND, NHWC, FRACTAL_NZ].
+* @li x2: A matrix Tensor. 2D. Must be one of the following types: float32,
+ float16, int32, int8. Has format [ND, NHWC, FRACTAL_NZ].
+* @li bias: A 1D Tensor. Must be one of the following types: float32,
+ float16, int32. Has format [ND, NHWC].
+* @li offset_w: A Optional 1D Tensor for quantized inference. Type is int8.
+ Reserved. \n
 
 *@par Attributes:
-*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
-*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
+* @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to
+ [M, K].
+* @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to
+[K, N].
+* @li offset_x: An optional integer for quantized MatMulV2.
+* The negative offset added to the input x1 for int8 type. Ensure offset_x
+ within the effective range of int8 [-128, 127]. Defaults to "0". \n
 
 *@par Outputs:
-*y: The result matrix Tensor. 2D. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC, FRACTAL_NZ] . \n
+*y: The result matrix Tensor. 2D. Must be one of the following types: float32,
+ float16, int32. Has format [ND, NHWC, FRACTAL_NZ]. \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
@@ -95,19 +102,27 @@ REG_OP(MatMulV2)
 *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
 
 *@par Inputs:
-*Two inputs, including:
+*Five inputs, including:
 * @li x1: A matrix Tensor. 2D. Must be one of the following types: int8.
 * @li x2: A matrix Tensor. 2D. Must be one of the following types: int8.
 * @li compress_index: A compress index matrix of type int8.
-* @li bias: A 1D Tensor. Must be one of the following types: int32, float16.
+* @li bias: An optional Tensor. 1D. Must be one of the following types: int32,
+ float16.
+* @li offset_w: An optional matrix Tensor. 2D. Must be one of the following
+ types: int8. \n
 
 *@par Attributes:
-*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
-*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
+*@li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to
+ [M, K].
+*@li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to
+ [K, N].
+*@li offset_x: An optional integer for quantized MatMulV2Compress.
+*The negative offset added to the input x1 for int8 type. Ensure offset_x
+ within the effective range of int8 [-128, 127]. Defaults to "0". \n
 
 *@par Outputs:
-*y: The result matrix Tensor. 2D. Must be one of the following types: float16,
-* int32. \n
+*y: The result matrix Tensor. 2D. Must be one of the following types: int32,
+* float16. \n
 
 */
 REG_OP(MatMulV2Compress)
@@ -488,13 +503,13 @@ REG_OP(ScatterElements)
 
 *@par Inputs:
 * Three inputs, including:
-*@li var: An ND Tensor . \n
+*@li var: An ND Tensor .
 
 *Must be one of the following types: float16, float32, int32, int8, uint8
 *@li indices: An ND Tensor of type int32 or int64
 
 
-*@li updates: An Tensor. format:NCHW, NHWC . \n
+*@li updates: An Tensor. format:NCHW, NHWC .
 
 *Must be one of the following types: float16, float32, int32, int8, uint8
 
@@ -517,6 +532,61 @@ REG_OP(ScatterAdd)
     .OP_END_FACTORY_REG(ScatterAdd)
 
 /**
+*@brief  Use a scalar to modify the tensor. \n
+
+*@par Inputs:
+*inputs, including:
+*@li index: An ND Tensor . \n
+
+*Must be one of the following types: float16, float32, int32, int8, uint8
+
+*@par Attributes:
+* dim : the axis along which to index .
+* value : the source element(s) to scatter . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "index" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the Pytorch operator ScatterScalar.
+*/
+REG_OP(ScatterScalar)
+    .INPUT(index, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .REQUIRED_ATTR(dim, Int)
+    .REQUIRED_ATTR(value, Float)
+    .OP_END_FACTORY_REG(ScatterScalar)
+
+/**
+*@brief Use a tensor to modify the tensor . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li index: An ND Tensor . \n
+
+*Must be one of the following types: float16, float32, int32, int8, uint8
+
+*@li src: An ND Tensor . \n
+
+*Must be one of the following types: float16, float32, int32, int8, uint8
+
+*@par Attributes:
+* dim : the axis along which to index . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "index" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the Pytorch operator ScatterTensor.
+*/
+REG_OP(ScatterTensor)
+    .INPUT(index, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(src, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .REQUIRED_ATTR(dim, Int)
+    .OP_END_FACTORY_REG(ScatterTensor)
+
+/**
 *@brief Divides a variable reference by sparse updates . \n
 
 *@par Inputs:
@@ -530,7 +600,7 @@ REG_OP(ScatterAdd)
 *Must be one of the following types: float16, float, int32, int8, uint8
 
 *@par Attributes:
-*@li use_locking: An optional bool. Defaults to "False". If "True",
+*use_locking: An optional bool. Defaults to "False". If "True",
 * the operation will be protected by a lock . \n
 
 *@par Outputs:
@@ -752,10 +822,12 @@ REG_OP(DiagPart)
 
 *@par Attributes:
 *@li num_output: Reserved.
-*@li transpose: A bool, specifying weight whether to transpose, either "true" or "false". Defaults to "false".
+*@li transpose: A bool, specifying weight whether to transpose input w, either "true" or "false". Defaults to "false".
 *@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1.
 * The product of the subsequent dimensions starting form first dimension or the second dimension is "K".
-*@li offset_x: Reserved . \n
+*@li offset_x: An optional integer for quantized FullyConnection.
+*The negative offset added to the input image for int8 type. Ensure offset_x within the
+*effective range of int8 [-128, 127]. Defaults to "0". \n
 
 *@par Outputs:
 *y: The result tensor of type float16, int32, float32 . \n
@@ -779,27 +851,34 @@ REG_OP(FullyConnection)
     .OP_END_FACTORY_REG(FullyConnection)
 
 /**
-*@brief Also known as a "fully-connected-compress" layer, computes an inner product with a set of learned weights, and (optionally) adds biases . \n
+*@brief Also known as a "fully-connected-compress" layer, computes an inner
+product with a set of learned weights, and (optionally) adds biases . \n
 
 *@par Inputs:
-* Four inputs, including:
+* Five inputs, including:
 *@li x: A Tensor of type uint8, int8.
-*@li w: A weight matrix of type int8, int8.
-*@li w: A compress index matrix of type int8, int8.
-*@li b: A Tensor of type float16, int32, int32.
-*@li offset_w: A Tensor of type int8.i
+*@li w: A weight matrix of type int8.
+*@li compress_index: A compress index matrix of type int8.
+*@li b: A Tensor of type int32.
+*@li offset_w: A Tensor of type int8.
 
 *@par Attributes:
-*@li num_output: Reserved.
-*@li transpose: A bool, specifying whether to transpose, either "true" or "false". Defaults to "false".
-*@li axis: Reserved.
-*@li offset_x: Reserved . \n
+*@li num_output: A int, specifying the number of outputs.
+*@li transpose: A bool, specifying whether to transpose input w, either "true"
+ or "false". Defaults to "false".
+*@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K"
+starts from. Defaults to "1".
+* The product of the subsequent dimensions starting form first dimension or the
+second dimension is "K".
+*@li offset_x: An optional integer for quantized FullyConnectionCompress.
+*The negative offset added to the input image for int8 type. Ensure offset_x
+within the effective range of int8 [-128, 127]. Defaults to "0". \n
 
 *@par Outputs:
-*y: The result tensor of type int32 . \n
+*y: The result tensor of type int32. \n
 
 *@par Third-party framework compatibility
-* Compatible with the Caffe operator InnerProduct . \n
+* Compatible with the Caffe operator InnerProduct. \n
 
 *@par Quantization supported or not
 * Yes
@@ -925,13 +1004,13 @@ REG_OP(ScatterMin)
 
 *@par Inputs:
 * Three inputs, including:
-*@li var: An ND Tensor . \n
+*@li var: An ND Tensor .
 
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An NCHW, NHWC, or ND Tensor . \n
 
 *Must be one of the following types: int32 or int64
-*@li updates: An NCHW, NHWC, or ND Tensor . \n
+*@li updates: An NCHW, NHWC, or ND Tensor .
 
 *Must be one of the following types: float16, float, int32, int8, uint8
 
@@ -958,13 +1037,13 @@ REG_OP(ScatterMax)
 
 *@par Inputs:
 * Three inputs, including:
-*@li var: An ND Tensor . \n
+*@li var: An ND Tensor .
 
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An ND Tensor . \n
 
 *Must be one of the following types: int32 or int64
-*@li updates: An ND Tensor . \n
+*@li updates: An ND Tensor .
 
 *Must be one of the following types: float16, float, int32, int8, uint8
 
@@ -1113,14 +1192,46 @@ REG_OP(IndexAdd)
     .OP_END_FACTORY_REG(IndexAdd)
 
 /**
+* @brief According to the index number of indexes, replace the value
+*corresponding to X1 with the value in x2.
+
+* @par Inputs:
+* Three inputs, including:
+* @li x1: A Tensor. Must be one of the following types:
+*     float16, float32, int32, int8, uint8.
+* @li x2: A Tensor of the same type as "x1".
+* @li indices: A Tensor of the indices, type should be int32.
+
+* @par Attributes:
+* @li accumulate: Does it support self accumulation.Defaults to 0.
+
+* @par Outputs:
+* @li y: A Tensor. Same as input "x1".
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator index_put.
+
+* @par Restrictions:
+* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(IndexPut)
+    .INPUT(x1, TensorType({DT_INT64, DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .INPUT(x2, TensorType({DT_INT64, DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .INPUT(indices, TensorType({DT_INT64, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT64, DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .ATTR(accumulate, Int, 0)
+    .OP_END_FACTORY_REG(IndexPut)
+
+/**
 *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n
 
 *@par Inputs:
-* Two inputs, including:
-*@li x: A Tensor. Must be one of the following types:
-*    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
-*    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
-*@li diagonal:(int, optional) – the diagonal to consider。\n
+*x: A Tensor. Must be one of the following types:
+*float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n
+
+*@par Attributes:
+*diagonal: An optional attribute indicates the diagonal to consider. \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n
@@ -1138,11 +1249,12 @@ REG_OP(Triu)
 *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n
 
 *@par Inputs:
-* Two inputs, including:
-*@li x: A Tensor. Must be one of the following types:
-*    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
-*    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
-*@li diagonal:(int, optional) – the diagonal to consider。\n
+*x: A Tensor. Must be one of the following types:
+*float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n
+
+*@par Attributes:
+*diagonal: An optional attribute indicates the diagonal to consider. \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n
@@ -1213,6 +1325,30 @@ REG_OP(Eye)
     .ATTR(dtype, Int, 0)
     .OP_END_FACTORY_REG(Eye)
 
+/**
+*@brief: Fill diagonal of at least 2 dimension tensors with value . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types:
+*    float32, int32, int64 . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Attributes:
+*fill_value:The value to fill in
+*wrap: An optional bool. Defaults to "False". If "True", Use recursive fill. \n
+
+*@par Third-party framework compatibility
+* Compatible with the Pytorch operator FillDiagonal.
+*/
+REG_OP(FillDiagonal)
+    .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(fill_value, Float)
+    .ATTR(wrap, Bool, false)
+    .OP_END_FACTORY_REG(FillDiagonal)
+
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
index 98473c65..a55cebe2 100644
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -195,7 +195,7 @@ REG_OP(DepthwiseConv2DBackpropInput)
     .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
     .INPUT(filter, TensorType({DT_FLOAT16}))
     .INPUT(out_backprop, TensorType({DT_FLOAT16}))
-    .OUTPUT(input_grad, TensorType({DT_FLOAT16}))
+    .OUTPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .REQUIRED_ATTR(strides, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
     .REQUIRED_ATTR(pads, ListInt)
@@ -255,7 +255,7 @@ REG_OP(DepthwiseConv2DBackpropInput)
 REG_OP(DepthwiseConv2DBackpropInputD)
     .INPUT(filter, TensorType({DT_FLOAT16}))
     .INPUT(out_backprop, TensorType({DT_FLOAT16}))
-    .OUTPUT(input_grad, TensorType({DT_FLOAT16}))
+    .OUTPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .REQUIRED_ATTR(input_size, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
@@ -367,19 +367,19 @@ REG_OP(BiasAddGrad)
  * Gradients with respect to the output of the convolution.
  *\n
  *\n
- * The following are the supported data types and data formats:
-*@verbatim
-    | Tensor    | out_bckprop | filter  | y
-    ------------|-------------|---------|--------
-    | Data Type | float16     | float16 | float16
-    |           |-------------|---------|--------
-    |           | float32     | float32 | float32
-    |           |-------------|---------|--------
-    |           | float64     | float64 | float64
-    ------------|-------------|---------|--------
-    | Format    | NCHW        | NCHW    | NCHW
-    |           | NHWC        | HWCN    | NHWC
-@endverbatim
+ * The following are the supported data types and data formats:\n
+ *\n
+    | Tensor    | out_bckprop | filter  | y\n
+    ------------|-------------|---------|--------\n
+    | Data Type | float16     | float16 | float16\n
+    |           |-------------|---------|--------\n
+    |           | float32     | float32 | float32\n
+    |           |-------------|---------|--------\n
+    |           | float64     | float64 | float64\n
+    ------------|-------------|---------|--------\n
+    | Format    | NCHW        | NCHW    | NCHW\n
+    |           | NHWC        | HWCN    | NHWC\n
+ *\n
  * For float32 and float64 type, the actual calculation on the chip is based on
  * float16.
  *\n
@@ -398,36 +398,37 @@ REG_OP(BiasAddGrad)
  * "NHWC". Specify the data format of the input and output data.
  *\n
  *\n
- * The following value range restrictions must be met:
-*@verbatim
-    | Name             | Field    | Scope
-    -------------------|----------|--------------
-    | input_size       | H        | [1, 4096]
-    |                  | W        | [1, 4096]
-    -------------------|----------|--------------
-    | Filter           | H        | [1, 255]
-    |                  | W        | [1, 255]
-    -------------------|----------|--------------
-    | out_backprop     | H*strideH| [1, 4096]
-    |                  | W*strideW| [1, 4096]
-    -------------------|----------|--------------
-    | y(fmap)          | H        | [1, 4096]
-    |                  | W        | [1, 4096]
-    -------------------|----------|--------------
-    | Stride           | H        | [1, 63]
-    |                  | W        | [1, 63]
-    -------------------|----------|--------------
-    | Padding          | Top      | [0, 255]
-    |                  | Bottom   | [0, 255]
-    |                  | Left     | [0, 255]
-    |                  | Right    | [0, 255]
-    -------------------|----------|--------------
-    | Dilation         | H        | [1, 255]
-    |                  | W        | [1, 255]
+ * The following value range restrictions must be met:\n
+ *\n
+    | Name             | Field    | Scope\n
+    -------------------|----------|--------------\n
+    | input_size       | H        | [1, 200000]\n
+    |                  | W        | [1, 4096]\n
+    -------------------|----------|--------------\n
+    | Filter           | H        | [1, 255]\n
+    |                  | W        | [1, 255]\n
+    -------------------|----------|--------------\n
+    | out_backprop     | H*strideH| [1, 200000]\n
+    |                  | W*strideW| [1, 4096]\n
+    -------------------|----------|--------------\n
+    | y(fmap)          | H        | [1, 200000]\n
+    |                  | W        | [1, 4096]\n
+    -------------------|----------|--------------\n
+    | Stride           | H        | [1, 63]\n
+    |                  | W        | [1, 63]\n
+    -------------------|----------|--------------\n
+    | Padding          | Top      | [0, 255]\n
+    |                  | Bottom   | [0, 255]\n
+    |                  | Left     | [0, 255]\n
+    |                  | Right    | [0, 255]\n
+    -------------------|----------|--------------\n
+    | Dilation         | H        | [1, 255]\n
+    |                  | W        | [1, 255]\n
+ *\n
 
-@endverbatim
  * In Ascend910, fmap or out_backprop's H and W not support 1 when
  * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
+ * and filter_width > fmap_width
  * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096
  *\n
  *
@@ -496,7 +497,7 @@ REG_OP(Conv2DBackpropInput)
 REG_OP(Conv2DBackpropInputD)
     .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
     .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32}))
     .REQUIRED_ATTR(input_size, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
@@ -508,7 +509,7 @@ REG_OP(Conv2DBackpropInputD)
 /**
 *@brief Computes the Deconvolution with respect to the input.
 *@par Inputs:
- * Three inputs:
+ * Two required inputs:
  * @li x: A Tensor of type float16 or int8.  4D with shape
  * [batch, out_channels, out_height, out_width]. Gradients with respect
  * to the output of the convolution.
@@ -520,16 +521,16 @@ REG_OP(Conv2DBackpropInputD)
  * Type is int8. Reserved.\n
  *\n
  *\n
- * The following are the supported data types and data formats:
-*@verbatim
-    | Tensor    | x       | filter  | bias    | y
-    ------------|---------|---------|---------|--------
-    | Data Type | float16 | float16 | float16 | float16
-    |           |---------|---------|---------|--------
-    |           | int8    | int8    | int32   | int32
-    ------------|---------|---------|---------|--------
-    | Format    | NCHW    | NCHW    | ND      | NCHW
-@endverbatim
+ * The following are the supported data types and data formats:\n
+ *\n
+    | Tensor    | x       | filter  | bias    | y\n
+    ------------|---------|---------|---------|--------\n
+    | Data Type | float16 | float16 | float16 | float16\n
+    |           |---------|---------|---------|--------\n
+    |           | int8    | int8    | int32   | int32\n
+    ------------|---------|---------|---------|--------\n
+    | Format    | NCHW    | NCHW    | ND      | NCHW\n
+ *\n
  * For int8, a dequant or requant operator must be followed.
  *\n
  *
@@ -550,35 +551,35 @@ REG_OP(Conv2DBackpropInputD)
  * within the effective range of int8 [-128, 127]. Defaults to "0".
  *\n
  *\n
- * The following value range restrictions must be met:
-*@verbatim
-    | Name             | Field    | Scope
-    -------------------|----------|--------------
-    | x (out_backprop) | H*strideH| [1, 4096]
-    |                  | W*strideW| [1, 4096]
-    -------------------|----------|--------------
-    | Filter           | H        | [1, 255]
-    |                  | W        | [1, 255]
-    -------------------|----------|--------------
-    | y (fmap)         | H        | [1, 4096]
-    |                  | W        | [1, 4096]
-    -------------------|----------|--------------
-    | Stride           | H        | [1, 63]
-    |                  | W        | [1, 63]
-    -------------------|----------|--------------
-    | Padding          | Top      | [0, 255]
-    |                  | Bottom   | [0, 255]
-    |                  | Left     | [0, 255]
-    |                  | Right    | [0, 255]
-    -------------------|----------|--------------
-    | Dilation         | H        | [1, 255]
-    |                  | W        | [1, 255]
-    -------------------|----------|--------------
-    | Offset_x         |          | [-128, 127]
-
-@endverbatim
+ * The following value range restrictions must be met:\n
+ *\n
+    | Name             | Field    | Scope\n
+    -------------------|----------|--------------\n
+    | x (out_backprop) | H*strideH| [1, 200000]\n
+    |                  | W*strideW| [1, 4096]\n
+    -------------------|----------|--------------\n
+    | Filter           | H        | [1, 255]\n
+    |                  | W        | [1, 255]\n
+    -------------------|----------|--------------\n
+    | y (fmap)         | H        | [1, 200000]\n
+    |                  | W        | [1, 4096]\n
+    -------------------|----------|--------------\n
+    | Stride           | H        | [1, 63]\n
+    |                  | W        | [1, 63]\n
+    -------------------|----------|--------------\n
+    | Padding          | Top      | [0, 255]\n
+    |                  | Bottom   | [0, 255]\n
+    |                  | Left     | [0, 255]\n
+    |                  | Right    | [0, 255]\n
+    -------------------|----------|--------------\n
+    | Dilation         | H        | [1, 255]\n
+    |                  | W        | [1, 255]\n
+    -------------------|----------|--------------\n
+    | Offset_x         |          | [-128, 127]\n
+ *\n
  * In Ascend910, fmap or out_backprop's H and W not support 1 when
  * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
+ * and filter_width > fmap_width
  * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096
  *\n
  *
@@ -628,19 +629,19 @@ REG_OP(Deconvolution)
  * convolution.
  *\n
  *\n
- * The following are the supported data types and data formats:
-*@verbatim
-    | Tensor    | x       | out_backprop | y
-    ------------|---------|--------------|---------
-    | Data Type | float16 |    float16   | float16
-    |           |---------|--------------|---------
-    |           | float32 |    float32   | float32
-    |           |---------|--------------|---------
-    |           | float64 |    float64   | float64
-    |-----------|---------|--------------|---------
-    | Format    | NCHW    |     NCHW     | NCHW
-    |           | NHWC    |     NHWC     | HWCN
-@endverbatim
+ * The following are the supported data types and data formats:\n
+ *\n
+    | Tensor    | x       | out_backprop | y\n
+    ------------|---------|--------------|---------\n
+    | Data Type | float16 |    float16   | float16\n
+    |           |---------|--------------|---------\n
+    |           | float32 |    float32   | float32\n
+    |           |---------|--------------|---------\n
+    |           | float64 |    float64   | float64\n
+    |-----------|---------|--------------|---------\n
+    | Format    | NCHW    |     NCHW     | NCHW\n
+    |           | NHWC    |     NHWC     | HWCN\n
+ *\n
  * For float32 and float64 type of x and outbackprop, the actual calculation on the chip
  * is based on float16.
  *\n
@@ -658,39 +659,34 @@ REG_OP(Deconvolution)
  * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
  * "NHWC". Specify the data format of the input and output data.
  *\n
-*\n
-* The following value range restrictions must be met:
-*@verbatim
-    | Name             | Field    | Scope
-    -------------------|----------|--------------
-    | x(fmap)          | H        | [1, 4096]
-    |                  | W        | [1, 4096]
-    -------------------|----------|--------------
-    | Filter Size      | H        | [1, 255]
-    |                  | W        | [1, 255]
-    -------------------|----------|--------------
-    | out_backprop     | H        | [1, 4096]
-    |                  | W        | [1, 4096]
-    -------------------|----------|--------------
-    | y                | H        | [1, 4096]
-    |                  | W        | [1, 4096]
-    -------------------|----------|--------------
-    | Stride           | H        | [1, 63]
-    |                  | W        | [1, 63]
-    -------------------|----------|--------------
-    | Padding          | Top      | [0, 255]
-    |                  | Bottom   | [0, 255]
-    |                  | Left     | [0, 255]
-    |                  | Right    | [0, 255]
-    -------------------|----------|--------------
-    | Dilation         | H        | [1, 255]
-    |                  | W        | [1, 255]
-
-@endverbatim
- * In Ascend910, out_backprop's H and W not support 1 when
- * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
  *\n
- *
+ * The following value range restrictions must be met:\n
+ *\n
+    | Name             | Field    | Scope\n
+    -------------------|----------|--------------\n
+    | x(fmap)          | H        | [1, 200000]\n
+    |                  | W        | [1, 4096]\n
+    -------------------|----------|--------------\n
+    | Filter Size      | H        | [1, 255]\n
+    |                  | W        | [1, 255]\n
+    -------------------|----------|--------------\n
+    | out_backprop     | H        | [1, 200000]\n
+    |                  | W        | [1, 4096]\n
+    -------------------|----------|--------------\n
+    | y                | H        | [1, 200000]\n
+    |                  | W        | [1, 4096]\n
+    -------------------|----------|--------------\n
+    | Stride           | H        | [1, 63]\n
+    |                  | W        | [1, 63]\n
+    -------------------|----------|--------------\n
+    | Padding          | Top      | [0, 255]\n
+    |                  | Bottom   | [0, 255]\n
+    |                  | Left     | [0, 255]\n
+    |                  | Right    | [0, 255]\n
+    -------------------|----------|--------------\n
+    | Dilation         | H        | [1, 255]\n
+    |                  | W        | [1, 255]\n
+ *\n
 *@par Outputs:
  * y: A Tensor. Has the same type as x, has the same format as filter_size.
  *\n
@@ -780,16 +776,16 @@ REG_OP(Conv2DBackpropFilterD)
 *\n
 *\n
 * The following are the supported data types and data formats:
-*@verbatim
-    | Tensor    | x       | filter  | bias    | y
-    ------------|---------|---------|---------|--------
-    | Data Type | float16 | float16 | float16 | float16
-    |           | float32 | float32 | float32 | float32
-    |           | int8    | int8    | int32   | int32
-    ------------|---------|---------|---------|--------
-    | Format    | NCHW    | NCHW    | ND      | NCHW
-    |           | NHWC    | HWCN    |         | NHWC
-@endverbatim
+*\n
+*\n
+| Tensor    | x       | filter  | bias    | y       |\n
+| :-------: | :-----: | :-----: | :-----: | :-----: |\n
+| Data Type | float16 | float16 | float16 | float16 |\n
+|           | float32 | float32 | float32 | float32 |\n
+|           | int8    | int8    | int32   | int32   |\n
+| Format    | NCHW    | NCHW    | ND      | NCHW    |\n
+|           | NHWC    | HWCN    |         | NHWC    |\n
+*\n
 * For float32 type, the actual calculation on the chip is based on
 * float16.
 *\n
@@ -813,35 +809,30 @@ REG_OP(Conv2DBackpropFilterD)
 *\n
 *\n
 * The following value range restrictions must be met:
-*@verbatim
-    | Name             | Field    | Scope
-    -------------------|----------|--------------
-    | Input Image Size | H        | [1, 100000]
-    |                  | W        | [1, 4096]
-    -------------------|----------|--------------
-    | Filter Size      | H        | [1, 255]
-    |                  | W        | [1, 255]
-    -------------------|----------|--------------
-    | Stride           | H        | [1, 63]
-    |                  | W        | [1, 63]
-    -------------------|----------|--------------
-    | Padding          | Top      | [0, 255]
-    |                  | Bottom   | [0, 255]
-    |                  | Left     | [0, 255]
-    |                  | Right    | [0, 255]
-    -------------------|----------|--------------
-    | Dilation         | H        | [1, 255]
-    |                  | W        | [1, 255]
-    -------------------|----------|--------------
-    | Offset_x         |          | [-128, 127]
-
-@endverbatim
+*\n
+*\n
+| Name             | Field    | Scope       |\n
+| :--------------: | :------: | :---------: |\n
+| Input Image Size | H        | [1, 100000] |\n
+|                  | W        | [1, 4096]   |\n
+| Filter Size      | H        | [1, 255]    |\n
+|                  | W        | [1, 255]    |\n
+| Stride           | H        | [1, 63]     |\n
+|                  | W        | [1, 63]     |\n
+| Padding          | Top      | [0, 255]    |\n
+|                  | Bottom   | [0, 255]    |\n
+|                  | Left     | [0, 255]    |\n
+|                  | Right    | [0, 255]    |\n
+| Dilation         | H        | [1, 255]    |\n
+|                  | W        | [1, 255]    |\n
+| Offset_x         | -        | [-128, 127] |\n
+*\n
 * The W dimension of the input image supports cases exceeding 4096, but it may
 * cause compilation errors.
 *\n
 *
 *@par Outputs:
-*@li y: A 4D Tensor of output feature map. Has the same type as "x". With the
+* y: A 4D Tensor of output feature map. Has the same type as "x". With the
 * format "NHWC", the data is stored in the order of: [batch, out_height,
 * out_width, out_channels].
 *\n
@@ -956,16 +947,15 @@ REG_OP(Conv2DCompress)
 *\n
 *\n
 * The following are the supported data types and data formats:
-*@verbatim
-    | Tensor    | x       | filter  | offsets | bias     | y
-    ------------|---------|---------|---------|----------|--------
-    | Data Type | float16 | float16 | float16 | float16  | float16
-    |           |---------|---------|---------|----------|--------
-    |           | float32 | float32 | float32 | float32  | float32
-    ------------|---------|---------|---------|----------|--------
-    | Format    | NCHW    | NCHW    | NCHW    | ND       | NCHW
-    |           | NHWC    | HWCN    | NHWC    |          | NHWC
-@endverbatim
+*\n
+*\n
+| Tensor    | x       | filter  | offsets | bias    | y       |\n
+| :-------: | :-----: | :-----: | :-----: | :-----: | :-----: |\n
+| Data Type | float16 | float16 | float16 | float16 | float16 |\n
+|           | float32 | float32 | float32 | float32 | float32 |\n
+| Format    | NCHW    | NCHW    | NCHW    | ND      | NCHW    |\n
+|           | NHWC    | HWCN    | NCHW    |         | NHWC    |\n
+*\n
 * For float32 type, the actual convolution calculation part on the chip is
 * based on float16.
 *\n
@@ -992,19 +982,18 @@ REG_OP(Conv2DCompress)
 *\n
 *\n
 * The following value range restrictions must be met:
-*@verbatim
-    | Name              | Field  | Scope
-    --------------------|--------|----------------------------
-    | Input Image Size  | H      | [1, 100000 / filter_height]
-    |                   | W      | [1, 4096 / filter_width]
-    --------------------|--------|----------------------------
-    | Filter Size       | H      | [1, 63]
-    |                   | W      | [1, 63]
-@endverbatim
+*\n
+*\n
+| Name             | Field    | Scope                       |\n
+| :--------------: | :------: | :-------------------------: |\n
+| Input Image Size | H        | [1, 100000 / filter_height] |\n
+|                  | W        | [1, 4096 / filter_width]    |\n
+| Filter Size      | H        | [1, 63]                     |\n
+|                  | W        | [1, 63]                     |\n
 *\n
 *
 *@par Outputs:
-*@li y:  A 4D Tensor of output feature map. Has the same type as "x". With the
+* y:  A 4D Tensor of output feature map. Has the same type as "x". With the
 * format "NHWC", the data is stored in the order of: [batch, out_height,
 * out_width, out_channels].
 *\n
@@ -1042,41 +1031,38 @@ REG_OP(DeformableConv2D)
 
 /**
 *@brief Computes a 3D convolution given 5D "x" and "filter" tensors.
- *@par Inputs:
+
+*@par Inputs:
  * @li x: A 5D tensor. Must be one of the following types: float16,
  * (Currently does not support int8). The format of x is NCDHW or NDHWC.
  * @li filter: A 5D tensor of the same type as "x".
  * (Currently does not support int8).
- * The format is NCDHW, NDHWC or DHWCN . \n
-
-*@par Optional input:
- * @li bias: An optional 1D tensor of the same type as "x".
- * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n
+ * The format is NCDHW, NDHWC or DHWCN.
+ * @li bias: Optional. An 1D tensor of the same type as "x".
+ * @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n
 
-*@par Required Attributes:
- * @li strides: A list of 5 integers. Specifies the stride of the sliding window
+*@par Attributes:
+ * @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window
  * for each dimension of "x".
  * The N and C dimensions must be 1. Has the same format as "x".
- * @li pads: A list of 6 integers.
+ * @li pads: Required. A list of 6 integers.
  * Supports only padding along the D, H and W dimensions in sequence of head,
- * tail, top, bottom, left and right . \n
-
-*@par Attributes:
- * @li groups: Number of blocked connections from input channels to output
+ * tail, top, bottom, left and right.
+ * @li dilations: Optional. A list of 5 integers. Specifies the dilation factor for each
+ * dimension of "x".
+ * @li groups: Optional. Number of blocked connections from input channels to output
  * channels.
- * @li data_format: An optional string from: "NDHWC", "NCDHW".
+ * @li data_format: Optional. An string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
- * @li dilations: A list of 5 integers. Specifies the dilation factor for each
- * dimension of "x".
  * The N, C and D dimensions must be 1. Has the same format as "x".
- * @li offset_x: An optional int. Input offset, used for quantized inference.
- * Defaults to 0. Reserved . \n
+ * @li offset_x: Optional. An int. Input offset, used for quantized inference.
+ * Defaults to 0. Reserved. \n
 
 *@par Outputs:
- *y: A Tensor. Has the same type and data format as "x". \n
+ * y: A Tensor. Has the same type and data format as "x". \n
 
 *@attention Constraints:
- *The image size after padding is greater than the filter size . \n
+ * The image size after padding is greater than the filter size. \n
 
 *@par Third-party framework compatibility
  * @li Compatible with the TensorFlow operator conv3d.
@@ -1085,9 +1071,9 @@ REG_OP(DeformableConv2D)
 REG_OP(Conv3D)
     .INPUT(x, TensorType({DT_FLOAT16}))
     .INPUT(filter, TensorType({DT_FLOAT16}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1, 1})
@@ -1099,8 +1085,8 @@ REG_OP(Conv3D)
 
 /**
 *@brief Computes the gradients of convolution 3d with respect to the input.
+
 *@par Inputs:
- * Three inputs:
  * @li input_size: A Tensor of type int32, int64. An integer vector representing
  * the shape of input, where input is a 5-D tensor
  * [batch, depth, height, width, channels] or
@@ -1110,28 +1096,25 @@ REG_OP(Conv3D)
  * @li out_backprop: A Tensor. Must have the same type as filter.
  * 5-D with shape [batch, depth, out_height, out_width, out_channels]
  * or [batch, out_channels, depth, out_height, out_width]. Gradients with
- * respect to the output of the convolution . \n
+ * respect to the output of the convolution. \n
 
-*@par Required Attributes:
- * @li strides: A list of 5 integers. Specifies the stride of the sliding window
+*@par Attributes:
+ * @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window
  * for each dimension of "out_backprop".
  * The N and C dimensions must be 1. Has the same format as "out_backprop".
- * @li pads: A list of 6 integers.
+ * @li pads: Required. A list of 6 integers.
  * Supports only padding along the D, H and W dimensions in sequence of head,
- * tail, top, bottom, left and right . \n
-
-*@par Attributes:
- * Three attributes:
- * @li groups: Number of blocked connections from input channels to output
- * channels.
- * @li data_format: An optional string from: "NDHWC", "NCDHW".
- * Defaults to "NDHWC". Specify the data format of the input and output data.
- * @li dilations: A tuple/list of 5 integers, The dilation factor for each
+ * tail, top, bottom, left and right.
+ * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each
  * dimension of the input.
  * The N, C and D dimensions must be 1. Has the same format as "out_backprop".
+ * @li groups: Optional. Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: Optional. An string from: "NDHWC", "NCDHW".
+ * Defaults to "NDHWC". Specify the data format of the input and output data. \n
 
 *@par Outputs:
- * y: A Tensor. Has the same type as filter,and has same format as "input_size"
+ * y: A Tensor. Has the same type as filter,and has same format as "input_size". \n
 
 *@par Third-party framework compatibility
  * Compatible with Tensorflow's conv3d_backprop_input
@@ -1150,45 +1133,44 @@ REG_OP(Conv3DBackpropInput)
 
 /**
 *@brief Computes the gradients of convolution 3d with respect to the input.
+
 *@par Inputs:
- * Two inputs:
  * @li filter: A Tensor whose type is float16. The format of filter is NCDHW,
  * NDHWC or DHWCN.
  * @li out_backprop: A Tensor. Must have the same type as filter. The format is
- * NDHWC or NCDHW.  \n
+ * NDHWC or NCDHW. \n
 
-*@par Required Attributes:
- * @li strides: A list of 5 integers. Specifies the stride of the sliding window
+*@par Attributes:
+ * @li input_size: Required. A tuple/list of type int32, int64. An integer vector
+ * representing the shape of input, where input is a 5-D tensor
+ * [batch, depth, height, width, channels] or
+ * [batch, channels, depth, height, width].
+ * @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window
  * for each dimension of "out_backprop".
  * The N and C dimensions must be 1. Has the same format as "out_backprop".
- * @li pads: A list of 6 integers. Supports only padding along the D, H and W
+ * @li pads: Required. A list of 6 integers. Supports only padding along the D, H and W
  * dimensions in sequence of head, tail, top, bottom, left and right.
- * @li input_size: A tuple/list of type int32, int64. An integer vector
- * representing the shape of input, where input is a 5-D tensor
- * [batch, depth, height, width, channels] or
- * [batch, channels, depth, height, width] . \n
-
-*@par Attributes:
- * Three attributes:
- * @li groups: Number of blocked connections from input channels to output
- * channels.
- * @li data_format: An optional string from: "NDHWC", "NCDHW".
- * Defaults to "NDHWC". Specify the data format of the input and output data.
- * @li dilations: A tuple/list of 5 integers, The dilation factor for each
+ * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each
  * dimension of input.
  * The N, C and D dimensions must be 1. Has the same format as "out_backprop".
+ * @li groups: Optional. Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: Optional. An string from: "NDHWC", "NCDHW".
+ * Defaults to "NDHWC". Specify the data format of the input and output data. \n
+
 *@par Outputs:
- * y: A Tensor. Has the same type and data format as "out_backprop".
+ * y: A Tensor. Has the same type and data format as "out_backprop". \n
+
 *@par Third-party framework compatibility
- * Compatible with Tensorflow's conv3d_backprop_input
+ * Compatible with Tensorflow's conv3d_backprop_input. \n
 
 *@par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropInput instead.
+ * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropInput instead.
 */
 REG_OP(Conv3DBackpropInputD)
     .INPUT(filter, TensorType({DT_FLOAT16}))
     .INPUT(out_backprop, TensorType({DT_FLOAT16}))
-    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .REQUIRED_ATTR(input_size, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
@@ -1242,8 +1224,8 @@ REG_OP(LSTM)
 
 /**
 *@brief Computes the gradients of convolution3D with respect to the filter
+
 *@par Inputs:
- * Three inputs:
  * @li x: A Tensor. Must be one of the following types: float16, float32.
  * Currently does not support double.
  * 5-D with shape [batch, in_depth, in_height, in_width, in_channels]
@@ -1258,26 +1240,23 @@ REG_OP(LSTM)
  * or [batch, out_channels, out_depth, out_height, out_width].
  * Gradients with respect to the output of the convolution. \n
 
-*@par Required Attributes:
- * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding
+*@par Attributes:
+ * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding
  * window for each dimension of "x". The N and C dimensions must be 1.
  * Has the same format as "x".
- * @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right]
- * pads on feature map . \n
-
-*@par Attributes:
- * Three attributes:
- * @li dilations: A tuple/list of 5 integers, The dilation factor for each
+ * @li pads: Required. A tuple/list of 6 integers, [front, back, top, bottom, left, right]
+ * pads on feature map.
+ * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each
  * dimension of input.
  * The N, C and D dimensions must be 1. Has the same format as "x".
- * @li groups: Number of blocked connections from input channels to output
+ * @li groups: Optional. Number of blocked connections from input channels to output
  * channels.
- * @li data_format: An optional string from: "NDHWC", "NCDHW".
- * Defaults to "NDHWC". Specify the data format of the input and output data.
+ * @li data_format: Optional. An string from: "NDHWC", "NCDHW".
+ * Defaults to "NDHWC". Specify the data format of the input and output data. \n
 
 *@par Outputs:
- * y: A Tensor that has the same type as "x"
- * and the format is NDHWC, NCDHW or DHWCN.
+ * y: A Tensor that has the same type as "x" and the format is NDHWC, NCDHW or DHWCN. \n
+
 *@par Third-party framework compatibility
  * Compatible with Tensorflow's conv3d_backprop_filter
 */
@@ -1295,8 +1274,8 @@ REG_OP(Conv3DBackpropFilter)
 
 /**
 *@brief Computes the gradients of convolution with respect to the filter.
+
 *@par Inputs:
- * Two inputs:
  * @li x: A Tensor of type float16.
  * 5-D with shape [batch, in_depth, in_height, in_width, in_channels]
  * or [batch, in_channels, in_depth, in_height, in_width].
@@ -1305,37 +1284,34 @@ REG_OP(Conv3DBackpropFilter)
  * or [batch, out_channels, out_depth, out_height, out_width].
  * Gradients with respect to the output of the convolution. \n
 
-*@par Required Attributes:
- * @li filter_size: A tuple/list of type integers. An integer vector
+*@par Attributes:
+ * @li filter_size: Required. A tuple/list of type integers. An integer vector
  * representing the tensor shape of filter, where filter is a 5-D tensor
  * [filter_depth, filter_height, filter_width, in_channels, out_channels],
  * [out_channels, filter_depth, filter_height, filter_width, in_channels]
  * or [out_channels, in_channels, filter_depth, filter_height, filter_width].
- * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding
+ * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding
  * window for each dimension of "x".
  * The N and C dimensions must be 1. Has the same format as "x".
- * @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right]
- * pads on feature map. \n
-
-*@par Attributes:
- * Three attributes:
- * @li dilations: A tuple/list of 5 integers, The dilation factor for each
+ * @li pads: Required. A tuple/list of 6 integers, [front, back, top, bottom, left, right]
+ * pads on feature map.
+ * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each
  * dimension of input.
  * The N, C and D dimensions must be 1. Has the same format as "x".
- * @li groups: Number of blocked connections from input channels to output
+ * @li groups: Optional. Number of blocked connections from input channels to output
  * channels.
- * @li data_format: An optional string from: "NDHWC", "NCDHW".
- * Defaults to "NDHWC". Specify the data format of the input and output data.
+ * @li data_format: Optional. An optional string from: "NDHWC", "NCDHW".
+ * Defaults to "NDHWC". Specify the data format of the input and output data. \n
 
 *@par Outputs:
- * y: A Tensor of type float32 and the format is NDHWC, NCDHW or DHWCN.
+ * y: A Tensor of type float32 and the format is NDHWC, NCDHW or DHWCN. \n
+
 *@par Third-party framework compatibility
- * Compatible with Tensorflow's conv3d_backprop_filter
+ * Compatible with Tensorflow's conv3d_backprop_filter. \n
+
 *@par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropFilter instead.
+ * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropFilter instead.
 */
-
-
 REG_OP(Conv3DBackpropFilterD)
     .INPUT(x, TensorType({DT_FLOAT16}))
     .INPUT(out_backprop, TensorType({DT_FLOAT16}))
@@ -1350,37 +1326,32 @@ REG_OP(Conv3DBackpropFilterD)
 
 /**
 *@brief Computes the transpose of convolution 3d with respect to the input.
+
 *@par Inputs:
- * Three inputs:
  * @li input_size: A Tensor of type int32. An integer vector representing the
  * shape of input.
  * @li x: A Tensor of type float16, currently does not support int8. The format
  * is NDHWC or NCDHW.
  * @li filter: A Tensor of type float16, currently does not support int8.
  * The format is NDHWC, NCDHW or DHWCN.
+ * @li bias: Optional. An optional 1D tensor of the same type as "x". Reserved.
+ * @li offset_w: Optional. An optional 1D tensor for quantized deconvolution. Reserved. \n
 
-*@par Optional input:
- * Two optional inputs
- * @li bias: An optional 1D tensor of the same type as "x". Reserved.
- * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n
-
-*@par Required Attributes:
- * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding
+*@par Attributes:
+ * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding
  * window for each dimension of "x".
  * The N and C dimensions must be 1. Has the same format as "x".
- * @li pads: A tuple/list of 6 integers
-
-*@par Attributes:
- * Five attributes:
- * @li groups: Number of blocked connections from input channels to output
- * channels.
- * @li dilations: A tuple/list of 5 integers,
+ * @li pads: Required. A tuple/list of 6 integers.
+ * @li dilations: Optional. A tuple/list of 5 integers,
  * The dilation factor for each dimension of input.
  * The N, C and D dimensions must be 1. Has the same format as "x".
- * @li data_format: An optional string from: "NDHWC", "NCDHW".
+ * @li groups: Optional. Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: Optional. An string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
- * @li output_padding: The size will be added in the output shape.
- * @li offset_x: Input offset_x value. Reserved.
+ * @li output_padding: Optional. The size will be added in the output shape.
+ * @li offset_x: Optional. Input offset_x value. Reserved. \n
+
 *@par Outputs:
  * y: A Tensor. Has the same type and format as "x".
 */
@@ -1388,9 +1359,9 @@ REG_OP(Conv3DTranspose)
     .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
     .INPUT(x, TensorType({DT_FLOAT16}))
     .INPUT(filter, TensorType({DT_FLOAT16}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1, 1})
@@ -1402,46 +1373,44 @@ REG_OP(Conv3DTranspose)
 
 /**
 *@brief Computes the transpose of convolution 3d with respect to the input.
+
 *@par Inputs:
  * @li x: A Tensor of type float16, currently does not support int8.
  * The format is NDHWC or NCDHW.
  * @li filter: A Tensor of type float16, currently does not support int8.
  * The format is NDHWC, NCDHW or DHWCN.
+ * @li bias: Optional. An 1D tensor of the same type as "x". Reserved.
+ * @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n
 
-*@par Optional inputs:
- * @li bias: An optional 1D tensor of the same type as "x". Reserved.
- * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n
-
-*@par Required Attributes:
- * @li input_size: A tuple/list of type int32.
- * An integer vector representing the shape of input
- * @li strides: A tuple/list of 5 integers.
+*@par Attributes:
+ * @li input_size: Required. A tuple/list of type int32.
+ * An integer vector representing the shape of input.
+ * @li strides: Required. A tuple/list of 5 integers.
  * Specifies the stride of the sliding window for each dimension of "x".
  * The N and C dimensions must be 1. Has the same format as "x".
- * @li pads: A tuple/list of 6 integers . \n
-
-*@par Attributes:
- * Five attributes:
- * @li dilations: A tuple/list of 5 integers, The dilation factor for each
+ * @li pads: Required. A tuple/list of 6 integers.
+ * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each
  * dimension of input.
  * The N, C and D dimensions must be 1. Has the same format as "x".
- * @li groups: Number of blocked connections from input channels to output
+ * @li groups: Optional. Number of blocked connections from input channels to output
  * channels.
- * @li data_format: An optional string from: "NDHWC", "NCDHW".
+ * @li data_format: Optional. An optional string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
- * @li output_padding: The size will be added in the output shape.
- * @li offset_x: Input offset_x value. Reserved.
+ * @li output_padding: Optional. The size will be added in the output shape.
+ * @li offset_x: Optional. Input offset_x value. Reserved. \n
+
 *@par Outputs:
- * y: A Tensor. Has the same type and format as "x".
+ * y: A Tensor. Has the same type and format as "x". \n
+
 *@par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead.
+ * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead.
 */
 REG_OP(Conv3DTransposeD)
     .INPUT(x, TensorType({DT_FLOAT16}))
     .INPUT(filter, TensorType({DT_FLOAT16}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32}))
     .REQUIRED_ATTR(input_size, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
@@ -1469,17 +1438,17 @@ REG_OP(Conv3DTransposeD)
  * @li offset_w: An optional 1D tensor for quantized inference. Reserved.
  *\n
  *\n
- * The following are the supported data types and data formats:
-*@verbatim
-    | Tensor    | x       | filter  | bias    | y
-    ------------|---------|---------|---------|--------
-    | Data Type | float16 | float16 | float16 | float16
-    |           |---------|---------|---------|--------
-    |           | int8    | int8    | int32   | int32
-    ------------|---------|---------|---------|--------
-    | Format    | NCHW    | NCHW    | ND      | NCHW
-    |           | NHWC    | HWCN    |         | NHWC
-@endverbatim
+ * The following are the supported data types and data formats:\n
+ *\n
+    | Tensor    | x       | filter  | bias    | y\n
+    ------------|---------|---------|---------|--------\n
+    | Data Type | float16 | float16 | float16 | float16\n
+    |           |---------|---------|---------|--------\n
+    |           | int8    | int8    | int32   | int32\n
+    ------------|---------|---------|---------|--------\n
+    | Format    | NCHW    | NCHW    | ND      | NCHW\n
+    |           | NHWC    | HWCN    |         | NHWC\n
+ *\n
  * For int8, a dequant or requant operator must be followed.
  *\n
  *
@@ -1504,38 +1473,38 @@ REG_OP(Conv3DTransposeD)
  * within the effective range of int8 [-128, 127]. Defaults to "0".
  *\n
  *\n
- * The following value range restrictions must be met:
-*@verbatim
-    | Name             | Field    | Scope
-    -------------------|----------|--------------
-    | input_size       | H        | [1, 4096]
-    |                  | W        | [1, 4096]
-    -------------------|----------|--------------
-    | x (out_backprop) | H*strideH| [1, 4096]
-    |                  | W*strideW| [1, 4096]
-    -------------------|----------|--------------
-    | filter           | H        | [1, 255]
-    |                  | W        | [1, 255]
-    -------------------|----------|--------------
-    | y (fmap)         | H        | [1, 4096]
-    |                  | W        | [1, 4096]
-    -------------------|----------|--------------
-    | Stride           | H        | [1, 63]
-    |                  | W        | [1, 63]
-    -------------------|----------|--------------
-    | Padding          | Top      | [0, 255]
-    |                  | Bottom   | [0, 255]
-    |                  | Left     | [0, 255]
-    |                  | Right    | [0, 255]
-    -------------------|----------|--------------
-    | Dilation         | H        | [1, 255]
-    |                  | W        | [1, 255]
-    -------------------|----------|--------------
-    | Offset_x         |          | [-128, 127]
-
-@endverbatim
+ * The following value range restrictions must be met:\n
+ *\n
+    | Name             | Field    | Scope\n
+    -------------------|----------|--------------\n
+    | input_size       | H        | [1, 200000]\n
+    |                  | W        | [1, 4096]\n
+    -------------------|----------|--------------\n
+    | x (out_backprop) | H*strideH| [1, 200000]\n
+    |                  | W*strideW| [1, 4096]\n
+    -------------------|----------|--------------\n
+    | filter           | H        | [1, 255]\n
+    |                  | W        | [1, 255]\n
+    -------------------|----------|--------------\n
+    | y (fmap)         | H        | [1, 200000]\n
+    |                  | W        | [1, 4096]\n
+    -------------------|----------|--------------\n
+    | Stride           | H        | [1, 63]\n
+    |                  | W        | [1, 63]\n
+    -------------------|----------|--------------\n
+    | Padding          | Top      | [0, 255]\n
+    |                  | Bottom   | [0, 255]\n
+    |                  | Left     | [0, 255]\n
+    |                  | Right    | [0, 255]\n
+    -------------------|----------|--------------\n
+    | Dilation         | H        | [1, 255]\n
+    |                  | W        | [1, 255]\n
+    -------------------|----------|--------------\n
+    | Offset_x         |          | [-128, 127]\n
+ *\n
  * In Ascend910, fmap or out_backprop's H and W not support 1 when
  * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
+ * and filter_width > fmap_width
  * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096
  *\n
  *
@@ -1557,9 +1526,9 @@ REG_OP(Conv2DTranspose)
     .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
     .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
     .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32}))
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
@@ -1604,9 +1573,9 @@ REG_OP(Conv2DTranspose)
 REG_OP(Conv2DTransposeD)
     .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
     .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32}))
     .REQUIRED_ATTR(input_size, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
@@ -1623,14 +1592,12 @@ REG_OP(Conv2DTransposeD)
  * Two inputs:
  * @li x: A Tensor of type float16,float32
  * @li offsets: A Tensor of type float16,float32.Deformation offset parameter.
-*@par Required Attributes:
+*@par Attributes:
  * @li strides: A tuple/list of 4 integers.The stride of the sliding window for
  * height and width for H/W dimension.
  * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension
  * of the input.
  * @li ksize: A tuple/list of 2 integers.kernel size.
-*@par Attributes:
- * Four attributes:
  * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension
  * of input.  Defaults to [1, 1, 1, 1]
  * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x.
@@ -1659,22 +1626,20 @@ REG_OP(DeformableOffsets)
  * @li grad: A Tensor of type float16,float32. gradients with respect to DeformableOffsets output
  * @li x: A Tensor of type float16,float32.
  * @li offsets: A Tensor of type float16,float32.Deformation offset parameter.
-*@par Required Attributes:
+*@par Attributes:
  * @li strides: A tuple/list of 4 integers.The stride of the sliding window for
  * height and width for H/W dimension.
  * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension
  * of the input.
  * @li ksize: A tuple/list of 2 integers.kernel size.
-*@par Attributes:
- * Three attributes:
  * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension
  * of input.  Defaults to [1, 1, 1, 1]
  * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x.
  * @li deformable_groups: Specify the c-axis grouping number of input x.
  * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1.
 *@par Outputs:
- * grad_x: A Tensor of type float16, float32. Gradients with respect to input_x
- * grad_offsets: A Tensor of type float16, float32. Gradients with respect to input_offsets
+ * @li grad_x: A Tensor of type float16, float32. Gradients with respect to input_x
+ * @li grad_offsets: A Tensor of type float16, float32. Gradients with respect to input_offsets
 */
 REG_OP(DeformableOffsetsGrad)
     .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1695,11 +1660,9 @@ REG_OP(DeformableOffsetsGrad)
 *@brief Computes the deformed dilation output with the expected input
 *@par Inputs:
  * One inputs:
- * @li x: A Tensor of type int8, float16, float32
-*@par Required Attributes:
- * @li dilations: A tuple/list of integers.
+ * x: A Tensor of type int8, float16, float32
 *@par Attributes:
- * Two attributes:
+ * @li dilations: A tuple/list of integers.
  * @li padding_value: default value filling in blank
  * @li pads: A tuple/list of integers.
 *@par Outputs:
diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
index 5fa40ad6..bd14df77 100644
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -153,6 +153,42 @@ REG_OP(Iou)
     .OP_END_FACTORY_REG(Iou)
 
 /**
+*@brief First calculate the minimum closure area of the two boxes, IoU,
+* the proportion of the closed area that does not belong to the two boxes in the closure area,
+* and finally subtract this proportion from IoU to get GIoU . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
+* shape (N, 4). "N" indicates the number of bounding boxes, and the value
+* "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
+*@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
+* with shape (M, 4). "M" indicates the number of ground truth boxes, and
+* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n
+
+*@par Attributes:
+*@li trans: An optional bool, true for 'xywh', false for 'xyxy'.
+*@li is_cross: An optional bool, control whether the output shape is [M, N] or [1, N]
+*@li mode: Computation mode, a character string with the value range of [iou, iof] . \n
+
+*@par Outputs:
+* overlap: A 2D Tensor of type float16 or float32 with shape [M, N] or [1, N],
+* specifying the IoU or IoF ratio . \n
+
+*@attention Constraints:
+* Only computation of float16 data is supported. To avoid overflow, the input
+* length and width are scaled by 0.2 internally.
+*/
+REG_OP(GIoU)
+    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(trans, Bool, false)
+    .ATTR(is_cross, Bool, true)
+    .ATTR(mode, String, "iou")
+    .OP_END_FACTORY_REG(GIoU)
+
+/**
 *@brief Performs the backpropagation of ROIAlign for training scenarios . \n
 
 *@par Inputs:
@@ -417,7 +453,7 @@ REG_OP(PSROIPooling)
 *@brief Returns detection result . \n
 
 *@par Inputs:
-* Four inputs, including:
+* Five inputs, including:
 *@li rois: An NCHW tensor of type floa16 or float32, output from operator proposal_d at the preceding layer, used as the input of operator FSRDetectionOutput.
 *@li bbox_delta: An NCHWC0 tensor of type floa16 or float32, specifying the prediction offset, used to update the coordinates [x1, y1, x2, y2] of each ROI.
 *@li score: An NCHWC0 tensor of type floa16 or float32, specifying the probability of each class. Class 0 is the background class.
@@ -459,7 +495,7 @@ REG_OP(FSRDetectionOutput)
 *@brief Returns detection result . \n
 
 *@par Inputs:
-* Four inputs, including:
+* Three inputs, including:
 *@li bbox_delta: An ND tensor of type floa16 or float32, specifying the box loc predictions, used as the input of operator SSDDetectionOutput.
 *@li score: An ND tensor of type floa16 or float32, specifying the box confidences data, used as the input of operator SSDDetectionOutput.
 *@li anchors: An ND tensor of type floa16 or float32, output from operator PriorBoxD, used as the input of operator SSDDetectionOutput.
@@ -474,7 +510,6 @@ REG_OP(FSRDetectionOutput)
 *@li code_type: An optional int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3
 *@li keep_top_k: An optional int32, specify the topk value after nms. Defaults to -1
 *@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold
-*@li kernel_name: An optional string, specifying the operator name. Defaults to "ssd_detection_output".
 *@par Outputs:
 *@li out_boxnum: A tensor of type int32, specifying the number of output boxes.
 *@li y: A tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box.
@@ -989,26 +1024,26 @@ REG_OP(SPP)
 * feature map . \n
 
 *@attention Constraints:
-*@li For the feature map input:
-(1) If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
-(2) If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
-(3) If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
-(4) If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
-(5) If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
-(6) If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
-(7) If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
-(8) If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
-(9) If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
-(10) If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
-(11) If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
-(12) If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
-(13) If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
-(14) If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
-(15) If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
-(16) If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
-(17) If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
-(18) If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
-(19) If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
+* For the feature map input:
+*@li If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
+*@li If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
+*@li If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
+*@li If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
+*@li If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
+*@li If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
+*@li If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
+*@li If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
+*@li If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
+*@li If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
 *@par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
@@ -1222,9 +1257,7 @@ REG_OP(RpnProposalsD)
 * @li box_filter: bool, mark of box_filter. Defaults to "true"
 * @li core_max_num: int, max number of core. Defaults to "8"
 *@par Outputs:
-* @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
-* @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
-* @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1].
+*sorted_box: A Tensor. Must be float16. N-D with shape [N, 1].
 */
 REG_OP(RpnProposalPostProcessing)
     .INPUT(sorted_proposal, TensorType({DT_FLOAT16}))
@@ -1382,7 +1415,7 @@ REG_OP(BatchMultiClassNonMaxSuppression)
 * @li shape_hw: A 1D Tensor of type int32 . \n
 
 * @par Attributes:
-* @li reversed_box: An optional bool, specifying the last two dims is "4,num" or
+* reversed_box: An optional bool, specifying the last two dims is "4,num" or
 * "num,4", "true" for "4,num", "false" for "num,4". Defaults to "false" . \n
 
 * @par Outputs:
@@ -1429,9 +1462,9 @@ REG_OP(NormalizeBBox)
 * @li anchors: A Tensor. Must be int32.
 *
 *@par Attributes:
-* @li scales: optional, listfloat, .
+* @li scales: optional, listfloat.
 * @li decode_clip: optional, float, threahold of decode process.
-* @li reversed_boxes: optional, bool,.
+* @li reversed_boxes: optional, bool.
 *
 *@par Outputs:
 * y: A Tensor. Must have the same type as box_predictions.
@@ -1446,16 +1479,16 @@ REG_OP(DecodeBboxV2)
     .OP_END_FACTORY_REG(DecodeBboxV2)
 
 /**
-*@brief Computes sort function.
+*@brief sort the input tensor and return the value of index.
 *
 *@par Inputs:
 *Inputs include:
-* x: A Tensor. Dtype support: flaot16, flaot, int16, int8,
+* x: A Tensor. Dtype support: float16, float, int16, int8,
                           uint8, int32, int64.
-*
+
 *@par Attributes:
-* @li axis: optional, int.
-* @li descending: optional,bool.
+* @li axis: An optional attribute indicates the sorting axis.
+* @li descending: An optional attribute indicates desending sort or not.
 *
 *@par Outputs:
 * @li y1: A Tensor. Must have the same type as x.
@@ -1568,16 +1601,18 @@ deciding when to remove boxes based on score . \n
 the last dim representing (batch_id,class_id,index_id)  . \n
 
 *@par Attributes:
-*center_point_box:Integer indicate the format of the box data. 
+*@li center_point_box:Integer indicate the format of the box data. 
 The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] 
 where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair 
 of box corners and the coordinates can be provided as normalized 
 (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
 1 - the box data is supplied as [x_center, y_center, width, height].
  Mostly used for Pytorch models. \n
+*@li max_boxes_size: An optional attribute integer representing the real maximum 
+*number of boxes to be selected by non max suppression . \n
 
 *@par Outputs:
-*@li selected_indices: A 2-D integer tensor of shape [M] representing the
+*selected_indices: A 2-D integer tensor of shape [M] representing the
 selected indices from the boxes tensor, where M <= max_output_size. \n
 
 *@attention Constraints:
@@ -1603,7 +1638,7 @@ REG_OP(NonMaxSuppressionV7)
 *@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n
 
 *@par Inputs:
-* Three inputs, including:
+* Two inputs, including:
 *@li features: A 5HD Tensor list of type float32 or float16.
 *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
 * the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1".
@@ -1760,7 +1795,7 @@ REG_OP(AnchorResponseFlags)
 * "N" indicates the number of ROIs. \n
 
 *@par Attributes:
-*@li performance_mode: select performance mode, "high_precision" or "high_performance".
+*performance_mode: select performance mode, "high_precision" or "high_performance".
 * select "high_precision" when input type is float32, the output tensor precision
 * will be smaller than 0.0001, select "high_performance" when input type is float32,
 * the ops will be best performance, but precision will be only smaller than 0.005.
@@ -1795,12 +1830,12 @@ REG_OP(YoloBoxesEncode)
 *@li num_gts: A Tensor. Support int32. real k. shape (1, )
 
 *@par Attributes:
-*@li output_dim: float. IOU threshold for positive bboxes.
-*@li group_size: float. minimum iou for a bbox to be considered as a positive bbox
-*@li spatial_scale: bool. whether to assign all bboxes with the same highest overlap with some gt to that gt.
+*@li pos_iou_thr: float. IOU threshold for positive bboxes.
+*@li min_pos_iou: float. minimum iou for a bbox to be considered as a positive bbox
+*@li gt_max_assign_all: bool. whether to assign all bboxes with the same highest overlap with some gt to that gt.
 
 *@par Outputs:
-*@li assigned_gt_inds_pos: A Tensor. Support float16/float32. shape (n, ).
+* assigned_gt_inds_pos: A Tensor. Support float16/float32. shape (n, ).
 */
 REG_OP(GridAssignPositive)
     .INPUT(assigned_gt_inds, TensorType({ DT_FLOAT, DT_FLOAT16 }))
@@ -1816,6 +1851,40 @@ REG_OP(GridAssignPositive)
     .REQUIRED_ATTR(min_pos_iou, Float)
     .REQUIRED_ATTR(gt_max_assign_all, Bool)
     .OP_END_FACTORY_REG(GridAssignPositive)
+
+/**
+*@brief GIoUGrad . \n
+
+*@par Inputs:
+*@li dy : data of grad increment, a 1D Tensor of type float16 or float32 with
+* shape (N,).
+*@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
+* shape (4, N). "N" indicates the number of bounding boxes, and the value
+* "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
+*@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
+* with shape (4, M). "M" indicates the number of ground truth boxes, and
+* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n
+
+*@par Attributes:
+*@li trans: An optional attr, true for 'xywh', false for 'xyxy', only support true now.
+*@li is_cross: An optional attr, if false M equals N, only support false now.
+*@li mode: An optional attr, a character string with the value range of ['iou', 'iof'],
+*          only support 'iou' now. \n
+
+*@par Outputs:
+*@li dbboxes: A 2D Tensor of type float16 or float32 with shape [4, N].
+*@li dgtboxes: A 2D Tensor of type float16 or float32 with shape [4, M].
+*/
+REG_OP(GIoUGrad)
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dbboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dgtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(trans, Bool, false)
+    .ATTR(is_cross, Bool, true)
+    .ATTR(mode, String, "iou")
+    .OP_END_FACTORY_REG(GIoUGrad)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
index b44c0780..9ce7abfd 100644
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -54,15 +54,16 @@ REG_OP(LogSoftmaxGrad)
 *@par Inputs:
 *Two inputs, including:
 * @li features: A Tensor. Must be one of the following types: half, float32, double.
-*    A "batch_size * num_classes" matrix.
+*A "batch_size * num_classes" matrix.
 * @li labels: A Tensor. Must be one of the following types: 'int32', 'int64'.
-*             batch_size vector with values in [0, num_classes).
-*             This is the label for the given minibatch entry.
+*batch_size vector with values in [0, num_classes).
+*This is the label for the given minibatch entry. \n
 
 
 *@par Outputs:
-*loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
-*backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n
+*@li loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
+*@li backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). 
+Has the same type as "features" . \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator SparseSoftmaxCrossEntropyWithLogits.
@@ -84,8 +85,8 @@ REG_OP(SparseSoftmaxCrossEntropyWithLogits)
 * @li labels: A Tensor of the same type as "features". A "batch_size * num_classes" matrix . \n
 
 *@par Outputs:
-*loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
-*backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n
+* @li loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
+* @li backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator SoftmaxCrossEntropyWithLogits.
@@ -127,12 +128,13 @@ REG_OP(SoftmaxGrad)
 *@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n
 
 *@par Inputs:
-* Two inputs, including:
+* Three inputs, including:
 *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
-*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n
+*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value .
+*@li dout:A multi-dimensional Tensor of float16 or float32,specifying the gradient transferred from the upper layer. \n
 
 *@par Outputs:
-*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n
+*gradient: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n
 
 *@par Third-party framework compatibility
 * Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SigmoidCrossEntropyWithLogitsGrad.
@@ -148,13 +150,12 @@ REG_OP(SigmoidCrossEntropyWithLogitsGrad)
 *@brief Performs the backpropagation of SigmoidCrossEntropyWithLogits for training scenarios . \n
 
 *@par Inputs:
-* Three inputs, including:
+* Two inputs, including:
 *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
-*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value.
-*@li dout: A multi-dimensional Tensor of float16 or float32, specifying the gradient transferred from the upper layer . \n
+*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value. \n
 
 *@par Outputs:
-*gradient: Return gradient. Has the same dimensions and type as "predict" . \n
+*loss: Return loss. Has the same dimensions and type as "predict" . \n
 
 *@par Third-party framework compatibility
 * Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SigmoidCrossEntropyWithLogits.
@@ -572,7 +573,7 @@ REG_OP(LayerNorm)
 
 *@par Inputs:
 *One input, including:
-* @li x: A Tensor. Must be one of the following types: float16, float32 . \n
+* x: A Tensor. Must be one of the following types: float16, float32 . \n
 
 *@par Attributes:
 * @li p: Specify L_p norm, the type is float. 
@@ -581,7 +582,7 @@ REG_OP(LayerNorm)
 
 *@par Outputs:
 *One outputs, including:
-* @li y: shape and dtype of output, should be same shape and type as input.
+* y: shape and dtype of output, should be same shape and type as input.
 */
 REG_OP(Renorm)
     .INPUT(x, TensorType::BasicType())
@@ -811,7 +812,7 @@ REG_OP(LayerNormBetaGammaBackpropV2)
 *     shape of "keep_prob" should be (1,) or [1,].
 *     Has the same type as "x" . \n
 
-*@par Output:
+*@par Outputs:
 *y: A mutable Tensor. Has the same type as "x".
 */
 REG_OP(DropOutDoMask)
@@ -839,7 +840,7 @@ REG_OP(DropOutDoMask)
 *     shape of "keep_prob" should be (1,) or [1,].
 *     Has the same type as "x" . \n
 
-*@par Output:
+*@par Outputs:
 *y: A mutable Tensor. Has the same type as "x".
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
@@ -1010,7 +1011,7 @@ REG_OP(LRNGrad)
  *@li grads: A Tensor. Has the same type as acts.
 
  *@par Attributes:
- *@li blank_label: An optional attribute. Defaults to 0.
+ *blank_label: An optional attribute. Defaults to 0.
 
  *@par Third-party framework compatibility
  * Compatible with TensorFlow RNNTLoss operator.
@@ -1198,13 +1199,11 @@ REG_OP(INInferV2D)
 * @li epsilon: An attribute of type Float. \n
 
 * @par Outputs:
-*Three outputs, including:
+* Three outputs, including:
 * @li y: A Tensor. Has the same type as "x". \n
 * @li mean: A Tensor. Has the same type as "x". \n
 * @li variance: A Tensor. Has the same type as "x". \n
 
-* @par Third-party framework compatibility
-* Can be used by onnx InstanceNormalization
 */
 REG_OP(InstanceNorm)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1218,24 +1217,22 @@ REG_OP(InstanceNorm)
     .OP_END_FACTORY_REG(InstanceNorm)
 
 /**
-*@brief InstanceNormGrad operator interface implementation.
+* @brief InstanceNormGrad operator interface implementation.
 
-*@par Inputs:
-*Five inputs, including:
+* @par Inputs:
+* Five inputs, including:
 * @li dy: A Tensor. Must be one of the following types: float16, float32.
 * @li x: A Tensor. Must be one of the following types: float16, float32.
 * @li variance: A Tensor. Must be one of the following types: float16, float32.
 * @li mean: A Tensor. Must be one of the following types: float16, float32.
 * @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n
 
-*@par Outputs:
-*Three outputs, including:
+* @par Outputs:
+* Three outputs, including:
 * @li pd_x: A Tensor. Must be one of the following types: float16, float32.
 * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
 * @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
 
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(InstanceNormGrad)
     .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -1249,58 +1246,6 @@ REG_OP(InstanceNormGrad)
     .OP_END_FACTORY_REG(InstanceNormGrad)
 
 /**
-*@brief InstanceNormXBackprop operator interface implementation.
-
-*@par Inputs:
-*Five inputs, including:
-* @li dy: A Tensor. Must be one of the following types: float16, float32.
-* @li x: A Tensor. Must be one of the following types: float16, float32.
-* @li variance: A Tensor. Must be one of the following types: float16, float32.
-* @li mean: A Tensor. Must be one of the following types: float16, float32.
-* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n
-
-*@par Outputs:
-*Two outputs, including:
-* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
-* @li res_for_gamma: A Tensor. Must be one of the following types: float32.
-
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
-*/
-REG_OP(InstanceNormXBackprop)
-    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(res_for_gamma, TensorType({DT_FLOAT}))
-    .OP_END_FACTORY_REG(InstanceNormXBackprop)
-
-/**
-*@brief InstanceNormBetaGammaBackprop operator interface implementation.
-
-*@par Inputs:
-*Two inputs, including:
-* @li dy: A Tensor. Must be one of the following types: float16, float32.
-* @li res_for_gamma: A Tensor. Must be one of the following types: float32.\n
-
-*@par Outputs:
-*Two outputs, including:
-* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
-* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
-
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
-*/
-REG_OP(InstanceNormBetaGammaBackprop)
-    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(res_for_gamma, TensorType({DT_FLOAT}))
-    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OP_END_FACTORY_REG(InstanceNormBetaGammaBackprop)
-
-/**
 * @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n
 
 * @par Inputs:
@@ -1340,10 +1285,10 @@ REG_OP(KlDivLossGrad)
 * @li label: A Tensor. Has the same type as "grads". Required. \n
 
 * @par Attributes:
-* @li reduction: An optional attribute of type String. Defaults to "mean". \n
+* reduction: An optional attribute of type String. Defaults to "mean". \n
 
 * @par Outputs:
-* @li y: A Tensor. Has the same type as "x". \n
+* y: A Tensor. Has the same type as "x". \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator L1LossGrad.
@@ -1368,7 +1313,7 @@ REG_OP(L1LossGrad)
 * @li reduction: An optional string.Defaults to "mean". \n
 
 * @par Outputs:
-* @li y: An ND tensor tensor with the same shape and type as "predict". \n
+*  y: An ND tensor tensor with the same shape and type as "predict". \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator LpLoss.
@@ -1390,10 +1335,10 @@ REG_OP(LpLoss)
 * @li dout: An ND tensor of type float16, float32. \n
 
 * @par Attributes:
-* @li reduction: An optional string.Defaults to "mean". \n
+* reduction: An optional string.Defaults to "mean". \n
 
 * @par Outputs:
-* @li y: An ND tensor tensor with the same shape and type as "predict". \n
+* y: An ND tensor tensor with the same shape and type as "predict". \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator MseLossGrad.
@@ -1414,10 +1359,10 @@ REG_OP(MseLossGrad)
 *  @li label: An ND Tensor of dtype float16 or float32.\n
 *
 * @par Attributes:
-*  @li reduction:An optional str from sum, none, mean, Defaults to "mean".\n
+* reduction:An optional str from sum, none, mean, Defaults to "mean".\n
 *
 * @par Outputs:
-*  @li y: when reduction=sum/mean, y is scale. when reduction=none, y has
+* y: when reduction=sum/mean, y is scale. when reduction=none, y has
 *    same type and shape as "predict".\n
 */
 REG_OP(MseLoss)
@@ -1445,7 +1390,7 @@ REG_OP(MseLoss)
 *    Must be one of the following: "none", "mean", "sum". \n
 
 * @par Outputs:
-* @li gradient: A Tensor. Has the same type as "predict". \n
+*  gradient: A Tensor. Has the same type as "predict". \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator SmoothL1LossBackward.
@@ -1480,7 +1425,7 @@ REG_OP(SmoothL1LossGradV2)
 * the output,'sum': the output will be summed. Default: 'mean'. \n
 
 * @par Outputs:
-* @li loss: Indicates the loss between the predictive value and target value.
+* loss: Indicates the loss between the predictive value and target value.
 * Has the same dimensions as "predict". \n
 
 * @par Third-party framework compatibility
@@ -1498,12 +1443,12 @@ REG_OP(SmoothL1LossV2)
 * @brief Computes Centralization. result = x - mean(x, axes)
 
 * @par Inputs:
-* @li x: An ND tensor of type float16, float32.
+*  x: An ND tensor of type float16, float32.
 * @par Attributes:
-* @li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType.
+* axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType.
 * Must be in the range [-rank(x), rank(x)).
 * @par Outputs:
-* @li y: A Tensor. Has the same type as "x". \n
+* y: A Tensor. Has the same type as "x". \n
 
 * @par Third-party framework compatibility
 * custom operator \n
@@ -1521,7 +1466,7 @@ REG_OP(Centralization)
 
 *@par Inputs:
 *One inputs, including:
-* @li x: A tensor . Must be one of the following types:
+* x: A tensor . Must be one of the following types:
 *     float16, float32, int32, uint32, int8, uint8. \n
 
 *@par Attributes:
@@ -1546,14 +1491,14 @@ REG_OP(Roll)
  logistic loss between input_x and input_y (containing 1 or -1). \n
 
  *@par Inputs:
- *One inputs, including:
+ *Tow inputs, including:
  * @li input_x: A tensor. Must be one of the following types:
  *     float16, float32. \n
  * @li input_y: A tensor. Must be one of the following types:
  *     float16, float32. \n
 
  *@par Attributes:
- *@li lambd: An optional string.Defaults to "mean". \n
+ *reduction: An optional string.Defaults to "mean". \n
 
  *@par Outputs:
  *output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n
@@ -1580,10 +1525,10 @@ REG_OP(SoftMarginLoss)
 * @li pos_weight: An optional ND tensor of type float16, float32. \n
 
 * @par Attributes:
-* @li reduction: An optional string.Defaults to "mean". \n
+* reduction: An optional string.Defaults to "mean". \n
 
 * @par Outputs:
-* @li gradient: An ND tensor tensor with the same shape and type as "predict". \n
+* gradient: An ND tensor tensor with the same shape and type as "predict". \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator SigmoidCrossEntropyWithLogitsGrad.
@@ -1603,24 +1548,14 @@ REG_OP(SigmoidCrossEntropyWithLogitsGradV2)
 
  * @par Inputs:
  * Two inputs, including:
- * @li input_x: A tensor. Must be one of the following types:
- *     float16, float32. \n
- * 
- * @par Inputs:
- * @li target: A tensor. Must be one of the following types:
- *     float16, float32. \n
+ * @li input_x: A tensor. Must be one of the following types: float16, float32.
+ * @li target: A tensor. Must be one of the following types: float16, float32. \n
 
  * @par Attributes:
  * four Attributes, including:
- * @li log_input: An optional bool. Defaults to "True" \n
- * 
- *  @par Attributes:
- * @li full: An optional bool. Defaults to "False" \n
- * 
- *  @par Attributes:
- * @li eps: An optional float. Defaults to "1e-8" \n
- * 
- *  @par Attributes:
+ * @li log_input: An optional bool. Defaults to "True"
+ * @li full: An optional bool. Defaults to "False"
+ * @li eps: An optional float. Defaults to "1e-8"
  * @li reduction: An optional string. Defaults to "mean" \n
 
  * @par Outputs:
@@ -1641,14 +1576,14 @@ REG_OP(PoissonNllLoss)
 /**
  *@brief rnn_gen_mask
  * @par Inputs:
- * @li seq_length: A ND Tensor of type int32. Recoed the current length of each batch.\n
+ * seq_length: A ND Tensor of type int32. Recoed the current length of each batch.\n
  *
  * @par Attributes:
  * @li num_step: A required int.\n
  * @li hidden_size: A required int. \n
  *
  * 
- * @par Output:
+ * @par Ouputs:
  * y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n
  *
  */
@@ -1666,18 +1601,16 @@ REG_OP(RnnGenMask)
 * @par Inputs:
 * Two inputs, including:
 * @li x: A tensor. Must be one of the following types:
-*     float16, float32. \n
-* 
-* @par Inputs:
+*     float16, float32.
 * @li target: A tensor. Must be the following types:
 *     int32. \n
 
 * @par Attributes:
-* @li reduction: An optional string. Defaults to "mean" \n
+* reduction: An optional string. Defaults to "mean" \n
 
 * @par Outputs:
-* y: A Tensor has same element type as input x. \n
-* is_target: A Tensor has same element type as input target. \n
+* @li y: A Tensor has same element type as input x. \n
+* @li is_target: A Tensor has same element type as input target. \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator MultiLabelMarginLoss. \n
diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h
index 49fd02fa..5b1a4dd0 100644
--- a/third_party/fwkacllib/inc/ops/nn_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_ops.h
@@ -106,16 +106,16 @@ REG_OP(FusedBatchNormV2)
     .OP_END_FACTORY_REG(FusedBatchNormV2)
 
 /**
- * @brief: Large amount of data sort.First operator of TopK.
+ * @brief Large amount of data sort.First operator of TopK.
  * @par Inputs:
  * two input, including:
  * @li input_data: A Tensor. Data to be sorted. Support float16
  * @li input_index: A Tensor. Range(0, 2048). Datatype and format is same as input_data.
  * @par Attributes:
- * @li k_num: Int.Number to be sorted.
+ * k_num: Int.Number to be sorted.
  * @par Outputs:
- * 1 output, including:
- * @li output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
+ * One output, including:
+ * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
  */
 REG_OP(SegmentSort)
     .INPUT(input_data, TensorType({DT_FLOAT16}))
@@ -127,13 +127,13 @@ REG_OP(SegmentSort)
 /**
  * @brief: Large amount of data sort.Second operator of TopK.
  * @par Inputs:
- * two input, including:
- * @li input_proposal: A Tensor. Proposal sorted for each channel. Support float16
+ * One input, including:
+ * input_proposal: A Tensor. Proposal sorted for each channel. Support float16
  * @par Attributes:
- * @li k_num: Int.Number to be sorted.
+ * k_num: Int.Number to be sorted.
  * @par Outputs:
- * 1 output, including:
- * @li output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
+ * One output, including:
+ * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
  */
 REG_OP(MultiMerge)
     .INPUT(input_proposal, TensorType({DT_FLOAT16}))
@@ -142,14 +142,14 @@ REG_OP(MultiMerge)
     .OP_END_FACTORY_REG(MultiMerge)
 
 /**
- * @brief: Large amount of data sort.Third operator of TopK.
+ * @brief Large amount of data sort.Third operator of TopK.
  * @par Inputs:
- * two input, including:
- * @li input_proposal: A Tensor. Proposal sorted for each channel. Support float16
+ * One input, including:
+ * input_proposal: A Tensor. Proposal sorted for each channel. Support float16
  * @par Attributes:
- * @li k_num: Int.Number to be sorted.
+ * k_num: Int.Number to be sorted.
  * @par Outputs:
- * 2 output, including:
+ * Two output, including:
  * @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted.
  * @li output_index: A Tensor. int32. Data index.
  */
diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
index 80a21333..72363d18 100644
--- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
@@ -29,7 +29,7 @@ namespace ge {
 /**
 *@brief Performs pooling on the input.
 *@par Inputs:
-*@li x: An NCHW tensor of type float16, float32, int8.
+* x: An NCHW tensor of type float16, float32, int8.
 *@par Attributes:
 *@li mode: An optional int32, specifying the pooling algorithm, either "0" (max pooling) or "1" (avg pooling). Defaults to "0".
 *@li global_pooling: An optional bool. Defaults to "false".
@@ -50,6 +50,7 @@ namespace ge {
 *dilation[2]: An optional int32, specifying the left dilation. Defaults to "1".
 *dilation[3]: An optional int32, specifying the right dilation. Defaults to "1".
 *@li ceil_mode: An optional int32, either "0" (ceil mode) or "1" (floor mode). Defaults to "0".
+*@li data_format: An optional string, Specify the data format of the input and output data. With the default format "NCHW".
 *@par Outputs:
 *y: An NCHW tensor of type float16, float32, int32.
 *@attention Constraints:
@@ -204,7 +205,7 @@ REG_OP(AvgPool3D)
 *y: The average pooled output tensor . \n
 
 *@attention Constraints:
-*@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
+*"ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPool3D.
@@ -281,10 +282,10 @@ REG_OP(AvgPool3DGrad)
 * @li data_format: A string, format of input data . \n
 
 * @par Outputs:
-* @output: The average pooled output tensor . \n
+* output: The average pooled output tensor . \n
 
 * @attention Constraints:
-* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
+* "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
 
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPool3DGradD.
@@ -431,6 +432,47 @@ REG_OP(MaxPool3D)
     .OP_END_FACTORY_REG(MaxPool3D)
 
 /**
+* @brief Performs max pooling3d on both max values and indices.
+* 
+* @par Inputs:
+*  One input:
+*  x: An 6D tensor. Supported type: float16. Format as NDC1HWC0.
+* @par Attributes:
+*  @li ksize: A required list of int32 values,
+*   specifying the size of the window for each dimension of the input tensor.
+*   No default value.
+*  @li strides: A required list of int32 values,
+*   specifying the stride of the sliding window for each dimension of
+*   the input tensor. No default value.
+*  @li pads: A required 3*2-dimension-list of int32 values.
+*   specifying the pad of three dimension of input, implement with 0.
+*  @li dilation: dilation of kernel. default value is {1,1,1,1,1}.
+*  @li ceil_mode: default value is false.
+*  @li data_format: the format of torch input, default value is "NCDHW".
+*  @li argmax_type: the function of this field is to determine the type of
+*   output argmax, "bitmask" is the default value, the argmax will return
+*   a img2col bitmask. "index_int32" and "index_int64" represent the torch 
+*   output indices.
+* @par Outputs:
+*  y: An 6D tensor. the maxpool3d output(max value), format as NDoC1HoWoC0.
+* @par Outputs:
+*  argmax: A 5D uint16 tensor. the indice output.
+*  format as NC1HWC0, actually it represent N, Do, C1*ksize, Ho*Wo//16, 16.
+*/
+REG_OP(MaxPool3DWithArgmax)
+    .INPUT(x, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .OUTPUT(argmax, TensorType::IndexNumberType())
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilation, ListInt, {1, 1, 1, 1, 1})
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(data_format, String, "NCDHW")
+    .ATTR(argmax_type, String, "bitmask")
+    .OP_END_FACTORY_REG(MaxPool3DWithArgmax)
+
+/**
 *@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n
 * The output is of size H x W, for any input size. 
 
@@ -522,8 +564,7 @@ REG_OP(MaxPool3DGradGrad)
 * y: A mutable tensor. Has the same shape and type as "x1" . \n
 
 * @attention Constraints:
-* @li Computing gradients of global pooling is not supported, which means
-* "ksize < x1".
+* @li ksize is limited by buffer with full tiling.
 * @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
 
 * @par Third-party framework compatibility
@@ -568,7 +609,7 @@ REG_OP(MaxPoolGrad)
 * @li Other dimensions of ksize and strides is 1 . \n
 
 * @par Outputs:
-* @li y: Has the same type and format as input "x1" . \n
+* y: Has the same type and format as input "x1" . \n
 
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator MaxPoolGradGrad.
@@ -588,7 +629,7 @@ REG_OP(MaxPoolGradGrad)
 *@brief Performs max_pool_ext2 on the input . \n
 
 *@par Inputs:
-* Two inputs:
+* Three inputs:
 *@li x: An NC1HWC0 Tensor of type float16.
 *@li strides: A required type of int32 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
 *@li ksize: A required type of int32 values, specifying the size of the window for each dimension of the input tensor. No default value.
@@ -635,7 +676,8 @@ REG_OP(MaxPoolV2)
 *@li strides: A required list of int8, int16, int32, or int64 values,
  * specifying the stride of the sliding window for each dimension of
  * the input tensor. No default value.
-*@li padding: A required string. No default value . \n
+*@li padding: A required string. No default value .
+*@li Targmax:An optional int with default value 7 . \n
 
 *@par Outputs:
 *@li y: A Tensor. Has the same type and format as input "x".
@@ -645,7 +687,7 @@ REG_OP(MaxPoolV2)
  * ksize[1] * ksize[2] <= 255.
 *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1,
  * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
-*@li "padding" is either "SAME" or "VALID" . \n
+*@li "padding" is either "SAME" or "VALID" .
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator MaxPoolWithArgmax.
@@ -710,14 +752,15 @@ REG_OP(MaxPoolGradWithArgmax)
 *@brief Performs transform mask to argmax . \n
 
 *@par Inputs:
-* Two input:
-*x: An NC1HWC0 Tensor of type float16.
-*mask: An NC1HWC0 Tensor of type uint16 . \n
+* Two inputs:
+*@li x: An NC1HWC0 Tensor of type float16.
+*@li mask: An NC1HWC0 Tensor of type uint16 . \n
 
 *@par Attributes:
 *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value.
 *@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
-*@li padding: A required string. No default value . \n
+*@li padding: A required string. No default value .
+*@li originshape:A required list of int8, int16, int32, or int64 values, No default value. \n
 
 *@par Outputs:
 *argmax: An NC1HWC0 Tensor of type int32 . \n
@@ -754,7 +797,7 @@ REG_OP(Mask2Argmax)
 * @li strides: A required list, specifying the stride of the sliding window.
 * @li padding: A required string, window sliding mode. Either SAME or VALID.
 * @par Outputs:
-* @li y:Result tensor. Supported type: float, double, int32,
+* y:Result tensor. Supported type: float, double, int32,
  * uint8, int16, int8, int64, uint16, half, uint32, uint64
 
 * @attention Constraints:
@@ -767,7 +810,7 @@ REG_OP(Mask2Argmax)
 * (shape_max_pool[2] * shape_max_pool[3] + 31) // 16, 16), else failed . \n
 
 * @par Third-party framework compatibility
-* @li Compatible with the TensorFlow operator MaxPoolGradGradWithArgmax.
+* Compatible with the TensorFlow operator MaxPoolGradGradWithArgmax.
 */
 REG_OP(MaxPoolGradGradWithArgmax)
     .INPUT(x, TensorType::RealNumberType())
@@ -931,11 +974,11 @@ REG_OP(AvgPoolV2GradD)
     .OP_END_FACTORY_REG(AvgPoolV2GradD)
 
 /**
-*@brief :upsample the layer
+*@brief upsample the layer, similar to the nearest-neighbor difference scaling algorithm.
 
 *@par Inputs:
 * one input, including:
-*@li x: A tensor of type float16 or float32.
+* x: A tensor of type float16 or float32.
 *@par Attributes:
 *@li  scale: A optional float32, scale factor of x. Defaults to "1.0".
 *@li  stride_h: An optional int32, broadcast the axis of h. Defaults to "2".
@@ -1419,7 +1462,7 @@ REG_OP(MaxPoolV3)
 * the floor function will be used. Default False \n
 
 * @par Outputs:
-* y: A mutable tensor. Has the same shape and type as "x1" . \n
+* out_grad: A mutable tensor. Has the same shape and type as "x1" . \n
 
 * @attention Constraints:
 * @li Computing gradients of global pooling is not supported, which means
@@ -1447,8 +1490,8 @@ REG_OP(MaxPoolV3Grad)
 *@brief Performs Dilation2D on the input . \n
 
 *@par Inputs:
-*x: A tensor of shape is 4d, format is support NHWC.
-*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. \n
+*@li x: A tensor of shape is 4d, format is support NHWC.
+*@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. \n
 
 *@par Attributes:
 *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1.
@@ -1480,9 +1523,9 @@ REG_OP(Dilation2D)
 *@brief Performs Dilation2DBackpropFilter on the input. \n
 
 *@par Inputs:
-*x: A tensor of shape is 4d, format is support NHWC.
-*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
-*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n
+*@li x: A tensor of shape is 4d, format is support NHWC.
+*@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
+*@li out_backprop: Has the same type and format as input x and the c dimension is same with x. \n
 
 *@par Attributes
 *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1.
@@ -1519,9 +1562,9 @@ REG_OP(Dilation2DBackpropFilter)
 *@brief Performs Dilation2DBackpropInput on the input. \n
 
 *@par Inputs:
-*x: A tensor of shape is 4d, format is support NHWC.
-*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
-*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n
+*@li x: A tensor of shape is 4d, format is support NHWC.
+*@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
+*@li out_backprop: Has the same type and format as input x and the c dimension is same with x. \n
 
 *@par Attributes
 *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1.
diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h
index 75e91aee..9dd502cd 100644
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -289,7 +289,8 @@ REG_OP(SparseApplyAdagradV2D)
 *     Should be from a Variable().
 *@li lr: A scalar. Has the same type as "var".
 *@li grad: A tensor for the gradient. Has the same type as "var".
-*
+*@li momentum: Momentum. Must be a scalar.
+
 *@par Attributes:
 *@li use_nesterov: An optional bool. Defaults to "False".
 *     If "True", the tensor passed to compute grad will be
@@ -701,7 +702,7 @@ REG_OP(ApplyPowerSignD)
 /**
 *@brief Updates "var" as FOBOS algorithm with fixed learning rate.
 *  prox_v = var - alpha * delta
-*  var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
+*  var = sign(prox_v)/(1+alpha * l2) * max{|prox_v|-alpha * l1,0}
 *
 *@attention Constraints:
 *  the input tensors must have the same shape.
@@ -2128,10 +2129,12 @@ REG_OP(FusedMulApplyMomentumExtern)
 *    otherwise the behavior is undefined, but may exhibit less contention.
 *
 *@par Outputs:
-* var: A mutable tensor. Has the same type as input "var".
+* @li var: A mutable tensor. Has the same type as input "var".
+* @li accum: A mutable tensor. Has the same type as input "accum".
 *
 *@attention Constraints:
-* The input tensors must have the same shape.
+* @li var: A mutable tensor. Has the same type as input "var".
+* @li accum: A mutable tensor. Has the same type as input "accum".
 *
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator ResourceApplyKerasMomentum.
diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
index ca1c24eb..01ff77cb 100644
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -28,8 +28,8 @@ namespace ge {
 *@brief Computes the for the gelu of "x" . \n
 
 *@par Inputs:
-*Two inputs, including:
-* @li x: A Tensor. Must be one of the following types: float16, float32
+*One input, including:
+*x: A Tensor. Must be one of the following types: float16, float32
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
@@ -66,8 +66,8 @@ REG_OP(GeluGrad)
 *@brief Computes the for the fast_gelu of "x" . \n
 
 *@par Inputs:
-*Two inputs, including:
-* @li x: A Tensor. Must be one of the following types: float16, float32
+*One input, including:
+*x: A Tensor. Must be one of the following types: float16, float32
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
@@ -83,7 +83,7 @@ REG_OP(FastGelu)
 *@brief Computes the gradient for the fast_gelu of "x" . \n
 
 *@par Inputs:
-*Three inputs, including:
+*Two inputs, including:
 * @li dy: A Tensor. Must be one of the following types: float16, float32
 * @li x: A Tensor of the same type as "dy" . \n
 
@@ -169,7 +169,7 @@ REG_OP(Relu)
 * x: A Tensor of type RealNumberType . \n
 
 * @par Outputs:
-* y: A Tensor of type RealNumberType . \n
+* y: A Tensor with the same type as x . \n
 
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator Relu6.
@@ -209,8 +209,12 @@ REG_OP(Relu6D)
 *     backprops = gradients * (features > 0) * (features < 6) . \n
 
 * @par Inputs:
-* @li features: A Tensor of type RealNumberType.
-* @li gradients: A Tensor of type RealNumberType . \n
+* @li gradients: A Tensor of type RealNumberType. The backpropagated
+      gradients to the corresponding Relu6 operation. 
+* @li features: A Tensor with the same type as gradients.he features passed
+      as input to the corresponding Relu6 operation, or its output;
+      using either one produces the same result.  \n
+
 
 * @par Outputs:
 * backprops: A Tensor of type RealNumberType . \n
@@ -228,7 +232,7 @@ REG_OP(Relu6Grad)
 *Applies the element-wise function:
 * Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha .
 *@par Inputs:
-*One inputs, including:
+*Two inputs, including:
 * @li grads: A tensor. Must be one of the following types:
 *     float16, float32. 
 * @li activations: A tensor. Must be one of the following types:
@@ -238,7 +242,7 @@ REG_OP(Relu6Grad)
 *y: A Tensor with the same type and shape of grads's.
 * 
 *@par Attributes:
-*@li alpha: scalar parameter, default value = 1.0
+*alpha: scalar parameter, default value = 1.0
 */	
 REG_OP(EluGradV2)
     .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -539,13 +543,9 @@ REG_OP(Elu)
 *x: A float16, float32, for the input data type . \n
 
 *@par Attributes:
-*alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n
-
-*@par Attributes:
-*alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n
-
-*@par Attributes:
-*alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n
+*@li alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .
+*@li alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .
+*@li alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n
 
 *@par Outputs:
 *y: A float16, float32, for the normalized result . \n
@@ -706,8 +706,8 @@ REG_OP(Mish)
  * @li x: A Tensor. Must be one of the following types: float16, float32
  * @li tanhx: A Tensor. shape, datatype and format is same as x
  * @par Outputs:
- * 1 output, including:
- * @li x_grad: A Tensor. shape, datatype and format is same as x
+ * One output, including:
+ * x_grad: A Tensor. shape, datatype and format is same as x
  */
 
 REG_OP(MishGrad)
@@ -721,20 +721,20 @@ REG_OP(MishGrad)
  * @brief pytorch hardtanh_backward operator.
  *
  * @par Inputs:
- * 2 inputs, including:
+ * Two inputs, including:
  * @li result, minimum tensor of the linear region range,
  * datatype: float16/float32, format:ND/5HD.
  * @li grad, maximum tensor of the linear region range,
  * datatype:float16/float32, format:ND/5HD. \n
 
  * @par Attributes:
- * 2 attributes, including:
+ * Two attributes, including:
  * @li min_val, minimum value of the linear region range, datatype:float.
  * @li max_val, maximum value of the linear region range, datatype:float. \n
 
  * @par Outputs:
- * 1 output, including:
- * @li y, hardtanh_backward output tensor, datatype and format is same as
+ * One output, including:
+ * y, hardtanh_backward output tensor, datatype and format is same as
  * input result. \n
 
  * @attention Constraints:
@@ -756,7 +756,7 @@ REG_OP(HardtanhGrad)
 
 * @par Inputs:
 * One inputs, including:
-* @li x: A mutable Tensor. Must be one of the following types:
+* x: A mutable Tensor. Must be one of the following types:
 *     float16, float32. \n
 
 * @par Attributes:
@@ -765,7 +765,7 @@ REG_OP(HardtanhGrad)
 * @li threshold: An optional float. Defaults to "20.0" \n
 
 * @par Outputs:
-* @li y: A mutable Tensor. Has the same type as "x" \n
+* y: A mutable Tensor. Has the same type as "x" \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Softplus.
@@ -792,7 +792,7 @@ REG_OP(SoftplusV2)
 * @li threshold: An optional float. Defaults to "20.0" \n
 
 * @par Outputs:
-* @li output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n
+* output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator SoftplusGrad.
@@ -809,13 +809,16 @@ REG_OP(SoftplusV2Grad)
  * @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor)
  *  where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise.
  * 
- * @par inputs
+ * @par Inputs:
  * one input including:
- * @li x: input A Tensor. Must be one of the following types: float32, float16
+ * x: input A Tensor. Must be one of the following types: float32, float16
  * 
- * @par output
+ * @par Attributes:
+ * alpha: An optional float. Defaults to 1.0. \n
+
+ * @par Outputs:
  * one output including:
- * @li y:A Tensor of the same type as x
+ * y:A Tensor of the same type as x
  * 
  */
 REG_OP(ThresholdedRelu)
@@ -829,14 +832,14 @@ REG_OP(ThresholdedRelu)
 
 * @par Inputs:
 * One inputs, including:
-* @li input_x: A tensor. Must be one of the following types:
+* input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 
 * @par Attributes:
-* @li lambd: An optional float. Defaults to 0.5. \n
+* lambd: An optional float. Defaults to 0.5. \n
 
 * @par Outputs:
-* y: A Tensor with the same dtype and shape of input_x's. \n
+* output_y: A Tensor with the same dtype and shape of input_x's. \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Hardshrink. \n
@@ -863,7 +866,7 @@ REG_OP(HardShrink)
 *backprops: A Tensor with the same type and shape of features's. \n
 *
 *@par Attributes:
-*@li lambd: An optional float.Defaults to 0.5. \n
+*lambd: An optional float.Defaults to 0.5. \n
 *
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator Hardshrink_backward. \n
@@ -880,7 +883,7 @@ REG_OP(HardShrink)
 
 * @par Inputs:
 * One inputs, including:
-* @li input_x: A tensor. Must be one of the following types:
+* input_x: A tensor. Must be one of the following types:
 *     float16, float32, int32. \n
 
 * @par Attributes:
@@ -905,11 +908,11 @@ REG_OP(HardSigmoid)
 
 * @par Inputs:
 * One inputs, including:
-* @li input_x: A tensor. Must be one of the following types:
+* input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 
 * @par Attributes:
-* @li lambd: An optional float. Defaults to 0.5. \n
+* lambd: An optional float. Defaults to 0.5. \n
 
 * @par Outputs:
 * y: A Tensor with the same dtype and shape of input_x's. \n
@@ -933,7 +936,7 @@ REG_OP(SoftShrink)
 * @li input_x: A tensor of the same dtype as "input_grad". \n
 
 * @par Attributes:
-* @li lambd: An optional float. Defaults to 0.5. \n
+* lambd: An optional float. Defaults to 0.5. \n
 
 * @par Outputs:
 * y: A Tensor of the same dtype and shape as "input_graxd". \n
@@ -976,12 +979,12 @@ REG_OP(LogSigmoidGrad)
 
 *@par Inputs:
 *One inputs, including:
-* @li x: A tensor. Must be one of the following types:
+* x: A tensor. Must be one of the following types:
 *       float16, float32. \n
 
 *@par Outputs:
 *One outputs, including:
-* @li y: A tensor with the same type and shape of x's. \n
+* y: A tensor with the same type and shape of x's. \n
 
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator LogSigmoid. \n
@@ -1003,7 +1006,7 @@ REG_OP(LogSigmoid)
 
 *@par Outputs:
 *One outputs, including:
-* @li y: A tensor with the same type and shape of x's. \n
+* y: A tensor with the same type and shape of x's. \n
 
 * @par Attributes:
 * @li alpha: An optional float. Defaults to 0.16666666. \n
diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h
index 6854c866..9d0e7a62 100644
--- a/third_party/fwkacllib/inc/ops/pad_ops.h
+++ b/third_party/fwkacllib/inc/ops/pad_ops.h
@@ -33,8 +33,8 @@ namespace ge {
 
 *@li value: A 0D scalar. Specifies the value to fill the returned tensor.
 *    Must be one of the following types:
-*    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
-*    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
+*    float16, float32, double, int32, uint8, int16, int8, complex64, int64, bool, 
+*    qint8, quint8, qint32, qint16, quint16, uint16, complex128, uint32, uint64, .
 *
 *@par Outputs:
 * y: A tensor. Has the same type as "value".
@@ -46,8 +46,14 @@ namespace ge {
 */
 REG_OP(Fill)
     .INPUT(dims, TensorType::IndexNumberType())
-    .INPUT(value, TensorType::BasicType())
-    .OUTPUT(y, TensorType::BasicType())
+    .INPUT(value, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16,
+                              DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8,
+                              DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16, DT_UINT16,
+                              DT_COMPLEX128, DT_FLOAT16, DT_UINT32, DT_UINT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16,
+                              DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8,
+                              DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16, DT_UINT16,
+                              DT_COMPLEX128, DT_FLOAT16, DT_UINT32, DT_UINT64}))
     .OP_END_FACTORY_REG(Fill)
 
 /**
@@ -213,11 +219,11 @@ REG_OP(PadV2)
 *@brief Pads a tensor . \n
 
 *@par Inputs:
-*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
-*constant_values: A Tensor. Must have the same type as input.
+*@li x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
+*@li constant_values: A Tensor. Must have the same type as input.
 
 *@par Attributes:
-*paddings: An optional "vector<vector<int>>". Defaults to "{}".
+*paddings: A required Attribute.
 *     For each dimension D of input, paddings[D, 0] indicates how many
 *     values to add before the contents of tensor in that dimension,
 *     and paddings[D, 1] indicates how many values to add after the
@@ -461,7 +467,7 @@ REG_OP(FillV2)
 * @li dims: An required listInt to specify the shape that the value to fill.
 
 * @par Outputs:
-* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.
+* y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.
 
 * @par Third-party framework compatibility
 * Compatible with the ONNX operator ConstantOfShape.
diff --git a/third_party/fwkacllib/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/ops/parsing_ops.h
index b625180a..e578997c 100644
--- a/third_party/fwkacllib/inc/ops/parsing_ops.h
+++ b/third_party/fwkacllib/inc/ops/parsing_ops.h
@@ -54,27 +54,26 @@ REG_OP(StringToNumber)
 /**
 *@brief Convert serialized tensorflow.TensorProto prototype to Tensor.
 *@brief Parse an Example prototype. 
-*@par Input:
-*serialized: A Tensor of type string.
-*dense_defaults:  DYNAMIC INPUT Tensor type as string, float, int64. \n
+*@par Inputs:
+*@li serialized: A Tensor of type string.
+*@li dense_defaults:  DYNAMIC INPUT Tensor type as string, float, int64. \n
 
 *@par Attributes:
-*num_sparse: type int num of inputs sparse_indices , sparse_values, sparse_shapes
-*out_type: output type
-*sparse_keys: ListString
-*sparse_types: types of sparse_values
-*dense_keys: ListString
-*dense_shapes: output of dense_defaults shape
-*dense_types: output of dense_defaults type  \n
+*@li num_sparse: type int num of inputs sparse_indices , sparse_values, sparse_shapes
+*@li sparse_keys: ListString
+*@li sparse_types: types of sparse_values
+*@li dense_keys: ListString
+*@li Tdense: output of dense_defaults type
+*@li dense_shapes: output of dense_defaults shape  \n
 
 *@par Outputs:
-*sparse_indices: A Tensor of type string. 
-*sparse_values:  Has the same type as sparse_types.
-*sparse_shapes: A Tensor of type int64
-*dense_values:  Has the same type as dense_defaults.
+*@li sparse_indices: A Tensor of type string. 
+*@li sparse_values:  Has the same type as sparse_types.
+*@li sparse_shapes: A Tensor of type int64
+*@li dense_values:  Has the same type as dense_defaults.
 
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
-**/
+*/
 REG_OP(ParseSingleExample)
     .INPUT(serialized, TensorType({DT_STRING}))
     .DYNAMIC_INPUT(dense_defaults, TensorType({DT_STRING,DT_FLOAT,DT_INT64}))
@@ -92,16 +91,16 @@ REG_OP(ParseSingleExample)
 
 /**
 *@brief Decodes raw file into  tensor . \n
-*@par Input:
+*@par Inputs:
 *bytes: A Tensor of type string.
 
 *@par Attributes:
-*little_endian: bool ture
-*out_type: output type
+*@li little_endian: bool ture
+*@li out_type: output type
 
 *@par Outputs:
 *Output: A Tensor
-**/
+*/
 REG_OP(DecodeRaw)
     .INPUT(bytes, TensorType({DT_STRING}))
     .OUTPUT(output, TensorType({DT_BOOL,DT_FLOAT16,DT_DOUBLE,DT_FLOAT,
@@ -147,18 +146,20 @@ REG_OP(ParseTensor)
 
 *@par Inputs:
 *Inputs include:
-*records: Each string is a record/row in the csv and all records should have the
+*@li records: Each string is a record/row in the csv and all records should have the
 *same format. \n
-*record_defaults: One tensor per column of the input record, with either a
+*@li record_defaults: One tensor per column of the input record, with either a
 *scalar default value for that column or an empty vector if the column is
 *required. \n
 
 *@par Attributes:
-*OUT_TYPE: The numeric type to interpret each string in string_tensor as . \n
-*field_delim: char delimiter to separate fields in a record. \n
-*use_quote_delim: If false, treats double quotation marks as regular characters
+*@li OUT_TYPE: The numeric type to interpret each string in string_tensor as . \n
+*@li field_delim: char delimiter to separate fields in a record. \n
+*@li use_quote_delim: If false, treats double quotation marks as regular characters
 *inside of the string fields (ignoring RFC 4180, Section 2, Bullet 5). \n
-*na_value: Additional string to recognize as NA/NaN. \n
+*@li na_value: Additional string to recognize as NA/NaN. \n
+*@li select_cols: Optional sorted list of column indices to select. If specified,
+only this subset of columns will be parsed and returned.
 
 *@par Outputs:
 *output: A Tensor. Has the same type as x . \n
@@ -186,25 +187,25 @@ REG_OP(DecodeCSV)
 /**
 *@brief Convert serialized tensorflow.TensorProto prototype to Tensor.
 *@brief Parse an Example prototype.
-*@par Input:
-*serialized: A Tensor of type string. \n
-*name:A Tensor of type string. \n
-*sparse_keys: Dynamic input tensor of string. \n
-*dense_keys: Dynamic input tensor of string \n
-*dense_defaults:  Dynamic input tensor type as string, float, int64. \n
+*@par Inputs:
+*@li serialized: A Tensor of type string. \n
+*@li name:A Tensor of type string. \n
+*@li sparse_keys: Dynamic input tensor of string. \n
+*@li dense_keys: Dynamic input tensor of string \n
+*@li dense_defaults:  Dynamic input tensor type as string, float, int64. \n
 
 *@par Attributes:
-*Nsparse: Number of sparse_keys, sparse_indices and sparse_shapes \n
-*Ndense: Number of dense_keys \n
-*sparse_types: types of sparse_values \n
-*Tdense: Type of dense_defaults dense_defaults and dense_values \n
-*dense_shapes: output of dense_defaults shape  \n
+*@li Nsparse: Number of sparse_keys, sparse_indices and sparse_shapes \n
+*@li Ndense: Number of dense_keys \n
+*@li sparse_types: types of sparse_values \n
+*@li Tdense: Type of dense_defaults dense_defaults and dense_values \n
+*@li dense_shapes: output of dense_defaults shape  \n
 
 *@par Outputs:
-*sparse_indices: A Tensor of type string. \n
-*sparse_values:  Has the same type as sparse_types. \n
-*sparse_shapes: A Tensor of type int64 \n
-*dense_values:  Has the same type as dense_defaults. \n
+*@li sparse_indices: A Tensor of type string. \n
+*@li sparse_values:  Has the same type as sparse_types. \n
+*@li sparse_shapes: A Tensor of type int64 \n
+*@li dense_values:  Has the same type as dense_defaults. \n
 *@par Third-party framework compatibility \n
 *@li compatible with tensorflow StringToNumber operator. \n
 */
@@ -228,37 +229,37 @@ REG_OP(ParseExample)
 /**
 *@brief Transforms a scalar brain.SequenceExample proto (as strings) into typed
 *tensors.
-*@par Input:
-*serialized: A Tensor of type string. \n
-*feature_list_dense_missing_assumed_empty:A Tensor of type string. \n
-*context_sparse_keys: Dynamic input tensor of string. \n
-*context_dense_keys: Dynamic input tensor of string \n
-*feature_list_sparse_keys:  Dynamic input tensor of string \n
-*feature_list_dense_keys:  Dynamic input tensor of string \n
-*context_dense_defaults:  Dynamic input tensor of string, float, int64 \n
-*debug_name: A Tensor of type string. \n
+*@par Inputs:
+*@li serialized: A Tensor of type string. \n
+*@li feature_list_dense_missing_assumed_empty:A Tensor of type string. \n
+*@li context_sparse_keys: Dynamic input tensor of string. \n
+*@li context_dense_keys: Dynamic input tensor of string \n
+*@li feature_list_sparse_keys:  Dynamic input tensor of string \n
+*@li feature_list_dense_keys:  Dynamic input tensor of string \n
+*@li context_dense_defaults:  Dynamic input tensor of string, float, int64 \n
+*@li debug_name: A Tensor of type string. \n
 
 *@par Attributes:
-*Ncontext_sparse: Number of context_sparse_keys, context_sparse_indices and context_sparse_shapes \n
-*Ncontext_dense: Number of context_dense_keys \n
-*Nfeature_list_sparse: Number of feature_list_sparse_keys \n
-*Nfeature_list_dense: Number of feature_list_dense_keys \n
-*context_sparse_types: Types of context_sparse_values \n
-*Tcontext_dense: Number of dense_keys \n
-*feature_list_dense_types: Types of feature_list_dense_values \n
-*context_dense_shapes: Shape of context_dense \n
-*feature_list_sparse_types: Type of feature_list_sparse_values \n
-*feature_list_dense_shapes: Shape of feature_list_dense \n
+*@li Ncontext_sparse: Number of context_sparse_keys, context_sparse_indices and context_sparse_shapes \n
+*@li Ncontext_dense: Number of context_dense_keys \n
+*@li Nfeature_list_sparse: Number of feature_list_sparse_keys \n
+*@li Nfeature_list_dense: Number of feature_list_dense_keys \n
+*@li context_sparse_types: Types of context_sparse_values \n
+*@li Tcontext_dense: Number of dense_keys \n
+*@li feature_list_dense_types: Types of feature_list_dense_values \n
+*@li context_dense_shapes: Shape of context_dense \n
+*@li feature_list_sparse_types: Type of feature_list_sparse_values \n
+*@li feature_list_dense_shapes: Shape of feature_list_dense \n
 
 *@par Outputs:
-*context_sparse_indices: Dynamic output tensor of type int64. \n
-*context_sparse_values:  Dynamic output tensor of type string, float, int64. \n
-*context_sparse_shapes: Dynamic output tensor of type int64 \n
-*context_dense_values:  Dynamic output tensor of type string, float, int64. \n
-*feature_list_sparse_indices: Dynamic output tensor of type int64. \n
-*feature_list_sparse_values:  Dynamic output tensor of type string, float, int64. \n
-*feature_list_sparse_shapes: Dynamic output tensor of type int64 \n
-*feature_list_dense_values:  Dynamic output tensor of type string, float, int64. \n
+*@li context_sparse_indices: Dynamic output tensor of type int64. \n
+*@li context_sparse_values:  Dynamic output tensor of type string, float, int64. \n
+*@li context_sparse_shapes: Dynamic output tensor of type int64 \n
+*@li context_dense_values:  Dynamic output tensor of type string, float, int64. \n
+*@li feature_list_sparse_indices: Dynamic output tensor of type int64. \n
+*@li feature_list_sparse_values:  Dynamic output tensor of type string, float, int64. \n
+*@li feature_list_sparse_shapes: Dynamic output tensor of type int64 \n
+*@li feature_list_dense_values:  Dynamic output tensor of type string, float, int64. \n
 *@par Third-party framework compatibility \n
 *@li compatible with tensorflow StringToNumber operator. \n
 */
diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h
index 69d5e67e..0636833c 100644
--- a/third_party/fwkacllib/inc/ops/quantize_ops.h
+++ b/third_party/fwkacllib/inc/ops/quantize_ops.h
@@ -63,10 +63,11 @@ REG_OP(Dequantize)
 /**
 *@brief Quantizes the input . \n
 *@par Inputs:
-*x:  shape and dtype of input_x. \n
-*scales: shape and dtype of input_scales. \n
-*zero_points: shape and dtype of input_zero_points \n
+*@li x: shape and dtype of input_x. \n
+*@li scales: shape and dtype of input_scales. \n
+*@li zero_points: shape and dtype of input_zero_points \n
 *@par Attributes:
+*@li dtype: required, type. 
 *@li axis: the processed dim. \n
 *@par Outputs:
 *y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n
@@ -91,7 +92,8 @@ REG_OP(Quantize)
 *@li offset: A required float16, specifying the offset.
 *@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False".
 *@li round_mode: An optional string, specifying the float16 to int8 cast type.
-* The value range is [Round, Floor, Ceiling, Truncate]. Defaults to "Round" . \n
+* The value range is [Round, Floor, Ceil, Truncate]. Defaults to "Round" .
+*@li dst_type: A optional int32, specifying the output data type. Defaults to "DT_INT8" . \n
 
 *@par Outputs:
 *y: The quantized output tensor of type int8 and with format NC1HWC0 . \n
diff --git a/third_party/fwkacllib/inc/ops/ragged_array_ops.h b/third_party/fwkacllib/inc/ops/ragged_array_ops.h
index 20484623..5af2dd74 100644
--- a/third_party/fwkacllib/inc/ops/ragged_array_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_array_ops.h
@@ -37,13 +37,18 @@ namespace ge {
 *deprecated name.
 *@li indices: Indices in the outermost dimension of `params` of the values that should be
 *gathered.
+
+*@par Attributes:
+*@li PARAMS_RAGGED_RANK:The ragged rank of the params_nested_splits.
+*@li Tsplits:A type of output_nested_splits.
 *@li OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain
 *this number of `row_splits` tensors. This value should equal
 *`indices.shape.ndims + params.ragged_rank - 1` . \n
 
 *@par Outputs:
-*y:A Returns The `nested_row_splits` tensors that define the row-partitioning for the
-*returned RaggedTensor.The `flat_values` for the returned RaggedTensor . \n
+*@li output_nested_splits:A Returns The `nested_row_splits` tensors that define the row-partitioning for the
+*returned RaggedTensor.The `flat_values` for the returned RaggedTensor . 
+*@li output_dense_values:The `flat_values` for the returned RaggedTensor. \n
 
 *@par Third-party framework compatibility
 * Compatible with tensorflow RaggedGather operator.
diff --git a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
index 020e3da4..ceaa64e4 100644
--- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
@@ -61,7 +61,6 @@ REG_OP(RaggedTensorToSparse)
 *@brief Create a dense tensor from a ragged tensor, possibly altering its shape . \n
 
 *@par Inputs:
-*Six inputs, including:
 *@li shape:A `Tensor`. Must be one of the following types: `int64`, `int32`.
 *@li values:A 1D tensor representing the values of the ragged tensor.
 *@li default_value:A `Tensor`. Must have the same type as `values`.
@@ -78,7 +77,7 @@ The types of the row partition tensors. At present, these can be:
 is preceeded by "FIRST_DIM_SIZE" . \n
 
 *@par Outputs:
-*@li result: A `Tensor`. Has the same type as `values`.
+*result: A `Tensor`. Has the same type as `values`.
 */
 REG_OP(RaggedTensorToTensor)
     .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
diff --git a/third_party/fwkacllib/inc/ops/ragged_math_ops.h b/third_party/fwkacllib/inc/ops/ragged_math_ops.h
index 258b0ca1..4376437f 100644
--- a/third_party/fwkacllib/inc/ops/ragged_math_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_math_ops.h
@@ -35,7 +35,11 @@ namespace ge {
 *@li deltas: The deltas of each range . \n
 
 *@par Outputs:
-*y:A Returns The `row_splits` for the returned `RaggedTensor`.The `flat_values` for the returned `RaggedTensor` . \n
+*@li rt_dense_values:The `flat_values` for the returned `RaggedTensor`.
+*@li rt_nested_splits:The `row_splits` for the returned `RaggedTensor`. \n
+
+*@par Attributes:
+*Tsplits:A type of rt_nested_splits.
 
 *@attention Constraints:
 *The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors.
diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h
index b65a68f1..66f9b65f 100644
--- a/third_party/fwkacllib/inc/ops/random_ops.h
+++ b/third_party/fwkacllib/inc/ops/random_ops.h
@@ -148,6 +148,32 @@ REG_OP(RandomGamma)
     .OP_END_FACTORY_REG(RandomGamma)
 
 /**
+*@brief Returns the random permutation of integers from 0 to n-1. \n
+
+*@par Attributes:
+*@li n: An required int.
+*@li dtype: An optional str. Defaults to int64 .
+*@li layout: An optional int. Defaults to 0 . \n
+
+*@par Outputs:
+*out: A required Tensor. Must be one of the following types:
+         float16, float32, float32, int8, uint8, int16, int32, int64. \n
+
+*@attention Constraints:
+*The implementation for Randperm on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with Pytorch Randperm operator.
+*/
+REG_OP(Randperm)
+    .OUTPUT(out, TensorType({DT_INT64, DT_INT32, DT_INT16,
+        DT_UINT8, DT_INT8, DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .REQUIRED_ATTR(n, Int)
+    .ATTR(layout, Int, 0)
+    .ATTR(dtype, Type, DT_INT64)
+    .OP_END_FACTORY_REG(Randperm)
+
+/**
 *@brief Outputs random values from the Poisson distribution(s) described by rate . \n
 
 *@par Inputs:
@@ -157,11 +183,12 @@ REG_OP(RandomGamma)
 
 *@par Attributes:
 *@li dtype: An optional type from: half, float32, float64, int32, int64. Defaults to int64.
-*@li seed: An optional int. Defaults to 0.
-*@li seed2: An optional int. Defaults to 0 . \n
+*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, 
+the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
 
 *@par Outputs:
-*y: A Tensor of type dtype . \n
+*y: A Tensor of type dtype float16, float, double, int32, int64. \n
 
 *@attention Constraints:
 *The implementation for RandomPoisson on Ascend uses AICPU, with bad performance.
@@ -188,11 +215,13 @@ REG_OP(RandomPoisson)
 *x: A Tensor. The tensor to be shuffled . \n
 
 *@par Attributes:
-*@li seed: An optional int. Defaults to 0.
-*@li seed2: An optional int. Defaults to 0 . \n
+*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, 
+the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type as x . \n
+*y: A Tensor. Has the same type as x . A Tensor of type float16, float, 
+*double, int32, int64, int16, uint16, int8, uint8, int32,int64. \n
 
 *@attention Constraints:
 *The implementation for RandomShuffle on Ascend uses AICPU, with bad performance.
@@ -220,11 +249,12 @@ REG_OP(RandomShuffle)
 
 *@par Attributes:
 *@li dtype: A type from: half, float16, float32, float64. The type of the output.
-*@li seed: An optional int. Defaults to 0.
-*@li seed2: An optional int. Defaults to 0 . \n
+*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, 
+the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
 
 *@par Outputs:
-*y: A Tensor of type dtype . \n
+*y: A Tensor of type float32, float16, double. \n
 
 *@attention Constraints:
 *The implementation for RandomStandardNormal on Ascend uses AICPU, with bad performance.
@@ -241,6 +271,28 @@ REG_OP(RandomStandardNormal)
     .OP_END_FACTORY_REG(RandomStandardNormal)
 
 /**
+*@brief Output random value from  separate normal distribution. \n
+
+*@par Inputs:
+*Inputs include:
+*mean: The mean is a tensor with the mean of each output element’s normal distribution . 
+*std: The std is a tensor with the standard deviation of each output element’s normal distribution. \n
+*@par Outputs:
+*y: A Tensor of type dtype . \n
+
+*@attention Constraints:
+*The implementation for Normal on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with Pytorch Normal operator.
+*/
+REG_OP(Normal)
+    .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(std, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(Normal)
+
+/**
 *@brief Outputs random integers from a uniform distribution . \n
 
 *@par Inputs:
@@ -250,8 +302,9 @@ REG_OP(RandomStandardNormal)
 * @li max: A Tensor. Must have the same type as minval. 0-D . \n
 
 *@par Attributes:
-*@li seed: An optional int. Defaults to 0.
-*@li seed2: An optional int. Defaults to 0 . \n
+*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, 
+the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as min . \n
@@ -280,8 +333,9 @@ REG_OP(RandomUniformInt)
 
 *@par Attributes:
 *@li dtype: A type from: half, float16, float32, float64. The type of the output.
-*@li seed: An optional int. Defaults to 0.
-*@li seed2: An optional int. Defaults to 0 . \n
+*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, 
+the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
 
 *@par Outputs:
 *y: A Tensor of type dtype . \n
@@ -308,11 +362,14 @@ REG_OP(RandomUniform)
 *shape: A Tensor. Must be one of the following types: int32, int64 . \n
 
 *@par Attributes:
-*@li seed: An optional int. Defaults to 0.
-*@li seed2: An optional int. Defaults to 0 . \n
+*@li seed: An optional int. Defaults to 0.If either `seed` or `seed2` 
+are set to be non-zero, the random number generator is seeded by the given 
+seed. Otherwise, it is seeded by a random seed.
+*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
 
 *@par Outputs:
-*size: A Tensor of types: float16, float32, double . \n
+*y: A Tensor of types: float16, float32, double . A tensor of the specified shape
+filled with random truncated normal values. \n
 
 *@attention Constraints:
 *The implementation for TruncatedNormal on Ascend uses AICPU, with bad performance.
@@ -505,15 +562,15 @@ REG_OP(RandomChoiceWithMask)
 
 *@par Inputs:
 *Inputs including:
-* @li x: A required Tensor. Must be one of the following types:
-         float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
+* x: A required Tensor. Must be one of the following types:
+     float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
 
 *@par Attributes:
-*@li group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n
+* group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n
 
 *@par Outputs:
-*y: A required Tensor. Has same type and shape as "x". Must be one of the following types:
-    float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
+* y: A required Tensor. Has same type and shape as "x". Must be one of the following types:
+     float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
 
 *@attention Constraints:
 *@li "group" must be greater than 0 and must evenly divide the channel dimension size.
@@ -584,6 +641,50 @@ REG_OP(DropoutV2)
     .OUTPUT(seed, TensorType({ DT_FLOAT }))
     .REQUIRED_ATTR(p, Float)
     .OP_END_FACTORY_REG(DropoutV2)
+
+/**
+* @brief The Bernoulli distribution with probability . \n
+
+* @par Inputs:
+* @li x: A ND Tensor. Must be one of the following data types: 
+         int8, uint8, int16, int32, int64, bool, float32, float64 . 
+* @li p: A ND Tensor. The probability of an element to be zeroed. 
+        Must be one of the following data types: float32, float64. \n
+
+* @par Attributes:
+* seed: An Integer, the seed of the random generator. Default value -1 
+    to use current timestamp, otherwise it should be a positive integer.
+
+* @par Outputs:
+* y: A tensor with the same shape and type as "x".
+*/
+
+REG_OP(Bernoulli)
+    .INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(p, TensorType({ DT_FLOAT, DT_DOUBLE }))
+    .OUTPUT(y, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(seed, Int, -1)
+    .OP_END_FACTORY_REG(Bernoulli)
+
+/**
+ * @brief: Fill the input tensor with values drawn from the uniform distribution U(from, to). \n
+ 
+ * @par Inputs:
+ * x: A Tensor. Must be one of the following types: float16, float, double. \n
+
+ * @par Attributes:
+ * @li from: The lower bound of the uniform. Defaults: 0.0
+ * @li to: The upper bound of the uniform. Defaults: 1.0  \n
+
+ * @par Outputs:
+ * y: A Tensor has the same type as x. \n
+ */
+REG_OP(Uniform)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(from, Float, 0.0)
+    .ATTR(to, Float, 1.0)
+    .OP_END_FACTORY_REG(Uniform)
 }   // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h
index 97c7b8e1..1578ba59 100644
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -576,7 +576,7 @@ REG_OP(ReduceAll)
 *@li axis: A mutable Tensor. The dimensions to reduce . \n
 
 *@par Attributes:
-*@li keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n
+*keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type and format as input "x" . \n
@@ -967,9 +967,9 @@ REG_OP(EuclideanNormD)
 Defaults to "0.00001" . \n
 
 *@par Outputs:
-*y: A Tensor of type float16 or float32 for the normalized "x".
-*batch_mean: A Tensor of type float32 for the result mean.
-*batch_ variance: A Tensor of type float32 for the result variance . \n
+*@li y: A Tensor of type float16 or float32 for the normalized "x".
+*@li batch_mean: A Tensor of type float32 for the result mean.
+*@li batch_ variance: A Tensor of type float32 for the result variance . \n
 
 *@attention Constraints:
 *For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction.
@@ -987,7 +987,7 @@ REG_OP(INInferV2)
     .OP_END_FACTORY_REG(INInferV2)
 
 /**
-*@brief Performs reduced instance normalization . \n
+*@brief Performs reduce instance normalization. \n
 
 *@par Inputs:
 *x: A Tensor of type float16 or float32. \n
@@ -1008,32 +1008,31 @@ REG_OP(INTrainingReduceV2)
 
 
 /**
-*@brief Performs update instance normalization . \n
+*@brief Performs update instance normalization. \n
 
 *@par Inputs:
-* Seven inputs, including: (NC1HWC0supported)
+* Seven inputs, including:
 *@li x: A Tensor of type float16 or float32.
 *@li sum: A Tensor of type float32 for the output of operator INTrainingReduceV2.
 *@li square_sum: A Tensor of type float32 for the output of operator INTrainingReduceV2.
 *@li gamma: A Tensor of type float32, for the scaling gamma.
 *@li beta: A Tensor of type float32, for the scaling beta.
 *@li mean: A Tensor of type float32, for the updated mean.
-*@li variance: A Tensor of type float32, for the updated variance . \n
+*@li variance: A Tensor of type float32, for the updated variance. \n
 
 *@par Attributes:
 *@li momentum: A required float32, specifying the momentum to update mean and var.
-*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n
+*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero. \n
 
 *@par Outputs:
 * Three outputs
 *@li y: A Tensor of type float16 or float32, for normalized "x".
 *@li batch_mean: A Tensor of type float32, for the updated mean.
-*@li batch_variance: A Tensor of type float32, for the updated variance . \n
+*@li batch_variance: A Tensor of type float32, for the updated variance. \n
 
 *@attention Constraints:
-*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training.
+* This operator is a InstanceNorm fusion operator for updating the moving averages for training.
 * This operator is used in conjunction with INTrainingReduceV2.
-*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
 */
 REG_OP(INTrainingUpdateV2)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -1052,6 +1051,80 @@ REG_OP(INTrainingUpdateV2)
 
 
 /**
+*@brief Performs the backpropagation of InstanceNorm. \n
+
+*@par Inputs:
+* Seven inputs, including:
+*@li dy: A Tensor of type float16 or float32.
+*@li x: A Tensor of type float16 or float32.
+*@li variance: A Tensor of type float32, for the variance of "x".
+*@li mean: A Tensor of type float32, for the mean of "x".
+*@li res_gamma: A Tensor of type float32.
+*@li res_beta: A Tensor of type float32.
+*@li gamma: A Tensor of type float32. \n
+
+*@par Outputs:
+*pd_x: A Tensor of type float16 or float32, for the offset of "x". \n
+
+*@attention Constraints:
+* The preceding layer of this operator must be INTrainingUpdateGrad. \n
+*/
+REG_OP(INTrainingReduceGrad)
+    .INPUT(dy, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(variance, TensorType({DT_FLOAT}))
+    .INPUT(mean, TensorType({DT_FLOAT}))
+    .INPUT(res_gamma, TensorType({DT_FLOAT}))
+    .INPUT(res_beta, TensorType({DT_FLOAT}))
+    .INPUT(gamma, TensorType({DT_FLOAT}))
+    .OUTPUT(pd_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(INTrainingReduceGrad)
+
+/**
+*@brief Performs the backpropagation of InstanceNorm. \n
+
+*@par Inputs:
+* Four inputs, including:
+*@li dy: A Tensor of type float16 or float32, for the gradient.
+*@li x: A Tensor of type float16 or float32.
+*@li variance: A Tensor of type float32, for the variance of "x".
+*@li mean: A Tensor of type float32, for the mean of "x". \n
+
+*@par Outputs:
+*@li res_gamma: A Tensor of type float32.
+*@li res_beta: A Tensor of type float32. \n
+
+*/
+REG_OP(INTrainingUpdateGrad)
+    .INPUT(dy, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(variance, TensorType({DT_FLOAT}))
+    .INPUT(mean, TensorType({DT_FLOAT}))
+    .OUTPUT(res_gamma, TensorType({DT_FLOAT}))
+    .OUTPUT(res_beta, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(INTrainingUpdateGrad)
+
+/**
+*@brief Performs the backpropagation of InstanceNorm. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li res_gamma: A Tensor of type float32.
+*@li res_beta: A Tensor of type float32. \n
+
+*@par Outputs:
+*@li pd_gamma: A Tensor of type float32.
+*@li pd_beta: A Tensor of type float32. \n
+
+*/
+REG_OP(INTrainingUpdateGradGammaBeta)
+    .INPUT(res_gamma, TensorType({DT_FLOAT}))
+    .INPUT(res_beta, TensorType({DT_FLOAT}))
+    .OUTPUT(pd_gamma, TensorType({DT_FLOAT}))
+    .OUTPUT(pd_beta, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(INTrainingUpdateGradGammaBeta)
+
+/**
 *@brief Performs reduced group normalization . \n
 
 *@par Inputs:
@@ -1063,7 +1136,7 @@ REG_OP(INTrainingUpdateV2)
 
 
 *@par Attributes:
-*@li num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n
+*num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n
 
 *@attention Constraints:
 * This operator is a GroupNorm fusion operator for updating the moving averages for training.
@@ -1081,7 +1154,7 @@ REG_OP(GNTrainingReduce)
 *@brief Performs update group normalization . \n
 
 *@par Inputs:
-* Eight inputs, including: (NCHW NHWC supported)
+* Seven inputs, including: (NCHW NHWC supported)
 *@li x: A Tensor of type float16 or float32.
 *@li sum: A 5D Tensor of type float32,
 shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC
@@ -1145,8 +1218,8 @@ include:
 *@li keep_dims:A bool, An optional bool. Defaults to False. If True, retain reduced dimensions with length 1..
 *@li separator:string.
 
-*@par output:
-*@li output::A Tensor of type string..
+*@par Outputs:
+*output:A Tensor of type string.
 */
 REG_OP(ReduceJoin)
     .INPUT(input, TensorType({DT_STRING}))
@@ -1160,7 +1233,7 @@ REG_OP(ReduceJoin)
 * @brief Calculates the standard deviation and average value of Tensors.
 
 * @par Inputs:
-* @li x: A Tensor. Must be one of the following types:
+* x: A Tensor. Must be one of the following types:
 *     float16, float32. \n
 
 * @par Attributes:
diff --git a/third_party/fwkacllib/inc/ops/resource_variable_ops.h b/third_party/fwkacllib/inc/ops/resource_variable_ops.h
index 74ac83f8..156f2f34 100644
--- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h
+++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h
@@ -33,10 +33,12 @@ namespace ge {
 *y:A Tensor of type resource. \n
 
 *@par Attributes:
-* @li container: optional, string.
-* @li shared_name: optional, string.
-* @li dtype: required, type.
-* @li shape: optional, ListInt. \n
+* @li container: optional, string. the container this 
+variable is placed in.
+* @li shared_name: optional, string.the name by which
+ this variable is referred to.
+* @li dtype: required, type. the output of type.
+* @li shape: optional, ListInt. the output of shape. \n
 
 *@see VarHandleOp.
 */
@@ -53,11 +55,11 @@ REG_OP(VarHandleOp)
 *@brief Assigns a new value to a variable. \n
 
 *@par Inputs:
-*resource:Handle to the resource in which to store the variable.
-*value:The value to set the new tensor to use. \n
+*@li resource:Handle to the resource in which to store the variable.
+*@li value:The value to set the new tensor to use. \n
 
 *@par Attributes:
-* @li dtype: required, type. \n
+* dtype: required, type. \n
 
 *@see AssignVariableOp.
 */
@@ -73,11 +75,11 @@ REG_OP(AssignVariableOp)
 *@brief Adds a value to the current value of a variable. \n
 
 *@par Inputs:
-*resource:Handle to the resource in which to store the variable.
-*value:The value by which the variable will be incremented. \n
+*@li resource:Handle to the resource in which to store the variable.
+*@li value:The value by which the variable will be incremented. \n
 
 *@par Attributes:
-* @li dtype: required, type. \n
+* dtype: required, type. \n
 
 *@see AssignAddVariableOp.
 */
@@ -93,11 +95,11 @@ REG_OP(AssignAddVariableOp)
 *@brief Subtracts a value to the current value of a variable. \n
 
 *@par Inputs:
-*resource:Handle to the resource in which to store the variable.
-*value:The value by which the variable will be incremented. \n
+*@li resource:Handle to the resource in which to store the variable.
+*@li value:The value by which the variable will be incremented. \n
 
 *@par Attributes:
-* @li dtype: required, type. \n
+* dtype: required, type. \n
 
 *@see AssignSubVariableOp.
 */
diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h
index 80546860..20828a89 100644
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -127,9 +127,7 @@ REG_OP(DynamicLSTM)
 *@li cell_clip:An float identifying the cell clip in the op. Default to -1.
 *@li num_proj:An integer identifying the num projection in the op. Default to 0.
 *@li time_major:An bool identifying the time major in the op. Default to false.
-*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
 *@li forget_bias:An float identifying the forget bias in the op. Default to 0.
-*@li is_training:An bool identifying is training in the op. Default to true.
 
 *@par Outputs:
 *eight outputs: \n
@@ -491,7 +489,6 @@ REG_OP(DynamicLSTMV2)
 *ten inputs: \n
 *@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
@@ -504,10 +501,11 @@ REG_OP(DynamicLSTMV2)
 
 
 *@par Outputs:
-*eight outputs: \n
+*four outputs: \n
 *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dgate:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
 */
 REG_OP(LSTMInputGrad)
     .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -571,13 +569,13 @@ REG_OP(DynamicLSTMGradCell)
   .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
   .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
   .INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT}))
-  .INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT}))
   .INPUT(t_state, TensorType({DT_INT32, DT_INT32}))
+  .INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT}))
   .OUTPUT(dgate, TensorType({DT_FLOAT16, DT_FLOAT}))
   .OUTPUT(dct_1, TensorType({DT_FLOAT16, DT_FLOAT}))
-  .ATTR(forget_bias, Float, 1)
-  .ATTR(activation, String, "")
-  .ATTR(direction, String, "Forward")
+  .ATTR(forget_bias, Float, 1.0)
+  .ATTR(activation, String, "tanh")
+  .ATTR(direction, String, "UNIDIRECTIONAL")
   .ATTR(gate_order, String, "ijfo")
   .OP_END_FACTORY_REG(DynamicLSTMGradCell)
 
@@ -1070,7 +1068,7 @@ REG_OP(GRUV2HiddenGradCell)
 *     If "False", "grad_weight" will not be scale by word_frequency. \n
 
 * @par Outputs:
-* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n
+* y: A mutable output Tensor of new word grad has the same type as "grads". \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator EmbeddingDenseGrad.
@@ -1222,7 +1220,7 @@ REG_OP(CommonGRU)
 *     is equivalent to the size of indices. This matches the CSR format.. \n
 
 * @par Outputs:
-* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n
+* y: A mutable output Tensor of new word grad has the same type as "grads". \n
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator EmbeddingBag.
diff --git a/third_party/fwkacllib/inc/ops/rpn_ops.h b/third_party/fwkacllib/inc/ops/rpn_ops.h
index 089af326..850b3e5a 100644
--- a/third_party/fwkacllib/inc/ops/rpn_ops.h
+++ b/third_party/fwkacllib/inc/ops/rpn_ops.h
@@ -28,12 +28,12 @@ namespace ge {
 * iou_threshold with higher scoring box according to their
 * intersection-over-union (IoU) . \n
 
-*@par Input:
-* @li box_scores: 2-D tensor with shape of [N, 8], including proposal boxes and
+* @par Inputs:
+* box_scores: 2-D tensor with shape of [N, 8], including proposal boxes and
 * corresponding confidence scores . \n
 
 * @par Attributes:
-* @li iou_threshold: An optional float. The threshold for deciding whether boxes
+* iou_threshold: An optional float. The threshold for deciding whether boxes
 * overlap too much with respect to IOU . \n
 
 * @par Outputs:
diff --git a/third_party/fwkacllib/inc/ops/sdca_ops.h b/third_party/fwkacllib/inc/ops/sdca_ops.h
index 34c6a268..601b360b 100644
--- a/third_party/fwkacllib/inc/ops/sdca_ops.h
+++ b/third_party/fwkacllib/inc/ops/sdca_ops.h
@@ -45,7 +45,13 @@ namespace ge {
 *corresponding weights in sparse_weights. This field maybe omitted for the dense approach.It's a dynamic input.
 *@li sparse_weights: a list of vectors where each value is the weight associated with a sparse feature group.
 *@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group.It's a dynamic input.
-*@li example_state_data: a list of vectors containing the example state data.
+*@li example_state_data: a list of vectors containing the example state data. \n
+
+*@par Attributes:
+*@li adaptive: the type is bool default false.
+*@li num_sparse_features:The num of sparse.
+*@li num_sparse_features_with_values: The num of sparse_feature_values
+*@li num_dense_features:The num of dense.
 *@li loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, squared and hinge losses.
 *@li l1: Symmetric l1 regularization strength.
 *@li l2: Symmetric l2 regularization strength.
@@ -53,10 +59,10 @@ namespace ge {
 *@li num_inner_iterations: Number of iterations per mini-batch . \n
 
 *@par Outputs:
-*y: A Returns a list of vectors containing the updated example state
+*@li out_example_state_data: A Returns a list of vectors containing the updated example state
 *data.a list of vectors where each value is the delta
-*weights associated with a sparse feature group.a list of vectors where the values are the delta
-*weights associated with a dense feature group . \n
+*@li out_delta_sparse_weights:weights associated with a sparse feature group.a list of vectors where the values are the delta
+*@li out_delta_dense_weights:weights associated with a dense feature group . \n
 
 *@par Third-party framework compatibility
 * Compatible with tensorflow SdcaOptimizerV2 operator.
diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h
index 1c26e033..43f72ef3 100644
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -258,7 +258,7 @@ REG_OP(GatherV2D)
 
 REG_OP(GatherElements)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
-    .INPUT(index, TensorType({DT_INT64}))
+    .INPUT(index, TensorType({DT_INT32, DT_INT64}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
     .ATTR(dim, Int, 0)
     .OP_END_FACTORY_REG(GatherElements)
@@ -508,7 +508,7 @@ REG_OP(UnsortedSegmentSum)
 
 *@par Inputs:
 *One inputs, including:
-* @li assist: A tensor. Must be one of the following types:
+* assist: A tensor. Must be one of the following types:
 *     float16, float32. \n
 
 * @par Attributes:
@@ -970,10 +970,11 @@ REG_OP(TopKV2)
 * for matrices) . \n
 
 * @par Attributes:
-* @li sorted: An optional bool. Defaults to true.
+* @li sorted: Defaults to true.
 * If true, the resulting "k" elements will be sorted by the values in descending
 * order.
-* @li T: Indicator of indices type . \n
+* @li largest:If true the resulting `k` elements will be sorted by the values in descending order.
+* @li dim:0-D. Number of top elements to look for along the last dimension (along each row for matrices). \n
 
 * @par Outputs:
 * @li values: A Tensor, specifying the sorted data. Has the same type as
@@ -982,7 +983,7 @@ REG_OP(TopKV2)
 
 * @see TopK()
 * @par Third-party framework compatibility
-* @li Compatible with the TensorFlow operator TopKV2.
+* Compatible with the TensorFlow operator TopKV2.
 */
 REG_OP(TopK)
     .INPUT(x, TensorType::RealNumberType())
@@ -1085,7 +1086,6 @@ REG_OP(InTopKD)
 * @brief Says whether the targets are in the top "k" predictions . \n
 
 * @par Inputs:
-* Two inputs, including:
 * @li x1: A 2D Tensor of type float32. A "batch_size * classes" tensor.
 * @li x2: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids.
 * @li k: A 1D Tensor of the same type as "x2".
@@ -1618,12 +1618,12 @@ REG_OP(UnsortedSegmentMinD)
 * y: A Tensor of type RealNumberType . \n
 
 * @attention Constraints:
-* @li segment_ids must be non-negative tensor.
+* segment_ids must be non-negative tensor.
 
 * @see UnsortedSegmentSum(), UnsortedSegmentProd(),
 
 * @par Third-party framework compatibility
-* @li Compatible with the TensorFlow operator UnsortedSegmentMax.
+* Compatible with the TensorFlow operator UnsortedSegmentMax.
 */
 REG_OP(UnsortedSegmentMax)
     .INPUT(x, TensorType::RealNumberType())
@@ -1875,15 +1875,15 @@ REG_OP(Crop)
 
 *@par Inputs:
 *One inputs, including:
-* @li x: A tensor . Must be one of the following types:
+* x: A tensor . Must be one of the following types:
 *     float16, float32, int32, uint32, int8, uint8. \n
 
 *@par Attributes:
-* @li axis: Axis along which to cummin. \n
+* axis: Axis along which to cummin. \n
 
 *@par Outputs:
-* y: A Tensor with the same type and shape of x's. \n
-* indices: A Tensor with the int32 type and the same shape of x's. \n
+* @li y: A Tensor with the same type and shape of x's.
+* @li indices: A Tensor with the int32 type and the same shape of x's. \n
 
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator Cummin. \n
@@ -1968,17 +1968,14 @@ REG_OP(WriteSelect)
     .OP_END_FACTORY_REG(WriteSelect)
 
 /**
-*@brief Read data by stride . \n
+*@brief Read data by stride.
 
 *@par Inputs:
-*One input:
-*x: A Tensor. Must be one of the following types: float16, int8 . \n
+*x: A Tensor. Must be one of the following types: float16, int8. \n
 
 *@par Attributes:
-*@li axis: A required int32, specifying the index of axis to read by stride . \n
-
-*@par Attributes:
-*@li stride: A required int32, specifying the value of reading stride . \n
+*@li axis: A required int32, specifying the index of axis to read by stride. \n
+*@li stride: A required int32, specifying the value of reading stride. \n
 
 *@par Outputs:
 *y: A Tensor of the same type as "x".
@@ -1991,16 +1988,14 @@ REG_OP(StridedRead)
     .OP_END_FACTORY_REG(StridedRead)
 
 /**
-*@brief: Write data by stride . \n
+*@brief Write data by stride.
 
 *@par Inputs:
-*x: A Tensor. Must be one of the following types: float16, int8 . \n
-
-*@par Attributes:
-*@li axis: A required int32, specifying the index of axis to write by stride . \n
+*x: A Tensor. Must be one of the following types: float16, int8. \n
 
 *@par Attributes:
-*@li stride: A required int32, specifying the value of writing stride . \n
+*@li axis: A required int32, specifying the index of axis to write by stride. \n
+*@li stride: A required int32, specifying the value of writing stride. \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
@@ -2076,10 +2071,10 @@ REG_OP(CumulativeLogsumexpD)
 * @li updates: A Tensor of the same type as "var". \n
 
 * @par Attributes:
-* @li axis: An required int to specify the axis to perform indices add. \n
+* axis: An required int to specify the axis to perform indices add. \n
 
 * @par Outputs:
-* @li var: A Tensor. Same as input "var".
+* var: A Tensor. Same as input "var".
 
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator index_add_.
@@ -2104,7 +2099,7 @@ REG_OP(InplaceIndexAdd)
 *  @li value: A Tensor of dtype float16 or float32 or int64 or int32 or int8.
 
 * @par Outputs:
-*  @li y: A tensor. Must be one of the following dtypes:
+*  y: A tensor. Must be one of the following dtypes:
 *   float16, float32, int64, int32, int8.
 */
 REG_OP(MaskedFill)
@@ -2123,7 +2118,7 @@ REG_OP(MaskedFill)
 *  @li mask: A Tensor of dtype is bool. \n
 
 * @par Outputs:
-*  @li y: A tensor with the same type as x. \n
+* y: A tensor with the same type as x. \n
 
 * @par Third-party framework compatibility
 * Compatible with the Numpy operator select.
@@ -2134,13 +2129,50 @@ REG_OP(MaskedSelectV2)
     .INPUT(mask, TensorType({DT_BOOL}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OP_END_FACTORY_REG(MaskedSelectV2)
+    
+/**
+* @brief Choose the value of X with value according to mask.
+
+* @par Inputs:
+* two inputs, including:
+*  @li x: A Tensor of dtype is float16 or float32 or float64 or int64 or int32 or int16 or int8 or uint8.
+*  @li mask: A Tensor of dtype is bool. \n
+
+* @par Outputs:
+*  @li y: A tensor with the same type as x. \n
+
+*/
+REG_OP(MaskedSelect)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8,  DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .INPUT(mask, TensorType({DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(MaskedSelect)
+
+/**
+* @brief update the value of X with value according to mask.
+
+* @par Inputs:
+* three inputs, including:
+*  @li x: A Tensor of dtype is float16 or float32 or float64 or int64 or int32 or int16 or int8 or uint8.
+*  @li mask: A Tensor of dtype is bool.
+*  @li updates: A tensor with the same type as x. \n
+
+* @par Outputs:
+*  @li y: A tensor with the same type as x. \n
+*/
+REG_OP(MaskedScatter)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8,  DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .INPUT(mask, TensorType({DT_BOOL}))
+    .INPUT(updates, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8,  DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(MaskedScatter)
 
 /**
 * @brief Slice a tensor at its last dim, e.x. a[..., begin:end:stride]. \n
 
 * @par Inputs:
 * One inputs, including:
-* @li x: A Tensor. Must be one of the following types: float16, float32, int16, int32.
+* x: A Tensor. Must be one of the following types: float16, float32, int16, int32.
 
 * @par Attributes:
 * @li start: An  attribute of type Int, start index of last dim. \n
@@ -2148,7 +2180,7 @@ REG_OP(MaskedSelectV2)
 * @li stride: An  attribute of type Int, stride of slice. \n
 
 * @par Outputs:
-* @li y: A Tensor. Has the same type as "x". \n
+* y: A Tensor. Has the same type as "x". \n
 
 * @par Third-party framework compatibility
 * No compatibility
@@ -2162,39 +2194,36 @@ REG_OP(SliceLastDim)
     .OP_END_FACTORY_REG(SliceLastDim)
 
 /**
-* @brief Extracts a strided slice of a tensor. Roughly speaking, this op \n
-*   extracts a slice of size (end-begin)/stride from the given input tensor. \n
-*   Starting at the location specified by begin the slice continues by \n
+* @brief Extracts a strided slice of a tensor. Roughly speaking, this op 
+*   extracts a slice of size (end-begin)/stride from the given input tensor. 
+*   Starting at the location specified by begin the slice continues by 
 *   adding stride to the index until all dimensions are not less than end. \n
 *
 * @par Inputs:
-* Four inputs, including:
-* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n
-*     complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n
-*     complex128, float16, uint32, uint64, complex64, complex128. \n
+* Five inputs, including:
+* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, 
+*     complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*     complex128, float16, uint32, uint64, complex64, complex128. 
 * @li begin: A Tensor of type int32 or int64, for the index of the first value to select.
-*
 * @li end: A Tensor of type int32 or int64, for the index of the last value to select.
-*
 * @li axes: A Tensor of type int32 or int64, indicate axis to be select.
-*
-* @li strides: A Tensor of type int32 or int64, for the increment.
+* @li strides: A Tensor of type int32 or int64, for the increment. \n
 *
 * @par Attributes:
-* @li begin_mask: A Tensor of type int32. \n
-*     A bitmask where a bit "i" being "1" means to ignore the begin \n
+* @li begin_mask: A Tensor of type int32.
+*     A bitmask where a bit "i" being "1" means to ignore the begin 
 *     value and instead use the largest interval possible.
-* @li end_mask: A Tensor of type int32. \n
+* @li end_mask: A Tensor of type int32. 
 *     Analogous to "begin_mask".
-* @li ellipsis_mask: A Tensor of type int32. \n
-*     A bitmask where bit "i" being "1" means the "i"th position \n
+* @li ellipsis_mask: A Tensor of type int32. 
+*     A bitmask where bit "i" being "1" means the "i"th position
 *     is actually an ellipsis.
-* @li new_axis_mask: A Tensor of type int32. \n
-*     A bitmask where bit "i" being "1" means the "i"th \n
+* @li new_axis_mask: A Tensor of type int32.
+*     A bitmask where bit "i" being "1" means the "i"th 
 *     specification creates a new shape 1 dimension.
-* @li shrink_axis_mask: A Tensor of type int32. \n
-*     A bitmask where bit "i" implies that the "i"th \n
-*     specification should shrink the dimensionality.
+* @li shrink_axis_mask: A Tensor of type int32. 
+*     A bitmask where bit "i" implies that the "i"th
+*     specification should shrink the dimensionality. \n
 *
 * @par Outputs:
 * y: A Tensor. Has the same type as "x".
@@ -2231,7 +2260,7 @@ REG_OP(StridedSliceV2)
 *     float16, float32, int32. \n
 
 * @par Attributes:
-* @li dim: A required int. Used to select the dimension of this tensor. \n
+* dim: A required int. Used to select the dimension of this tensor. \n
 
 *@par Outputs:
 *y: A Tensor with the same type and shape of input_x's. \n
@@ -2307,6 +2336,34 @@ REG_OP(MaskedFillRange)
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32}))
     .REQUIRED_ATTR(axis, Int)
     .OP_END_FACTORY_REG(MaskedFillRange)
+
+/**
+* @brief After a set of sorted data and a new set of data are re-sorted, get the first k data. \n
+*
+* @par Inputs:
+* Six inputs, including:
+* @li topk_pq_distance: A sorted Tensor, Will be updated after calculation. Must be one of the following types: float32, float16. 
+* @li topk_pq_index: A Tensor of type int32, index corresponding to topk_pq_distance.
+* @li topk_pq_ivf: A Tensor of type int32 , the bucket number corresponding to topk_pq_distance.
+* @li pq_distance: A Tensor of type float32 or float16, the new data set will be reordered with topk_pq_distance and updated to topk_pq_distance.
+* @li pq_index: A Tensor of type int32, index corresponding to pq_distance. 
+* @li pq_ivf: A scalar of type int32 , the bucket number corresponding to pq_distance. \n
+*
+* @par Attributes:
+* @li order: A string, indicates the sorting method of topk_pq_distance. \n
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(InplaceTopKDistance)
+    .INPUT(topk_pq_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(topk_pq_index, TensorType({DT_INT32}))
+    .INPUT(topk_pq_ivf, TensorType({DT_INT32}))
+    .INPUT(pq_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(pq_index, TensorType({DT_INT32}))
+    .INPUT(pq_ivf, TensorType({DT_INT32}))
+    .ATTR(order, String, "asc")
+    .OP_END_FACTORY_REG(InplaceTopKDistance)
 } // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h
index a1fc9ee6..8eb7b521 100644
--- a/third_party/fwkacllib/inc/ops/sparse_ops.h
+++ b/third_party/fwkacllib/inc/ops/sparse_ops.h
@@ -281,9 +281,9 @@ REG_OP(SparseSliceGrad)
 * @li size: A 1D Tensor of type int64. The size of the slice . \n
 
 *@par Outputs:
-*y_indices: A Tensor of type int64.
-*y_values: A Tensor. Has the same type as "values".
-*y_values: A Tensor of type int64 . \n
+*@li y_indices: A Tensor of type int64.
+*@li y_values: A Tensor. Has the same type as "values".
+*@li y_shape: A Tensor of type int64 . \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SparseSlice.
@@ -313,8 +313,8 @@ REG_OP(SparseSlice)
 * @li sum_indices: A 2D Tensor of type int64. The indices of the sum SparseTensor, with size [nnz(sum), ndims] . \n
 
 *@par Outputs:
-*x1_val_grad: A Tensor. Has the same type as "backprop_val_grad".
-*x2_val_grad: A Tensor. Has the same type as "backprop_val_grad" . \n
+*@li x1_val_grad: A Tensor. Has the same type as "backprop_val_grad".
+*@li x2_val_grad: A Tensor. Has the same type as "backprop_val_grad" . \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SparseAddGrad.
@@ -363,7 +363,7 @@ REG_OP(SparseFillEmptyRowsGrad)
 
 *@par Inputs:
 * @li x1_indices: A 2D Tensor of type int32 or int64.
-* @li The indices of the matrix "SparseTensor", with size [nnz, 2].
+*The indices of the matrix "SparseTensor", with size [nnz, 2].
 * @li x1_values: A 1D Tensor. The values of the SparseTensor, with size [nnz].
 * @li x1_shape: A 1D Tensor of type int64. The shape of the SparseTensor, with size [2].
 * @li x2: A dense matrix Tensor of the same type as "x1_values". 2D . \n
@@ -373,9 +373,9 @@ REG_OP(SparseFillEmptyRowsGrad)
 
 *@par Attributes:
 *@li adjoint_a: An optional bool. Defaults to "False".Use the adjoint of A in the matrix multiply.
-*@li If A is complex, this is transpose(conj(A)). Otherwise it is transpose(A).
+*If A is complex, this is transpose(conj(A)). Otherwise it is transpose(A).
 *@li adjoint_b: An optional bool. Defaults to "False".Use the adjoint of B in the matrix multiply.
-*@li If B is complex, this is transpose(conj(B)). Otherwise it is transpose(B) . \n
+*If B is complex, this is transpose(conj(B)). Otherwise it is transpose(B) . \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SparseTensorDenseMatMul.
@@ -400,9 +400,13 @@ REG_OP(SparseTensorDenseMatMul)
 * @li indices: A 0D, 1D, or 2D Tensor of type int32 or int64.
 * @li output_shape: A 1D Tensor of the same type as "sparse_indices". The shape of the dense output tensor.
 * @li values: A 1D Tensor. Values corresponding to each row of "sparse_indices",
-* @li or a scalar value to be used for all sparse indices.
+or a scalar value to be used for all sparse indices.
 * @li default_value: A Tensor of the same type as "sparse_values" . \n
 
+*@par Attributes:
+*validate_indices: If true, indices are checked to make sure they are sorted in
+lexicographic order and that there are no repeats. \n
+
 *@par Outputs:
 *y: A Tensor. Has the same type as "values" . \n
 
@@ -427,7 +431,6 @@ REG_OP(SparseToDense)
 *Concatenation is with respect to the dense versions of these sparse tensors . \n
 
 *@par Inputs:
-*3 or 5 inputs,contains:
 * @li indices:A list of at least 2 `Tensor` objects with type `int64`.2-D.
 *Indices of each input `SparseTensor`.It's a dynamic input.
 * @li values:A list with the same length as `indices` of `Tensor` objects with the same type.
@@ -700,7 +703,6 @@ REG_OP(SparseReduceMaxSparse)
 *@brief Computes the sum of elements across dimensions of a SparseTensor . \n
 
 *@par Inputs:
-*4 or 5 inputs, including:
 * @li x_indices: A 2D Tensor of type int64.
 *"N x R" matrix with the indices of non-empty values in a
 *SparseTensor, possibly not in canonical ordering.
@@ -711,13 +713,11 @@ REG_OP(SparseReduceMaxSparse)
 *A length-"K" vector containing the reduction axes . \n
 
 *@par Attributes:
-* keep_dims: An optional bool. Defaults to "False".
+*keep_dims: An optional bool. Defaults to "False".
 *If true, retains reduced dimensions with length 1 . \n
 
 *@par Outputs:
-* @li y_indices: A Tensor of type int64.
-* @li y_values: A Tensor. Has the same type as "input_values".
-* @li y_shape: A Tensor of type int64 . \n
+*y: A Tensor. Has the same type as "x_values". \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SparseReduceSum.
@@ -818,7 +818,6 @@ REG_OP(SparseSplit)
 *@brief Generates sparse cross from a list of sparse and dense tensors . \n
 
 *@par Inputs:
-*8 or 10 inputs, including:
 * @li indices: A list of 2D Tensor objects of type int64.
 * Indices of each input SparseTensor.It's a dynamic input.
 * @li values: A list of 1D Tensor objects of type int64 or string.
@@ -899,9 +898,8 @@ REG_OP(AddManySparseToTensorsMap)
 *@brief Reads SparseTensors from a "SparseTensorsMap" and concatenate them . \n
 
 *@par Inputs:
-*2 or 4 inputs, including:
 * handles: A 1D Tensor of type int64.
-* The "N" serialized SparseTensor objects . \n
+*The "N" serialized SparseTensor objects . \n
 
 *@par Attributes:
 * @li dtype: A tf.DType. The "dtype" of the SparseTensor objects stored in the "SparseTensorsMap".
@@ -911,9 +909,9 @@ REG_OP(AddManySparseToTensorsMap)
 *The shared name for the "SparseTensorsMap" read by this op . \n
 
 *@par Outputs:
-* @li indices: A Tensor of type int64.
-* @li values: A Tensor of type "dtype".
-* @li shape: A Tensor of type int64 . \n
+* @li indices: A Tensor of type int64.2-D. The `indices` of the minibatch `SparseTensor`.
+* @li values: A Tensor of type "dtype". 1-D. The `values` of the minibatch `SparseTensor`.
+* @li shape: A Tensor of type int64 . 1-D. The `shape` of the minibatch `SparseTensor`. \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator TakeManySparseFromTensorsMap.
@@ -989,8 +987,7 @@ REG_OP(SerializeManySparse)
 *@brief Deserializes SparseTensor objects . \n
 
 *@par Inputs:
-*Two inputs, including:
-* serialized_sparse: A Tensor. The serialized SparseTensor objects.
+*serialized_sparse: A Tensor. The serialized SparseTensor objects.
 *The last dimension must have 3 columns . \n
 
 *@par Attributes:
diff --git a/third_party/fwkacllib/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/ops/spectral_ops.h
index 34ccb398..ab9e1dec 100644
--- a/third_party/fwkacllib/inc/ops/spectral_ops.h
+++ b/third_party/fwkacllib/inc/ops/spectral_ops.h
@@ -31,10 +31,10 @@ namespace ge {
 inner-most dimension of `x`. \n
 
 *@par Inputs:
-*@li x: A Tensor. Must be the following types: complex64, complex128. \n
+*x: A Tensor. Must be the following types: complex64, complex128. \n
 
 *@par Outputs:
-*@li y: A complex tensor of the same rank as `x`. \n
+*y: A complex tensor of the same rank as `x`. \n
 
 *@par Third-party framework compatibility
 * Compatible with TensorFlow IFFT operator.
@@ -52,7 +52,7 @@ REG_OP(IFFT)
 *@li fft_length: An int32 tensor of shape [1]. The FFT length . \n
 
 *@par Outputs:
-*@li y: A complex64 tensor of the same rank as `input`. The inner-most
+*y: A complex64 tensor of the same rank as `input`. The inner-most
 dimension of `input` is replaced with the `fft_length / 2 + 1` unique
 frequency components of its 1D Fourier transform . \n
 
@@ -73,7 +73,7 @@ REG_OP(RFFT)
 *@li fft_length: An int32 tensor of shape [1]. The FFT length. \n
 
 *@par Outputs:
-*@li y: A float32 tensor of the same rank as `input`. The inner-most
+* y: A float32 tensor of the same rank as `input`. The inner-most
   dimension of `input` is replaced with the `fft_length` samples of its inverse
   1D Fourier transform. \n
 
@@ -91,10 +91,10 @@ REG_OP(IRFFT)
 *@brief 2D fast Fourier transform. \n
 
 *@par Inputs:
-*@li x: A complex64 tensor.
+*x: A complex64 tensor.
 
 *@par Outputs:
-*@li y: A complex64 tensor of the same shape as `input`. The inner-most 2
+*y: A complex64 tensor of the same shape as `input`. The inner-most 2
   dimensions of `input` are replaced with their 2D Fourier transform. \n
 
 *@par Third-party framework compatibility
@@ -110,10 +110,10 @@ REG_OP(FFT2D)
 innermost dimension of the input. \n
 
 *@par Inputs:
-*@li x: A Tensor. Must be the following types: complex64, complex128. \n
+*x: A Tensor. Must be the following types: complex64, complex128. \n
 
 *@par Outputs:
-*@li y: A complex tensor with the same shape as input. The innermost dimension
+*y: A complex tensor with the same shape as input. The innermost dimension
 of the input is replaced by its 1-dimensional Fourier transform. \n
 
 *@par Third-party framework compatibility
@@ -129,10 +129,10 @@ REG_OP(FFT)
 innermost dimension of the input. \n
 
 *@par Inputs:
-*@li x: A Tensor. Must be the following types: complex64, complex128. \n
+*x: A Tensor. Must be the following types: complex64, complex128. \n
 
 *@par Outputs:
-*@li y: A complex tensor with the same shape as input. The innermost dimension
+*y: A complex tensor with the same shape as input. The innermost dimension
 of the input is replaced by its inverse two-dimensional Fourier transform. \n
 
 *@par Third-party framework compatibility
diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h
index fe25a46f..98d4d111 100644
--- a/third_party/fwkacllib/inc/ops/split_combination_ops.h
+++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h
@@ -161,14 +161,11 @@ REG_OP(SplitVD)
 /**
 *@brief Concatenates a list of N tensors along the first dimension.
 *@par Inputs:
-* Two inputs, including:
-* @li values: A list of Tensors. Must be one of the following types: int8, int16, int32,
+* One input, including:
+* values: A list of Tensors. Must be one of the following types: int8, int16, int32,
 *     int64, uint8, uint16, uint32, uint64, float16, float32.
 *     Tensors to be concatenated. All must have size 1 in the first dimension and same shape.
-*     It's a dynamic input.
-* @li shape: A Tensor of the same type as "x".
-* The final shape of the result. Should be equal to the shapes of any input
-* but with the number of input values in the first dimension . \n
+*     It's a dynamic input. \n
 
 *@par Attributes:
 * @li shape: A required list of ints.
diff --git a/third_party/fwkacllib/inc/ops/state_ops.h b/third_party/fwkacllib/inc/ops/state_ops.h
index 3c8e32b6..d1ec00b5 100644
--- a/third_party/fwkacllib/inc/ops/state_ops.h
+++ b/third_party/fwkacllib/inc/ops/state_ops.h
@@ -104,7 +104,7 @@ REG_OP(DestroyTemporaryVariable)
 *@brief Checks whether a tensor has been initialized. Outputs boolean scalar indicating whether the tensor has been initialized . \n
 
 *@par Inputs:
-*x: A tensor . \n
+*x: A Tensor of type float16, float32, double, bool, int8, uint8, uint16, int16, int32, uint32, uint64, int64. 
 
 *@par Outputs:
 *y: A tensor, indicating whether "x" has been initialized . \n
diff --git a/third_party/fwkacllib/inc/ops/stateful_random_ops.h b/third_party/fwkacllib/inc/ops/stateful_random_ops.h
index c2f65c6a..f4eb763c 100644
--- a/third_party/fwkacllib/inc/ops/stateful_random_ops.h
+++ b/third_party/fwkacllib/inc/ops/stateful_random_ops.h
@@ -32,7 +32,10 @@ namespace ge {
 *@par Inputs:
 *This op may use some OS-provided source of non-determinism (e.g. an RNG),
 *so each execution will give different results. Inputs included:
-*@li shape: The shape of the output tensor . \n
+*shape: The shape of the output tensor . \n
+
+*@par Attributes:
+*dtype: required, type. \n
 
 *@par Outputs:
 *y:A Returns Non-deterministic integer values with specified shape . \n
@@ -54,13 +57,10 @@ REG_OP(NonDeterministicInts)
 *counter is an unspecified implementation detail . \n
 
 *@par Inputs:
-*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li delta: The amount of advancement . \n
 
-*@par Outputs:
-*y:A Returns the created operation . \n
-
 *@par Third-party framework compatibility
 * Compatible with tensorflow RngSkip operator.
 */
@@ -81,11 +81,16 @@ power of two.  The bias is small for values of `maxval - minval` significantly
 smaller than the range of the output (either `2^32` or `2^64`) . \n
 
 *@par Inputs:
-*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor.
-*@li minval: Minimum value (inclusive, scalar).
-*@li maxval: Maximum value (exclusive, scalar) . \n
+*@li counts: A 0/1-D Tensor or Python value. The counts of the binomial
+distribution.  Must be broadcastable with the leftmost dimension defined by `shape`.
+*@li probs: A 0/1-D Tensor or Python value. The probability of success for the
+binomial distribution.  Must be broadcastable with the leftmost dimension defined by `shape`.\n
+
+*@par Attributes:
+*dtype: required, type. \n
 
 *@par Outputs:
 *y:A Returns Random values with specified shape . \n
@@ -109,7 +114,7 @@ REG_OP(StatefulRandomBinomial)
 *The generated values will have mean 0 and standard deviation 1 . \n
 
 *@par Inputs:
-*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor . \n
 
@@ -134,7 +139,7 @@ REG_OP(StatefulStandardNormalV2)
 *deviations from the mean are dropped and re-picked . \n
 
 *@par Inputs:
-*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor . \n
 
@@ -158,7 +163,7 @@ The generated values follow a uniform distribution in the range `[0, 1)`. The
 lower bound 0 is included in the range, while the upper bound 1 is excluded.
 
 *@par Inputs:
-*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor . \n
 
@@ -181,7 +186,7 @@ REG_OP(StatefulUniform)
 The generated values are uniform integers covering the whole range of `dtype` . \n
 
 *@par Inputs:
-*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor . \n
 
@@ -209,7 +214,7 @@ power of two.  The bias is small for values of `maxval - minval` significantly
 smaller than the range of the output (either `2^32` or `2^64`) . \n
 
 *@par Inputs:
-*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor.
 *@li minval: Minimum value (inclusive, scalar).
diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h
index f9cc2549..a78d63a1 100644
--- a/third_party/fwkacllib/inc/ops/string_ops.h
+++ b/third_party/fwkacllib/inc/ops/string_ops.h
@@ -295,7 +295,7 @@ REG_OP(StringSplit)
 
 *@par Inputs:
 include:
-*@li input:A Tensor of type string. The text to be processed. \n
+*input:A Tensor of type string. The text to be processed. \n
 
 *@par Attributes:
 *@li pattern:A string. The regular expression to match the input.
@@ -303,8 +303,8 @@ include:
 *@li replace_global:An optional bool. Defaults to True. If True, the replacement is global,
 otherwise the replacement is done only on the first match.
 
-*@par output:
-*@li output::A Tensor of type string.
+*@par Outputs:
+*output::A Tensor of type string.
 */
 REG_OP(StaticRegexReplace)
     .INPUT(input, TensorType({DT_STRING}))
@@ -322,13 +322,13 @@ REG_OP(StaticRegexReplace)
 
 *@par Inputs:
 include:
-*@li input:A Tensor of type string. The text to be processed. \n
+*input:A Tensor of type string. The text to be processed. \n
 
 *@par Attributes:
-*@li pattern:A string. The regular expression to match the input.
+*pattern:A string. The regular expression to match the input.
 
-*@par output:
-*@li output::A bool tensor with the same shape as `input`.
+*@par Outputs:
+*output::A bool tensor with the same shape as `input`.
 */
 REG_OP(StaticRegexFullMatch)
     .INPUT(input, TensorType({DT_STRING}))
@@ -347,10 +347,10 @@ include:
 *@li num_segments:A Tensor. Must be one of the following types: int32, int64. A scalar. 
 
 *@par Attributes:
-*@li separator:An optional string. Defaults to "". The separator to use when joining.
+*separator:An optional string. Defaults to "". The separator to use when joining.
 
-*@par output:
-*@li output::A Tensor of type string..
+*@par Outputs:
+*output::A Tensor of type string..
 */
 REG_OP(UnsortedSegmentJoin)
     .INPUT(input, TensorType({DT_STRING}))
@@ -366,13 +366,13 @@ REG_OP(UnsortedSegmentJoin)
 
 *@par Inputs:
 include:
-*@li input:A Tensor of type string. The text to be processed. 
+*input:A Tensor of type string. The text to be processed. 
 
 *@par Attributes:
-*@li encoding:An optional string. Defaults to "". 
+*encoding:An optional string. Defaults to "". 
 
-*@par output:
-*@li output::A Tensor of type string..
+*@par Outputs:
+*output::A Tensor of type string..
 */
 REG_OP(StringLower)
     .INPUT(input, TensorType({DT_STRING}))
@@ -386,13 +386,13 @@ REG_OP(StringLower)
 
 *@par Inputs:
 include:
-*@li input:A Tensor of type string. The text to be processed. 
+*input:A Tensor of type string. The text to be processed. 
 
 *@par Attributes:
-*@li encoding:An optional string. Defaults to "". 
+*encoding:An optional string. Defaults to "". 
 
-*@par output:
-*@li output::A Tensor of type string..
+*@par Outputs:
+*output::A Tensor of type string..
 */
 REG_OP(StringUpper)
     .INPUT(input, TensorType({DT_STRING}))
@@ -901,10 +901,10 @@ REG_OP(DecodeBase64)
 *@brief StringNormalization performs string operations for basic cleaning . \n
 
 *@par Inputs:
-*@li input: only accepts [C] or [1, C] UTF-8 strings tensor . \n
+*input: only accepts [C] or [1, C] UTF-8 strings tensor . \n
 
 *@par Outputs:
-*@li output: UTF-8 strings tensor after cleaning . \n
+*output: UTF-8 strings tensor after cleaning . \n
 
 *@par Attributes:
 *@li stopwords : list of strings (default is empty).
@@ -919,13 +919,13 @@ case-sensitive. Default is false.
 *string enum that cases output to be lowercased/uppercases/unchanged. Valid
 values are "LOWER", "UPPER", "NONE". Default is "NONE".
 
-*@li local : string (default is "en_US").
+*@li locale : string (default is "C").
 *Environment dependent string that denotes the locale according to which output
-strings needs to be upper/lowercased.Default en_US or platform specific equivalent
-as decided by the implementation . \n
+strings needs to be upper/lowercased.Default C or platform specific equivalent
+as decided by the implementation. \n
 
 *@attention Constraints:
-*@li input can be either a 1-D or 2-D tensor, the shape of 2-D tensor must be [1, C].
+*input can be either a 1-D or 2-D tensor, the shape of 2-D tensor must be [1, C].
 */
 REG_OP(StringNormalizer)
     .INPUT(input, TensorType({DT_STRING}))
@@ -933,7 +933,7 @@ REG_OP(StringNormalizer)
     .ATTR(stopwords, ListString, {})
     .ATTR(is_case_sensitive, Bool, false)
     .ATTR(case_change_action, String, "NONE")
-    .ATTR(local, String, "en_US")
+    .ATTR(locale, String, "C")
     .OP_END_FACTORY_REG(StringNormalizer)
 }  // namespace ge
 
diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h
index 4a46e35f..f403fe12 100644
--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -29,15 +29,15 @@ namespace ge {
 
 *@par Inputs:
 *The input handle must have the resource type. Inputs include:
-*@li x:A list of Tensor objects. One or more tensors from which
+*x:A list of Tensor objects. One or more tensors from which
 the enqueued tensors should be taken . \n
 
 *@par Outputs:
-*@li y:A list of Tensor objects. One or more tensors from which
+*y:A list of Tensor objects. One or more tensors from which
 the enqueued tensors should be taken . \n
 
 *@par Attributes:
-*@li type: An optional ge::DataType. It refers to the target data type of outputs . \n
+*type: An optional ge::DataType. It refers to the target data type of outputs . \n
 
 *@par Third-party framework compatibility
 *Compatible with tensorflow QueueIsClosed operator.
@@ -723,11 +723,12 @@ REG_OP(CompressFcOp)
 *@brief Performs Col2im for each batch entry. \n
 
 *@par Inputs:
-*@li input_x: The Col Tensor. 5-D, shape: `(n, c1, kernel_h*kernel_w, ho*wo, c0)`. 
-where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1     \n
+*@li x: The Col Tensor. 4-D, shape: `(n, c, kernel_h*kernel_w, ho*wo)`. 
+where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1.
+*@li output_size: The img shape Tensor. 1-D, shape:`(2)`, value: (output_h, output_w).  \n
 
 *@par Outputs:
-*@li output_y: The img Tensor. 5-D, shape: `(n, c1, output_h, output_w, c0)`. \n
+*y: The img Tensor. 4-D, shape: `(n, c, output_h, output_w)`. \n
 
 *@par Attributes:
 *@li kernel_shape: ListInt, value: `(kernel_h, kernel_w)`, the shape of kernel in convolution.
@@ -909,7 +910,7 @@ output shape would be [max(ngram_indexes) + 1]. If input shape is [N, C], this o
 *@li either pool_strings or pool_int64s attributes must be present but not both.
 */
 
-REG_OP(TfidVectorizer)
+REG_OP(TfIdfVectorizer)
     .INPUT(input, TensorType({DT_INT32, DT_INT64, DT_STRING}))
     .OUTPUT(output, TensorType({DT_FLOAT}))
     .REQUIRED_ATTR(max_gram_length, Int)
@@ -921,7 +922,7 @@ REG_OP(TfidVectorizer)
     .ATTR(pool_int64s, ListInt, {})
     .ATTR(pool_strings, ListString, {})
     .ATTR(weights, ListFloat, {})
-    .OP_END_FACTORY_REG(TfidVectorizer)
+    .OP_END_FACTORY_REG(TfIdfVectorizer)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h
index 7fc1cdea..70e42dc9 100644
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -20,7 +20,7 @@
 #include <stdint.h>
 #include "toolchain/prof_callback.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -357,7 +357,7 @@ RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_
  */
 RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h
index a244c793..76836e7b 100644
--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -19,7 +19,7 @@
 
 #include "base.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -43,6 +43,7 @@ typedef enum tagRtChipType {
     CHIP_LHISI,
     CHIP_DC,
     CHIP_CLOUD_V2,
+    CHIP_NO_DEVICE,
     CHIP_END,
 } rtChipType_t;
 
@@ -53,11 +54,11 @@ typedef enum tagRtAicpuScheType {
 } rtAicpuScheType;
 
 typedef enum tagRtDeviceCapabilityType {
-  RT_SCHEDULE_SOFTWARE = 0, // SoftWare Schedule
-  RT_SCHEDULE_SOFTWARE_OPT,
-  RT_SCHEDULE_HARDWARE, // HWTS Schedule
-  RT_AICPU_BLOCKING_OP_NOT_SUPPORT,
-  RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation
+    RT_SCHEDULE_SOFTWARE = 0, // Software Schedule
+    RT_SCHEDULE_SOFTWARE_OPT,
+    RT_SCHEDULE_HARDWARE, // HWTS Schedule
+    RT_AICPU_BLOCKING_OP_NOT_SUPPORT,
+    RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation
 } rtDeviceCapabilityType;
 
 typedef enum tagRtVersion {
@@ -235,7 +236,7 @@ RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout);
  */
 RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h
index e95d4c89..c597a657 100644
--- a/third_party/fwkacllib/inc/runtime/context.h
+++ b/third_party/fwkacllib/inc/runtime/context.h
@@ -19,7 +19,7 @@
 
 #include "base.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -157,7 +157,7 @@ RTS_API rtError_t rtGetGroupCount(uint32_t *count);
  */
 RTS_API rtError_t rtSetCtxINFMode(bool mode);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h
index 18d837eb..4a9a5817 100644
--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -19,7 +19,7 @@
 
 #include "base.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -80,15 +80,15 @@ typedef enum tagMemoryInfo {
 } rtMemoryInfo_t;
 
 typedef enum tagRtDeviceModuleType {
-  RT_MODULE_TYPE_SYSTEM = 0,
-  RT_MODULE_TYPE_AICPU,
-  RT_MODULE_TYPE_CCPU,
-  RT_MODULE_TYPE_DCPU,
-  RT_MODULE_TYPE_AICORE,
-  RT_MODULE_TYPE_TSCPU,
-  RT_MODULE_TYPE_PCIE,
-  RT_MODULE_TYPE_VECTOR_CORE
-} tagRtDeviceModuleType_t;
+    RT_MODULE_TYPE_SYSTEM = 0,  /**< system info*/
+    RT_MODULE_TYPE_AICPU,       /** < aicpu info*/
+    RT_MODULE_TYPE_CCPU,        /**< ccpu_info*/
+    RT_MODULE_TYPE_DCPU,        /**< dcpu info*/
+    RT_MODULE_TYPE_AICORE,      /**< AI CORE info*/
+    RT_MODULE_TYPE_TSCPU,       /**< tscpu info*/
+    RT_MODULE_TYPE_PCIE,        /**< PCIE info*/
+    RT_MODULE_TYPE_VECTOR_CORE, /**< VECTOR CORE info*/
+} rtDeviceModuleType_t;
 
 /**
  * @ingroup dvrt_dev
@@ -380,7 +380,7 @@ RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device);
  */
 RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/dvfsprofile.h b/third_party/fwkacllib/inc/runtime/dvfsprofile.h
index 6e451695..33e2f4c1 100644
--- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h
+++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h
@@ -19,7 +19,7 @@
 
 #include "base.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -56,7 +56,7 @@ RTS_API rtError_t rtUnsetDvfsProfile();
  */
 RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h
index 1cd1a198..81b635c3 100644
--- a/third_party/fwkacllib/inc/runtime/event.h
+++ b/third_party/fwkacllib/inc/runtime/event.h
@@ -19,7 +19,7 @@
 
 #include "base.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -41,16 +41,6 @@ typedef enum rtEventWaitStatus {
 #define RT_EVENT_DDSYNC       0x04U
 #define RT_EVENT_TIME_LINE    0x08U
 
-#define RT_EVENT_DDSYNC_NS    0x01U
-#define RT_EVENT_STREAM_MARK  0x02U
-#define RT_EVENT_DDSYNC       0x04U
-#define RT_EVENT_TIME_LINE    0x08U
-
-#define RT_EVENT_DDSYNC_NS    0x01U
-#define RT_EVENT_STREAM_MARK  0x02U
-#define RT_EVENT_DDSYNC       0x04U
-#define RT_EVENT_TIME_LINE    0x08U
-
 /**
  * @ingroup dvrt_event
  * @brief create event instance
@@ -282,7 +272,7 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs
  */
 RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h
index 9b0221c7..c1b9bd6d 100644
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -20,7 +20,7 @@
 #include "base.h"
 #include "stream.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -647,7 +647,7 @@ RTS_API rtError_t rtStartMDCProfiler(void **addr, uint32_t length);
  */
 RTS_API rtError_t rtStopMDCProfiler(void *addr);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h
index bace4bc6..b049e762 100644
--- a/third_party/fwkacllib/inc/runtime/mem.h
+++ b/third_party/fwkacllib/inc/runtime/mem.h
@@ -24,7 +24,7 @@
 #include "config.h"
 #include "stream.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -547,7 +547,7 @@ RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num);
  */
 RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts.h b/third_party/fwkacllib/inc/runtime/rt_ffts.h
old mode 100755
new mode 100644
index 720da7cd..f2809218
--- a/third_party/fwkacllib/inc/runtime/rt_ffts.h
+++ b/third_party/fwkacllib/inc/runtime/rt_ffts.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) Huawei Technologies Co. , Ltd. 2021. All rights reserved.
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  * Description: ffts interface
  */
 
@@ -8,7 +8,7 @@
 
 #include "base.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -19,8 +19,8 @@ extern "C" {
 #define RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN    32U
 
 typedef enum tagFftsType {
-    RT_FFTS_TYPE_AUTO_THREAD = 2,    // ffts auto thread mode, same as ffts define
-    RT_FFTS_TYPE_MANUAL_THREAD = 3,   // ffts manual thread mode, same as ffts define
+    RT_FFTS_TYPE_AUTO_THREAD = 2,   // ffts auto thread mode, same as ffts define
+    RT_FFTS_TYPE_MANUAL_THREAD = 3, // ffts manual thread mode, same as ffts define
 } rtFftsType_t;
 
 typedef enum tagFftsSubTaskType {
@@ -37,7 +37,7 @@ typedef enum tagFftsSubTaskType {
 } rtFftsSubTaskType_t;
 
 typedef struct tagManualThreadDmuInfo {
-    uint64_t dataAddr;    // device mem
+    uint64_t dataAddr; // device mem
     uint16_t numOuter;
     uint16_t numInner;
     uint32_t strideOuter;
@@ -50,44 +50,43 @@ typedef struct tagManualThreadDependency {
 } rtManualThreadDependency_t;
 
 typedef struct tagManualThreadAicAivInfo {
-    uint64_t taskParamAddr;    // device mem
+    uint64_t taskParamAddr; // device mem
     uint16_t taskParamOffset;
     // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16
-    // when satMode=0 and FP16 computation with none INF inputs overflows/underflows
-    // results will be saturated to +/- MAX of FP16
+    // when satMode=0 and FP16 computation with none INF inputs overflows/underflows,
+    // results will be saturated to +/-MAX of FP16
     uint8_t satMode;
-    uint8_t scheduleMode;    // 0:normal mode, 1:batch mode, 2:sync mode, 3: reserved
-    uint8_t iCachePrefetchCnt;    // units is 2K
-    uint8_t prefetchEnableBitmap;    // 8 bit bitmap 1 0 1 0
-    uint8_t prefetchOnceBitmap;    // 8 bit bitmap 1 0 1 0
-    uint16_t prefetchOnceDmuNum;   // prefetch_once_dmu_descriptor_index in ffts
-    // num: thread0_prefetch_dmu_descriptor_index - prefetch_once_dmu_descriptor_index
-    uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM];    // max valid is threadDim
+    uint8_t scheduleMode;   // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved
+    uint8_t iCachePrefetchCnt; // units is 2K
+    uint8_t prefetchEnableBitmap; // 8 bit bitmap  1 0 1 0
+    uint8_t prefetchOnceBitmap; // 8 bit bitmap  1 0 1 0
+    uint16_t prefetchOnceDmuNum; // prefetch_once_dmu_descriptor_index in ffts
+    // num： thread0_prefetch_dmu_descriptor_index – prefetch_once_dmu_descriptor_index
+    uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; // max valid is threadDim
     uint16_t threadBlkDim[RT_FFTS_MAX_MANUAL_THREAD_NUM];
     const char *threadTaskFuncStub[RT_FFTS_MAX_MANUAL_THREAD_NUM];
 
-    rtManualThreadDmuInfo_t *prefetchList;  // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim - 1]
+    rtManualThreadDmuInfo_t *prefetchList; // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim-1]
     rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK];
 } rtManualThreadAicAivInfo_t;
 
 typedef struct tagAutoThreadPrefetch {
-    uint64_t dataAddr;    // device mem
+    uint64_t dataAddr; // device mem
     uint32_t dataAddrOffset;
     uint32_t nonTailDataLen;
     uint32_t tailDataLen;
 } rtAutoThreadPrefetch_t;
 
 typedef struct tagAutoThreadAicAivInfo {
-    uint64_t taskParamAddr;   // device mem
+    uint64_t taskParamAddr; // device mem
     uint16_t taskParamOffset;
     // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16
-    // when satMode=0 and FP16 computation with none INF inputs overflows/underflows
-    // results will be saturated to +/- MAX of FP16
+    // when satMode=0 and FP16 computation with none INF inputs overflows/underflows, results will be saturated to +/-MAX of FP16
     uint8_t satMode;
-    uint8_t scheduleMode;    // 0:normal mode, 1:batch mode, 2:sync mode, 3: reserved
-    uint8_t iCachePrefetchCnt;    // units is 2K
-    uint8_t prefetchEnableBitmap;    // 8 bit bitmap
-    uint8_t prefetchOnceBitmap;    // 8 bit bitmap
+    uint8_t scheduleMode;   // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved
+    uint8_t iCachePrefetchCnt; // units is 2K
+    uint8_t prefetchEnableBitmap;   // 8 bit bitmap
+    uint8_t prefetchOnceBitmap;     // 8 bit bitmap
 
     uint16_t tailBlkDim;
     uint16_t nonTailBlkDim;
@@ -95,13 +94,13 @@ typedef struct tagAutoThreadAicAivInfo {
     const char *nonTailTaskFuncStub;
     const char *tailTaskFuncStub;
 
-    // for prefetch, valid num is prefetchEnableBitmap bit count
-    // if prefetchEnableBitmap = '00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid
+    // for prefetch, valid num is prefetchEnableBitmap bit count.
+    // if prefetchEnableBitmap='00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid
     rtAutoThreadPrefetch_t srcPrefetch[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK];
 } rtAutoThreadAicAivInfo_t;
 
 typedef struct tagAutoThreadCacheInfo {
-    uint64_t dataAddr;   // device mem
+    uint64_t dataAddr; // device mem
     uint32_t dataAddrOffset;
     uint32_t nonTailDataLen;
     uint32_t tailDataLen;
@@ -109,7 +108,7 @@ typedef struct tagAutoThreadCacheInfo {
 } rtAutoThreadCacheInfo_t;
 
 typedef struct tagManualThreadCacheInfo {
-    rtManualThreadDmuInfo_t *dmuList;    // 0-64k
+    rtManualThreadDmuInfo_t *dmuList;  // 0-64k
     uint16_t dmuNum;
     uint16_t sliceDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM];
     uint16_t ticketCacheRefCntTbl[RT_FFTS_MAX_MANUAL_THREAD_NUM];
@@ -152,11 +151,11 @@ typedef struct tagFftsSubTaskInfo {
 } rtFftsSubTaskInfo_t;
 
 typedef struct tagFftsDescInfo {
-    uint8_t tm;    // thread subtask kickstart mode, 0:order, 1:disorder
-    uint8_t di;    // discard invalidate
-    uint8_t dw;    // discard write back
-    uint8_t df;    // discard flush
-    uint8_t dataSplitUnit;    // split source or ticket cache by 2~dataSplitUnit MB
+    uint8_t tm; // thread subtask kickstart mode, 0:order, 1:disorder
+    uint8_t di; // discard invalidate
+    uint8_t dw; // discard write back
+    uint8_t df; // discard flush
+    uint8_t dataSplitUnit;  // split source or ticket cache by 2^dataSplitUnit MB
     uint8_t prefetchOstNum;
     uint8_t cacheMaintainOstNum;
     uint8_t aicPrefetchUpper;
@@ -166,20 +165,20 @@ typedef struct tagFftsDescInfo {
 } rtFftsDescInfo_t;
 
 typedef struct tagFftsTaskInfo {
-    rtFftsType_t  fftsType;
+    rtFftsType_t fftsType;
     uint16_t subTaskNum;
     uint16_t tickCacheNum;
     rtFftsDescInfo_t fftsDesc;
     // sub task desc, real num is subTaskNum
     rtFftsSubTaskInfo_t subTask[RT_FFTS_MAX_SUB_TASK_NUM];
 
-    // ticket cache, real number is ticketCacheNum
+    // ticket cache, real number is tickCacheNum.
     rtTicketCache_t ticketCache[RT_FFTS_MAX_TICKET_CACHE_NUM];
 } rtFftsTaskInfo_t;
 
 RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
-#endif //__CCE_RUNTIME_FFTS_H
+#endif // __CCE_RUNTIME_FFTS_H
diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h
index a7618b45..d4af72c5 100644
--- a/third_party/fwkacllib/inc/runtime/rt_model.h
+++ b/third_party/fwkacllib/inc/runtime/rt_model.h
@@ -19,7 +19,7 @@
 
 #include "base.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -490,7 +490,7 @@ RTS_API rtError_t rtDebugRegister(rtModel_t model, uint32_t flag, const void *ad
  */
 RTS_API rtError_t rtDebugUnRegister(rtModel_t model);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/rt_stars.h b/third_party/fwkacllib/inc/runtime/rt_stars.h
index 188656b1..016c352a 100644
--- a/third_party/fwkacllib/inc/runtime/rt_stars.h
+++ b/third_party/fwkacllib/inc/runtime/rt_stars.h
@@ -8,7 +8,7 @@
 
 #include "base.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -23,6 +23,7 @@ extern "C" {
  */
 RTS_API rtError_t rtStarsTaskLaunch(const void *taskSqe, uint32_t sqeLen, rtStream_t stream);
 
+
 /**
  * @ingroup rt_stars
  * @brief create cdq instance.
@@ -76,10 +77,11 @@ RTS_API rtError_t rtCdqEnQueue(const char *queName, uint32_t cdqeIndex, void *da
  * @param [in] stream       launch task on the stream
  * @return RT_ERROR_NONE for ok, others failed
  */
-RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, const void *prtAddr,
+RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, const void *ptrAddr,
     rtStream_t stream);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
+
 }
 #endif
 #endif // __CCE_RUNTIME_STARS_H
diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h
index f9981514..3a078e99 100644
--- a/third_party/fwkacllib/inc/runtime/stream.h
+++ b/third_party/fwkacllib/inc/runtime/stream.h
@@ -20,7 +20,7 @@
 #include "base.h"
 #include "event.h"
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
@@ -211,7 +211,7 @@ RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, con
  */
 RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream);
 
-#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+#if defined(__cplusplus)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
index 07b32149..9350f9d4 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
@@ -84,6 +84,7 @@
 #endif
 
 #include <cstdint>
+#include <stddef.h>
 
 namespace Msprofiler {
 namespace Api {
@@ -105,6 +106,37 @@ extern "C" {
 
 MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);
 
+typedef int Status;
+typedef struct aclprofSubscribeConfig aclprofSubscribeConfig1;
+///
+/// @ingroup AscendCL
+/// @brief subscribe profiling data of graph
+/// @param [in] graphId: the graph id subscribed
+/// @param [in] profSubscribeConfig: pointer to config of model subscribe
+/// @return Status result of function
+///
+Status aclgrphProfGraphSubscribe(const uint32_t graphId,
+    const aclprofSubscribeConfig1 *profSubscribeConfig);
+
+///
+/// @ingroup AscendCL
+/// @brief unsubscribe profiling data of graph
+/// @param [in] graphId: the graph id subscribed
+/// @return Status result of function
+///
+Status aclgrphProfGraphUnSubscribe(const uint32_t graphId);
+
+/**
+ * @ingroup AscendCL
+ * @brief get graph id from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ *
+ * @retval graph id of subscription data
+ * @retval 0 for failed
+ */
+size_t aclprofGetGraphId(const void *opInfo, size_t opInfoLen, uint32_t index);
 #ifdef __cplusplus
 }
 #endif
diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h
index 5073cfb1..36b55216 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_callback.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h
@@ -55,6 +55,17 @@ struct ReporterData {
 };
 
 /**
+ * @name  HashData
+ * @brief struct of data to hash
+ */
+struct HashData {
+    int deviceId;                             // the index of device
+    size_t dataLen;                           // the length of data
+    unsigned char *data;                      // the data content
+    uint64_t hashId;                          // the id of hashed data
+};
+
+/**
  * @name  MsprofReporterModuleId
  * @brief module id of data to report
  */
@@ -75,6 +86,7 @@ enum MsprofReporterCallbackType {
     MSPROF_REPORTER_INIT,                 // init reporter
     MSPROF_REPORTER_UNINIT,               // uninit reporter
     MSPROF_REPORTER_DATA_MAX_LEN,         // data max length for calling report callback
+    MSPROF_REPORTER_HASH                  // hash data to id
 };
 
 /**