From 452ec62cb297fe61cab05c62a0ebd1176fe4e060 Mon Sep 17 00:00:00 2001
From: dingpeifei <dingpeifei1@huawei.com>
Date: Thu, 29 Apr 2021 16:10:37 +0800
Subject: [PATCH] code_sync_0428_inc

---
 inc/external/acl/error_codes/rt_error_codes.h      | 71 +++++++++++-----------
 inc/external/runtime/rt_error_codes.h              | 71 +++++++++++-----------
 third_party/fwkacllib/inc/ops/data_flow_ops.h      | 16 +++++
 .../fwkacllib/inc/ops/elewise_calculation_ops.h    | 14 +++--
 third_party/fwkacllib/inc/ops/image_ops.h          | 13 ++--
 .../fwkacllib/inc/ops/matrix_calculation_ops.h     | 31 ++++++++++
 third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h  | 28 ---------
 third_party/fwkacllib/inc/ops/nn_calculation_ops.h |  3 +
 third_party/fwkacllib/inc/ops/nn_detect_ops.h      | 53 ++++++++--------
 third_party/fwkacllib/inc/ops/nn_norm_ops.h        | 44 +++++++-------
 third_party/fwkacllib/inc/ops/nn_pooling_ops.h     | 59 +++++++++++++++---
 third_party/fwkacllib/inc/ops/nn_training_ops.h    | 49 +++++++++++++++
 third_party/fwkacllib/inc/ops/parsing_ops.h        | 18 +++---
 third_party/fwkacllib/inc/ops/random_ops.h         |  2 +
 third_party/fwkacllib/inc/ops/rnn.h                | 35 +++++++----
 third_party/fwkacllib/inc/ops/selection_ops.h      | 27 ++++++++
 third_party/fwkacllib/inc/ops/string_ops.h         |  2 +
 .../fwkacllib/inc/toolchain/tuning_tool/tune_api.h | 30 ++-------
 18 files changed, 353 insertions(+), 213 deletions(-)

diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h
index 437a5544..1c196c48 100644
--- a/inc/external/acl/error_codes/rt_error_codes.h
+++ b/inc/external/acl/error_codes/rt_error_codes.h
@@ -57,40 +57,43 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008;   // no stream re
 static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009;   // no notify resource
 static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010;    // no model resource
 
-static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000;          // runtime internal error
-static const int32_t ACL_ERROR_RT_TS_ERROR = 507001;                // ts internel error
-static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002;        // task full in stream
-static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003;       // task empty in stream
-static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004;     // stream not complete
-static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005;         // end of sequence
-static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006;      // event not complete
-static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007;   // context release error
-static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008;             // soc version error
-static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009;   // task type not support
-static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010;          // ts lost heartbeat
-static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011;           // model execute failed
-static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012;          // report timeout
-static const int32_t ACL_ERROR_RT_SYS_DMA = 507013;                 // sys dma error
-static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014;          // aicore timeout
-static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015;        // aicore exception
-static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016;   // aicore trap exception
-static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017;           // aicpu timeout
-static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018;         // aicpu exception
-static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019;  // aicpu datadump response error
-static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020;     // aicpu model operate response error
-static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021;         // profiling error
-static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022;               // ipc error
-static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023;      // model abort normal
-static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024;    // kernel unregistering
-static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025;     // ringbuffer not init
-static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026;      // ringbuffer no data
-static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027;           // kernel lookup error
-static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028;        // kernel register duplicate
-static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029;     // debug register failed
-static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030;   // debug unregister failed
-static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031;           // label not in current context
-static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032;         // program register num use out
-static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033;         // device setup error
+static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000;              // runtime internal error
+static const int32_t ACL_ERROR_RT_TS_ERROR = 507001;                    // ts internel error
+static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002;            // task full in stream
+static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003;           // task empty in stream
+static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004;         // stream not complete
+static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005;             // end of sequence
+static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006;          // event not complete
+static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007;       // context release error
+static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008;                 // soc version error
+static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009;       // task type not support
+static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010;              // ts lost heartbeat
+static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011;               // model execute failed
+static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012;              // report timeout
+static const int32_t ACL_ERROR_RT_SYS_DMA = 507013;                     // sys dma error
+static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014;              // aicore timeout
+static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015;            // aicore exception
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016;       // aicore trap exception
+static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017;               // aicpu timeout
+static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018;             // aicpu exception
+static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019;      // aicpu datadump response error
+static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020;         // aicpu model operate response error
+static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021;             // profiling error
+static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022;                   // ipc error
+static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023;          // model abort normal
+static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024;        // kernel unregistering
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025;         // ringbuffer not init
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026;          // ringbuffer no data
+static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027;               // kernel lookup error
+static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028;            // kernel register duplicate
+static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029;         // debug register failed
+static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030;       // debug unregister failed
+static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031;               // label not in current context
+static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032;             // program register num use out
+static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033;             // device setup error
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034;         // vector core timeout
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035;       // vector core exception
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036;  // vector core trap exception
 
 static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;    // drv internal error
 static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900;  // aicpu internal error
diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h
index 437a5544..1c196c48 100644
--- a/inc/external/runtime/rt_error_codes.h
+++ b/inc/external/runtime/rt_error_codes.h
@@ -57,40 +57,43 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008;   // no stream re
 static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009;   // no notify resource
 static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010;    // no model resource
 
-static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000;          // runtime internal error
-static const int32_t ACL_ERROR_RT_TS_ERROR = 507001;                // ts internel error
-static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002;        // task full in stream
-static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003;       // task empty in stream
-static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004;     // stream not complete
-static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005;         // end of sequence
-static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006;      // event not complete
-static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007;   // context release error
-static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008;             // soc version error
-static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009;   // task type not support
-static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010;          // ts lost heartbeat
-static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011;           // model execute failed
-static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012;          // report timeout
-static const int32_t ACL_ERROR_RT_SYS_DMA = 507013;                 // sys dma error
-static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014;          // aicore timeout
-static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015;        // aicore exception
-static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016;   // aicore trap exception
-static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017;           // aicpu timeout
-static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018;         // aicpu exception
-static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019;  // aicpu datadump response error
-static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020;     // aicpu model operate response error
-static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021;         // profiling error
-static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022;               // ipc error
-static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023;      // model abort normal
-static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024;    // kernel unregistering
-static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025;     // ringbuffer not init
-static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026;      // ringbuffer no data
-static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027;           // kernel lookup error
-static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028;        // kernel register duplicate
-static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029;     // debug register failed
-static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030;   // debug unregister failed
-static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031;           // label not in current context
-static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032;         // program register num use out
-static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033;         // device setup error
+static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000;              // runtime internal error
+static const int32_t ACL_ERROR_RT_TS_ERROR = 507001;                    // ts internel error
+static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002;            // task full in stream
+static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003;           // task empty in stream
+static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004;         // stream not complete
+static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005;             // end of sequence
+static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006;          // event not complete
+static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007;       // context release error
+static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008;                 // soc version error
+static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009;       // task type not support
+static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010;              // ts lost heartbeat
+static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011;               // model execute failed
+static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012;              // report timeout
+static const int32_t ACL_ERROR_RT_SYS_DMA = 507013;                     // sys dma error
+static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014;              // aicore timeout
+static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015;            // aicore exception
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016;       // aicore trap exception
+static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017;               // aicpu timeout
+static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018;             // aicpu exception
+static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019;      // aicpu datadump response error
+static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020;         // aicpu model operate response error
+static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021;             // profiling error
+static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022;                   // ipc error
+static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023;          // model abort normal
+static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024;        // kernel unregistering
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025;         // ringbuffer not init
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026;          // ringbuffer no data
+static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027;               // kernel lookup error
+static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028;            // kernel register duplicate
+static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029;         // debug register failed
+static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030;       // debug unregister failed
+static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031;               // label not in current context
+static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032;             // program register num use out
+static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033;             // device setup error
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034;         // vector core timeout
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035;       // vector core exception
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036;  // vector core trap exception
 
 static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;    // drv internal error
 static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900;  // aicpu internal error
diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h
index 0043c027..05de1b0a 100644
--- a/third_party/fwkacllib/inc/ops/data_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h
@@ -2331,6 +2331,22 @@ REG_OP(CacheAllIndexToLocal)
   .REQUIRED_ATTR(dtype, Type)
   .OP_END_FACTORY_REG(CacheAllIndexToLocal)
 
+/**
+*@brief DynamicGetNext, dynamic get next data
+*@par Inputs:
+*x: the iterator, all types are available
+*@par Outputs:
+*y: the date in iterator, all types are available
+*@par Attributes:
+*output_types: types of all outputs
+*output_shapes: shapes of all outputs
+*_dynamic_graph_execute_mode: dynamic graph execution mode, 
+value is one of lazy_recompile and dynamic_execute
+*_getnext_inputs_shape_range: shape ranges of outputs, 
+it works where _dynamic_graph_execute_mode is dynamic_execute
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
 REG_OP(DynamicGetNext)
   .INPUT(x, TensorType::ALL())
   .DYNAMIC_OUTPUT(y, TensorType::ALL())
diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
index 209967bd..8c87cfe8 100644
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -28,7 +28,7 @@ namespace ge {
 
 *@par Inputs:
 *Dynamic inputs, including:
-* @li x: A list of Tensor objects, each with same shape and type. The supported types are:
+*x: A list of Tensor objects, each with same shape and type. The supported types are:
 *   float16, float32, double, int32, uint8, int16, int8, complex64, int64,
 *   qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n
 
@@ -330,8 +330,8 @@ REG_OP(Sub)
 *@brief computes the absolute value of a tensor. \n
 
 *@par Inputs:
-*One inputs, including:
-* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n
+*One input, including: \n
+*x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x". \n
@@ -3243,9 +3243,11 @@ REG_OP(Fills)
 *@brief Add tensor with scale. \n
 
 *@par Inputs:
-*Five inputs, including:
-* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
-* @li x2: A scale. Must be float. \n
+*One input, including: \n
+*x: A Tensor. Must be one of the following types:int32,int16, float16, float32. \n
+
+*@par Attributes:
+*value: A scale. Must be float. \n
 
 *@par Outputs:
 *@li y: A Tensor. Has the same type and shape as "x1". \n
diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h
index 3ef4d95e..e6802c1e 100644
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -1134,7 +1134,7 @@ REG_OP(DecodeBmp)
     .ATTR(channels, Int, 0)
     .OP_END_FACTORY_REG(DecodeBmp)
 
-/*
+/**
 *@brief Function parse image from string to int. \n
 
 *@par Inputs:
@@ -1602,11 +1602,11 @@ REG_OP(DecodeJpeg)
 *@brief Image warping using per-pixel flow vectors. \n
 
 *@par Inputs:
-*@li images: 4-D Tensor with shape `[batch, height, width, channels]`.
+*@li image: 4-D Tensor with shape `[batch, height, width, channels]`.
 *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n
 
 *@par Outputs:
-*y: Returns 4-D with the same shape and dtype as `images`. \n
+*y: Returns 4-D with the same shape and dtype as `image`. \n
 */
 REG_OP(DenseImageWarp)
     .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -1709,11 +1709,11 @@ REG_OP(ResizeGradD)
 
 *@par Inputs:
 *@li grad: gradients with respect to DenseImageWarp output.
-*@li images: 4-D Tensor with shape `[batch, height, width, channels]`.
+*@li image: 4-D Tensor with shape `[batch, height, width, channels]`.
 *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n
 
 *@par Outputs:
-*grad_image: Returns 4-D with the same shape and dtype as `images`.
+*grad_image: Returns 4-D with the same shape and dtype as `image`.
 *grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n
 */
 REG_OP(DenseImageWarpGrad)
@@ -1747,6 +1747,9 @@ REG_OP(DenseImageWarpGrad)
 
 *@par Third-party framework compatibility
 *Compatible with pytorch GridSampler2D operator.
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(GridSampler2D)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
index 3340007c..083d4f9c 100644
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -444,6 +444,9 @@ REG_OP(ScatterNdUpdate)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator TensorScatterUpdate.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(TensorScatterUpdate)
     .INPUT(x, TensorType::BasicType())
@@ -565,6 +568,9 @@ REG_OP(ScatterNdAdd)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator TensorScatterAdd.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(TensorScatterAdd)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
@@ -623,6 +629,9 @@ REG_OP(ScatterNdSub)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator TensorScatterSub.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(TensorScatterSub)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
@@ -1045,6 +1054,28 @@ REG_OP(MatrixDiagV2)
     .OUTPUT(output, TensorType::BasicType())
     .OP_END_FACTORY_REG(MatrixDiagV2)
 
+/**
+* @brief Add updates to var_out according to axis and indices.
+
+* @par Inputs:
+* Three inputs, including:
+* @li var: A Tensor. Must be one of the following types:
+*     float16, float32, int32, int8, uint8.
+* @li indices: A Tensor of the indices, type should be int32.
+* @li updates: A Tensor of the same type as "var".
+
+* @par Attributes:
+* @li axis: An required int to specify the axis to perform indices add.
+
+* @par Outputs:
+* @li var_out: A Tensor. Same as input "var".
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator index_add.
+
+* @par Restrictions:
+* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
 REG_OP(IndexAdd)
     .INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
     .INPUT(indices, TensorType({DT_INT32}))
diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
index ddd70bc8..9629976e 100644
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -419,35 +419,7 @@ REG_OP(BNInference)
     .ATTR(use_global_stats, Bool,true)
     .ATTR(mode, Int,1)
     .OP_END_FACTORY_REG(BNInference)
-/**
-*@brief aicpu batch normalization host  . \n
-
-*@par Inputs:
 
-*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
-*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
-*@li momentum: An optional float, mean and variance's Scale factor
-*@par Attributes:
-*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
-*@li use_global_stats: mean inference mode , only can be "True".
-*@li mode: An optional attr, not use
-*@par Outputs:
-*@li alpha: A Tensor of type float16 or float32 for the cpu calculate mean
-*@li beta: A Tensor of type float16 or float32 for the cpu calculate variance
-*/
-REG_OP(BnHost)
-    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(momentum, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OPTIONAL_INPUT(offset, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .ATTR(epsilon, Float, 0.00001)
-    .ATTR(mode, Int, 1)
-    .ATTR(use_global_stats, Bool, true)
-    .OUTPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(mu, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OP_END_FACTORY_REG(BnHost)
 /**
 *@brief Performs batch normalization . \n
 
diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
index 8cbdf9ff..0eeeb511 100644
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -989,6 +989,8 @@ REG_OP(Conv2DCompress)
 *@li deformable_groups: Optional. An integer of type int32. The number of
 * deformable group partitions. In_channels must be divisible by
 * "deformable_groups". Defaults to 1.
+*@li modulated: Optional. Specify version of DeformableConv2D, true means v2,
+* false means v1, currently only support v2.
 *\n
 *\n
 * The following value range restrictions must be met:
@@ -1037,6 +1039,7 @@ REG_OP(DeformableConv2D)
     .ATTR(groups, Int, 1)
     .ATTR(data_format, String, "NHWC")
     .ATTR(deformable_groups, Int, 1)
+    .ATTR(modulated, Bool, true)
     .OP_END_FACTORY_REG(DeformableConv2D)
 
 /**
diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
index 33148e62..cc60f483 100644
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -1202,35 +1202,6 @@ REG_OP(RpnProposalsD)
     .OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
     .OP_END_FACTORY_REG(RpnProposalsD)
 
-/**
-*@brief Computes Score Filte Pre-Sort function.
-
-*@par Inputs:
-*Inputs include:
-* @li rois: A Tensor. Must be float16. N-D with shape [N, 4].
-* @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1].
-
-*@par Attributes:
-* @li score_threshold: required, float, threahold of topk process.
-* @li k: required, Int, threahold of topk process.
-* @li score_filter: bool, mark of score_filter. Defaults to "true"
-* @li core_max_num: int, max number of core. Defaults to "8"
-*@par Outputs:
-* @li sorted_proposal: A Tensor. Must be float16.
-*                      N-D with shape [8*6002, 8].
-* @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8].
-*/
-
-REG_OP(ScoreFiltePreSort)
-    .INPUT(rois, TensorType({DT_FLOAT16}))
-    .INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
-    .OUTPUT(sorted_proposal, TensorType({ DT_FLOAT16}))
-    .OUTPUT(proposal_num, TensorType({ DT_UINT32}))
-    .REQUIRED_ATTR(score_threshold, Float)
-    .REQUIRED_ATTR(k, Int)
-    .ATTR(score_filter, Bool, true)
-    .ATTR(core_max_num, Int, 8)
-    .OP_END_FACTORY_REG(ScoreFiltePreSort)
 
 /**
 *@brief Computes Score Filte Pre-Sort function.
@@ -1500,6 +1471,26 @@ REG_OP(Sort)
     .ATTR(descending, Bool, false)
     .OP_END_FACTORY_REG(Sort)
 
+/**
+*@brief Computes iou for input bboxes and gtboxes.
+
+*@par Inputs:
+* Two inputs, including:
+*@li bboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1),
+*@li gtboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1).\n
+
+*@par Attributes:
+*@li mode: A optional attribute of type string, whether judge the mode of iou. \n
+
+*@par Outputs:
+*@li overlap: A 2D Tensor of type float16 with shape [n, m]. \n
+
+*@attention Constraints:
+* Only computation of float16 data is supported.
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS DEPRECATED. Please use Iou instead.
+*/
 REG_OP(PtIou)
     .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1541,6 +1532,9 @@ selected indices from the boxes tensor, where M <= max_output_size. \n
 
 *@par Third-party framework compatibility
 *Compatible with onnx NonMaxSuppression operator.
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 
 REG_OP(NonMaxSuppressionV6)
@@ -1729,3 +1723,4 @@ REG_OP(PSROIPoolingGradV2D)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
+
diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
index a0251d88..22023f46 100644
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -1073,26 +1073,6 @@ REG_OP(INInferV2D)
     .OP_END_FACTORY_REG(INInferV2D)
 
 /**
-*@brief Performs instance normalization for inference of InHost part.
-
-*@par Inputs:\n
-* One input, including: (NC1HWC0 supported)
-* variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance.
-
-*@par Attributes:
-* epsilon: An optional float32, specifying the small value added to
-variance to avoid dividing by zero. Defaults to "0.00001" . \n
-
-*@par Outputs:\n
-* variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt.
-*/
-REG_OP(InHost)
-     .INPUT(variance, TensorType({DT_FLOAT}))
-     .OUTPUT(variance_sqrt, TensorType({DT_FLOAT}))
-     .ATTR(epsilon, Float, 0.00001)
-     .OP_END_FACTORY_REG(InHost)
-
-/**
 * @brief perform instance normalization to x. \n
 
 * @par Inputs:
@@ -1124,6 +1104,26 @@ REG_OP(InstanceNorm)
     .REQUIRED_ATTR(epsilon, Float)
     .OP_END_FACTORY_REG(InstanceNorm)
 
+/**
+* @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li grad: A Tensor. Must be one of the following types: float16, float32.
+* Required.
+* @li input: A Tensor. Has the same type as "grad". Required.
+* @li target: A Tensor. Has the same type as "grad". Required. \n
+
+* @par Attributes:
+* @li reduction: An optional attribute of type String. Defaults to "mean". \n
+* @li log_target: An optional attribute of type Bool. Defaults to false. \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "grad". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator KlDivLossGrad.
+*/
 REG_OP(KlDivLossGrad)
     .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1453,12 +1453,12 @@ REG_OP(PoissonNllLoss)
  *
  * 
  * @par Output:
- * y: A mutable Tensor of type int32, with the shape of [num_step, batch_size, hidden_size]. \n
+ * y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n
  *
  */
 REG_OP(RnnGenMask)
     .INPUT(seq_length, TensorType({DT_INT32}))
-    .OUTPUT(seq_mask, TensorType({DT_INT32}))
+    .OUTPUT(seq_mask, TensorType({DT_FLOAT16}))
     .REQUIRED_ATTR(num_step, Int)
     .REQUIRED_ATTR(hidden_size, Int)
     .OP_END_FACTORY_REG(RnnGenMask)
diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
index 743c28b7..ef9fabb8 100644
--- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
@@ -237,18 +237,18 @@ REG_OP(AvgPool3DD)
 * @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
 * @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
 * @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
-* @li data_format: A string, format of input data . 
+* @li data_format: A string, format of input data.
 
 * @par Outputs:
-* @output: A mutable tensor with the same shape and type as "orig_input".
+* @output: A mutable tensor with the same shape and type as "orig_input_shape".
 
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator AvgPoolGrad.
 */
 
 REG_OP(AvgPool3DGrad)
-    .INPUT(orig_input_shape, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
-    .INPUT(grads, TensorType({DT_INT32}))
+    .INPUT(orig_input_shape, TensorType({DT_INT32}))
+    .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
     .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
     .REQUIRED_ATTR(ksize, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
@@ -888,7 +888,7 @@ REG_OP(AvgPoolV2Grad)
 * @brief Computes gradients of averagev2 pooling function.
 
 * @par Inputs:
-* @li input_grad: An NHWC tensor of type float16, float32, or double.
+*input_grad: An NHWC tensor of type float16, float32, or double.
 
 * @par Attributes:
 * @li orig_input_shape: A required tuple or list of type int32.
@@ -906,10 +906,10 @@ REG_OP(AvgPoolV2Grad)
 * @li data_format: An optional string. Defaults to "NHWC".
 
 * @par Outputs:
-* @out_grad: A mutable tensor with the same shape and type as "orig_input".
+*out_grad: A mutable tensor with the same shape and type as "orig_input".
 
 * @par Third-party framework compatibility
-* @li Compatible with the TensorFlow operator AvgPoolGrad.
+*Compatible with the TensorFlow operator AvgPoolGrad.
 */
 REG_OP(AvgPoolV2GradD)
     .INPUT(input_grad, TensorType({DT_FLOAT16}))
@@ -1682,7 +1682,27 @@ REG_OP(MaxPoolWithArgmaxV1)
     .ATTR(ceil_mode, Bool, false)
     .OP_END_FACTORY_REG(MaxPoolWithArgmaxV1)
 
-// SubSample
+/**
+*@brief Randomly sample a subset of positive and negative examples,and overwrite
+the label vector to the ignore value (-1) for all elements that are not
+included in the sample.\n
+
+* @par Inputs:
+* One input:
+* labels: shape of labels,(N, ) label vector with values. \n
+
+* @par Attributes:
+* @li batch_size_per_images: A require attribute of type int.
+* @li positive_fraction: A require attribute of type float.
+
+*@par Outputs:
+*y: The result of subSample. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator SubSample.
+*@par Restrictions:
+*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly.
+*/
 REG_OP(SubSample)
     .INPUT(labels, TensorType({DT_INT32}))
     .OUTPUT(y, TensorType({DT_INT32}))
@@ -1690,7 +1710,28 @@ REG_OP(SubSample)
     .REQUIRED_ATTR(positive_fraction, Float)
     .OP_END_FACTORY_REG(SubSample)
 
-//  SubSampleLabels
+/**
+*@brief Randomly sample a subset of positive and negative examples,and overwrite
+the label vector to the ignore value (-1) for all elements that are not
+included in the sample.\n
+
+* @par Inputs:
+* two inputs, including:
+* @li labels: shape of labels,(N, ) label vector with values:.
+* @li shuffle_matrix: random matrix with shape (N, ). \n
+
+* @par Attributes:
+* @li batch_size_per_images: A require attribute of type int.
+* @li positive_fraction: A require attribute of type float.
+
+*@par Outputs:
+*y: The result of subSample. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator SubSampleLabels.
+*@par Restrictions:
+*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly.
+*/
 REG_OP(SubSampleLabels)
     .INPUT(labels, TensorType({DT_INT32}))
     .INPUT(shuffle_matrix, TensorType({DT_INT32}))
diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h
index 92074872..75e91aee 100644
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -2102,6 +2102,55 @@ REG_OP(FusedMulApplyMomentumExtern)
     .OP_END_FACTORY_REG(FusedMulApplyMomentumExtern)
 
 /**
+*@brief Updates '*var' according to the momentum scheme.
+*   accum = accum * momentum - x1 * x2 * lr
+*   if use_nesterov is True:
+*       var += accum * momentum - x1 * x2 * lr
+*   else:
+*       var += accum
+*
+*@par Inputs:
+*@li var: A mutable tensor. Must be one of the data types defined in
+*    TensorType::NumberType(). Should be from a Variable().
+*@li accum: A mutable tensor. Has the same type as "var". Should be from a
+*    Variable().
+*@li lr: A tensor for the learning rate. Has the same type as "var". Should be
+*    from a Variable().
+*@li x1: A Tensor has type TensorType::NumberType().
+*@li momentum: A scalar. Has the same type as "var".
+*@li x2: A scalar has the same type as "var".
+*
+*@par Attributes:
+*@li use_nesterov: An optional bool. Defaults to "False".
+*    If "True", var will be updated by using Nesterov momentum.
+*@li use_locking: An optional bool. Defaults to "False".
+*    If "True", updating of the "var" tensor is protected by a lock;
+*    otherwise the behavior is undefined, but may exhibit less contention.
+*
+*@par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+*
+*@attention Constraints:
+* The input tensors must have the same shape.
+*
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ResourceApplyKerasMomentum.
+*
+*/
+REG_OP(FusedMulApplyKerasMomentum)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(x1, TensorType::NumberType())
+    .INPUT(momentum, TensorType::NumberType())
+    .INPUT(x2, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(accum, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .ATTR(use_nesterov, Bool, false)
+    .OP_END_FACTORY_REG(FusedMulApplyKerasMomentum)
+
+/**
 *@brief Update "g" according to the LARS algorithm . \n
 
 *@par Inputs:
diff --git a/third_party/fwkacllib/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/ops/parsing_ops.h
index 452961a9..b625180a 100644
--- a/third_party/fwkacllib/inc/ops/parsing_ops.h
+++ b/third_party/fwkacllib/inc/ops/parsing_ops.h
@@ -86,7 +86,7 @@ REG_OP(ParseSingleExample)
     .ATTR(sparse_keys, ListString, {})
     .ATTR(dense_keys, ListString, {})
     .ATTR(sparse_types, ListType, {})
-    .ATTR(dense_types, ListType, {})
+    .ATTR(Tdense, ListType, {})
     .ATTR(dense_shapes, ListListInt, {})
     .OP_END_FACTORY_REG(ParseSingleExample)
 
@@ -173,9 +173,9 @@ REG_OP(ParseTensor)
 REG_OP(DecodeCSV)
     .INPUT(records, TensorType({DT_STRING}))
     .DYNAMIC_INPUT(record_defaults, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32,
-                                        DT_INT64, DT_STRING, DT_RESOURCE}))
+                                        DT_INT64, DT_STRING}))
     .DYNAMIC_OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32,
-                                        DT_INT64, DT_STRING, DT_RESOURCE}))
+                                        DT_INT64, DT_STRING}))
     .ATTR(OUT_TYPE, ListType, {})
     .ATTR(field_delim, String, ",")
     .ATTR(use_quote_delim, Bool, true)
@@ -283,12 +283,12 @@ REG_OP(ParseSingleSequenceExample)
     .ATTR(Ncontext_dense, Int, 0)
     .ATTR(Nfeature_list_sparse, Int, 0)
     .ATTR(Nfeature_list_dense, Int, 0)
-    .REQUIRED_ATTR(context_sparse_types, ListType)
-    .REQUIRED_ATTR(Tcontext_dense, ListType)
-    .REQUIRED_ATTR(feature_list_dense_types, ListType)
-    .REQUIRED_ATTR(context_dense_shapes, ListListInt)
-    .REQUIRED_ATTR(feature_list_sparse_types, ListType)
-    .REQUIRED_ATTR(feature_list_dense_shapes, ListListInt)
+    .ATTR(context_sparse_types, ListType, {})
+    .ATTR(Tcontext_dense, ListType, {})
+    .ATTR(feature_list_dense_types, ListType, {})
+    .ATTR(context_dense_shapes, ListListInt, {})
+    .ATTR(feature_list_sparse_types, ListType, {})
+    .ATTR(feature_list_dense_shapes, ListListInt, {})
     .OP_END_FACTORY_REG(ParseSingleSequenceExample)
 
 }  // namespace ge
diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h
index 8104cb01..b65a68f1 100644
--- a/third_party/fwkacllib/inc/ops/random_ops.h
+++ b/third_party/fwkacllib/inc/ops/random_ops.h
@@ -548,6 +548,8 @@ REG_OP(ShuffleChannel)
  * Each value along the axis zero represents the outcome of 
  * the corresponding sample in a batch.
  * 
+ * @par Restrictions:
+ * Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  */
 REG_OP(MultinomialFuss)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64}))
diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h
index d4b3b102..d671a531 100644
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -190,7 +190,7 @@ REG_OP(DynamicRNNGrad)
 *@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
-*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
+*@li seq_length:A optional Tensor. Only Support float16 in FRACTAL_NZ and int32 in ND.
 *@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
@@ -228,7 +228,7 @@ REG_OP(DynamicRNN)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16}))
     .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -698,9 +698,6 @@ REG_OP(DynamicGRU)
 *@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(DynamicGRUV2)
     .INPUT(x, TensorType({DT_FLOAT16}))
@@ -989,6 +986,27 @@ REG_OP(CommonLSTM)
     .OP_END_FACTORY_REG(CommonLSTM)
 
 /**
+ * @brief Calculate the mask. According to hidden_size and num_step, convert seq_length to mask.
+ *
+ * @par Inputs:
+ * @li seq_length: A 1D Tensor. Must be one of the following types: int32. Record the current length of each batch. [batch_size].
+ * @li b: A 1D Tensor. Must be one of the following types: fp16/fp32. Record the hidden_size. [4 * hidden_size].
+ * @li x: A 3D Tensor. Must be one of the following types: fp16/fp32. Record the num_step/batch_size/input_size. [num_step, batch_size, input_size].
+ *
+ * @par Outputs:
+ * seq_mask: A 3D Tensor. Must be one of the following types: fp16/fp32. with the shape of [num_step, batch_size, hidden_size]. And has the same type as "b" \n
+ *
+ * @par Restrictions:
+ * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+ */
+REG_OP(RnnGenMaskV2)
+    .INPUT(seq_length, TensorType({DT_INT32}))
+    .INPUT(b, TensorType({{DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(seq_mask, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(RnnGenMaskV2)
+
+/**
 * @brief Common GRU calculation.
 
 * @par Inputs:
@@ -1002,22 +1020,15 @@ REG_OP(CommonLSTM)
 
 * @par Attributes:
 * @li activation_alpha: Optional scaling values used by some activation functions.  \n
-
 * @li activation_beta: Optional scaling values used by some activation functions.  \n
-
 * @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates.  \n
-
 * @li clip: Cell clip threshold. \n
-
 * @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n
-
 * @li hidden_size: Number of neurons in the hidden layer. \n
-
 * @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n
 
 * @par Outputs:
 * @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ
-
 * @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ
 */
 REG_OP(CommonGRU)
diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h
index da5bdb6a..f99493b7 100644
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -2254,6 +2254,33 @@ REG_OP(IndexFillD)
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
     .REQUIRED_ATTR(dim, Int)
     .OP_END_FACTORY_REG(IndexFillD)
+
+/**
+* @brief For each row r of this and for each column c, do (*this)(r, c) += src(j, c), \n
+*   where j ranges from indexes[r].first through indexes[r].second - 1. \n
+*   In general indexes must be >= 0 and < src.NumRows(); \n
+*   but to represent an empty range you may use the pair (-1, -1) or any pair of numbers (i, j) such that i >= j. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: A Tensor. Must be one of the following types:
+*     float16, float32.
+* @li indices: A Tensor of the indices, type should be int32.
+* @li src: A Tensor of the same type as "x". \n
+
+* @par Outputs:
+* @li x: A Tensor. Same as input "x".
+
+* @par Third-party framework compatibility
+* Compatible with the kaldi operator AddRowRanges.
+*/
+REG_OP(AddRowRanges)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(src, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(AddRowRanges)
+
 } // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h
index d7233906..080b9a3f 100644
--- a/third_party/fwkacllib/inc/ops/string_ops.h
+++ b/third_party/fwkacllib/inc/ops/string_ops.h
@@ -114,6 +114,7 @@ REG_OP(UnicodeDecodeWithOffsets)
     .ATTR(errors, String, "replace")
     .ATTR(replacement_char, Int, 65533)
     .ATTR(replace_control_characters, Bool, false)
+    .ATTR(Tsplits, Type, DT_INT64)
     .OP_END_FACTORY_REG(UnicodeDecodeWithOffsets)
 
 /**
@@ -161,6 +162,7 @@ REG_OP(UnicodeDecode)
     .ATTR(errors, String, "replace")
     .ATTR(replacement_char, Int, 65533)
     .ATTR(replace_control_characters, Bool, false)
+    .ATTR(Tsplits, Type, DT_INT64)
     .OP_END_FACTORY_REG(UnicodeDecode)
 
 /**
diff --git a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h
index 71226d87..2cf6e0c4 100644
--- a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h
+++ b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h
@@ -23,8 +23,8 @@
  * @attention 无
  * @param  option [IN] 调优参数
  * @param  msg [OUT] 调优异常下返回信息
- * @retval #MSTUNE_SUCCESS 执行成功
- * @retval #MSTUNE_FAILED 执行失败
+ * @retval #AOE_SUCCESS 执行成功
+ * @retval #AOE_FAILURE 执行失败
  * @par 依赖:
  * @li tune_api.cpp：该接口所属的开发包。
  * @li tune_api.h：该接口声明所在的头文件。
@@ -35,33 +35,13 @@ AoeStatus AoeOfflineTuning(const std::map<std::string, std::string> &option, std
 
 /**
  * @ingroup aoe
- * @par 描述: 梯度调优
- *
- * @attention 无
- * @param  tuningGraph [IN] 调优图
- * @param  dependGraph [IN] 调优依赖图
- * @param  session [IN] ge连接会话
- * @param  option [IN] 参数集. 包含调优参数及ge参数
- * @retval #MSTUNE_SUCCESS 执行成功
- * @retval #MSTUNE_FAILED 执行失败
- * @par 依赖:
- * @li tune_api.cpp：该接口所属的开发包。
- * @li tune_api.h：该接口声明所在的头文件。
- * @see 无
- * @since
- */
-extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
-    ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);
-
-/**
- * @ingroup aoe
  * @par 描述: 调优初始化
  *
  * @attention 无
  * @param  session [IN] ge连接会话
  * @param  option [IN] 参数集. 包含调优参数及ge参数
  * @retval #AOE_SUCCESS 执行成功
- * @retval #AOE_FAILED 执行失败
+ * @retval #AOE_FAILURE 执行失败
  * @par 依赖:
  * @li tune_api.cpp：该接口所属的开发包。
  * @li tune_api.h：该接口声明所在的头文件。
@@ -77,7 +57,7 @@ extern "C" AoeStatus AoeOnlineInitialize(ge::Session *session, const std::map<st
  * @attention 无
  * @param  无
  * @retval #AOE_SUCCESS 执行成功
- * @retval #AOE_FAILED 执行失败
+ * @retval #AOE_FAILURE 执行失败
  * @par 依赖:
  * @li tune_api.cpp：该接口所属的开发包。
  * @li tune_api.h：该接口声明所在的头文件。
@@ -96,7 +76,7 @@ extern "C" AoeStatus AoeOnlineFinalize();
  * @param  session [IN] ge连接会话
  * @param  option [IN] 参数集. 包含调优参数及ge参数
  * @retval #AOE_SUCCESS 执行成功
- * @retval #AOE_FAILED 执行失败
+ * @retval #AOE_FAILURE 执行失败
  * @par 依赖:
  * @li tune_api.cpp：该接口所属的开发包。
  * @li tune_api.h：该接口声明所在的头文件。