diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h
index 88f85cb3..2bf85e29 100644
--- a/inc/external/acl/acl_mdl.h
+++ b/inc/external/acl/acl_mdl.h
@@ -43,6 +43,7 @@ extern "C" {
 
 #define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data"
 #define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data"
+#define ACL_ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES "_datadump_original_op_names"
 
 typedef struct aclmdlDataset aclmdlDataset;
 typedef struct aclmdlDesc aclmdlDesc;
@@ -638,6 +639,18 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc,
 
 /**
  * @ingroup AscendCL
+ * @brief get attr value by op name
+ *
+ * @param modelDesc [IN]   model description
+ * @param opName [IN]      op name
+ * @param attr [IN]        attr name
+ *
+ * @retval the attr value
+ */
+ACL_FUNC_VISIBILITY const char *aclmdlGetOpAttr(aclmdlDesc *modelDesc, const char *opName, const char *attr);
+
+/**
+ * @ingroup AscendCL
  * @brief get input name by index
  *
  * @param modelDesc [IN]  model description
diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h
index 9992a636..8261adc4 100644
--- a/inc/external/hccl/hccl.h
+++ b/inc/external/hccl/hccl.h
@@ -118,8 +118,24 @@ extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvC
  */
 extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm,
                                 aclrtStream stream);
+/**
+ * @brief Get the rank size of this comm.
+ *
+ * @param comm A pointer identifying the communication resource based on.
+ * @param rankSize  A pointer identifying the rank size.
+ * @return HcclResult
+ */
+extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize);
 
 /**
+ * @brief Get the rank id of this comm.
+ *
+ * @param comm A pointer identifying the communication resource based on.
+ * @param rankSize  A pointer identifying the rank id.
+ * @return HcclResult
+ */
+extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank);
+/**
  * @brief Barrier operator.
  *
  * @param comm A pointer identifying the communication resource based on.
diff --git a/metadef b/metadef
index 7cb171b9..7cbdf957 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit 7cb171b9c511fec57ccc0ad746ef2126267fe18b
+Subproject commit 7cbdf95765133b5a5b979c2231013f7c76c3d529
diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h
index 375802fc..b5f49dad 100644
--- a/third_party/fwkacllib/inc/ops/array_ops.h
+++ b/third_party/fwkacllib/inc/ops/array_ops.h
@@ -1203,6 +1203,29 @@ REG_OP(Expand)
     .OP_END_FACTORY_REG(Expand)
 
 /**
+*@Returns a tensor containing the indices of all non-zero elements of input. \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64.
+
+*@par Attributes:
+* transpose: the output tensor will be transposed if true. \n
+
+*@par Outputs:
+* y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the PyTorch operator NonZero.
+*/
+
+REG_OP(NonZero)
+    .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \
+              DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_INT64}))
+    .ATTR(transpose, Bool, false)
+    .OP_END_FACTORY_REG(NonZero)
+
+/**
 * @brief Expand the input tensor to a compatible shape. \n
 
 * @par Inputs:
diff --git a/third_party/fwkacllib/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/ops/batch_ops.h
index 181bf694..ca4fe1db 100644
--- a/third_party/fwkacllib/inc/ops/batch_ops.h
+++ b/third_party/fwkacllib/inc/ops/batch_ops.h
@@ -64,10 +64,10 @@ the same types as "x_tensors" .  It's a dynamic output.  \n
 REG_OP(Batch)
   .DYNAMIC_INPUT(x_tensors, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, \
       DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE}))
-  .OUTPUT(y_index, TensorType({ DT_INT64 }))
-  .OUTPUT(y_id, TensorType({ DT_INT64 }))
   .DYNAMIC_OUTPUT(y_tensors, TensorType({DT_INT8, DT_UINT8, DT_INT16, \
       DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_BOOL}))
+  .OUTPUT(y_index, TensorType({ DT_INT64 }))
+  .OUTPUT(y_id, TensorType({ DT_INT64 }))
   .REQUIRED_ATTR(num_batch_threads, Int)
   .REQUIRED_ATTR(max_batch_size, Int)
   .ATTR(max_enqueued_batches, Int, 10)
diff --git a/third_party/fwkacllib/inc/ops/correlation.h b/third_party/fwkacllib/inc/ops/correlation.h
new file mode 100644
index 00000000..caebba50
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/correlation.h
@@ -0,0 +1,52 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file correlation.h
+ * \brief
+ */
+#ifndef GE_OP_CORRELATION_OPS_H
+#define GE_OP_CORRELATION_OPS_H
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+*@brief Computes a 2D Correlation given 4D "x" and "filter" tensors.
+*
+*@par Inputs:
+* @li filter: A 4D tensor of filters.
+* @li x: A 4D tensor of input images, batch number must equal to batch
+* number of "filter", and channel must equal to channel of "filter".
+*
+*@par Attributes:
+* @li groups: set correlation mode, must be 1 or channel.
+*
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+
+*@par Third-party framework compatibility
+* Compatible with caffe correlation custom operator.
+*/
+REG_OP(Correlation)
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .ATTR(groups, Int, 1)
+    .OP_END_FACTORY_REG(Correlation)
+}  // namespace ge
+
+#endif  // GE_OP_NN_CALCULATION_OPS_H
diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
index 8c87cfe8..7850536d 100644
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -3468,25 +3468,6 @@ REG_OP(AxpyV2)
     .OP_END_FACTORY_REG(AxpyV2)
 
 /**
-* @brief Computes the result of x1 + x2.
-
-* @par Inputs:
-* @li x1: An ND tensor of type float16, float, int32.
-* @li x2: An ND tensor of type float16, float, int32. \n
-
-* @par Outputs:
-* @li y: An ND tensor tensor with the same type as "x1". \n
-
-* @par Third-party framework compatibility
-* Compatible with the Pytorch operator Add.
-*/
-REG_OP(PtAdd)
-    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
-    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
-    .OP_END_FACTORY_REG(PtAdd)
-
-/**
 * @brief Computes the result of x1 * x2.
 
 * @par Inputs:
diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h
index e6802c1e..71f1b9e1 100644
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -1484,6 +1484,55 @@ REG_OP(CombinedNonMaxSuppression)
     .OP_END_FACTORY_REG(CombinedNonMaxSuppression)
 
 /**
+*@brief Resizes "images" with "offset" using bilinear interpolation. \n
+
+*@par Inputs:
+*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`.
+*@li warp_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for offset point.
+
+*@par Outputs:
+*warp_img: A Tensor after resize. \n
+*/
+REG_OP(IMGWarp)
+    .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
+    .INPUT(warp_offset, TensorType({DT_FLOAT32}))
+    .OUTPUT(warp_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
+    .OP_END_FACTORY_REG(IMGWarp)
+
+/**
+*@brief Resizes "images" with "offset" using bilinear interpolation. \n
+
+*@par Inputs:
+*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`.
+*@li map_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for resize point.
+
+*@par Outputs:
+*map_img: A Tensor after resize. \n
+*/
+REG_OP(Remap)
+    .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
+    .INPUT(map_offset, TensorType({DT_FLOAT32}))
+    .OUTPUT(map_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
+    .OP_END_FACTORY_REG(Remap)
+
+/**
+*@brief Resizes "images" with "offset" using bilinear interpolation. \n
+
+*@par Inputs:
+*@li img: input image, A 5-D tensor of shape `[n, 4, c, h, w]`,
+and 4 mean input[(h_top, w_left), (h_top, w_right), (h_bottom, w_left),  (h_bottom, w_right)].
+*@li warp_index: the resize offset A 4-D float tensor of shape `[n, 2, h, w]`, 2 means (x, y) for resize point.
+
+*@par Outputs:
+*remap_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n
+*/
+REG_OP(IMGWarpResize)
+    .INPUT(img, TensorType({DT_FLOAT32}))
+    .INPUT(warp_index, TensorType({DT_FLOAT32}))
+    .OUTPUT(warp_img, TensorType({DT_FLOAT32}))
+    .OP_END_FACTORY_REG(IMGWarpResize)
+
+/**
 *@brief Function spatial transformer . \n
 
 *@par Inputs:
@@ -1802,5 +1851,22 @@ REG_OP(ImageUnfold)
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(padding_mode, String, "zeros")
     .OP_END_FACTORY_REG(ImageUnfold)
+	
+/**
+*@brief This operation select images to warp_images according to offsets.
+
+*@par Inputs:
+*@li images: 4-D Tensor with shape `[batch, height, width, 3]`.
+*@li offsets: 4-D Tensor with shape `[batch, 4, new_height, new_width]`.
+
+*@par Outputs:
+*warp_images: Returns 5-D Tensor with shape
+`[batch, 4, new_height, new_width, 3]` and the same dtype as `images`.
+*/
+REG_OP(IMGWarpOffsets)
+    .INPUT(images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT}))
+    .INPUT(offsets, TensorType({DT_FLOAT, DT_INT32}))
+    .OUTPUT(warp_images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(IMGWarpOffsets)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
index 083d4f9c..af02276b 100644
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -179,7 +179,7 @@ REG_OP(GEMM)
 *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
 
 *@par Inputs:
-*Three inputs, including:
+*Two inputs, including:
 * @li x1: A matrix Tensor. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ].
 * @li x2: A matrix Tensor. Must be one of the following types: float16,
diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
index cc60f483..d4141e47 100644
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -1460,8 +1460,6 @@ REG_OP(DecodeBboxV2)
 * @li y1: A Tensor. Must have the same type as x.
 * @li y2: A Tensor. Indices of y1 in x. Dtype must be int32.
 *
-*@attention Constraints:
-* The upper limit of data on the direction axis is 7040.
 */
 REG_OP(Sort)
     .INPUT(x, TensorType({ DT_FLOAT16 }))
diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
index 22023f46..796e1e61 100644
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -55,7 +55,9 @@ REG_OP(LogSoftmaxGrad)
 *Two inputs, including:
 * @li features: A Tensor. Must be one of the following types: half, float32, double.
 *    A "batch_size * num_classes" matrix.
-* @li labels: A Tensor of the same type as "features". batch_size vector with values in [0, num_classes).
+* @li labels: A Tensor. Must be one of the following types: 'int32', 'int64'.
+*             batch_size vector with values in [0, num_classes).
+*             This is the label for the given minibatch entry.
 
 
 *@par Outputs:
@@ -639,6 +641,48 @@ REG_OP(LayerNormXBackprop)
     .OP_END_FACTORY_REG(LayerNormXBackprop)
 
 /**
+*@brief LayerNormXBackpropV2 operator interface implementation
+*  calculating: dy, x, variance, mean, gamma
+*  pd_xl = data_dy*data_gamma
+*  pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean)
+*           np.power((data_variance + EPSLON), (-1.5))),
+*           reduce_axis, keepdims=True)
+*  pd_mean = np.sum(((-1.0)*pd_xl
+*            np.power((data_variance + EPSLON), (-0.5))),
+*            reduce_axis, keepdims=True)
+*            + pd_var*(1.0/m)
+*            np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True)
+*  pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) +
+*         pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m)
+*  res_for_gamma = (data_x - data_mean) * np.power((data_variance + EPSLON), (-0.5))
+
+*@par Inputs:
+*Five inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
+* @li res_for_gamma: A Tensor. Must be one of the following types: float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LayerNormXBackpropV2)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(res_for_gamma, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(LayerNormXBackpropV2)
+
+/**
 *@brief LayerNormBetaGammaBackprop operator interface implementation
 *  calculating: dy, x, variance, mean
 *  pd_xl = data_dy*data_gamma
@@ -682,6 +726,35 @@ REG_OP(LayerNormBetaGammaBackprop)
     .OP_END_FACTORY_REG(LayerNormBetaGammaBackprop)
 
 /**
+*@brief LayerNormBetaGammaBackpropV2 operator interface implementation
+*  calculating: dy, x, variance, mean
+*  pd_gamma = np.sum((data_dy*res_for_gamma), param_axis, keepdims=True)
+*  pd_beta = np.sum(data_dy, param_axis, keepdims=True)
+
+*@par Inputs:
+*Three inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
+* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LayerNormBetaGammaBackpropV2)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(res_for_gamma, TensorType({DT_FLOAT}))
+    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(shape_gamma, ListInt)
+    .OP_END_FACTORY_REG(LayerNormBetaGammaBackpropV2)
+
+/**
 *@brief Return "output" according to the algorithm of dropout_do_mask:
 *  scale_x = x *(1 / keep_prob)
 *  output = select(mask == 1, scale_x, 0)
diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
index 4cbe4057..ca1c24eb 100644
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -536,13 +536,19 @@ REG_OP(Elu)
 *       max(0, x) + min(0, alpha * (exp(x/alpha) - 1)). \n
 
 *@par Inputs:
-*x: A float16, float32 or double, for the input data type . \n
+*x: A float16, float32, for the input data type . \n
 
 *@par Attributes:
-*alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n
+*alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n
+
+*@par Attributes:
+*alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n
+
+*@par Attributes:
+*alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n
 
 *@par Outputs:
-*y: A float16, float32 or double, for the normalized result . \n
+*y: A float16, float32, for the normalized result . \n
 
 *@attention Constraints:
 *@li The input is of type float16 or float32 . \n
@@ -553,9 +559,11 @@ REG_OP(Elu)
 *@li Compatible with ONNX's Celu operator
 */
 REG_OP(Celu)
-    .INPUT(x, TensorType::FloatingDataType())
-    .OUTPUT(y, TensorType::FloatingDataType())
-    .ATTR(alpha, Float, 1.0)
+    .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .ATTR(alpha1, Float, 1.0)
+    .ATTR(alpha2, Float, 1.0)
+    .ATTR(alpha3, Float, 1.0)
     .OP_END_FACTORY_REG(Celu)
 
 /**
@@ -691,6 +699,25 @@ REG_OP(Mish)
     .OP_END_FACTORY_REG(Mish)
 
 /**
+ * @brief: pytorch mish_grad operator.
+ * @par Inputs:
+ * three input, including:
+ * @li grad: A Tensor. shape, datatype and format is same as x
+ * @li x: A Tensor. Must be one of the following types: float16, float32
+ * @li tanhx: A Tensor. shape, datatype and format is same as x
+ * @par Outputs:
+ * 1 output, including:
+ * @li x_grad: A Tensor. shape, datatype and format is same as x
+ */
+
+REG_OP(MishGrad)
+    .INPUT(grad, TensorType({ DT_FLOAT,DT_FLOAT16 }))
+    .INPUT(x, TensorType({ DT_FLOAT,DT_FLOAT16 }))
+    .OPTIONAL_INPUT(tanhx, TensorType({ DT_FLOAT,DT_FLOAT16 }))
+    .OUTPUT(x_grad, TensorType({ DT_FLOAT,DT_FLOAT16 }))
+    .OP_END_FACTORY_REG(MishGrad)
+
+/**
  * @brief pytorch hardtanh_backward operator.
  *
  * @par Inputs:
@@ -993,6 +1020,30 @@ REG_OP(HardSigmoidGrad)
     .ATTR(beta, Float, 0.5)
     .OP_END_FACTORY_REG(HardSigmoidGrad)
 
+/**
+* @brief Calculate the shrink function. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li input_x: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* @li lambd: An optional float. Defaults to 0.5. \n
+* @li bias: An optional float. Defaults to 0.0. \n
+
+* @par Outputs:
+* y: A Tensor with the same dtype and shape of input_x's. \n
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator Shrink. \n
+*/
+REG_OP(Shrink)
+    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(lambd, Float, 0.5)
+    .ATTR(bias, Float, 0.0)
+    .OP_END_FACTORY_REG(Shrink)
 } // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h
index 42da3828..bacbe40d 100644
--- a/third_party/fwkacllib/inc/ops/pad_ops.h
+++ b/third_party/fwkacllib/inc/ops/pad_ops.h
@@ -273,14 +273,11 @@ REG_OP(PadV3)
 *@brief Pads a tensor.
 
 *@par Inputs:
-*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li paddings: A Tensor. Must be int32 type 
+*     paddings is a required input tensor.
 
 *@par Attributes:
-* @li paddings: An required "vector<vector<int>>".
-*     For each dimension D of input, paddings[D, 0] indicates how many
-*     values to add before the contents of tensor in that dimension,
-*     and paddings[D, 1] indicates how many values to add after the
-*     contents of tensor in that dimension.
 * @li constant_values: An optional int value for pad.
 * @li mode: An optional string, Defaults to "constant", indicates paddings mode,
 *     support "constant", "reflect", "edge"
@@ -298,9 +295,9 @@ REG_OP(PadV3)
 * Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead.
 */
 REG_OP(PadV3D)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
-    .REQUIRED_ATTR(paddings, ListListInt)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(paddings, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(constant_values, Int, 0)
     .ATTR(mode, String, "constant")
     .ATTR(paddings_contiguous, Bool, true)
diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h
index eafc8cc4..d6eda1e6 100644
--- a/third_party/fwkacllib/inc/ops/quantize_ops.h
+++ b/third_party/fwkacllib/inc/ops/quantize_ops.h
@@ -214,7 +214,7 @@ REG_OP(AscendRequant)
 *@brief Requantizes the input of int16 . \n
 
 *@par Inputs:
-*@li x: An NC1HWC0 tensor of type int16, specifying the input.
+*@li x0: An NC1HWC0 tensor of type int16, specifying the input.
 *@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio.
 *@li x1: An NC1HWC0 tensor of type int16 . \n
 
@@ -223,17 +223,17 @@ REG_OP(AscendRequant)
 *@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n
 
 *@par Outputs:
-*@li y: The dequantized output tensor of type int8 and with format NC1HWC0.
+*@li y0: The dequantized output tensor of type int8 and with format NC1HWC0.
 *@li y1: The dequantized output tensor of type int16 and with format NC1HWC0 . \n
 
 *@par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(AscendRequantS16)
-  .INPUT(x, TensorType({DT_INT16}))
+  .INPUT(x0, TensorType({DT_INT16}))
   .INPUT(req_scale, TensorType({DT_UINT64}))
   .OPTIONAL_INPUT(x1, TensorType({DT_INT16}))
-  .OUTPUT(y, TensorType({DT_INT8}))
+  .OUTPUT(y0, TensorType({DT_INT8}))
   .OUTPUT(y1, TensorType({DT_INT16}))
   .ATTR(dual_output, Bool, false)
   .ATTR(relu_flag, Bool, false)
diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h
index d671a531..9c6a7d1b 100644
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -33,6 +33,7 @@ namespace ge {
 *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n
+*@li mask:A 1D Tensor. Must be one of the following types: uint8.
 
 *@par Attributes:
 *@li keep_prob:An integer identifying the keep prob in the op. Default to 1.
@@ -42,7 +43,6 @@ namespace ge {
 
 *@par Outputs:
 *seven outputs:
-*@li mask:A 1D Tensor. Must be one of the following types: uint8.
 *@li ct:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li ht:A 4D Tensor. Must be one of the following types: float16.
 *@li it:A 4D Tensor. Must be one of the following types: float16, float32.
@@ -209,6 +209,7 @@ REG_OP(DynamicRNNGrad)
 *@li time_major:An bool identifying the time major in the op. Default to true.
 *@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
 *@li forget_bias:An float identifying the forget bias in the op. Default to 0.
+*@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifjo". Default to "ijfo".
 *@li is_training:An bool identifying is training in the op. Default to true . \n
 
 *@par Outputs:
@@ -253,10 +254,104 @@ REG_OP(DynamicRNN)
     .ATTR(time_major, Bool, true)
     .ATTR(activation, String, "tanh")
     .ATTR(forget_bias, Float, 0.0)
+    .ATTR(gate_order, String, "ijfo")
     .ATTR(is_training, Bool, true)
     .OP_END_FACTORY_REG(DynamicRNN)
 
 /**
+*@brief: DynamicRNNV2 calculation.
+*@par Inputs:
+*ten inputs:
+*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li weight_input:A required 4D Tensor. Must be one of the following types: float16, float32.
+*The format must be FRACTAL_Z.
+*@li weight_hidden:A required 4D Tensor. Must be one of the following types: float16, float32.
+*The format must be FRACTAL_Z.
+*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
+*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n
+
+*@par Attributes:
+*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
+*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL".
+*Only UNIDIRECTIONAL is currently supported.
+*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+*@li use_peephole:An bool identifying if use peephole in the op. Default to false.
+*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
+*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
+*@li num_proj:An integer identifying the num projection in the op. Default to 0.
+*@li time_major:An bool identifying the time major in the op. Default to true.
+*@li activation:An string identifying the type of activation function in the op. Default to "tanh".
+*Only tanh is currently supported.
+*@li recurrent_activation:An string identifying the type of activation function in the op. Default to "sigmoid".
+*Supprot "sigmoid" and "hard_sigmoid". In general, set "hard_sigmoid" for TF Keras LSTM.
+*@li forget_bias:An float identifying the forget bias in the op. Default to 0.
+*@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifco". Default to "ijfo".
+*Set "ijfo" for TF operator LSTM, Set "ifco" for TF Keras LSTM.
+*@li stateful: An bool identifying the type of stateful in the op. Default to fasle.Only false is currently supported.
+*@li merge_mode: An string identifying the type of merge_modein the op. Default to "concat".
+*Only "concat" is currently supported
+*@li is_training:An bool identifying is training in the op. Default to true . \n
+
+*@par Outputs:
+*eight outputs:
+*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*Return the last output_h.
+*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*Return the last output_c.
+*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@par Third-party framework compatibility:
+* Compatible with the TF operator LSTM or TF keras operator LSTM.
+*/
+
+REG_OP(DynamicRNNV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weight_input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(cell_type, String, "LSTM")
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 1)
+    .ATTR(use_peephole, Bool, false)
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(activation, String, "tanh")
+    .ATTR(recurrent_activation, String, "sigmoid")
+    .ATTR(forget_bias, Float, 0.0)
+    .ATTR(gate_order, String, "ijfo")
+    .ATTR(stateful, Bool, false)
+    .ATTR(merge_mode, String, "concat")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(DynamicRNNV2)
+
+/**
 *@brief: DynamicRNNV3 calculation.
 *@par Inputs:
 *ten inputs:
diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h
index 48a094f7..2e8ffd9c 100644
--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -743,6 +743,52 @@ REG_OP(Col2im)
     .OP_END_FACTORY_REG(Col2im)
 
 /**
+* @brief Performs Im2col for each batch entry. \n
+
+* @par Inputs:
+* x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the
+*    following types:float32, int8, float16. The inputs must have data_format with
+*    one of follows:NHWC, NCHW.
+
+* @par Attributes:
+* @li ksizes: A required list or tuple. The size of the sliding window for each
+* dimension of images.
+* @li strides: A optional list or tuple. How far the centers of two consecutive
+* patches are in the images. Defaults to "{1}".
+* @li dilations: A optional list or tuple. Defaults to "{1}".
+* This is the input stride, specifying how far two consecutive patch
+* samples are in the input. Equivalent to extracting patches
+* with patch_sizes_eff = patch_sizes + (patch_sizes - 1) *
+* (dilations - 1), followed by subsampling them spatially by a factor of dilations.
+* This is equivalent to rate in dilated (a.k.a. Atrous) convolutions.
+* @li padding_mode: A optional String. The type of padding algorithm to use,
+* support "SAME", "VALID", "CALCULATED". Among the three modes, only the "CALCULATED"
+* means to use the pads below. Defaults to "CALCULATED".
+* @li pads: A optional list or tuple. The pad distance. Defaults to "{0}". \n
+
+* @par Outputs:
+* y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows *
+* ksize_cols * depth] containing image patches with size ksize_rows x ksize_cols
+* x depth vectorized in the "depth" dimension. Note "out_rows" and "out_cols"
+* are the dimensions of the output patches . \n
+
+* @attention Constraints:
+* "ksizes", "strides", "dilations" and "pads" are lists of integers . \n
+
+* @par Third-party framework compatibility
+* Compatible with Pytorch Im2col operator.
+*/
+REG_OP(Im2col)
+    .INPUT(x, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .REQUIRED_ATTR(ksizes, ListInt)
+    .ATTR(strides, ListInt, {1})
+    .ATTR(dilations, ListInt, {1})
+    .ATTR(padding_mode, String, "CALCULATED")
+    .ATTR(pads, ListInt, {0})
+    .OP_END_FACTORY_REG(Im2col)
+
+/**
 *@brief Generates a 2D or 3D flow field (sampling grid), given a batch of affine
 matrices theta. \n
 
diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h
index 338e8854..c79ee7a5 100644
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -186,9 +186,9 @@ typedef void (*rtCallback_t)(void *fnData);
 #define RT_KERNEL_CUSTOM_AICPU (0x08)
 
 // STARS topic scheduler sqe : topic_type
-#define RT_KERNEL_DEVICE_FIRST (0X10)
-#define RT_KERNEL_HOST_ONLY (0X20)
-#define RT_KERNEL_HOST_FIRST (0X30)
+#define RT_KERNEL_DEVICE_FIRST (0x10)
+#define RT_KERNEL_HOST_ONLY (0x20)
+#define RT_KERNEL_HOST_FIRST (0x40)
 
 /**
  * @ingroup rt_kernel
diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h
index c6be6b79..cc7c83ca 100644
--- a/third_party/fwkacllib/inc/toolchain/slog.h
+++ b/third_party/fwkacllib/inc/toolchain/slog.h
@@ -17,6 +17,8 @@
 #ifndef D_SYSLOG_H_
 #define D_SYSLOG_H_
 
+static const int TMP_LOG = 0;
+
 #ifdef __cplusplus
 #ifndef LOG_CPP
 extern "C" {
@@ -261,7 +263,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
 #define dlog_error(moduleId, fmt, ...)                                          \
   do {                                                                          \
     DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
-  } while (0)
+  } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -276,7 +278,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
     if(CheckLogLevel(moduleId, DLOG_WARN) == 1) {                                   \
         DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
     }                                                                               \
-  } while (0)
+  } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -291,7 +293,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
     if(CheckLogLevel(moduleId, DLOG_INFO) == 1) {                                   \
         DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
     }                                                                               \
-  } while (0)
+  } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -306,7 +308,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
     if(CheckLogLevel(moduleId, DLOG_DEBUG) == 1) {                                  \
         DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
     }                                                                               \
-  } while (0)
+  } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -318,7 +320,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
 #define dlog_event(moduleId, fmt, ...)                                          \
   do {                                                                          \
     DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
-  } while (0)
+  } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -334,7 +336,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
     if(CheckLogLevel(moduleId, level) == 1) {                                           \
         DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);   \
      }                                                                                  \
-  } while (0)
+  } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -351,7 +353,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
     if(CheckLogLevel(moduleId, level) == 1) {                                                           \
         DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__);    \
     }                                                                                                   \
-  } while (0)
+  } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -369,7 +371,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
     if(CheckLogLevel(moduleId, level) == 1) {                                                                   \
         DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
     }                                                                                                           \
-  } while (0)
+  } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -453,7 +455,7 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr);
     if(CheckLogLevelForC(moduleId, level) == 1) {                                           \
         DlogInnerForC(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);   \
      }                                                                                  \
-  } while (0)
+  } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -470,7 +472,7 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr);
     if(CheckLogLevelForC(moduleId, level) == 1) {                                                           \
         DlogInnerForC(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__);    \
     }                                                                                                   \
-  } while (0)
+  } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog
@@ -488,7 +490,7 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr);
     if(CheckLogLevelForC(moduleId, level) == 1) {                                                                   \
         DlogWithKVInnerForC(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
     }                                                                                                           \
-  } while (0)
+  } while (TMP_LOG != 0)
 
 /**
  * @ingroup slog