diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h
index 556652be..445ed12f 100644
--- a/inc/external/acl/error_codes/rt_error_codes.h
+++ b/inc/external/acl/error_codes/rt_error_codes.h
@@ -57,9 +57,6 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008;   // no stream re
 static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009;   // no notify resource
 static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010;    // no model resource
 static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011;      // no cdq resource
-static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012;           // over limit
-static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013;          // queue is empty
-static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014;           // queue is full
 
 static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000;              // runtime internal error
 static const int32_t ACL_ERROR_RT_TS_ERROR = 507001;                    // ts internel error
diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h
index 1a8dc3e9..ef7e2ec7 100644
--- a/inc/external/runtime/rt_error_codes.h
+++ b/inc/external/runtime/rt_error_codes.h
@@ -58,9 +58,6 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008;   // no stream re
 static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009;   // no notify resource
 static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010;    // no model resource
 static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011;      // no cdq resource
-static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012;           // over limit
-static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013;          // queue is empty
-static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014;           // queue is full
 
 static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000;              // runtime internal error
 static const int32_t ACL_ERROR_RT_TS_ERROR = 507001;                    // ts internel error
diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h
index a3621d8b..6decaaea 100644
--- a/inc/framework/omg/omg_inner_types.h
+++ b/inc/framework/omg/omg_inner_types.h
@@ -128,6 +128,7 @@ struct OmgContext {
   bool fuzz_compile_flag = false;
   std::string atc_cmdline;
   bool user_attr_index_valid = false;
+  bool is_online_model = false;
 };
 }  // namespace ge
 
diff --git a/metadef b/metadef
index eb9262ab..f011a4c7 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit eb9262abf88b9d85d9fdc25055a2682dfd402ced
+Subproject commit f011a4c7ad36e1ec80990e659abefc78b0aa7543
diff --git a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h
index a2d805fb..a7eb727e 100644
--- a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h
+++ b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h
@@ -52,9 +52,6 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE         = 207008; // no str
 static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE         = 207009; // no notify resource
 static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE          = 207010; // no model resource
 static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE            = 207011; // no cdq resource
-static const int32_t ACL_ERROR_RT_OVER_LIMIT                 = 207012; // over limit
-static const int32_t ACL_ERROR_RT_QUEUE_EMPTY                = 207013; // queue is empty
-static const int32_t ACL_ERROR_RT_QUEUE_FULL                 = 207014; // queue is full
 
 static const int32_t ACL_ERROR_RT_INTERNAL_ERROR             = 507000; // runtime internal error
 static const int32_t ACL_ERROR_RT_TS_ERROR                   = 507001; // ts internel error
diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h
index 32454d27..90a60b3d 100644
--- a/third_party/fwkacllib/inc/ops/data_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h
@@ -2336,12 +2336,14 @@ REG_OP(CacheAllIndexToLocal)
 
 /**
 *@brief LRUCacheV2, aicore LRUCache.
+
 *@par Inputs:
 *index_list: exchange index list
 *data: host data
 *cache: gm cache
 *tag: cache's tag
 *is_last_call: if is last call write all cache to data
+
 *@par Outputs:
 *data: output data
 *cache: gm cache
@@ -2349,8 +2351,11 @@ REG_OP(CacheAllIndexToLocal)
 *index_offset_list: index_offset_list
 *not_in_cache_index_list: output not in cache's index_list
 *not_in_cache_number: scalar
+
 *@par Attributes:
 *pre_route_count: types of all outputs
+
+*@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(LRUCacheV2)
diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h
index 2c5afbe7..b5251cd6 100644
--- a/third_party/fwkacllib/inc/ops/math_ops.h
+++ b/third_party/fwkacllib/inc/ops/math_ops.h
@@ -480,7 +480,7 @@ REG_OP(HistogramFixedWidth)
     .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
     .INPUT(nbins, TensorType({DT_INT32}))
     .OUTPUT(y, TensorType({DT_INT32}))
-    .ATTR(dtype, String, "int32")
+    .ATTR(dtype, Int, 3)
     .OP_END_FACTORY_REG(HistogramFixedWidth)
 
 /**
@@ -511,7 +511,7 @@ REG_OP(HistogramFixedWidthD)
     .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
     .OUTPUT(y, TensorType({DT_INT32}))
     .REQUIRED_ATTR(nbins, Int)
-    .ATTR(dtype, String, "int32")
+    .ATTR(dtype, Int, 3)
     .OP_END_FACTORY_REG(HistogramFixedWidthD)
 
 /**
diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
index 55199962..0d9a8424 100644
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -532,6 +532,36 @@ REG_OP(ScatterAdd)
     .OP_END_FACTORY_REG(ScatterAdd)
 
 /**
+*@brief Adds sparse "updates" to a variable reference . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li var: An ND Tensor .
+*Must be one of the following types: float16, float32, int32, int8, uint8
+
+*@li indices: An ND Tensor of type int32 or int64
+
+*@li updates: An ND Tensor .
+*Must be one of the following types: float16, float32, int32, int8, uint8
+
+*@par Attributes:
+* axis: An required int. The axis along which to index. \n
+
+*@par Outputs:
+*var: A Tensor. Has the same type and format as input "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the pytorch operator ScatterAdd.
+*/
+REG_OP(ScatterAddWithAxis)
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .REQUIRED_ATTR(axis, Int)
+    .OP_END_FACTORY_REG(ScatterAddWithAxis)
+
+/**
 *@brief Divides a variable reference by sparse updates . \n
 
 *@par Inputs:
diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
index 9629976e..1ecc6e91 100644
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -142,6 +142,29 @@ REG_OP(BatchNorm)
     .OP_END_FACTORY_REG(BatchNorm)
 
 /**
+*@brief part of SyncBatchNormBackward . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li sum_dy: A Tensor. Must be one of the following types: float16, float32 .
+*@li sum_dy_dx_pad: A Tensor. Must be one of the following types: float16, float32 .
+*@li mean: A Tensor. Must be one of the following types: float16, float32 .
+*@li invert_std: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+*@li sum_dy_xmu: A Tensor. Has the same type and format as input "sum_dy"
+*@li y: A Tensor. Has the same type and format as input "sum_dy" . \n
+*/
+REG_OP(SyncBatchNormBackwardReduce)
+    .INPUT(sum_dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(sum_dy_dx_pad, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(invert_std, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(sum_dy_xmu, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(SyncBatchNormBackwardReduce)
+    
+/**
 *@brief Performs batch normalization . \n
 
 *@par Inputs:
diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
index b14cc49d..a12a4f94 100644
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -135,7 +135,8 @@ REG_OP(CheckValid)
 * the value "4" refers to "x0", "x1", "y0", and "y1" . \n
 
 *@par Attributes:
-*mode: Computation mode, a character string with the value range of [iou, iof] . \n
+*@li mode: Computation mode, a character string with the value range of [iou, iof]
+*@li eps: An optional float, prevent division by 0, default value is 1.0 . \n
 
 *@par Outputs:
 *overlap: A 2D Tensor of type float16 or float32 with shape [M, N], specifying
@@ -150,6 +151,7 @@ REG_OP(Iou)
     .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(mode, String, "iou")
+    .ATTR(eps, Float, 1.0)
     .OP_END_FACTORY_REG(Iou)
 
 /**
@@ -580,6 +582,172 @@ REG_OP(Yolo)
     .ATTR(background, Bool, false)
     .ATTR(softmaxtree, Bool, false)
     .OP_END_FACTORY_REG(Yolo)
+    
+/**
+*@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3 . \n
+
+*@par Inputs:
+*x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W),
+where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged
+as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n
+
+*@par Attributes:
+*@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3.
+*@li coords: An int32, specifying the number of parameters required for locating an object. The value is fixed at "4", corresponding to (x,y,w,h).
+*@li classes: An int32, specifying the number of prediction classes. Defaults to "80". The value range is [1, 1024].
+*@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".Defaults to "V3"
+*@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". Defaults to "false".
+*@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". Defaults to "false".
+*@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false" . \n
+
+*@par Outputs:
+*@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2],
+* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box.
+*@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2],
+* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence.
+*@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2],
+* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n
+
+*@attention Constraints:
+*@li This operator applies to YOLO v2,v3 and v5 networks.
+*@li The succeeding layer of the Yolo operator must be operator Yolov5DetectionOutput.
+*@par Third-party framework compatibility
+* It is a custom operator.
+*/
+REG_OP(YoloPreDetection)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(boxes, Int, 3)
+    .ATTR(coords, Int, 4)
+    .ATTR(classes, Int, 80)
+    .ATTR(yolo_version, String, "V5")
+    .ATTR(softmax, Bool, false)
+    .ATTR(background, Bool, false)
+    .ATTR(softmaxtree, Bool, false)
+    .OP_END_FACTORY_REG(YoloPreDetection)
+
+/**
+*@brief Performs YOLO V5 detection . \n
+
+*@par Inputs:
+*Ten inputs, including:
+*@li Operator Yolov5DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n
+There are three Yolo operators at Yolov5DetectionOutput's preceding layer on Yolo v5. For details, see the description of operator Yolo.
+*@li img_info: A float16 or float32, describing the image information including the required image height and width \n
+* and the actual image height and width.
+
+*@par Attributes:
+*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
+*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
+*@li coords: Specifies the number of coordinate parameters. Must be 4.
+*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
+*@li relative: An optional bool. Defaults to and must be "true".
+*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
+
+*@li post_nms_topn: An optional int32. This attribute is reserved.
+*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
+
+*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n
+
+*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
+
+*@par Outputs:
+*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
+* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
+*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
+* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
+
+*@attention Constraints:\n
+*@li This operator applies only to the YOLO v5 network.
+*@li The preceding layer of operator Yolov5DetectionOutput must be three Yolo operators.
+
+*@see Yolo()
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(YoloV5DetectionOutput)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(biases, ListFloat)
+    .ATTR(boxes, Int, 3)
+    .ATTR(coords, Int, 4)
+    .ATTR(classes, Int, 80)
+    .ATTR(relative, Bool, true)
+    .ATTR(obj_threshold, Float, 0.5)
+    .ATTR(post_nms_topn, Int, 512)
+    .ATTR(score_threshold, Float, 0.5)
+    .ATTR(iou_threshold, Float, 0.45)
+    .ATTR(pre_nms_topn, Int, 512)
+    .ATTR(N, Int, 10)
+    .ATTR(resize_origin_img_to_net, Bool, false)
+    .ATTR(out_box_dim, Int, 3)
+    .ATTR(alpha, Float, 2.0)
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(box_out_num, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(YoloV5DetectionOutput)
+
+/**
+*@brief Performs YOLO V5 detection.
+
+*@par Inputs:
+*16 Input, including:
+*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v5) are used as the inputs of operator Yolov5DetectionOutput.
+* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
+*@li imginfo: A float16, describing the image information including the required image height and width
+* and the actual image height and width.
+*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
+* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)]
+* is formed for the three Yolo outputs, respectively .It's a dynamic input. \n
+
+*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
+*@par Attributes:
+*@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
+*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
+*@li coords: Specifies the number of coordinate parameters. Must be 4.
+*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
+*@li relative: An optional bool. Defaults to and must be "true".
+*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
+*@li post_nms_topn: An optional int32. This attribute is reserved.
+*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
+*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
+*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
+*
+*@par Outputs:
+*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
+*            describing the information of each output box.
+* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
+*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
+* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
+*
+*@attention Constraints:
+*@li This operator applies only to the YOLO v5 network.
+*@li The preceding layer of operator Yolov5DetectionOutput must be three Yolo operators.
+*@see Yolo()
+*@par Third-party framework compatibility
+* It is a custom operator. 
+*/
+REG_OP(YoloV5DetectionOutputD)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(biases, ListFloat)
+    .ATTR(boxes, Int, 3)
+    .ATTR(coords, Int, 4)
+    .ATTR(classes, Int, 80)
+    .ATTR(relative, Bool, true)
+    .ATTR(obj_threshold, Float, 0.5)
+    .ATTR(post_nms_topn, Int, 512)
+    .ATTR(score_threshold, Float, 0.5)
+    .ATTR(iou_threshold, Float, 0.45)
+    .ATTR(pre_nms_topn, Int, 512)
+    .ATTR(N, Int, 10)
+    .ATTR(resize_origin_img_to_net, Bool, false)
+    .ATTR(out_box_dim, Int, 3)
+    .ATTR(alpha, Float, 2.0)
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(box_out_num, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(YoloV5DetectionOutputD)
 
 /**
 *@brief Performs YOLO V2 detection . \n
diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h
index 9dd502cd..bc75cfb7 100644
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -2645,6 +2645,19 @@ REG_OP(SparseApplyAdadeltaD)
 REG_OP(AtomicAddrClean)
     .ATTR(automic_add_mem_size, ListInt, {})
     .OP_END_FACTORY_REG(AtomicAddrClean)
+
+/**
+*@brief Clean memory of workspace list . \n
+
+*@par Attributes:
+* @li workspace_size: sizes of workspaces . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(DynamicAtomicAddrClean)
+    .ATTR(automic_add_mem_size, ListInt, {})
+    .OP_END_FACTORY_REG(DynamicAtomicAddrClean)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_TRAINING_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h
index 08fb25a3..580df4d8 100644
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -1381,7 +1381,7 @@ REG_OP(InplaceUpdate)
     .INPUT(x, TensorType::BasicType())
     .INPUT(indices, TensorType({DT_INT32}))
     .INPUT(v, TensorType::BasicType())
-    .OUTPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
     .OP_END_FACTORY_REG(InplaceUpdate)
 
 /**
diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h
index 525f60e9..2bbab7a2 100644
--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -847,7 +847,11 @@ with the same setting for this option. Default: False \n
 selected indices from the boxes tensor, where M <= max_output_size. \n
 
 *@attention Constraints:
-*Input theta must be float16 or float, output_size must be int32 type . \n
+*Input theta must be float16 or float, output_size must be int32 type .
+The current implementation of AffineGrid operator AiCore adopts 
+BatchMatMul's FP16 fusion operator scheme, and the accuracy will 
+decrease when the theta range exceeds [-10,10].If the model requires 
+high accuracy of AffineGrid, it is recommended to use AICPU. \n
 
 *@par Third-party framework compatibility
 *Compatible with Pytorch affine_grid operator.
diff --git a/third_party/fwkacllib/inc/toolchain/plog.h b/third_party/fwkacllib/inc/toolchain/plog.h
index 6134c3e6..6fdbbb43 100644
--- a/third_party/fwkacllib/inc/toolchain/plog.h
+++ b/third_party/fwkacllib/inc/toolchain/plog.h
@@ -1,59 +1,59 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _PLOG_H_
-#define _PLOG_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
-#ifndef LINUX
-#define LINUX 0
-#endif // LINUX
-
-#ifndef WIN
-#define WIN 1
-#endif
-
-#ifndef OS_TYPE
-#define OS_TYPE 0
-#endif // OS_TYPE
-
-#if (OS_TYPE == LINUX)
-#define DLL_EXPORT __attribute__((visibility("default")))
-#else
-#define DLL_EXPORT _declspec(dllexport)
-#endif
-
-/**
- * @ingroup plog
- * @brief DlogReportInitialize: init log in service process before all device setting.
- * @return: 0: SUCCEED, others: FAILED
- */
-DLL_EXPORT int DlogReportInitialize();
-
-/**
- * @ingroup plog
- * @brief DlogReportFinalize: release log resource in service process after all device reset.
- * @return: 0: SUCCEED, others: FAILED
- */
-DLL_EXPORT int DlogReportFinalize();
-
-#ifdef __cplusplus
-}
-#endif // __cplusplus
-#endif // D_PLOG_H_
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _PLOG_H_
+#define _PLOG_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#ifndef LINUX
+#define LINUX 0
+#endif // LINUX
+
+#ifndef WIN
+#define WIN 1
+#endif
+
+#ifndef OS_TYPE
+#define OS_TYPE 0
+#endif // OS_TYPE
+
+#if (OS_TYPE == LINUX)
+#define DLL_EXPORT __attribute__((visibility("default")))
+#else
+#define DLL_EXPORT _declspec(dllexport)
+#endif
+
+/**
+ * @ingroup plog
+ * @brief DlogReportInitialize: init log in service process before all device setting.
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogReportInitialize(void);
+
+/**
+ * @ingroup plog
+ * @brief DlogReportFinalize: release log resource in service process after all device reset.
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogReportFinalize(void);
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+#endif // D_PLOG_H_
diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
index 09a35c5d..10be2b93 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
@@ -77,18 +77,14 @@
 
 #define PROF_MODEL_LOAD_MASK             0x8000000000000000
 
-#ifndef OS_TYPE
-#define OS_TYPE 0
-#endif // OS_TYPE
-
-#if (OS_TYPE != LINUX)
+#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER))
 #define MSVP_PROF_API __declspec(dllexport)
 #else
 #define MSVP_PROF_API __attribute__((visibility("default")))
 #endif
 
 #include <cstdint>
-#include <stddef.h>
+#include <cstddef>
 
 namespace Msprofiler {
 namespace Api {
diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h
index e6ac64bf..53a5f64b 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_callback.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h
@@ -24,7 +24,7 @@
 extern "C" {
 #endif // __cplusplus
 
-#if (OS_TYPE != LINUX)
+#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER))
 #define MSVP_PROF_API __declspec(dllexport)
 #else
 #define MSVP_PROF_API __attribute__((visibility("default")))
diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h
index d5ed7569..f0747833 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h
@@ -16,11 +16,8 @@
 
 #ifndef MSPROF_ENGINE_PROF_REPORTER_H_
 #define MSPROF_ENGINE_PROF_REPORTER_H_
-#ifndef OS_TYPE
-#define OS_TYPE 0
-#endif // OS_TYPE
 
-#if (OS_TYPE != LINUX)
+#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER))
 #define MSVP_PROF_API __declspec(dllexport)
 #else
 #define MSVP_PROF_API __attribute__((visibility("default")))