diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h
index 8de6e434..0ab61d36 100644
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -314,7 +314,7 @@ struct TaskDescInfo {
   std::vector<Format> output_format;
   std::vector<std::vector<int64_t>> output_shape;
   std::vector<DataType> output_data_type;
-  uint32_t context_id;
+  uint32_t context_id = 0xFFFFFFFFUL;
 };
 
 struct OpDescInfo {
diff --git a/metadef b/metadef
index 22309b14..a4e44019 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit 22309b14838a763d41dccd636fec567dae3720fd
+Subproject commit a4e4401926e14de2a79f2b622669f306b29e97f8
diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
index cae0f04a..b78679fa 100644
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -84,6 +84,10 @@ REG_OP(MatMul)
 *y: The result matrix Tensor. 2D. Must be one of the following types: float32,
  float16, int32. Has format [ND, NHWC]. \n
 
+*@attention Constraints:
+* if performances better in format NZ, please close
+ "MatmulTransdataFusionPass" in fusion configuration. \n
+
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
 */
@@ -124,6 +128,10 @@ REG_OP(MatMulV2)
 *y: The result matrix Tensor. 2D. Must be one of the following types: int32,
 * float16. \n
 
+*@attention Constraints:
+* if performances better in format NZ, please close
+ "MatmulTransdataFusionPass" in fusion configuration.
+
 */
 REG_OP(MatMulV2Compress)
     .INPUT(x1, TensorType({DT_INT8}))
@@ -240,6 +248,10 @@ REG_OP(BatchMatMul)
 * y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape length as "x1" and "x2" . \n
 
+*@attention Constraints:
+* if performances better in format NZ, please close
+ "MatmulTransdataFusionPass" in fusion configuration. \n
+
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
 */
@@ -264,7 +276,12 @@ REG_OP(BatchMatMulV2)
 *     TensorType::FloatingDataType() . \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type as "x".
+*y: A Tensor. Has the same type as "x". \n
+
+*@attention Constraints:
+* if performances better in format NZ, please close
+ "MatmulTransdataFusionPass" in fusion configuration. \n
+
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator L2Loss.
 */
@@ -1210,7 +1227,7 @@ REG_OP(IndexAdd)
 *qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n
 
 * @li x2: A Tensor of the same type as "x1".
-* @li indices: A Tensor of the indices, 
+* @li indices: A Tensor of the indices,
 
 * @par Attributes:
 * @li accumulate: Does it support self accumulation.Defaults to 0.
@@ -1393,8 +1410,8 @@ REG_OP(Trace)
 
 */
 REG_OP(Pinverse)
-    .INPUT(x, TensorType({ DT_FLOAT, DT_DOUBLE }))        
-    .OUTPUT(y, TensorType({ DT_FLOAT, DT_DOUBLE })) 
+    .INPUT(x, TensorType({ DT_FLOAT, DT_DOUBLE }))
+    .OUTPUT(y, TensorType({ DT_FLOAT, DT_DOUBLE }))
     .ATTR(rcond, Float, 1e-15)
     .OP_END_FACTORY_REG(Pinverse)
 
diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
index e87e1a82..84f573c5 100644
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -1844,6 +1844,7 @@ REG_OP(NonMaxSuppressionV7)
 REG_OP(RoiExtractor)
     .DYNAMIC_INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(index, TensorType({DT_INT32}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(finest_scale, Int, 56)
     .ATTR(roi_scale_factor, Float, 0)
@@ -2183,6 +2184,27 @@ REG_OP(RotatedBoxDecode)
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(weight, ListFloat, {1.0, 1.0, 1.0, 1.0, 1.0})
     .OP_END_FACTORY_REG(RotatedBoxDecode)
+
+/**
+* @brief sort rois to balance on each core. \n
+
+* @par Inputs:
+* one inputs, including:
+* @li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
+* the value "5" indicates the indexes of images where the ROIs are located, "batch", "x0", "y0", "x1", and "y1".
+
+* @par Outputs:
+* balance_rois: A 2D Tensor of float32 or float16 with shape (N, 5), Outputs of the rois which balance.
+* index: 1D Tensor 0f int32 with shape (N,), that is the index of origin rois.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(BalanceRois)
+    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(balance_rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(index, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(BalanceRois)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_