From d7c4d1089076ced0f4a3b290ea369b8831eeb9c2 Mon Sep 17 00:00:00 2001
From: yanghaoran <yanghaoran2@huawei.com>
Date: Mon, 21 Mar 2022 14:53:44 +0800
Subject: [PATCH] upgrade Ascend pacakge 21 Mar 22

---
 metadef                                            |   2 +-
 .../fwkacllib/inc/aicpu/common/aicpu_task_struct.h |  27 +++--
 .../fwkacllib/inc/{ => aicpu}/common/type_def.h    |   0
 third_party/fwkacllib/inc/{ => aicpu}/tsd/status.h |   0
 third_party/fwkacllib/inc/ops/nn_norm_ops.h        |  72 +++++++++++++
 third_party/fwkacllib/inc/ops/quantize_ops.h       |  10 +-
 third_party/fwkacllib/inc/ops/reduce_ops.h         |   4 +-
 third_party/fwkacllib/inc/ops/rnn.h                | 111 +++++++++++++++++++++
 third_party/fwkacllib/inc/ops/selection_ops.h      |  52 +++++++---
 9 files changed, 242 insertions(+), 36 deletions(-)
 rename third_party/fwkacllib/inc/{ => aicpu}/common/type_def.h (100%)
 rename third_party/fwkacllib/inc/{ => aicpu}/tsd/status.h (100%)
diff --git a/metadef b/metadef
index 569f685a..e0efffc7 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit 569f685a2e6107daf613daf98d4ef8e29bde6e86
+Subproject commit e0efffc740a79d49ba0553478b51d9d3481771cb
diff --git a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
index 2d4b91cb..4425f134 100644
--- a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
+++ b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef AICPU_TASK_STRUCT_H_
-#define AICPU_TASK_STRUCT_H_
+#ifndef AICPU_TASK_STRUCT_H
+#define AICPU_TASK_STRUCT_H
 
 #include <cstdint>
 
@@ -46,7 +46,7 @@ enum class AicpuExtInfoMsgType {
     EXT_MODEL_ID_MSG_TYPE = 0,
 };
 
-typedef struct tagAicpuConfigMsg {
+struct AicpuConfigMsg {
     uint8_t msgType;
     uint8_t reserved1;
     uint16_t bufLen;
@@ -54,26 +54,25 @@ typedef struct tagAicpuConfigMsg {
     uint64_t bufAddr;
     uint32_t tsId;
     uint32_t reserved2;
-} AicpuConfigMsg;
-
+};
 
-typedef struct tagAicpuModelIdInfo {
+struct AicpuModelIdInfo {
     uint32_t modelId;
     uint32_t extendModelId;
     uint32_t extendInfo[13];
-} AicpuModelIdInfo;
+};
 
 // 64 bytes
-typedef struct tagAicpuExtendInfo {
+struct AicpuExtendInfo {
     uint8_t msgType;
     uint8_t version;
     uint8_t reserved[2];
     union {
         AicpuModelIdInfo modelIdMap;
     };
-} AicpuExtendInfo;
+};
 
-typedef struct tagAicoreErrMsgInfo {
+struct AicoreErrMsgInfo {
     uint8_t errType;
     uint8_t version;
     uint8_t reserved1[2];    /* reserved1, 4 byte alignment */
@@ -83,9 +82,9 @@ typedef struct tagAicoreErrMsgInfo {
     uint32_t streamId;
     uint64_t transactionId;
     uint8_t reserved2[228];  /* the total byte is 256, reserved2 len = 256 - other lens */
-} AicoreErrMsgInfo;
+};
 
-typedef struct tagAicpuErrMsgInfo {
+struct AicpuErrMsgInfo {
     uint8_t errType;
     uint8_t version;
     uint8_t reserved1[2];    /* reserved1, 4 byte alignment */
@@ -96,10 +95,10 @@ typedef struct tagAicpuErrMsgInfo {
     char opName[64];        /* op name str */
     char errDesc[128];      /* err msg desc info */
     uint8_t reserved2[40];  /* the total byte is 256, reserved2 len = 256 - other lens */
-} AicpuErrMsgInfo;
+};
 #pragma pack(pop)
 
 }  // namespace aicpu
 
-#endif  // AICPU_TASK_STRUCT_H_
+#endif  // AICPU_TASK_STRUCT_H
 
diff --git a/third_party/fwkacllib/inc/common/type_def.h b/third_party/fwkacllib/inc/aicpu/common/type_def.h
similarity index 100%
rename from third_party/fwkacllib/inc/common/type_def.h
rename to third_party/fwkacllib/inc/aicpu/common/type_def.h
diff --git a/third_party/fwkacllib/inc/tsd/status.h b/third_party/fwkacllib/inc/aicpu/tsd/status.h
similarity index 100%
rename from third_party/fwkacllib/inc/tsd/status.h
rename to third_party/fwkacllib/inc/aicpu/tsd/status.h
diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
index 2db73199..1cda06eb 100644
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -1739,5 +1739,77 @@ REG_OP(DropoutWithMulsAndSoftmaxGrad)
     .REQUIRED_ATTR(alpha, Float)
     .ATTR(axes, ListInt, { -1 })
     .OP_END_FACTORY_REG(DropoutWithMulsAndSoftmaxGrad)
+
+/**
+* @brief Loss function that measures the softmax cross entropy. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li scores: A Tensor. Must be one of the following types: half, float32, double.
+* A "batch_size * num_classes" matrix.
+* @li labels: A Tensor. Must be one of the following types: "int32", "int64".
+* @li weights: A manual rescaling weight given to each class. 
+* If given, it has to be a 1D Tensor assigning weight to each of the classes.
+* Otherwise, it is treated as if having all ones. \n
+
+* @par Attributes:
+* ignore_index:Specifies a target value that is ignored and does not contribute to the input gradient.
+* It's an optional value.
+* reduction: A character string from "none", "mean", and "sum", specifying the gradient output mode. Defaults to "mean" . \n
+
+* @par Outputs:
+* @li loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "scores".
+* @li log_prop: A Tensor. Has the same type as "scores" . \n
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator SoftmaxCrossEntropyLoss.
+*/
+REG_OP(SoftmaxCrossEntropyLoss)
+    .INPUT(scores, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16}))
+    .INPUT(labels, TensorType({DT_INT32, DT_INT64}))
+    .OPTIONAL_INPUT(weights, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16}))
+    .ATTR(ignore_index, Int, 0)
+    .ATTR(reduction, String, "mean")
+    .OUTPUT(loss, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16}))
+    .OUTPUT(log_prop, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16}))
+    .OP_END_FACTORY_REG(SoftmaxCrossEntropyLoss)
+
+/**
+* @brief Function axpy with softmax and dropoutdomask . \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li x1: A mutable Tensor. The type only support float16.
+* @li x2: A mutable Tensor. The type only support float16.
+* @li mask: A mutable Tensor. Must meet all of the following rules:
+*     shape of mask should be 1D.
+*     dtype of mask should be uint8.
+*     value of shape should meet the following algorithm:
+*     value = (size(x) + 128 - 1) // 128 * 128 . \n
+
+* @par Attributes:
+* @li alpha: A attribute used to scale tensor. The type is float . \n
+* @li input_keep_prob: A attribute used to judge which units should be keep.
+*     The type is float . \n
+* @li axis: A list of int. The dimension softmax would be performed on. Defaults
+*     to "[-1]" . \n
+
+* @par Outputs:
+* y1: A mutable Tensor. Has the same type as "x1". \n
+* y2: A mutable Tensor. Has the same type as "x1". \n
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(AxpyWithSoftmaxAndDropoutdomask)
+    .INPUT(x1, TensorType({DT_FLOAT16}))
+    .INPUT(x2, TensorType({DT_FLOAT16}))
+    .INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(y1, TensorType({DT_FLOAT16}))
+    .OUTPUT(y2, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(alpha, Float)
+    .REQUIRED_ATTR(input_keep_prob, Float)
+    .ATTR(axis, ListInt, {-1})
+    .OP_END_FACTORY_REG(AxpyWithSoftmaxAndDropoutdomask)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h
index 2fdf2aa3..1f30db61 100644
--- a/third_party/fwkacllib/inc/ops/quantize_ops.h
+++ b/third_party/fwkacllib/inc/ops/quantize_ops.h
@@ -115,13 +115,13 @@ REG_OP(AscendQuant)
 *@brief Dequantizes the input . \n
 
 *@par Inputs:
-*@li x: An tensor of type int32, specifying the input.
-*@li deq_scale: An tensor of type float16 or uint64, specifying the scaling ratio . \n
+* @li x: An tensor of type int32, specifying the input.
+* @li deq_scale: An tensor of type uint64, specifying the scaling ratio . \n
 
 *@par Attributes:
-*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False".
-*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False".
-*@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n
+* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False".
+* @li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False".
+* @li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n
 
 *@par Outputs:
 *y: The dequantized output tensor of type float16 or float32. \n
diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h
index 6e30f954..b481bb29 100644
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -690,7 +690,7 @@ REG_OP(ReduceMean)
 *@li keep_dims: A bool or NoneType.
 * - If true, retains reduced dimensions with length 1.
 * - If false, the rank of the tensor is reduced by 1 for each entry in axis.
-*@li keep_dims: A bool default True.
+*@li noop_with_empty_axes: A bool default False.
 * - If true, same as tf.
 * - If false, when x's shape is [], reduce all dims, for onnx.
 *@par Outputs:
@@ -707,7 +707,7 @@ REG_OP(ReduceMeanD)
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(axes, ListInt)
     .ATTR(keep_dims, Bool, false)
-    .ATTR(noop_with_empty_axes, Bool, true)
+    .ATTR(noop_with_empty_axes, Bool, false)
     .OP_END_FACTORY_REG(ReduceMeanD)
 
 /**
diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h
index 7feec412..b6a775ce 100644
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -348,6 +348,117 @@ REG_OP(DynamicRNNV2)
     .OP_END_FACTORY_REG(DynamicRNNV2)
 
 /**
+* @brief: DynamicRNNV2Grad calculation.
+* @par Inputs:
+* twenty-one inputs:
+* @li x:A required 4D Tensor. Must be one of the following types: float16, float32.
+* @li w_x:A required 4D Tensor. Must be one of the following types: float16, float32.
+* @li w_h:A required 4D Tensor. Must be one of the following types: float16, float32.
+* @li y:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li init_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li init_c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li c:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dy:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dc:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li i:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li j:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li f:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li o:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li seq_length:A 1D Tensor. Must be one of the following types: int32.
+* @li wci:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li wcf:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li wco:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li mask:A 1D Tensor. Must be one of the following types: int8. \n
+
+* @par Attributes:
+* @li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
+* @li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL".
+* Only UNIDIRECTIONAL is currently supported.
+* @li cell_depth:An integer identifying the cell depth in the op. Default to 1. Only 1 is currently supported.
+* @li use_peephole:An bool identifying if use peephole in the op. Default to false.
+* Only false is currently supported.
+* @li keep_prob:An float identifying the keep prob in the op. Default to 1. Only 1 is currently supported.
+* @li cell_clip:An float identifying the cell clip in the op. Default to -1. Only -1 is currently supported.
+* @li num_proj:An integer identifying the num projection in the op. Default to 0. Only 0 is currently supported.
+* @li time_major:An bool identifying the time major in the op. Default to true. Only true is currently supported.
+* @li activation:An string identifying the type of activation function in the op. Default to "tanh".
+* Only "tanh" is currently supported.
+* @li recurrent_activation:An string identifying the type of activation function in the op. Default to "sigmoid".
+* Only "sigmoid" is currently supported.
+* @li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifco". Default to "ijfo".
+* Set "ijfo" for TF operator LSTM, Set "ifco" for TF Keras/Pytorch LSTM .
+* @li stateful: An bool identifying the type of stateful in the op. Default to fasle.Only false is currently supported.
+* @li merge_mode: An string identifying the type of merge_modein the op. Default to "concat".
+* Only "concat" is currently supported. \n
+
+* @par Outputs:
+* nine outputs:
+* @li dw_x:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dw_h:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li db:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dx:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dwci:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dwcf:A 4D Tensor. Must be one of the following types: float16, float32.
+* @li dwco:A 4D Tensor. Must be one of the following types: float16, float32.
+
+* @par Third-party framework compatibility:
+* Compatible with the TF operator LSTM or TF keras operator LSTM.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicRNNV2Grad)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dc, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(dw_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dw_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(db, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dc_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_OUTPUT(dwci, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_OUTPUT(dwcf, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_OUTPUT(dwco, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(cell_type, String, "LSTM")
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 1)
+    .ATTR(use_peephole, Bool, false)
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(activation, String, "tanh")
+    .ATTR(recurrent_activation, String, "sigmoid")
+    .ATTR(gate_order, String, "ijfo")
+    .ATTR(stateful, Bool, false)
+    .ATTR(merge_mode, String, "concat")
+    .OP_END_FACTORY_REG(DynamicRNNV2Grad)
+
+/**
 *@brief: DynamicRNNV3 calculation.
 *@par Inputs:
 *ten inputs:
diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h
index 2cc8fd1d..88a61610 100644
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -2534,32 +2534,56 @@ REG_OP(StridedSliceV3)
     .OP_END_FACTORY_REG(StridedSliceV3)
 
 /**
-*@brief MovingSumWithSigmoid.
+* @brief Sum the alpha according to the offset and ksize,
+    and quadrature it with the sigmoid value of energy. \n
 
-*@par Inputs:
-*Four inputs, including:
+* @par Inputs:
+* Three inputs, including:
 * @li alpha: A Tensor. Must be one of the following types: float32, float16.
 * @li energy: A Tensor. Must be one of the following types: float32, float16.
-* @li beam_size: A Tensor of type int32.
-* @li frame_size: A Tensor of type int32. \n
+* @li offset: A Tensor of type int32. \n
 
 *@par Outputs:
-* y: A Tensor. Has the same type as "alpha". \n
+* y: A Tensor with same type as "alpha". \n
 *
 * @par Attributes:
-* window_size: A int.
+* ksize: A int.
 *
 * @par Restrictions:
-* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(MovingSumWithSigmoid)
-    .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .INPUT(energy, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .INPUT(beam_size, TensorType({DT_INT32}))
-    .INPUT(frame_size, TensorType({DT_INT32}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .REQUIRED_ATTR(window_size, Int)
+    .INPUT(alpha, TensorType::BasicType())
+    .INPUT(energy, TensorType::BasicType())
+    .INPUT(offset, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(ksize, Int)
     .OP_END_FACTORY_REG(MovingSumWithSigmoid)
+
+
+/**
+* @brief Sum X1 and X2 according to the offset recorded in seq_len1 and seq_len2. \n
+
+* @par Inputs:
+* Four inputs, including:
+* @li x1: A Tensor. Support BasicType.
+* @li x2: A Tensor. Support BasicType.
+* @li seq_len1: A Tensor. Support int32.
+* @li seq_len2: A Tensor. Support int32. \n
+
+* @par Outputs:
+* y: A Tensor with same type as "x1". \n
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynSeqOuter)
+    .INPUT(x1, TensorType::BasicType())
+    .INPUT(x2, TensorType::BasicType())
+    .INPUT(seq_len1, TensorType({DT_INT32}))
+    .INPUT(seq_len2, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(DynSeqOuter)
 } // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_