From d7c4d1089076ced0f4a3b290ea369b8831eeb9c2 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Mon, 21 Mar 2022 14:53:44 +0800 Subject: [PATCH] upgrade Ascend pacakge 21 Mar 22 --- metadef | 2 +- .../fwkacllib/inc/aicpu/common/aicpu_task_struct.h | 27 +++-- .../fwkacllib/inc/{ => aicpu}/common/type_def.h | 0 third_party/fwkacllib/inc/{ => aicpu}/tsd/status.h | 0 third_party/fwkacllib/inc/ops/nn_norm_ops.h | 72 +++++++++++++ third_party/fwkacllib/inc/ops/quantize_ops.h | 10 +- third_party/fwkacllib/inc/ops/reduce_ops.h | 4 +- third_party/fwkacllib/inc/ops/rnn.h | 111 +++++++++++++++++++++ third_party/fwkacllib/inc/ops/selection_ops.h | 52 +++++++--- 9 files changed, 242 insertions(+), 36 deletions(-) rename third_party/fwkacllib/inc/{ => aicpu}/common/type_def.h (100%) rename third_party/fwkacllib/inc/{ => aicpu}/tsd/status.h (100%) diff --git a/metadef b/metadef index 569f685a..e0efffc7 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 569f685a2e6107daf613daf98d4ef8e29bde6e86 +Subproject commit e0efffc740a79d49ba0553478b51d9d3481771cb diff --git a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h index 2d4b91cb..4425f134 100644 --- a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h +++ b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef AICPU_TASK_STRUCT_H_ -#define AICPU_TASK_STRUCT_H_ +#ifndef AICPU_TASK_STRUCT_H +#define AICPU_TASK_STRUCT_H #include @@ -46,7 +46,7 @@ enum class AicpuExtInfoMsgType { EXT_MODEL_ID_MSG_TYPE = 0, }; -typedef struct tagAicpuConfigMsg { +struct AicpuConfigMsg { uint8_t msgType; uint8_t reserved1; uint16_t bufLen; @@ -54,26 +54,25 @@ typedef struct tagAicpuConfigMsg { uint64_t bufAddr; uint32_t tsId; uint32_t reserved2; -} AicpuConfigMsg; - +}; -typedef struct tagAicpuModelIdInfo { +struct AicpuModelIdInfo { uint32_t modelId; uint32_t extendModelId; uint32_t extendInfo[13]; -} AicpuModelIdInfo; +}; // 64 bytes -typedef struct tagAicpuExtendInfo { +struct AicpuExtendInfo { uint8_t msgType; uint8_t version; uint8_t reserved[2]; union { AicpuModelIdInfo modelIdMap; }; -} AicpuExtendInfo; +}; -typedef struct tagAicoreErrMsgInfo { +struct AicoreErrMsgInfo { uint8_t errType; uint8_t version; uint8_t reserved1[2]; /* reserved1, 4 byte alignment */ @@ -83,9 +82,9 @@ typedef struct tagAicoreErrMsgInfo { uint32_t streamId; uint64_t transactionId; uint8_t reserved2[228]; /* the total byte is 256, reserved2 len = 256 - other lens */ -} AicoreErrMsgInfo; +}; -typedef struct tagAicpuErrMsgInfo { +struct AicpuErrMsgInfo { uint8_t errType; uint8_t version; uint8_t reserved1[2]; /* reserved1, 4 byte alignment */ @@ -96,10 +95,10 @@ typedef struct tagAicpuErrMsgInfo { char opName[64]; /* op name str */ char errDesc[128]; /* err msg desc info */ uint8_t reserved2[40]; /* the total byte is 256, reserved2 len = 256 - other lens */ -} AicpuErrMsgInfo; +}; #pragma pack(pop) } // namespace aicpu -#endif // AICPU_TASK_STRUCT_H_ +#endif // AICPU_TASK_STRUCT_H diff --git a/third_party/fwkacllib/inc/common/type_def.h b/third_party/fwkacllib/inc/aicpu/common/type_def.h similarity index 100% rename from third_party/fwkacllib/inc/common/type_def.h rename to third_party/fwkacllib/inc/aicpu/common/type_def.h diff --git a/third_party/fwkacllib/inc/tsd/status.h b/third_party/fwkacllib/inc/aicpu/tsd/status.h similarity index 100% rename from third_party/fwkacllib/inc/tsd/status.h rename to third_party/fwkacllib/inc/aicpu/tsd/status.h diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 2db73199..1cda06eb 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -1739,5 +1739,77 @@ REG_OP(DropoutWithMulsAndSoftmaxGrad) .REQUIRED_ATTR(alpha, Float) .ATTR(axes, ListInt, { -1 }) .OP_END_FACTORY_REG(DropoutWithMulsAndSoftmaxGrad) + +/** +* @brief Loss function that measures the softmax cross entropy. \n + +* @par Inputs: +* Three inputs, including: +* @li scores: A Tensor. Must be one of the following types: half, float32, double. +* A "batch_size * num_classes" matrix. +* @li labels: A Tensor. Must be one of the following types: "int32", "int64". +* @li weights: A manual rescaling weight given to each class. +* If given, it has to be a 1D Tensor assigning weight to each of the classes. +* Otherwise, it is treated as if having all ones. \n + +* @par Attributes: +* ignore_index:Specifies a target value that is ignored and does not contribute to the input gradient. +* It's an optional value. +* reduction: A character string from "none", "mean", and "sum", specifying the gradient output mode. Defaults to "mean" . \n + +* @par Outputs: +* @li loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "scores". +* @li log_prop: A Tensor. Has the same type as "scores" . \n + +* @par Third-party framework compatibility +* Compatible with the ONNX operator SoftmaxCrossEntropyLoss. +*/ +REG_OP(SoftmaxCrossEntropyLoss) + .INPUT(scores, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16})) + .INPUT(labels, TensorType({DT_INT32, DT_INT64})) + .OPTIONAL_INPUT(weights, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16})) + .ATTR(ignore_index, Int, 0) + .ATTR(reduction, String, "mean") + .OUTPUT(loss, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16})) + .OUTPUT(log_prop, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16})) + .OP_END_FACTORY_REG(SoftmaxCrossEntropyLoss) + +/** +* @brief Function axpy with softmax and dropoutdomask . \n + +* @par Inputs: +* Three inputs, including: +* @li x1: A mutable Tensor. The type only support float16. +* @li x2: A mutable Tensor. The type only support float16. +* @li mask: A mutable Tensor. Must meet all of the following rules: +* shape of mask should be 1D. +* dtype of mask should be uint8. +* value of shape should meet the following algorithm: +* value = (size(x) + 128 - 1) // 128 * 128 . \n + +* @par Attributes: +* @li alpha: A attribute used to scale tensor. The type is float . \n +* @li input_keep_prob: A attribute used to judge which units should be keep. +* The type is float . \n +* @li axis: A list of int. The dimension softmax would be performed on. Defaults +* to "[-1]" . \n + +* @par Outputs: +* y1: A mutable Tensor. Has the same type as "x1". \n +* y2: A mutable Tensor. Has the same type as "x1". \n + +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(AxpyWithSoftmaxAndDropoutdomask) + .INPUT(x1, TensorType({DT_FLOAT16})) + .INPUT(x2, TensorType({DT_FLOAT16})) + .INPUT(mask, TensorType({DT_UINT8})) + .OUTPUT(y1, TensorType({DT_FLOAT16})) + .OUTPUT(y2, TensorType({DT_FLOAT16})) + .REQUIRED_ATTR(alpha, Float) + .REQUIRED_ATTR(input_keep_prob, Float) + .ATTR(axis, ListInt, {-1}) + .OP_END_FACTORY_REG(AxpyWithSoftmaxAndDropoutdomask) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h index 2fdf2aa3..1f30db61 100644 --- a/third_party/fwkacllib/inc/ops/quantize_ops.h +++ b/third_party/fwkacllib/inc/ops/quantize_ops.h @@ -115,13 +115,13 @@ REG_OP(AscendQuant) *@brief Dequantizes the input . \n *@par Inputs: -*@li x: An tensor of type int32, specifying the input. -*@li deq_scale: An tensor of type float16 or uint64, specifying the scaling ratio . \n +* @li x: An tensor of type int32, specifying the input. +* @li deq_scale: An tensor of type uint64, specifying the scaling ratio . \n *@par Attributes: -*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False". -*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False". -*@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n +* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False". +* @li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False". +* @li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n *@par Outputs: *y: The dequantized output tensor of type float16 or float32. \n diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 6e30f954..b481bb29 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -690,7 +690,7 @@ REG_OP(ReduceMean) *@li keep_dims: A bool or NoneType. * - If true, retains reduced dimensions with length 1. * - If false, the rank of the tensor is reduced by 1 for each entry in axis. -*@li keep_dims: A bool default True. +*@li noop_with_empty_axes: A bool default False. * - If true, same as tf. * - If false, when x's shape is [], reduce all dims, for onnx. *@par Outputs: @@ -707,7 +707,7 @@ REG_OP(ReduceMeanD) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(axes, ListInt) .ATTR(keep_dims, Bool, false) - .ATTR(noop_with_empty_axes, Bool, true) + .ATTR(noop_with_empty_axes, Bool, false) .OP_END_FACTORY_REG(ReduceMeanD) /** diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index 7feec412..b6a775ce 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -348,6 +348,117 @@ REG_OP(DynamicRNNV2) .OP_END_FACTORY_REG(DynamicRNNV2) /** +* @brief: DynamicRNNV2Grad calculation. +* @par Inputs: +* twenty-one inputs: +* @li x:A required 4D Tensor. Must be one of the following types: float16, float32. +* @li w_x:A required 4D Tensor. Must be one of the following types: float16, float32. +* @li w_h:A required 4D Tensor. Must be one of the following types: float16, float32. +* @li y:A 4D Tensor. Must be one of the following types: float16, float32. +* @li init_h:A 4D Tensor. Must be one of the following types: float16, float32. +* @li init_c:A 4D Tensor. Must be one of the following types: float16, float32. +* @li h:A 4D Tensor. Must be one of the following types: float16, float32. +* @li c:A 4D Tensor. Must be one of the following types: float16, float32. +* @li dy:A 4D Tensor. Must be one of the following types: float16, float32. +* @li dh:A 4D Tensor. Must be one of the following types: float16, float32. +* @li dc:A 4D Tensor. Must be one of the following types: float16, float32. +* @li i:A 4D Tensor. Must be one of the following types: float16, float32. +* @li j:A 4D Tensor. Must be one of the following types: float16, float32. +* @li f:A 4D Tensor. Must be one of the following types: float16, float32. +* @li o:A 4D Tensor. Must be one of the following types: float16, float32. +* @li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. +* @li seq_length:A 1D Tensor. Must be one of the following types: int32. +* @li wci:A 4D Tensor. Must be one of the following types: float16, float32. +* @li wcf:A 4D Tensor. Must be one of the following types: float16, float32. +* @li wco:A 4D Tensor. Must be one of the following types: float16, float32. +* @li mask:A 1D Tensor. Must be one of the following types: int8. \n + +* @par Attributes: +* @li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. +* @li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". +* Only UNIDIRECTIONAL is currently supported. +* @li cell_depth:An integer identifying the cell depth in the op. Default to 1. Only 1 is currently supported. +* @li use_peephole:An bool identifying if use peephole in the op. Default to false. +* Only false is currently supported. +* @li keep_prob:An float identifying the keep prob in the op. Default to 1. Only 1 is currently supported. +* @li cell_clip:An float identifying the cell clip in the op. Default to -1. Only -1 is currently supported. +* @li num_proj:An integer identifying the num projection in the op. Default to 0. Only 0 is currently supported. +* @li time_major:An bool identifying the time major in the op. Default to true. Only true is currently supported. +* @li activation:An string identifying the type of activation function in the op. Default to "tanh". +* Only "tanh" is currently supported. +* @li recurrent_activation:An string identifying the type of activation function in the op. Default to "sigmoid". +* Only "sigmoid" is currently supported. +* @li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifco". Default to "ijfo". +* Set "ijfo" for TF operator LSTM, Set "ifco" for TF Keras/Pytorch LSTM . +* @li stateful: An bool identifying the type of stateful in the op. Default to fasle.Only false is currently supported. +* @li merge_mode: An string identifying the type of merge_modein the op. Default to "concat". +* Only "concat" is currently supported. \n + +* @par Outputs: +* nine outputs: +* @li dw_x:A 4D Tensor. Must be one of the following types: float16, float32. +* @li dw_h:A 4D Tensor. Must be one of the following types: float16, float32. +* @li db:A 4D Tensor. Must be one of the following types: float16, float32. +* @li dx:A 4D Tensor. Must be one of the following types: float16, float32. +* @li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. +* @li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. +* @li dwci:A 4D Tensor. Must be one of the following types: float16, float32. +* @li dwcf:A 4D Tensor. Must be one of the following types: float16, float32. +* @li dwco:A 4D Tensor. Must be one of the following types: float16, float32. + +* @par Third-party framework compatibility: +* Compatible with the TF operator LSTM or TF keras operator LSTM. + +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DynamicRNNV2Grad) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(w_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(w_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dc, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(i, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(j, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) + .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(mask, TensorType({DT_UINT8})) + .OUTPUT(dw_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dw_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(db, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dc_prev, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_OUTPUT(dwci, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_OUTPUT(dwcf, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_OUTPUT(dwco, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(cell_type, String, "LSTM") + .ATTR(direction, String, "UNIDIRECTIONAL") + .ATTR(cell_depth, Int, 1) + .ATTR(use_peephole, Bool, false) + .ATTR(keep_prob, Float, 1.0) + .ATTR(cell_clip, Float, -1.0) + .ATTR(num_proj, Int, 0) + .ATTR(time_major, Bool, true) + .ATTR(activation, String, "tanh") + .ATTR(recurrent_activation, String, "sigmoid") + .ATTR(gate_order, String, "ijfo") + .ATTR(stateful, Bool, false) + .ATTR(merge_mode, String, "concat") + .OP_END_FACTORY_REG(DynamicRNNV2Grad) + +/** *@brief: DynamicRNNV3 calculation. *@par Inputs: *ten inputs: diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 2cc8fd1d..88a61610 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -2534,32 +2534,56 @@ REG_OP(StridedSliceV3) .OP_END_FACTORY_REG(StridedSliceV3) /** -*@brief MovingSumWithSigmoid. +* @brief Sum the alpha according to the offset and ksize, + and quadrature it with the sigmoid value of energy. \n -*@par Inputs: -*Four inputs, including: +* @par Inputs: +* Three inputs, including: * @li alpha: A Tensor. Must be one of the following types: float32, float16. * @li energy: A Tensor. Must be one of the following types: float32, float16. -* @li beam_size: A Tensor of type int32. -* @li frame_size: A Tensor of type int32. \n +* @li offset: A Tensor of type int32. \n *@par Outputs: -* y: A Tensor. Has the same type as "alpha". \n +* y: A Tensor with same type as "alpha". \n * * @par Attributes: -* window_size: A int. +* ksize: A int. * * @par Restrictions: -* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(MovingSumWithSigmoid) - .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(energy, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(beam_size, TensorType({DT_INT32})) - .INPUT(frame_size, TensorType({DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) - .REQUIRED_ATTR(window_size, Int) + .INPUT(alpha, TensorType::BasicType()) + .INPUT(energy, TensorType::BasicType()) + .INPUT(offset, TensorType({DT_INT32})) + .OUTPUT(y, TensorType::BasicType()) + .REQUIRED_ATTR(ksize, Int) .OP_END_FACTORY_REG(MovingSumWithSigmoid) + + +/** +* @brief Sum X1 and X2 according to the offset recorded in seq_len1 and seq_len2. \n + +* @par Inputs: +* Four inputs, including: +* @li x1: A Tensor. Support BasicType. +* @li x2: A Tensor. Support BasicType. +* @li seq_len1: A Tensor. Support int32. +* @li seq_len2: A Tensor. Support int32. \n + +* @par Outputs: +* y: A Tensor with same type as "x1". \n + +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DynSeqOuter) + .INPUT(x1, TensorType::BasicType()) + .INPUT(x2, TensorType::BasicType()) + .INPUT(seq_len1, TensorType({DT_INT32})) + .INPUT(seq_len2, TensorType({DT_INT32})) + .OUTPUT(y, TensorType::BasicType()) + .OP_END_FACTORY_REG(DynSeqOuter) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_