Pre Merge pull request !2113 from yanghaoran/r1.7

3 years ago · 0bcd6d108e
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit 569f685a2e6107daf613daf98d4ef8e29bde6e86
 Subproject commit e0efffc740a79d49ba0553478b51d9d3481771cb
--- a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
+++ b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
@@ -14,8 +14,8 @@
 * limitations under the License.
 */

 #ifndef AICPU_TASK_STRUCT_H_
 #define AICPU_TASK_STRUCT_H_
 #ifndef AICPU_TASK_STRUCT_H
 #define AICPU_TASK_STRUCT_H

 #include <cstdint>

@@ -46,7 +46,7 @@ enum class AicpuExtInfoMsgType {
    EXT_MODEL_ID_MSG_TYPE = 0,
 };

 typedef struct tagAicpuConfigMsg {
 struct AicpuConfigMsg {
    uint8_t msgType;
    uint8_t reserved1;
    uint16_t bufLen;
@@ -54,26 +54,25 @@ typedef struct tagAicpuConfigMsg {
    uint64_t bufAddr;
    uint32_t tsId;
    uint32_t reserved2;
 } AicpuConfigMsg;

 };

 typedef struct tagAicpuModelIdInfo {
 struct AicpuModelIdInfo {
    uint32_t modelId;
    uint32_t extendModelId;
    uint32_t extendInfo[13];
 } AicpuModelIdInfo;
 };

 // 64 bytes
 typedef struct tagAicpuExtendInfo {
 struct AicpuExtendInfo {
    uint8_t msgType;
    uint8_t version;
    uint8_t reserved[2];
    union {
        AicpuModelIdInfo modelIdMap;
    };
 } AicpuExtendInfo;
 };

 typedef struct tagAicoreErrMsgInfo {
 struct AicoreErrMsgInfo {
    uint8_t errType;
    uint8_t version;
    uint8_t reserved1[2];    /* reserved1, 4 byte alignment */
@@ -83,9 +82,9 @@ typedef struct tagAicoreErrMsgInfo {
    uint32_t streamId;
    uint64_t transactionId;
    uint8_t reserved2[228];  /* the total byte is 256, reserved2 len = 256 - other lens */
 } AicoreErrMsgInfo;
 };

 typedef struct tagAicpuErrMsgInfo {
 struct AicpuErrMsgInfo {
    uint8_t errType;
    uint8_t version;
    uint8_t reserved1[2];    /* reserved1, 4 byte alignment */
@@ -96,10 +95,10 @@ typedef struct tagAicpuErrMsgInfo {
    char opName[64];        /* op name str */
    char errDesc[128];      /* err msg desc info */
    uint8_t reserved2[40];  /* the total byte is 256, reserved2 len = 256 - other lens */
 } AicpuErrMsgInfo;
 };
 #pragma pack(pop)

 }  // namespace aicpu

 #endif  // AICPU_TASK_STRUCT_H_
 #endif  // AICPU_TASK_STRUCT_H

--- a/third_party/fwkacllib/inc/aicpu/common/type_def.h
+++ b/third_party/fwkacllib/inc/aicpu/common/type_def.h
--- a/third_party/fwkacllib/inc/aicpu/tsd/status.h
+++ b/third_party/fwkacllib/inc/aicpu/tsd/status.h
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -1739,5 +1739,77 @@ REG_OP(DropoutWithMulsAndSoftmaxGrad)
    .REQUIRED_ATTR(alpha, Float)
    .ATTR(axes, ListInt, { -1 })
    .OP_END_FACTORY_REG(DropoutWithMulsAndSoftmaxGrad)

 /**
 * @brief Loss function that measures the softmax cross entropy. \n

 * @par Inputs:
 * Three inputs, including:
 * @li scores: A Tensor. Must be one of the following types: half, float32, double.
 * A "batch_size * num_classes" matrix.
 * @li labels: A Tensor. Must be one of the following types: "int32", "int64".
 * @li weights: A manual rescaling weight given to each class. 
 * If given, it has to be a 1D Tensor assigning weight to each of the classes.
 * Otherwise, it is treated as if having all ones. \n

 * @par Attributes:
 * ignore_index:Specifies a target value that is ignored and does not contribute to the input gradient.
 * It's an optional value.
 * reduction: A character string from "none", "mean", and "sum", specifying the gradient output mode. Defaults to "mean" . \n

 * @par Outputs:
 * @li loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "scores".
 * @li log_prop: A Tensor. Has the same type as "scores" . \n

 * @par Third-party framework compatibility
 * Compatible with the ONNX operator SoftmaxCrossEntropyLoss.
 */
 REG_OP(SoftmaxCrossEntropyLoss)
    .INPUT(scores, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16}))
    .INPUT(labels, TensorType({DT_INT32, DT_INT64}))
    .OPTIONAL_INPUT(weights, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16}))
    .ATTR(ignore_index, Int, 0)
    .ATTR(reduction, String, "mean")
    .OUTPUT(loss, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16}))
    .OUTPUT(log_prop, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT,DT_BFLOAT16}))
    .OP_END_FACTORY_REG(SoftmaxCrossEntropyLoss)

 /**
 * @brief Function axpy with softmax and dropoutdomask . \n

 * @par Inputs:
 * Three inputs, including:
 * @li x1: A mutable Tensor. The type only support float16.
 * @li x2: A mutable Tensor. The type only support float16.
 * @li mask: A mutable Tensor. Must meet all of the following rules:
 *     shape of mask should be 1D.
 *     dtype of mask should be uint8.
 *     value of shape should meet the following algorithm:
 *     value = (size(x) + 128 - 1) // 128 * 128 . \n

 * @par Attributes:
 * @li alpha: A attribute used to scale tensor. The type is float . \n
 * @li input_keep_prob: A attribute used to judge which units should be keep.
 *     The type is float . \n
 * @li axis: A list of int. The dimension softmax would be performed on. Defaults
 *     to "[-1]" . \n

 * @par Outputs:
 * y1: A mutable Tensor. Has the same type as "x1". \n
 * y2: A mutable Tensor. Has the same type as "x1". \n

 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(AxpyWithSoftmaxAndDropoutdomask)
    .INPUT(x1, TensorType({DT_FLOAT16}))
    .INPUT(x2, TensorType({DT_FLOAT16}))
    .INPUT(mask, TensorType({DT_UINT8}))
    .OUTPUT(y1, TensorType({DT_FLOAT16}))
    .OUTPUT(y2, TensorType({DT_FLOAT16}))
    .REQUIRED_ATTR(alpha, Float)
    .REQUIRED_ATTR(input_keep_prob, Float)
    .ATTR(axis, ListInt, {-1})
    .OP_END_FACTORY_REG(AxpyWithSoftmaxAndDropoutdomask)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_
--- a/third_party/fwkacllib/inc/ops/quantize_ops.h
+++ b/third_party/fwkacllib/inc/ops/quantize_ops.h
@@ -115,13 +115,13 @@ REG_OP(AscendQuant)
 *@brief Dequantizes the input . \n

 *@par Inputs:
 *@li x: An tensor of type int32, specifying the input.
 *@li deq_scale: An tensor of type float16 or uint64, specifying the scaling ratio . \n
 * @li x: An tensor of type int32, specifying the input.
 * @li deq_scale: An tensor of type uint64, specifying the scaling ratio . \n

 *@par Attributes:
 *@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False".
 *@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False".
 *@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n
 * @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False".
 * @li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False".
 * @li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n

 *@par Outputs:
 *y: The dequantized output tensor of type float16 or float32. \n
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -690,7 +690,7 @@ REG_OP(ReduceMean)
 *@li keep_dims: A bool or NoneType.
 * - If true, retains reduced dimensions with length 1.
 * - If false, the rank of the tensor is reduced by 1 for each entry in axis.
 *@li keep_dims: A bool default True.
 *@li noop_with_empty_axes: A bool default False.
 * - If true, same as tf.
 * - If false, when x's shape is [], reduce all dims, for onnx.
 *@par Outputs:
@@ -707,7 +707,7 @@ REG_OP(ReduceMeanD)
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(axes, ListInt)
    .ATTR(keep_dims, Bool, false)
    .ATTR(noop_with_empty_axes, Bool, true)
    .ATTR(noop_with_empty_axes, Bool, false)
    .OP_END_FACTORY_REG(ReduceMeanD)

 /**
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -348,6 +348,117 @@ REG_OP(DynamicRNNV2)
    .OP_END_FACTORY_REG(DynamicRNNV2)

 /**
 * @brief: DynamicRNNV2Grad calculation.
 * @par Inputs:
 * twenty-one inputs:
 * @li x:A required 4D Tensor. Must be one of the following types: float16, float32.
 * @li w_x:A required 4D Tensor. Must be one of the following types: float16, float32.
 * @li w_h:A required 4D Tensor. Must be one of the following types: float16, float32.
 * @li y:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li init_h:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li init_c:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li h:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li c:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li dy:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li dh:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li dc:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li i:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li j:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li f:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li o:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li seq_length:A 1D Tensor. Must be one of the following types: int32.
 * @li wci:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li wcf:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li wco:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li mask:A 1D Tensor. Must be one of the following types: int8. \n

 * @par Attributes:
 * @li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
 * @li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL".
 * Only UNIDIRECTIONAL is currently supported.
 * @li cell_depth:An integer identifying the cell depth in the op. Default to 1. Only 1 is currently supported.
 * @li use_peephole:An bool identifying if use peephole in the op. Default to false.
 * Only false is currently supported.
 * @li keep_prob:An float identifying the keep prob in the op. Default to 1. Only 1 is currently supported.
 * @li cell_clip:An float identifying the cell clip in the op. Default to -1. Only -1 is currently supported.
 * @li num_proj:An integer identifying the num projection in the op. Default to 0. Only 0 is currently supported.
 * @li time_major:An bool identifying the time major in the op. Default to true. Only true is currently supported.
 * @li activation:An string identifying the type of activation function in the op. Default to "tanh".
 * Only "tanh" is currently supported.
 * @li recurrent_activation:An string identifying the type of activation function in the op. Default to "sigmoid".
 * Only "sigmoid" is currently supported.
 * @li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifco". Default to "ijfo".
 * Set "ijfo" for TF operator LSTM, Set "ifco" for TF Keras/Pytorch LSTM .
 * @li stateful: An bool identifying the type of stateful in the op. Default to fasle.Only false is currently supported.
 * @li merge_mode: An string identifying the type of merge_modein the op. Default to "concat".
 * Only "concat" is currently supported. \n

 * @par Outputs:
 * nine outputs:
 * @li dw_x:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li dw_h:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li db:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li dx:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li dwci:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li dwcf:A 4D Tensor. Must be one of the following types: float16, float32.
 * @li dwco:A 4D Tensor. Must be one of the following types: float16, float32.

 * @par Third-party framework compatibility:
 * Compatible with the TF operator LSTM or TF keras operator LSTM.

 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(DynamicRNNV2Grad)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(w_x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(w_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(dc, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
    .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
    .OUTPUT(dw_x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(dw_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(db, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(dc_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
    .DYNAMIC_OUTPUT(dwci, TensorType({DT_FLOAT16, DT_FLOAT}))
    .DYNAMIC_OUTPUT(dwcf, TensorType({DT_FLOAT16, DT_FLOAT}))
    .DYNAMIC_OUTPUT(dwco, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(cell_type, String, "LSTM")
    .ATTR(direction, String, "UNIDIRECTIONAL")
    .ATTR(cell_depth, Int, 1)
    .ATTR(use_peephole, Bool, false)
    .ATTR(keep_prob, Float, 1.0)
    .ATTR(cell_clip, Float, -1.0)
    .ATTR(num_proj, Int, 0)
    .ATTR(time_major, Bool, true)
    .ATTR(activation, String, "tanh")
    .ATTR(recurrent_activation, String, "sigmoid")
    .ATTR(gate_order, String, "ijfo")
    .ATTR(stateful, Bool, false)
    .ATTR(merge_mode, String, "concat")
    .OP_END_FACTORY_REG(DynamicRNNV2Grad)

 /**
 *@brief: DynamicRNNV3 calculation.
 *@par Inputs:
 *ten inputs:
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -2534,32 +2534,56 @@ REG_OP(StridedSliceV3)
    .OP_END_FACTORY_REG(StridedSliceV3)

 /**
 *@brief MovingSumWithSigmoid.
 * @brief Sum the alpha according to the offset and ksize,
    and quadrature it with the sigmoid value of energy. \n

 *@par Inputs:
 *Four inputs, including:
 * @par Inputs:
 * Three inputs, including:
 * @li alpha: A Tensor. Must be one of the following types: float32, float16.
 * @li energy: A Tensor. Must be one of the following types: float32, float16.
 * @li beam_size: A Tensor of type int32.
 * @li frame_size: A Tensor of type int32. \n
 * @li offset: A Tensor of type int32. \n

 *@par Outputs:
 * y: A Tensor. Has the same type as "alpha". \n
 * y: A Tensor with same type as "alpha". \n
 *
 * @par Attributes:
 * window_size: A int.
 * ksize: A int.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(MovingSumWithSigmoid)
    .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(energy, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(beam_size, TensorType({DT_INT32}))
    .INPUT(frame_size, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(window_size, Int)
    .INPUT(alpha, TensorType::BasicType())
    .INPUT(energy, TensorType::BasicType())
    .INPUT(offset, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType::BasicType())
    .REQUIRED_ATTR(ksize, Int)
    .OP_END_FACTORY_REG(MovingSumWithSigmoid)


 /**
 * @brief Sum X1 and X2 according to the offset recorded in seq_len1 and seq_len2. \n

 * @par Inputs:
 * Four inputs, including:
 * @li x1: A Tensor. Support BasicType.
 * @li x2: A Tensor. Support BasicType.
 * @li seq_len1: A Tensor. Support int32.
 * @li seq_len2: A Tensor. Support int32. \n

 * @par Outputs:
 * y: A Tensor with same type as "x1". \n

 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(DynSeqOuter)
    .INPUT(x1, TensorType::BasicType())
    .INPUT(x2, TensorType::BasicType())
    .INPUT(seq_len1, TensorType({DT_INT32}))
    .INPUT(seq_len2, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType::BasicType())
    .OP_END_FACTORY_REG(DynSeqOuter)
 } // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_