Browse Source

!1677 code_sync_0520_inc

From: @ding_fei_fei
Reviewed-by: @xsmq,@liucunwei
Signed-off-by: @liucunwei
tags/v1.3.0
mindspore-ci-bot Gitee 4 years ago
parent
commit
c5be7f3198
18 changed files with 473 additions and 60 deletions
  1. +13
    -0
      inc/external/acl/acl_mdl.h
  2. +16
    -0
      inc/external/hccl/hccl.h
  3. +1
    -1
      metadef
  4. +23
    -0
      third_party/fwkacllib/inc/ops/array_ops.h
  5. +2
    -2
      third_party/fwkacllib/inc/ops/batch_ops.h
  6. +52
    -0
      third_party/fwkacllib/inc/ops/correlation.h
  7. +0
    -19
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  8. +66
    -0
      third_party/fwkacllib/inc/ops/image_ops.h
  9. +1
    -1
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  10. +0
    -2
      third_party/fwkacllib/inc/ops/nn_detect_ops.h
  11. +74
    -1
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  12. +57
    -6
      third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
  13. +6
    -9
      third_party/fwkacllib/inc/ops/pad_ops.h
  14. +4
    -4
      third_party/fwkacllib/inc/ops/quantize_ops.h
  15. +96
    -1
      third_party/fwkacllib/inc/ops/rnn.h
  16. +46
    -0
      third_party/fwkacllib/inc/ops/transformation_ops.h
  17. +3
    -3
      third_party/fwkacllib/inc/runtime/kernel.h
  18. +13
    -11
      third_party/fwkacllib/inc/toolchain/slog.h

+ 13
- 0
inc/external/acl/acl_mdl.h View File

@@ -43,6 +43,7 @@ extern "C" {

#define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data"
#define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data"
#define ACL_ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES "_datadump_original_op_names"

typedef struct aclmdlDataset aclmdlDataset;
typedef struct aclmdlDesc aclmdlDesc;
@@ -638,6 +639,18 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc,

/**
* @ingroup AscendCL
* @brief get attr value by op name
*
* @param modelDesc [IN] model description
* @param opName [IN] op name
* @param attr [IN] attr name
*
* @retval the attr value
*/
ACL_FUNC_VISIBILITY const char *aclmdlGetOpAttr(aclmdlDesc *modelDesc, const char *opName, const char *attr);

/**
* @ingroup AscendCL
* @brief get input name by index
*
* @param modelDesc [IN] model description


+ 16
- 0
inc/external/hccl/hccl.h View File

@@ -118,8 +118,24 @@ extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvC
*/
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm,
aclrtStream stream);
/**
* @brief Get the rank size of this comm.
*
* @param comm A pointer identifying the communication resource based on.
* @param rankSize A pointer identifying the rank size.
* @return HcclResult
*/
extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize);

/**
* @brief Get the rank id of this comm.
*
* @param comm A pointer identifying the communication resource based on.
* @param rankSize A pointer identifying the rank id.
* @return HcclResult
*/
extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank);
/**
* @brief Barrier operator.
*
* @param comm A pointer identifying the communication resource based on.


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit 7cb171b9c511fec57ccc0ad746ef2126267fe18b
Subproject commit 7cbdf95765133b5a5b979c2231013f7c76c3d529

+ 23
- 0
third_party/fwkacllib/inc/ops/array_ops.h View File

@@ -1203,6 +1203,29 @@ REG_OP(Expand)
.OP_END_FACTORY_REG(Expand)

/**
*@Returns a tensor containing the indices of all non-zero elements of input. \n

*@par Inputs:
*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64.

*@par Attributes:
* transpose: the output tensor will be transposed if true. \n

*@par Outputs:
* y: A Tensor. Has the same type as "x" . \n

*@par Third-party framework compatibility
*Compatible with the PyTorch operator NonZero.
*/

REG_OP(NonZero)
.INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \
DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
.OUTPUT(y, TensorType({DT_INT64}))
.ATTR(transpose, Bool, false)
.OP_END_FACTORY_REG(NonZero)

/**
* @brief Expand the input tensor to a compatible shape. \n

* @par Inputs:


+ 2
- 2
third_party/fwkacllib/inc/ops/batch_ops.h View File

@@ -64,10 +64,10 @@ the same types as "x_tensors" . It's a dynamic output. \n
REG_OP(Batch)
.DYNAMIC_INPUT(x_tensors, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, \
DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE}))
.OUTPUT(y_index, TensorType({ DT_INT64 }))
.OUTPUT(y_id, TensorType({ DT_INT64 }))
.DYNAMIC_OUTPUT(y_tensors, TensorType({DT_INT8, DT_UINT8, DT_INT16, \
DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_BOOL}))
.OUTPUT(y_index, TensorType({ DT_INT64 }))
.OUTPUT(y_id, TensorType({ DT_INT64 }))
.REQUIRED_ATTR(num_batch_threads, Int)
.REQUIRED_ATTR(max_batch_size, Int)
.ATTR(max_enqueued_batches, Int, 10)


+ 52
- 0
third_party/fwkacllib/inc/ops/correlation.h View File

@@ -0,0 +1,52 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file correlation.h
* \brief
*/
#ifndef GE_OP_CORRELATION_OPS_H
#define GE_OP_CORRELATION_OPS_H

#include "graph/operator_reg.h"

namespace ge {
/**
*@brief Computes a 2D Correlation given 4D "x" and "filter" tensors.
*
*@par Inputs:
* @li filter: A 4D tensor of filters.
* @li x: A 4D tensor of input images, batch number must equal to batch
* number of "filter", and channel must equal to channel of "filter".
*
*@par Attributes:
* @li groups: set correlation mode, must be 1 or channel.
*
*@par Outputs:
*y: A Tensor. Has the same type as "x".

*@par Third-party framework compatibility
* Compatible with caffe correlation custom operator.
*/
REG_OP(Correlation)
.INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
.INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
.ATTR(groups, Int, 1)
.OP_END_FACTORY_REG(Correlation)
} // namespace ge

#endif // GE_OP_NN_CALCULATION_OPS_H

+ 0
- 19
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -3468,25 +3468,6 @@ REG_OP(AxpyV2)
.OP_END_FACTORY_REG(AxpyV2)

/**
* @brief Computes the result of x1 + x2.

* @par Inputs:
* @li x1: An ND tensor of type float16, float, int32.
* @li x2: An ND tensor of type float16, float, int32. \n

* @par Outputs:
* @li y: An ND tensor tensor with the same type as "x1". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Add.
*/
REG_OP(PtAdd)
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OP_END_FACTORY_REG(PtAdd)

/**
* @brief Computes the result of x1 * x2.

* @par Inputs:


+ 66
- 0
third_party/fwkacllib/inc/ops/image_ops.h View File

@@ -1484,6 +1484,55 @@ REG_OP(CombinedNonMaxSuppression)
.OP_END_FACTORY_REG(CombinedNonMaxSuppression)

/**
*@brief Resizes "images" with "offset" using bilinear interpolation. \n

*@par Inputs:
*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`.
*@li warp_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for offset point.

*@par Outputs:
*warp_img: A Tensor after resize. \n
*/
REG_OP(IMGWarp)
.INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
.INPUT(warp_offset, TensorType({DT_FLOAT32}))
.OUTPUT(warp_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
.OP_END_FACTORY_REG(IMGWarp)

/**
*@brief Resizes "images" with "offset" using bilinear interpolation. \n

*@par Inputs:
*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`.
*@li map_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for resize point.

*@par Outputs:
*map_img: A Tensor after resize. \n
*/
REG_OP(Remap)
.INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
.INPUT(map_offset, TensorType({DT_FLOAT32}))
.OUTPUT(map_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
.OP_END_FACTORY_REG(Remap)

/**
*@brief Resizes "images" with "offset" using bilinear interpolation. \n

*@par Inputs:
*@li img: input image, A 5-D tensor of shape `[n, 4, c, h, w]`,
and 4 mean input[(h_top, w_left), (h_top, w_right), (h_bottom, w_left), (h_bottom, w_right)].
*@li warp_index: the resize offset A 4-D float tensor of shape `[n, 2, h, w]`, 2 means (x, y) for resize point.

*@par Outputs:
*remap_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n
*/
REG_OP(IMGWarpResize)
.INPUT(img, TensorType({DT_FLOAT32}))
.INPUT(warp_index, TensorType({DT_FLOAT32}))
.OUTPUT(warp_img, TensorType({DT_FLOAT32}))
.OP_END_FACTORY_REG(IMGWarpResize)

/**
*@brief Function spatial transformer . \n

*@par Inputs:
@@ -1802,5 +1851,22 @@ REG_OP(ImageUnfold)
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(padding_mode, String, "zeros")
.OP_END_FACTORY_REG(ImageUnfold)
/**
*@brief This operation select images to warp_images according to offsets.

*@par Inputs:
*@li images: 4-D Tensor with shape `[batch, height, width, 3]`.
*@li offsets: 4-D Tensor with shape `[batch, 4, new_height, new_width]`.

*@par Outputs:
*warp_images: Returns 5-D Tensor with shape
`[batch, 4, new_height, new_width, 3]` and the same dtype as `images`.
*/
REG_OP(IMGWarpOffsets)
.INPUT(images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT}))
.INPUT(offsets, TensorType({DT_FLOAT, DT_INT32}))
.OUTPUT(warp_images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(IMGWarpOffsets)
} // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_

+ 1
- 1
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -179,7 +179,7 @@ REG_OP(GEMM)
*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n

*@par Inputs:
*Three inputs, including:
*Two inputs, including:
* @li x1: A matrix Tensor. Must be one of the following types: float16,
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ].
* @li x2: A matrix Tensor. Must be one of the following types: float16,


+ 0
- 2
third_party/fwkacllib/inc/ops/nn_detect_ops.h View File

@@ -1460,8 +1460,6 @@ REG_OP(DecodeBboxV2)
* @li y1: A Tensor. Must have the same type as x.
* @li y2: A Tensor. Indices of y1 in x. Dtype must be int32.
*
*@attention Constraints:
* The upper limit of data on the direction axis is 7040.
*/
REG_OP(Sort)
.INPUT(x, TensorType({ DT_FLOAT16 }))


+ 74
- 1
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -55,7 +55,9 @@ REG_OP(LogSoftmaxGrad)
*Two inputs, including:
* @li features: A Tensor. Must be one of the following types: half, float32, double.
* A "batch_size * num_classes" matrix.
* @li labels: A Tensor of the same type as "features". batch_size vector with values in [0, num_classes).
* @li labels: A Tensor. Must be one of the following types: 'int32', 'int64'.
* batch_size vector with values in [0, num_classes).
* This is the label for the given minibatch entry.


*@par Outputs:
@@ -639,6 +641,48 @@ REG_OP(LayerNormXBackprop)
.OP_END_FACTORY_REG(LayerNormXBackprop)

/**
*@brief LayerNormXBackpropV2 operator interface implementation
* calculating: dy, x, variance, mean, gamma
* pd_xl = data_dy*data_gamma
* pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean)
* np.power((data_variance + EPSLON), (-1.5))),
* reduce_axis, keepdims=True)
* pd_mean = np.sum(((-1.0)*pd_xl
* np.power((data_variance + EPSLON), (-0.5))),
* reduce_axis, keepdims=True)
* + pd_var*(1.0/m)
* np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True)
* pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) +
* pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m)
* res_for_gamma = (data_x - data_mean) * np.power((data_variance + EPSLON), (-0.5))

*@par Inputs:
*Five inputs, including:
* @li dy: A Tensor. Must be one of the following types: float16, float32.
* @li x: A Tensor. Must be one of the following types: float16, float32.
* @li variance: A Tensor. Must be one of the following types: float16, float32.
* @li mean: A Tensor. Must be one of the following types: float16, float32.
* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n

*@par Outputs:
*Three outputs, including:
* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
* @li res_for_gamma: A Tensor. Must be one of the following types: float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(LayerNormXBackpropV2)
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(res_for_gamma, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(LayerNormXBackpropV2)

/**
*@brief LayerNormBetaGammaBackprop operator interface implementation
* calculating: dy, x, variance, mean
* pd_xl = data_dy*data_gamma
@@ -682,6 +726,35 @@ REG_OP(LayerNormBetaGammaBackprop)
.OP_END_FACTORY_REG(LayerNormBetaGammaBackprop)

/**
*@brief LayerNormBetaGammaBackpropV2 operator interface implementation
* calculating: dy, x, variance, mean
* pd_gamma = np.sum((data_dy*res_for_gamma), param_axis, keepdims=True)
* pd_beta = np.sum(data_dy, param_axis, keepdims=True)

*@par Inputs:
*Three inputs, including:
* @li dy: A Tensor. Must be one of the following types: float16, float32.
* @li x: A Tensor. Must be one of the following types: float16, float32.
* @li variance: A Tensor. Must be one of the following types: float16, float32.
* @li mean: A Tensor. Must be one of the following types: float16, float32 . \n

*@par Outputs:
*Three outputs, including:
* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(LayerNormBetaGammaBackpropV2)
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(res_for_gamma, TensorType({DT_FLOAT}))
.OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
.REQUIRED_ATTR(shape_gamma, ListInt)
.OP_END_FACTORY_REG(LayerNormBetaGammaBackpropV2)

/**
*@brief Return "output" according to the algorithm of dropout_do_mask:
* scale_x = x *(1 / keep_prob)
* output = select(mask == 1, scale_x, 0)


+ 57
- 6
third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h View File

@@ -536,13 +536,19 @@ REG_OP(Elu)
* max(0, x) + min(0, alpha * (exp(x/alpha) - 1)). \n

*@par Inputs:
*x: A float16, float32 or double, for the input data type . \n
*x: A float16, float32, for the input data type . \n

*@par Attributes:
*alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n
*alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n

*@par Attributes:
*alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n

*@par Attributes:
*alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n

*@par Outputs:
*y: A float16, float32 or double, for the normalized result . \n
*y: A float16, float32, for the normalized result . \n

*@attention Constraints:
*@li The input is of type float16 or float32 . \n
@@ -553,9 +559,11 @@ REG_OP(Elu)
*@li Compatible with ONNX's Celu operator
*/
REG_OP(Celu)
.INPUT(x, TensorType::FloatingDataType())
.OUTPUT(y, TensorType::FloatingDataType())
.ATTR(alpha, Float, 1.0)
.INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16}))
.ATTR(alpha1, Float, 1.0)
.ATTR(alpha2, Float, 1.0)
.ATTR(alpha3, Float, 1.0)
.OP_END_FACTORY_REG(Celu)

/**
@@ -691,6 +699,25 @@ REG_OP(Mish)
.OP_END_FACTORY_REG(Mish)

/**
* @brief: pytorch mish_grad operator.
* @par Inputs:
* three input, including:
* @li grad: A Tensor. shape, datatype and format is same as x
* @li x: A Tensor. Must be one of the following types: float16, float32
* @li tanhx: A Tensor. shape, datatype and format is same as x
* @par Outputs:
* 1 output, including:
* @li x_grad: A Tensor. shape, datatype and format is same as x
*/

REG_OP(MishGrad)
.INPUT(grad, TensorType({ DT_FLOAT,DT_FLOAT16 }))
.INPUT(x, TensorType({ DT_FLOAT,DT_FLOAT16 }))
.OPTIONAL_INPUT(tanhx, TensorType({ DT_FLOAT,DT_FLOAT16 }))
.OUTPUT(x_grad, TensorType({ DT_FLOAT,DT_FLOAT16 }))
.OP_END_FACTORY_REG(MishGrad)

/**
* @brief pytorch hardtanh_backward operator.
*
* @par Inputs:
@@ -993,6 +1020,30 @@ REG_OP(HardSigmoidGrad)
.ATTR(beta, Float, 0.5)
.OP_END_FACTORY_REG(HardSigmoidGrad)

/**
* @brief Calculate the shrink function. \n

* @par Inputs:
* One inputs, including:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n

* @par Attributes:
* @li lambd: An optional float. Defaults to 0.5. \n
* @li bias: An optional float. Defaults to 0.0. \n

* @par Outputs:
* y: A Tensor with the same dtype and shape of input_x's. \n

* @par Third-party framework compatibility
* Compatible with the ONNX operator Shrink. \n
*/
REG_OP(Shrink)
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(lambd, Float, 0.5)
.ATTR(bias, Float, 0.0)
.OP_END_FACTORY_REG(Shrink)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_

+ 6
- 9
third_party/fwkacllib/inc/ops/pad_ops.h View File

@@ -273,14 +273,11 @@ REG_OP(PadV3)
*@brief Pads a tensor.

*@par Inputs:
*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32.
* @li x: A Tensor. Must be one of the following types: float16, float32.
* @li paddings: A Tensor. Must be int32 type
* paddings is a required input tensor.

*@par Attributes:
* @li paddings: An required "vector<vector<int>>".
* For each dimension D of input, paddings[D, 0] indicates how many
* values to add before the contents of tensor in that dimension,
* and paddings[D, 1] indicates how many values to add after the
* contents of tensor in that dimension.
* @li constant_values: An optional int value for pad.
* @li mode: An optional string, Defaults to "constant", indicates paddings mode,
* support "constant", "reflect", "edge"
@@ -298,9 +295,9 @@ REG_OP(PadV3)
* Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead.
*/
REG_OP(PadV3D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
.REQUIRED_ATTR(paddings, ListListInt)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(paddings, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(constant_values, Int, 0)
.ATTR(mode, String, "constant")
.ATTR(paddings_contiguous, Bool, true)


+ 4
- 4
third_party/fwkacllib/inc/ops/quantize_ops.h View File

@@ -214,7 +214,7 @@ REG_OP(AscendRequant)
*@brief Requantizes the input of int16 . \n

*@par Inputs:
*@li x: An NC1HWC0 tensor of type int16, specifying the input.
*@li x0: An NC1HWC0 tensor of type int16, specifying the input.
*@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio.
*@li x1: An NC1HWC0 tensor of type int16 . \n

@@ -223,17 +223,17 @@ REG_OP(AscendRequant)
*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n

*@par Outputs:
*@li y: The dequantized output tensor of type int8 and with format NC1HWC0.
*@li y0: The dequantized output tensor of type int8 and with format NC1HWC0.
*@li y1: The dequantized output tensor of type int16 and with format NC1HWC0 . \n

*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(AscendRequantS16)
.INPUT(x, TensorType({DT_INT16}))
.INPUT(x0, TensorType({DT_INT16}))
.INPUT(req_scale, TensorType({DT_UINT64}))
.OPTIONAL_INPUT(x1, TensorType({DT_INT16}))
.OUTPUT(y, TensorType({DT_INT8}))
.OUTPUT(y0, TensorType({DT_INT8}))
.OUTPUT(y1, TensorType({DT_INT16}))
.ATTR(dual_output, Bool, false)
.ATTR(relu_flag, Bool, false)


+ 96
- 1
third_party/fwkacllib/inc/ops/rnn.h View File

@@ -33,6 +33,7 @@ namespace ge {
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
*@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n
*@li mask:A 1D Tensor. Must be one of the following types: uint8.

*@par Attributes:
*@li keep_prob:An integer identifying the keep prob in the op. Default to 1.
@@ -42,7 +43,6 @@ namespace ge {

*@par Outputs:
*seven outputs:
*@li mask:A 1D Tensor. Must be one of the following types: uint8.
*@li ct:A 4D Tensor. Must be one of the following types: float16, float32.
*@li ht:A 4D Tensor. Must be one of the following types: float16.
*@li it:A 4D Tensor. Must be one of the following types: float16, float32.
@@ -209,6 +209,7 @@ REG_OP(DynamicRNNGrad)
*@li time_major:An bool identifying the time major in the op. Default to true.
*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
*@li forget_bias:An float identifying the forget bias in the op. Default to 0.
*@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifjo". Default to "ijfo".
*@li is_training:An bool identifying is training in the op. Default to true . \n

*@par Outputs:
@@ -253,10 +254,104 @@ REG_OP(DynamicRNN)
.ATTR(time_major, Bool, true)
.ATTR(activation, String, "tanh")
.ATTR(forget_bias, Float, 0.0)
.ATTR(gate_order, String, "ijfo")
.ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(DynamicRNN)

/**
*@brief: DynamicRNNV2 calculation.
*@par Inputs:
*ten inputs:
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li weight_input:A required 4D Tensor. Must be one of the following types: float16, float32.
*The format must be FRACTAL_Z.
*@li weight_hidden:A required 4D Tensor. Must be one of the following types: float16, float32.
*The format must be FRACTAL_Z.
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n

*@par Attributes:
*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL".
*Only UNIDIRECTIONAL is currently supported.
*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
*@li use_peephole:An bool identifying if use peephole in the op. Default to false.
*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
*@li num_proj:An integer identifying the num projection in the op. Default to 0.
*@li time_major:An bool identifying the time major in the op. Default to true.
*@li activation:An string identifying the type of activation function in the op. Default to "tanh".
*Only tanh is currently supported.
*@li recurrent_activation:An string identifying the type of activation function in the op. Default to "sigmoid".
*Supprot "sigmoid" and "hard_sigmoid". In general, set "hard_sigmoid" for TF Keras LSTM.
*@li forget_bias:An float identifying the forget bias in the op. Default to 0.
*@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifco". Default to "ijfo".
*Set "ijfo" for TF operator LSTM, Set "ifco" for TF Keras LSTM.
*@li stateful: An bool identifying the type of stateful in the op. Default to fasle.Only false is currently supported.
*@li merge_mode: An string identifying the type of merge_modein the op. Default to "concat".
*Only "concat" is currently supported
*@li is_training:An bool identifying is training in the op. Default to true . \n

*@par Outputs:
*eight outputs:
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*Return the last output_h.
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*Return the last output_c.
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@par Third-party framework compatibility:
* Compatible with the TF operator LSTM or TF keras operator LSTM.
*/

REG_OP(DynamicRNNV2)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(weight_input, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(cell_type, String, "LSTM")
.ATTR(direction, String, "UNIDIRECTIONAL")
.ATTR(cell_depth, Int, 1)
.ATTR(use_peephole, Bool, false)
.ATTR(keep_prob, Float, 1.0)
.ATTR(cell_clip, Float, -1.0)
.ATTR(num_proj, Int, 0)
.ATTR(time_major, Bool, true)
.ATTR(activation, String, "tanh")
.ATTR(recurrent_activation, String, "sigmoid")
.ATTR(forget_bias, Float, 0.0)
.ATTR(gate_order, String, "ijfo")
.ATTR(stateful, Bool, false)
.ATTR(merge_mode, String, "concat")
.ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(DynamicRNNV2)

/**
*@brief: DynamicRNNV3 calculation.
*@par Inputs:
*ten inputs:


+ 46
- 0
third_party/fwkacllib/inc/ops/transformation_ops.h View File

@@ -743,6 +743,52 @@ REG_OP(Col2im)
.OP_END_FACTORY_REG(Col2im)

/**
* @brief Performs Im2col for each batch entry. \n

* @par Inputs:
* x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the
* following types:float32, int8, float16. The inputs must have data_format with
* one of follows:NHWC, NCHW.

* @par Attributes:
* @li ksizes: A required list or tuple. The size of the sliding window for each
* dimension of images.
* @li strides: A optional list or tuple. How far the centers of two consecutive
* patches are in the images. Defaults to "{1}".
* @li dilations: A optional list or tuple. Defaults to "{1}".
* This is the input stride, specifying how far two consecutive patch
* samples are in the input. Equivalent to extracting patches
* with patch_sizes_eff = patch_sizes + (patch_sizes - 1) *
* (dilations - 1), followed by subsampling them spatially by a factor of dilations.
* This is equivalent to rate in dilated (a.k.a. Atrous) convolutions.
* @li padding_mode: A optional String. The type of padding algorithm to use,
* support "SAME", "VALID", "CALCULATED". Among the three modes, only the "CALCULATED"
* means to use the pads below. Defaults to "CALCULATED".
* @li pads: A optional list or tuple. The pad distance. Defaults to "{0}". \n

* @par Outputs:
* y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows *
* ksize_cols * depth] containing image patches with size ksize_rows x ksize_cols
* x depth vectorized in the "depth" dimension. Note "out_rows" and "out_cols"
* are the dimensions of the output patches . \n

* @attention Constraints:
* "ksizes", "strides", "dilations" and "pads" are lists of integers . \n

* @par Third-party framework compatibility
* Compatible with Pytorch Im2col operator.
*/
REG_OP(Im2col)
.INPUT(x, TensorType::RealNumberType())
.OUTPUT(y, TensorType::RealNumberType())
.REQUIRED_ATTR(ksizes, ListInt)
.ATTR(strides, ListInt, {1})
.ATTR(dilations, ListInt, {1})
.ATTR(padding_mode, String, "CALCULATED")
.ATTR(pads, ListInt, {0})
.OP_END_FACTORY_REG(Im2col)

/**
*@brief Generates a 2D or 3D flow field (sampling grid), given a batch of affine
matrices theta. \n



+ 3
- 3
third_party/fwkacllib/inc/runtime/kernel.h View File

@@ -186,9 +186,9 @@ typedef void (*rtCallback_t)(void *fnData);
#define RT_KERNEL_CUSTOM_AICPU (0x08)

// STARS topic scheduler sqe : topic_type
#define RT_KERNEL_DEVICE_FIRST (0X10)
#define RT_KERNEL_HOST_ONLY (0X20)
#define RT_KERNEL_HOST_FIRST (0X30)
#define RT_KERNEL_DEVICE_FIRST (0x10)
#define RT_KERNEL_HOST_ONLY (0x20)
#define RT_KERNEL_HOST_FIRST (0x40)

/**
* @ingroup rt_kernel


+ 13
- 11
third_party/fwkacllib/inc/toolchain/slog.h View File

@@ -17,6 +17,8 @@
#ifndef D_SYSLOG_H_
#define D_SYSLOG_H_

static const int TMP_LOG = 0;

#ifdef __cplusplus
#ifndef LOG_CPP
extern "C" {
@@ -261,7 +263,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
#define dlog_error(moduleId, fmt, ...) \
do { \
DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
} while (0)
} while (TMP_LOG != 0)

/**
* @ingroup slog
@@ -276,7 +278,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
if(CheckLogLevel(moduleId, DLOG_WARN) == 1) { \
DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
} \
} while (0)
} while (TMP_LOG != 0)

/**
* @ingroup slog
@@ -291,7 +293,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
if(CheckLogLevel(moduleId, DLOG_INFO) == 1) { \
DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
} \
} while (0)
} while (TMP_LOG != 0)

/**
* @ingroup slog
@@ -306,7 +308,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
if(CheckLogLevel(moduleId, DLOG_DEBUG) == 1) { \
DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
} \
} while (0)
} while (TMP_LOG != 0)

/**
* @ingroup slog
@@ -318,7 +320,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
#define dlog_event(moduleId, fmt, ...) \
do { \
DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
} while (0)
} while (TMP_LOG != 0)

/**
* @ingroup slog
@@ -334,7 +336,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
if(CheckLogLevel(moduleId, level) == 1) { \
DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
} \
} while (0)
} while (TMP_LOG != 0)

/**
* @ingroup slog
@@ -351,7 +353,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
if(CheckLogLevel(moduleId, level) == 1) { \
DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \
} \
} while (0)
} while (TMP_LOG != 0)

/**
* @ingroup slog
@@ -369,7 +371,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
if(CheckLogLevel(moduleId, level) == 1) { \
DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
} \
} while (0)
} while (TMP_LOG != 0)

/**
* @ingroup slog
@@ -453,7 +455,7 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr);
if(CheckLogLevelForC(moduleId, level) == 1) { \
DlogInnerForC(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
} \
} while (0)
} while (TMP_LOG != 0)

/**
* @ingroup slog
@@ -470,7 +472,7 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr);
if(CheckLogLevelForC(moduleId, level) == 1) { \
DlogInnerForC(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \
} \
} while (0)
} while (TMP_LOG != 0)

/**
* @ingroup slog
@@ -488,7 +490,7 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr);
if(CheckLogLevelForC(moduleId, level) == 1) { \
DlogWithKVInnerForC(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
} \
} while (0)
} while (TMP_LOG != 0)

/**
* @ingroup slog


Loading…
Cancel
Save