diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h index 88f85cb3..2bf85e29 100644 --- a/inc/external/acl/acl_mdl.h +++ b/inc/external/acl/acl_mdl.h @@ -43,6 +43,7 @@ extern "C" { #define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data" #define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data" +#define ACL_ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES "_datadump_original_op_names" typedef struct aclmdlDataset aclmdlDataset; typedef struct aclmdlDesc aclmdlDesc; @@ -638,6 +639,18 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc, /** * @ingroup AscendCL + * @brief get attr value by op name + * + * @param modelDesc [IN] model description + * @param opName [IN] op name + * @param attr [IN] attr name + * + * @retval the attr value + */ +ACL_FUNC_VISIBILITY const char *aclmdlGetOpAttr(aclmdlDesc *modelDesc, const char *opName, const char *attr); + +/** + * @ingroup AscendCL * @brief get input name by index * * @param modelDesc [IN] model description diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h index 9992a636..8261adc4 100644 --- a/inc/external/hccl/hccl.h +++ b/inc/external/hccl/hccl.h @@ -118,8 +118,24 @@ extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvC */ extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, aclrtStream stream); +/** + * @brief Get the rank size of this comm. + * + * @param comm A pointer identifying the communication resource based on. + * @param rankSize A pointer identifying the rank size. + * @return HcclResult + */ +extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize); /** + * @brief Get the rank id of this comm. + * + * @param comm A pointer identifying the communication resource based on. + * @param rankSize A pointer identifying the rank id. + * @return HcclResult + */ +extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank); +/** * @brief Barrier operator. * * @param comm A pointer identifying the communication resource based on. diff --git a/metadef b/metadef index 7cb171b9..7cbdf957 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 7cb171b9c511fec57ccc0ad746ef2126267fe18b +Subproject commit 7cbdf95765133b5a5b979c2231013f7c76c3d529 diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h index 375802fc..b5f49dad 100644 --- a/third_party/fwkacllib/inc/ops/array_ops.h +++ b/third_party/fwkacllib/inc/ops/array_ops.h @@ -1203,6 +1203,29 @@ REG_OP(Expand) .OP_END_FACTORY_REG(Expand) /** +*@Returns a tensor containing the indices of all non-zero elements of input. \n + +*@par Inputs: +*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64. + +*@par Attributes: +* transpose: the output tensor will be transposed if true. \n + +*@par Outputs: +* y: A Tensor. Has the same type as "x" . \n + +*@par Third-party framework compatibility +*Compatible with the PyTorch operator NonZero. +*/ + +REG_OP(NonZero) + .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ + DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL})) + .OUTPUT(y, TensorType({DT_INT64})) + .ATTR(transpose, Bool, false) + .OP_END_FACTORY_REG(NonZero) + +/** * @brief Expand the input tensor to a compatible shape. \n * @par Inputs: diff --git a/third_party/fwkacllib/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/ops/batch_ops.h index 181bf694..ca4fe1db 100644 --- a/third_party/fwkacllib/inc/ops/batch_ops.h +++ b/third_party/fwkacllib/inc/ops/batch_ops.h @@ -64,10 +64,10 @@ the same types as "x_tensors" . It's a dynamic output. \n REG_OP(Batch) .DYNAMIC_INPUT(x_tensors, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, \ DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE})) - .OUTPUT(y_index, TensorType({ DT_INT64 })) - .OUTPUT(y_id, TensorType({ DT_INT64 })) .DYNAMIC_OUTPUT(y_tensors, TensorType({DT_INT8, DT_UINT8, DT_INT16, \ DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_BOOL})) + .OUTPUT(y_index, TensorType({ DT_INT64 })) + .OUTPUT(y_id, TensorType({ DT_INT64 })) .REQUIRED_ATTR(num_batch_threads, Int) .REQUIRED_ATTR(max_batch_size, Int) .ATTR(max_enqueued_batches, Int, 10) diff --git a/third_party/fwkacllib/inc/ops/correlation.h b/third_party/fwkacllib/inc/ops/correlation.h new file mode 100644 index 00000000..caebba50 --- /dev/null +++ b/third_party/fwkacllib/inc/ops/correlation.h @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file correlation.h + * \brief + */ +#ifndef GE_OP_CORRELATION_OPS_H +#define GE_OP_CORRELATION_OPS_H + +#include "graph/operator_reg.h" + +namespace ge { +/** +*@brief Computes a 2D Correlation given 4D "x" and "filter" tensors. +* +*@par Inputs: +* @li filter: A 4D tensor of filters. +* @li x: A 4D tensor of input images, batch number must equal to batch +* number of "filter", and channel must equal to channel of "filter". +* +*@par Attributes: +* @li groups: set correlation mode, must be 1 or channel. +* +*@par Outputs: +*y: A Tensor. Has the same type as "x". + +*@par Third-party framework compatibility +* Compatible with caffe correlation custom operator. +*/ +REG_OP(Correlation) + .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) + .INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32})) + .ATTR(groups, Int, 1) + .OP_END_FACTORY_REG(Correlation) +} // namespace ge + +#endif // GE_OP_NN_CALCULATION_OPS_H diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index 8c87cfe8..7850536d 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -3468,25 +3468,6 @@ REG_OP(AxpyV2) .OP_END_FACTORY_REG(AxpyV2) /** -* @brief Computes the result of x1 + x2. - -* @par Inputs: -* @li x1: An ND tensor of type float16, float, int32. -* @li x2: An ND tensor of type float16, float, int32. \n - -* @par Outputs: -* @li y: An ND tensor tensor with the same type as "x1". \n - -* @par Third-party framework compatibility -* Compatible with the Pytorch operator Add. -*/ -REG_OP(PtAdd) - .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) - .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) - .OP_END_FACTORY_REG(PtAdd) - -/** * @brief Computes the result of x1 * x2. * @par Inputs: diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index e6802c1e..71f1b9e1 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -1484,6 +1484,55 @@ REG_OP(CombinedNonMaxSuppression) .OP_END_FACTORY_REG(CombinedNonMaxSuppression) /** +*@brief Resizes "images" with "offset" using bilinear interpolation. \n + +*@par Inputs: +*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`. +*@li warp_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for offset point. + +*@par Outputs: +*warp_img: A Tensor after resize. \n +*/ +REG_OP(IMGWarp) + .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) + .INPUT(warp_offset, TensorType({DT_FLOAT32})) + .OUTPUT(warp_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) + .OP_END_FACTORY_REG(IMGWarp) + +/** +*@brief Resizes "images" with "offset" using bilinear interpolation. \n + +*@par Inputs: +*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`. +*@li map_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for resize point. + +*@par Outputs: +*map_img: A Tensor after resize. \n +*/ +REG_OP(Remap) + .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) + .INPUT(map_offset, TensorType({DT_FLOAT32})) + .OUTPUT(map_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) + .OP_END_FACTORY_REG(Remap) + +/** +*@brief Resizes "images" with "offset" using bilinear interpolation. \n + +*@par Inputs: +*@li img: input image, A 5-D tensor of shape `[n, 4, c, h, w]`, +and 4 mean input[(h_top, w_left), (h_top, w_right), (h_bottom, w_left), (h_bottom, w_right)]. +*@li warp_index: the resize offset A 4-D float tensor of shape `[n, 2, h, w]`, 2 means (x, y) for resize point. + +*@par Outputs: +*remap_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n +*/ +REG_OP(IMGWarpResize) + .INPUT(img, TensorType({DT_FLOAT32})) + .INPUT(warp_index, TensorType({DT_FLOAT32})) + .OUTPUT(warp_img, TensorType({DT_FLOAT32})) + .OP_END_FACTORY_REG(IMGWarpResize) + +/** *@brief Function spatial transformer . \n *@par Inputs: @@ -1802,5 +1851,22 @@ REG_OP(ImageUnfold) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .ATTR(padding_mode, String, "zeros") .OP_END_FACTORY_REG(ImageUnfold) + +/** +*@brief This operation select images to warp_images according to offsets. + +*@par Inputs: +*@li images: 4-D Tensor with shape `[batch, height, width, 3]`. +*@li offsets: 4-D Tensor with shape `[batch, 4, new_height, new_width]`. + +*@par Outputs: +*warp_images: Returns 5-D Tensor with shape +`[batch, 4, new_height, new_width, 3]` and the same dtype as `images`. +*/ +REG_OP(IMGWarpOffsets) + .INPUT(images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT})) + .INPUT(offsets, TensorType({DT_FLOAT, DT_INT32})) + .OUTPUT(warp_images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(IMGWarpOffsets) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index 083d4f9c..af02276b 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -179,7 +179,7 @@ REG_OP(GEMM) *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n *@par Inputs: -*Three inputs, including: +*Two inputs, including: * @li x1: A matrix Tensor. Must be one of the following types: float16, * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. * @li x2: A matrix Tensor. Must be one of the following types: float16, diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index cc60f483..d4141e47 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -1460,8 +1460,6 @@ REG_OP(DecodeBboxV2) * @li y1: A Tensor. Must have the same type as x. * @li y2: A Tensor. Indices of y1 in x. Dtype must be int32. * -*@attention Constraints: -* The upper limit of data on the direction axis is 7040. */ REG_OP(Sort) .INPUT(x, TensorType({ DT_FLOAT16 })) diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 22023f46..796e1e61 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -55,7 +55,9 @@ REG_OP(LogSoftmaxGrad) *Two inputs, including: * @li features: A Tensor. Must be one of the following types: half, float32, double. * A "batch_size * num_classes" matrix. -* @li labels: A Tensor of the same type as "features". batch_size vector with values in [0, num_classes). +* @li labels: A Tensor. Must be one of the following types: 'int32', 'int64'. +* batch_size vector with values in [0, num_classes). +* This is the label for the given minibatch entry. *@par Outputs: @@ -639,6 +641,48 @@ REG_OP(LayerNormXBackprop) .OP_END_FACTORY_REG(LayerNormXBackprop) /** +*@brief LayerNormXBackpropV2 operator interface implementation +* calculating: dy, x, variance, mean, gamma +* pd_xl = data_dy*data_gamma +* pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean) +* np.power((data_variance + EPSLON), (-1.5))), +* reduce_axis, keepdims=True) +* pd_mean = np.sum(((-1.0)*pd_xl +* np.power((data_variance + EPSLON), (-0.5))), +* reduce_axis, keepdims=True) +* + pd_var*(1.0/m) +* np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True) +* pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) + +* pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m) +* res_for_gamma = (data_x - data_mean) * np.power((data_variance + EPSLON), (-0.5)) + +*@par Inputs: +*Five inputs, including: +* @li dy: A Tensor. Must be one of the following types: float16, float32. +* @li x: A Tensor. Must be one of the following types: float16, float32. +* @li variance: A Tensor. Must be one of the following types: float16, float32. +* @li mean: A Tensor. Must be one of the following types: float16, float32. +* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n + +*@par Outputs: +*Three outputs, including: +* @li pd_x: A Tensor. Must be one of the following types: float16, float32. +* @li res_for_gamma: A Tensor. Must be one of the following types: float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(LayerNormXBackpropV2) + .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(res_for_gamma, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(LayerNormXBackpropV2) + +/** *@brief LayerNormBetaGammaBackprop operator interface implementation * calculating: dy, x, variance, mean * pd_xl = data_dy*data_gamma @@ -682,6 +726,35 @@ REG_OP(LayerNormBetaGammaBackprop) .OP_END_FACTORY_REG(LayerNormBetaGammaBackprop) /** +*@brief LayerNormBetaGammaBackpropV2 operator interface implementation +* calculating: dy, x, variance, mean +* pd_gamma = np.sum((data_dy*res_for_gamma), param_axis, keepdims=True) +* pd_beta = np.sum(data_dy, param_axis, keepdims=True) + +*@par Inputs: +*Three inputs, including: +* @li dy: A Tensor. Must be one of the following types: float16, float32. +* @li x: A Tensor. Must be one of the following types: float16, float32. +* @li variance: A Tensor. Must be one of the following types: float16, float32. +* @li mean: A Tensor. Must be one of the following types: float16, float32 . \n + +*@par Outputs: +*Three outputs, including: +* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. +* @li pd_beta: A Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(LayerNormBetaGammaBackpropV2) + .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(res_for_gamma, TensorType({DT_FLOAT})) + .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16})) + .REQUIRED_ATTR(shape_gamma, ListInt) + .OP_END_FACTORY_REG(LayerNormBetaGammaBackpropV2) + +/** *@brief Return "output" according to the algorithm of dropout_do_mask: * scale_x = x *(1 / keep_prob) * output = select(mask == 1, scale_x, 0) diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index 4cbe4057..ca1c24eb 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -536,13 +536,19 @@ REG_OP(Elu) * max(0, x) + min(0, alpha * (exp(x/alpha) - 1)). \n *@par Inputs: -*x: A float16, float32 or double, for the input data type . \n +*x: A float16, float32, for the input data type . \n *@par Attributes: -*alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n +*alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n + +*@par Attributes: +*alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n + +*@par Attributes: +*alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n *@par Outputs: -*y: A float16, float32 or double, for the normalized result . \n +*y: A float16, float32, for the normalized result . \n *@attention Constraints: *@li The input is of type float16 or float32 . \n @@ -553,9 +559,11 @@ REG_OP(Elu) *@li Compatible with ONNX's Celu operator */ REG_OP(Celu) - .INPUT(x, TensorType::FloatingDataType()) - .OUTPUT(y, TensorType::FloatingDataType()) - .ATTR(alpha, Float, 1.0) + .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16})) + .ATTR(alpha1, Float, 1.0) + .ATTR(alpha2, Float, 1.0) + .ATTR(alpha3, Float, 1.0) .OP_END_FACTORY_REG(Celu) /** @@ -691,6 +699,25 @@ REG_OP(Mish) .OP_END_FACTORY_REG(Mish) /** + * @brief: pytorch mish_grad operator. + * @par Inputs: + * three input, including: + * @li grad: A Tensor. shape, datatype and format is same as x + * @li x: A Tensor. Must be one of the following types: float16, float32 + * @li tanhx: A Tensor. shape, datatype and format is same as x + * @par Outputs: + * 1 output, including: + * @li x_grad: A Tensor. shape, datatype and format is same as x + */ + +REG_OP(MishGrad) + .INPUT(grad, TensorType({ DT_FLOAT,DT_FLOAT16 })) + .INPUT(x, TensorType({ DT_FLOAT,DT_FLOAT16 })) + .OPTIONAL_INPUT(tanhx, TensorType({ DT_FLOAT,DT_FLOAT16 })) + .OUTPUT(x_grad, TensorType({ DT_FLOAT,DT_FLOAT16 })) + .OP_END_FACTORY_REG(MishGrad) + +/** * @brief pytorch hardtanh_backward operator. * * @par Inputs: @@ -993,6 +1020,30 @@ REG_OP(HardSigmoidGrad) .ATTR(beta, Float, 0.5) .OP_END_FACTORY_REG(HardSigmoidGrad) +/** +* @brief Calculate the shrink function. \n + +* @par Inputs: +* One inputs, including: +* @li input_x: A tensor. Must be one of the following types: +* float16, float32. \n + +* @par Attributes: +* @li lambd: An optional float. Defaults to 0.5. \n +* @li bias: An optional float. Defaults to 0.0. \n + +* @par Outputs: +* y: A Tensor with the same dtype and shape of input_x's. \n + +* @par Third-party framework compatibility +* Compatible with the ONNX operator Shrink. \n +*/ +REG_OP(Shrink) + .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(lambd, Float, 0.5) + .ATTR(bias, Float, 0.0) + .OP_END_FACTORY_REG(Shrink) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index 42da3828..bacbe40d 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -273,14 +273,11 @@ REG_OP(PadV3) *@brief Pads a tensor. *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32. +* @li x: A Tensor. Must be one of the following types: float16, float32. +* @li paddings: A Tensor. Must be int32 type +* paddings is a required input tensor. *@par Attributes: -* @li paddings: An required "vector>". -* For each dimension D of input, paddings[D, 0] indicates how many -* values to add before the contents of tensor in that dimension, -* and paddings[D, 1] indicates how many values to add after the -* contents of tensor in that dimension. * @li constant_values: An optional int value for pad. * @li mode: An optional string, Defaults to "constant", indicates paddings mode, * support "constant", "reflect", "edge" @@ -298,9 +295,9 @@ REG_OP(PadV3) * Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead. */ REG_OP(PadV3D) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) - .REQUIRED_ATTR(paddings, ListListInt) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(paddings, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .ATTR(constant_values, Int, 0) .ATTR(mode, String, "constant") .ATTR(paddings_contiguous, Bool, true) diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h index eafc8cc4..d6eda1e6 100644 --- a/third_party/fwkacllib/inc/ops/quantize_ops.h +++ b/third_party/fwkacllib/inc/ops/quantize_ops.h @@ -214,7 +214,7 @@ REG_OP(AscendRequant) *@brief Requantizes the input of int16 . \n *@par Inputs: -*@li x: An NC1HWC0 tensor of type int16, specifying the input. +*@li x0: An NC1HWC0 tensor of type int16, specifying the input. *@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio. *@li x1: An NC1HWC0 tensor of type int16 . \n @@ -223,17 +223,17 @@ REG_OP(AscendRequant) *@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n *@par Outputs: -*@li y: The dequantized output tensor of type int8 and with format NC1HWC0. +*@li y0: The dequantized output tensor of type int8 and with format NC1HWC0. *@li y1: The dequantized output tensor of type int16 and with format NC1HWC0 . \n *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(AscendRequantS16) - .INPUT(x, TensorType({DT_INT16})) + .INPUT(x0, TensorType({DT_INT16})) .INPUT(req_scale, TensorType({DT_UINT64})) .OPTIONAL_INPUT(x1, TensorType({DT_INT16})) - .OUTPUT(y, TensorType({DT_INT8})) + .OUTPUT(y0, TensorType({DT_INT8})) .OUTPUT(y1, TensorType({DT_INT16})) .ATTR(dual_output, Bool, false) .ATTR(relu_flag, Bool, false) diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index d671a531..9c6a7d1b 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -33,6 +33,7 @@ namespace ge { *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. *@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n +*@li mask:A 1D Tensor. Must be one of the following types: uint8. *@par Attributes: *@li keep_prob:An integer identifying the keep prob in the op. Default to 1. @@ -42,7 +43,6 @@ namespace ge { *@par Outputs: *seven outputs: -*@li mask:A 1D Tensor. Must be one of the following types: uint8. *@li ct:A 4D Tensor. Must be one of the following types: float16, float32. *@li ht:A 4D Tensor. Must be one of the following types: float16. *@li it:A 4D Tensor. Must be one of the following types: float16, float32. @@ -209,6 +209,7 @@ REG_OP(DynamicRNNGrad) *@li time_major:An bool identifying the time major in the op. Default to true. *@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. *@li forget_bias:An float identifying the forget bias in the op. Default to 0. +*@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifjo". Default to "ijfo". *@li is_training:An bool identifying is training in the op. Default to true . \n *@par Outputs: @@ -253,10 +254,104 @@ REG_OP(DynamicRNN) .ATTR(time_major, Bool, true) .ATTR(activation, String, "tanh") .ATTR(forget_bias, Float, 0.0) + .ATTR(gate_order, String, "ijfo") .ATTR(is_training, Bool, true) .OP_END_FACTORY_REG(DynamicRNN) /** +*@brief: DynamicRNNV2 calculation. +*@par Inputs: +*ten inputs: +*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li weight_input:A required 4D Tensor. Must be one of the following types: float16, float32. +*The format must be FRACTAL_Z. +*@li weight_hidden:A required 4D Tensor. Must be one of the following types: float16, float32. +*The format must be FRACTAL_Z. +*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND. +*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n + +*@par Attributes: +*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. +*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". +*Only UNIDIRECTIONAL is currently supported. +*@li cell_depth:An integer identifying the cell depth in the op. Default to 1. +*@li use_peephole:An bool identifying if use peephole in the op. Default to false. +*@li keep_prob:An float identifying the keep prob in the op. Default to 1. +*@li cell_clip:An float identifying the cell clip in the op. Default to -1. +*@li num_proj:An integer identifying the num projection in the op. Default to 0. +*@li time_major:An bool identifying the time major in the op. Default to true. +*@li activation:An string identifying the type of activation function in the op. Default to "tanh". +*Only tanh is currently supported. +*@li recurrent_activation:An string identifying the type of activation function in the op. Default to "sigmoid". +*Supprot "sigmoid" and "hard_sigmoid". In general, set "hard_sigmoid" for TF Keras LSTM. +*@li forget_bias:An float identifying the forget bias in the op. Default to 0. +*@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifco". Default to "ijfo". +*Set "ijfo" for TF operator LSTM, Set "ifco" for TF Keras LSTM. +*@li stateful: An bool identifying the type of stateful in the op. Default to fasle.Only false is currently supported. +*@li merge_mode: An string identifying the type of merge_modein the op. Default to "concat". +*Only "concat" is currently supported +*@li is_training:An bool identifying is training in the op. Default to true . \n + +*@par Outputs: +*eight outputs: +*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*Return the last output_h. +*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*Return the last output_c. +*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@par Third-party framework compatibility: +* Compatible with the TF operator LSTM or TF keras operator LSTM. +*/ + +REG_OP(DynamicRNNV2) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(weight_input, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) + .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(mask, TensorType({DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(cell_type, String, "LSTM") + .ATTR(direction, String, "UNIDIRECTIONAL") + .ATTR(cell_depth, Int, 1) + .ATTR(use_peephole, Bool, false) + .ATTR(keep_prob, Float, 1.0) + .ATTR(cell_clip, Float, -1.0) + .ATTR(num_proj, Int, 0) + .ATTR(time_major, Bool, true) + .ATTR(activation, String, "tanh") + .ATTR(recurrent_activation, String, "sigmoid") + .ATTR(forget_bias, Float, 0.0) + .ATTR(gate_order, String, "ijfo") + .ATTR(stateful, Bool, false) + .ATTR(merge_mode, String, "concat") + .ATTR(is_training, Bool, true) + .OP_END_FACTORY_REG(DynamicRNNV2) + +/** *@brief: DynamicRNNV3 calculation. *@par Inputs: *ten inputs: diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 48a094f7..2e8ffd9c 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -743,6 +743,52 @@ REG_OP(Col2im) .OP_END_FACTORY_REG(Col2im) /** +* @brief Performs Im2col for each batch entry. \n + +* @par Inputs: +* x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the +* following types:float32, int8, float16. The inputs must have data_format with +* one of follows:NHWC, NCHW. + +* @par Attributes: +* @li ksizes: A required list or tuple. The size of the sliding window for each +* dimension of images. +* @li strides: A optional list or tuple. How far the centers of two consecutive +* patches are in the images. Defaults to "{1}". +* @li dilations: A optional list or tuple. Defaults to "{1}". +* This is the input stride, specifying how far two consecutive patch +* samples are in the input. Equivalent to extracting patches +* with patch_sizes_eff = patch_sizes + (patch_sizes - 1) * +* (dilations - 1), followed by subsampling them spatially by a factor of dilations. +* This is equivalent to rate in dilated (a.k.a. Atrous) convolutions. +* @li padding_mode: A optional String. The type of padding algorithm to use, +* support "SAME", "VALID", "CALCULATED". Among the three modes, only the "CALCULATED" +* means to use the pads below. Defaults to "CALCULATED". +* @li pads: A optional list or tuple. The pad distance. Defaults to "{0}". \n + +* @par Outputs: +* y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows * +* ksize_cols * depth] containing image patches with size ksize_rows x ksize_cols +* x depth vectorized in the "depth" dimension. Note "out_rows" and "out_cols" +* are the dimensions of the output patches . \n + +* @attention Constraints: +* "ksizes", "strides", "dilations" and "pads" are lists of integers . \n + +* @par Third-party framework compatibility +* Compatible with Pytorch Im2col operator. +*/ +REG_OP(Im2col) + .INPUT(x, TensorType::RealNumberType()) + .OUTPUT(y, TensorType::RealNumberType()) + .REQUIRED_ATTR(ksizes, ListInt) + .ATTR(strides, ListInt, {1}) + .ATTR(dilations, ListInt, {1}) + .ATTR(padding_mode, String, "CALCULATED") + .ATTR(pads, ListInt, {0}) + .OP_END_FACTORY_REG(Im2col) + +/** *@brief Generates a 2D or 3D flow field (sampling grid), given a batch of affine matrices theta. \n diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index 338e8854..c79ee7a5 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -186,9 +186,9 @@ typedef void (*rtCallback_t)(void *fnData); #define RT_KERNEL_CUSTOM_AICPU (0x08) // STARS topic scheduler sqe : topic_type -#define RT_KERNEL_DEVICE_FIRST (0X10) -#define RT_KERNEL_HOST_ONLY (0X20) -#define RT_KERNEL_HOST_FIRST (0X30) +#define RT_KERNEL_DEVICE_FIRST (0x10) +#define RT_KERNEL_HOST_ONLY (0x20) +#define RT_KERNEL_HOST_FIRST (0x40) /** * @ingroup rt_kernel diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h index c6be6b79..cc7c83ca 100644 --- a/third_party/fwkacllib/inc/toolchain/slog.h +++ b/third_party/fwkacllib/inc/toolchain/slog.h @@ -17,6 +17,8 @@ #ifndef D_SYSLOG_H_ #define D_SYSLOG_H_ +static const int TMP_LOG = 0; + #ifdef __cplusplus #ifndef LOG_CPP extern "C" { @@ -261,7 +263,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); #define dlog_error(moduleId, fmt, ...) \ do { \ DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ - } while (0) + } while (TMP_LOG != 0) /** * @ingroup slog @@ -276,7 +278,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); if(CheckLogLevel(moduleId, DLOG_WARN) == 1) { \ DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ } \ - } while (0) + } while (TMP_LOG != 0) /** * @ingroup slog @@ -291,7 +293,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); if(CheckLogLevel(moduleId, DLOG_INFO) == 1) { \ DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ } \ - } while (0) + } while (TMP_LOG != 0) /** * @ingroup slog @@ -306,7 +308,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); if(CheckLogLevel(moduleId, DLOG_DEBUG) == 1) { \ DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ } \ - } while (0) + } while (TMP_LOG != 0) /** * @ingroup slog @@ -318,7 +320,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); #define dlog_event(moduleId, fmt, ...) \ do { \ DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ - } while (0) + } while (TMP_LOG != 0) /** * @ingroup slog @@ -334,7 +336,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); if(CheckLogLevel(moduleId, level) == 1) { \ DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ } \ - } while (0) + } while (TMP_LOG != 0) /** * @ingroup slog @@ -351,7 +353,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); if(CheckLogLevel(moduleId, level) == 1) { \ DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ } \ - } while (0) + } while (TMP_LOG != 0) /** * @ingroup slog @@ -369,7 +371,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); if(CheckLogLevel(moduleId, level) == 1) { \ DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ } \ - } while (0) + } while (TMP_LOG != 0) /** * @ingroup slog @@ -453,7 +455,7 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr); if(CheckLogLevelForC(moduleId, level) == 1) { \ DlogInnerForC(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ } \ - } while (0) + } while (TMP_LOG != 0) /** * @ingroup slog @@ -470,7 +472,7 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr); if(CheckLogLevelForC(moduleId, level) == 1) { \ DlogInnerForC(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ } \ - } while (0) + } while (TMP_LOG != 0) /** * @ingroup slog @@ -488,7 +490,7 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr); if(CheckLogLevelForC(moduleId, level) == 1) { \ DlogWithKVInnerForC(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ } \ - } while (0) + } while (TMP_LOG != 0) /** * @ingroup slog