From: @ding_fei_fei Reviewed-by: @xsmq,@liucunwei Signed-off-by: @liucunweitags/v1.3.0
@@ -43,6 +43,7 @@ extern "C" { | |||
#define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data" | |||
#define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data" | |||
#define ACL_ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES "_datadump_original_op_names" | |||
typedef struct aclmdlDataset aclmdlDataset; | |||
typedef struct aclmdlDesc aclmdlDesc; | |||
@@ -638,6 +639,18 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc, | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get attr value by op name | |||
* | |||
* @param modelDesc [IN] model description | |||
* @param opName [IN] op name | |||
* @param attr [IN] attr name | |||
* | |||
* @retval the attr value | |||
*/ | |||
ACL_FUNC_VISIBILITY const char *aclmdlGetOpAttr(aclmdlDesc *modelDesc, const char *opName, const char *attr); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get input name by index | |||
* | |||
* @param modelDesc [IN] model description | |||
@@ -118,8 +118,24 @@ extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvC | |||
*/ | |||
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, | |||
aclrtStream stream); | |||
/** | |||
* @brief Get the rank size of this comm. | |||
* | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param rankSize A pointer identifying the rank size. | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize); | |||
/** | |||
* @brief Get the rank id of this comm. | |||
* | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param rankSize A pointer identifying the rank id. | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank); | |||
/** | |||
* @brief Barrier operator. | |||
* | |||
* @param comm A pointer identifying the communication resource based on. | |||
@@ -1 +1 @@ | |||
Subproject commit 7cb171b9c511fec57ccc0ad746ef2126267fe18b | |||
Subproject commit 7cbdf95765133b5a5b979c2231013f7c76c3d529 |
@@ -1203,6 +1203,29 @@ REG_OP(Expand) | |||
.OP_END_FACTORY_REG(Expand) | |||
/** | |||
*@Returns a tensor containing the indices of all non-zero elements of input. \n | |||
*@par Inputs: | |||
*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64. | |||
*@par Attributes: | |||
* transpose: the output tensor will be transposed if true. \n | |||
*@par Outputs: | |||
* y: A Tensor. Has the same type as "x" . \n | |||
*@par Third-party framework compatibility | |||
*Compatible with the PyTorch operator NonZero. | |||
*/ | |||
REG_OP(NonZero) | |||
.INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ | |||
DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL})) | |||
.OUTPUT(y, TensorType({DT_INT64})) | |||
.ATTR(transpose, Bool, false) | |||
.OP_END_FACTORY_REG(NonZero) | |||
/** | |||
* @brief Expand the input tensor to a compatible shape. \n | |||
* @par Inputs: | |||
@@ -64,10 +64,10 @@ the same types as "x_tensors" . It's a dynamic output. \n | |||
REG_OP(Batch) | |||
.DYNAMIC_INPUT(x_tensors, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, \ | |||
DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE})) | |||
.OUTPUT(y_index, TensorType({ DT_INT64 })) | |||
.OUTPUT(y_id, TensorType({ DT_INT64 })) | |||
.DYNAMIC_OUTPUT(y_tensors, TensorType({DT_INT8, DT_UINT8, DT_INT16, \ | |||
DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_BOOL})) | |||
.OUTPUT(y_index, TensorType({ DT_INT64 })) | |||
.OUTPUT(y_id, TensorType({ DT_INT64 })) | |||
.REQUIRED_ATTR(num_batch_threads, Int) | |||
.REQUIRED_ATTR(max_batch_size, Int) | |||
.ATTR(max_enqueued_batches, Int, 10) | |||
@@ -0,0 +1,52 @@ | |||
/** | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
/*! | |||
* \file correlation.h | |||
* \brief | |||
*/ | |||
#ifndef GE_OP_CORRELATION_OPS_H | |||
#define GE_OP_CORRELATION_OPS_H | |||
#include "graph/operator_reg.h" | |||
namespace ge { | |||
/** | |||
*@brief Computes a 2D Correlation given 4D "x" and "filter" tensors. | |||
* | |||
*@par Inputs: | |||
* @li filter: A 4D tensor of filters. | |||
* @li x: A 4D tensor of input images, batch number must equal to batch | |||
* number of "filter", and channel must equal to channel of "filter". | |||
* | |||
*@par Attributes: | |||
* @li groups: set correlation mode, must be 1 or channel. | |||
* | |||
*@par Outputs: | |||
*y: A Tensor. Has the same type as "x". | |||
*@par Third-party framework compatibility | |||
* Compatible with caffe correlation custom operator. | |||
*/ | |||
REG_OP(Correlation) | |||
.INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32})) | |||
.ATTR(groups, Int, 1) | |||
.OP_END_FACTORY_REG(Correlation) | |||
} // namespace ge | |||
#endif // GE_OP_NN_CALCULATION_OPS_H |
@@ -3468,25 +3468,6 @@ REG_OP(AxpyV2) | |||
.OP_END_FACTORY_REG(AxpyV2) | |||
/** | |||
* @brief Computes the result of x1 + x2. | |||
* @par Inputs: | |||
* @li x1: An ND tensor of type float16, float, int32. | |||
* @li x2: An ND tensor of type float16, float, int32. \n | |||
* @par Outputs: | |||
* @li y: An ND tensor tensor with the same type as "x1". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Add. | |||
*/ | |||
REG_OP(PtAdd) | |||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.OP_END_FACTORY_REG(PtAdd) | |||
/** | |||
* @brief Computes the result of x1 * x2. | |||
* @par Inputs: | |||
@@ -1484,6 +1484,55 @@ REG_OP(CombinedNonMaxSuppression) | |||
.OP_END_FACTORY_REG(CombinedNonMaxSuppression) | |||
/** | |||
*@brief Resizes "images" with "offset" using bilinear interpolation. \n | |||
*@par Inputs: | |||
*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`. | |||
*@li warp_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for offset point. | |||
*@par Outputs: | |||
*warp_img: A Tensor after resize. \n | |||
*/ | |||
REG_OP(IMGWarp) | |||
.INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) | |||
.INPUT(warp_offset, TensorType({DT_FLOAT32})) | |||
.OUTPUT(warp_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) | |||
.OP_END_FACTORY_REG(IMGWarp) | |||
/** | |||
*@brief Resizes "images" with "offset" using bilinear interpolation. \n | |||
*@par Inputs: | |||
*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`. | |||
*@li map_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for resize point. | |||
*@par Outputs: | |||
*map_img: A Tensor after resize. \n | |||
*/ | |||
REG_OP(Remap) | |||
.INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) | |||
.INPUT(map_offset, TensorType({DT_FLOAT32})) | |||
.OUTPUT(map_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) | |||
.OP_END_FACTORY_REG(Remap) | |||
/** | |||
*@brief Resizes "images" with "offset" using bilinear interpolation. \n | |||
*@par Inputs: | |||
*@li img: input image, A 5-D tensor of shape `[n, 4, c, h, w]`, | |||
and 4 mean input[(h_top, w_left), (h_top, w_right), (h_bottom, w_left), (h_bottom, w_right)]. | |||
*@li warp_index: the resize offset A 4-D float tensor of shape `[n, 2, h, w]`, 2 means (x, y) for resize point. | |||
*@par Outputs: | |||
*remap_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n | |||
*/ | |||
REG_OP(IMGWarpResize) | |||
.INPUT(img, TensorType({DT_FLOAT32})) | |||
.INPUT(warp_index, TensorType({DT_FLOAT32})) | |||
.OUTPUT(warp_img, TensorType({DT_FLOAT32})) | |||
.OP_END_FACTORY_REG(IMGWarpResize) | |||
/** | |||
*@brief Function spatial transformer . \n | |||
*@par Inputs: | |||
@@ -1802,5 +1851,22 @@ REG_OP(ImageUnfold) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(padding_mode, String, "zeros") | |||
.OP_END_FACTORY_REG(ImageUnfold) | |||
/** | |||
*@brief This operation select images to warp_images according to offsets. | |||
*@par Inputs: | |||
*@li images: 4-D Tensor with shape `[batch, height, width, 3]`. | |||
*@li offsets: 4-D Tensor with shape `[batch, 4, new_height, new_width]`. | |||
*@par Outputs: | |||
*warp_images: Returns 5-D Tensor with shape | |||
`[batch, 4, new_height, new_width, 3]` and the same dtype as `images`. | |||
*/ | |||
REG_OP(IMGWarpOffsets) | |||
.INPUT(images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(offsets, TensorType({DT_FLOAT, DT_INT32})) | |||
.OUTPUT(warp_images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT})) | |||
.OP_END_FACTORY_REG(IMGWarpOffsets) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ |
@@ -179,7 +179,7 @@ REG_OP(GEMM) | |||
*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n | |||
*@par Inputs: | |||
*Three inputs, including: | |||
*Two inputs, including: | |||
* @li x1: A matrix Tensor. Must be one of the following types: float16, | |||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. | |||
* @li x2: A matrix Tensor. Must be one of the following types: float16, | |||
@@ -1460,8 +1460,6 @@ REG_OP(DecodeBboxV2) | |||
* @li y1: A Tensor. Must have the same type as x. | |||
* @li y2: A Tensor. Indices of y1 in x. Dtype must be int32. | |||
* | |||
*@attention Constraints: | |||
* The upper limit of data on the direction axis is 7040. | |||
*/ | |||
REG_OP(Sort) | |||
.INPUT(x, TensorType({ DT_FLOAT16 })) | |||
@@ -55,7 +55,9 @@ REG_OP(LogSoftmaxGrad) | |||
*Two inputs, including: | |||
* @li features: A Tensor. Must be one of the following types: half, float32, double. | |||
* A "batch_size * num_classes" matrix. | |||
* @li labels: A Tensor of the same type as "features". batch_size vector with values in [0, num_classes). | |||
* @li labels: A Tensor. Must be one of the following types: 'int32', 'int64'. | |||
* batch_size vector with values in [0, num_classes). | |||
* This is the label for the given minibatch entry. | |||
*@par Outputs: | |||
@@ -639,6 +641,48 @@ REG_OP(LayerNormXBackprop) | |||
.OP_END_FACTORY_REG(LayerNormXBackprop) | |||
/** | |||
*@brief LayerNormXBackpropV2 operator interface implementation | |||
* calculating: dy, x, variance, mean, gamma | |||
* pd_xl = data_dy*data_gamma | |||
* pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean) | |||
* np.power((data_variance + EPSLON), (-1.5))), | |||
* reduce_axis, keepdims=True) | |||
* pd_mean = np.sum(((-1.0)*pd_xl | |||
* np.power((data_variance + EPSLON), (-0.5))), | |||
* reduce_axis, keepdims=True) | |||
* + pd_var*(1.0/m) | |||
* np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True) | |||
* pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) + | |||
* pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m) | |||
* res_for_gamma = (data_x - data_mean) * np.power((data_variance + EPSLON), (-0.5)) | |||
*@par Inputs: | |||
*Five inputs, including: | |||
* @li dy: A Tensor. Must be one of the following types: float16, float32. | |||
* @li x: A Tensor. Must be one of the following types: float16, float32. | |||
* @li variance: A Tensor. Must be one of the following types: float16, float32. | |||
* @li mean: A Tensor. Must be one of the following types: float16, float32. | |||
* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n | |||
*@par Outputs: | |||
*Three outputs, including: | |||
* @li pd_x: A Tensor. Must be one of the following types: float16, float32. | |||
* @li res_for_gamma: A Tensor. Must be one of the following types: float32. | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(LayerNormXBackpropV2) | |||
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(res_for_gamma, TensorType({DT_FLOAT})) | |||
.OP_END_FACTORY_REG(LayerNormXBackpropV2) | |||
/** | |||
*@brief LayerNormBetaGammaBackprop operator interface implementation | |||
* calculating: dy, x, variance, mean | |||
* pd_xl = data_dy*data_gamma | |||
@@ -682,6 +726,35 @@ REG_OP(LayerNormBetaGammaBackprop) | |||
.OP_END_FACTORY_REG(LayerNormBetaGammaBackprop) | |||
/** | |||
*@brief LayerNormBetaGammaBackpropV2 operator interface implementation | |||
* calculating: dy, x, variance, mean | |||
* pd_gamma = np.sum((data_dy*res_for_gamma), param_axis, keepdims=True) | |||
* pd_beta = np.sum(data_dy, param_axis, keepdims=True) | |||
*@par Inputs: | |||
*Three inputs, including: | |||
* @li dy: A Tensor. Must be one of the following types: float16, float32. | |||
* @li x: A Tensor. Must be one of the following types: float16, float32. | |||
* @li variance: A Tensor. Must be one of the following types: float16, float32. | |||
* @li mean: A Tensor. Must be one of the following types: float16, float32 . \n | |||
*@par Outputs: | |||
*Three outputs, including: | |||
* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. | |||
* @li pd_beta: A Tensor. Must be one of the following types: float16, float32. | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(LayerNormBetaGammaBackpropV2) | |||
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(res_for_gamma, TensorType({DT_FLOAT})) | |||
.OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.REQUIRED_ATTR(shape_gamma, ListInt) | |||
.OP_END_FACTORY_REG(LayerNormBetaGammaBackpropV2) | |||
/** | |||
*@brief Return "output" according to the algorithm of dropout_do_mask: | |||
* scale_x = x *(1 / keep_prob) | |||
* output = select(mask == 1, scale_x, 0) | |||
@@ -536,13 +536,19 @@ REG_OP(Elu) | |||
* max(0, x) + min(0, alpha * (exp(x/alpha) - 1)). \n | |||
*@par Inputs: | |||
*x: A float16, float32 or double, for the input data type . \n | |||
*x: A float16, float32, for the input data type . \n | |||
*@par Attributes: | |||
*alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n | |||
*alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n | |||
*@par Attributes: | |||
*alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n | |||
*@par Attributes: | |||
*alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n | |||
*@par Outputs: | |||
*y: A float16, float32 or double, for the normalized result . \n | |||
*y: A float16, float32, for the normalized result . \n | |||
*@attention Constraints: | |||
*@li The input is of type float16 or float32 . \n | |||
@@ -553,9 +559,11 @@ REG_OP(Elu) | |||
*@li Compatible with ONNX's Celu operator | |||
*/ | |||
REG_OP(Celu) | |||
.INPUT(x, TensorType::FloatingDataType()) | |||
.OUTPUT(y, TensorType::FloatingDataType()) | |||
.ATTR(alpha, Float, 1.0) | |||
.INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16})) | |||
.ATTR(alpha1, Float, 1.0) | |||
.ATTR(alpha2, Float, 1.0) | |||
.ATTR(alpha3, Float, 1.0) | |||
.OP_END_FACTORY_REG(Celu) | |||
/** | |||
@@ -691,6 +699,25 @@ REG_OP(Mish) | |||
.OP_END_FACTORY_REG(Mish) | |||
/** | |||
* @brief: pytorch mish_grad operator. | |||
* @par Inputs: | |||
* three input, including: | |||
* @li grad: A Tensor. shape, datatype and format is same as x | |||
* @li x: A Tensor. Must be one of the following types: float16, float32 | |||
* @li tanhx: A Tensor. shape, datatype and format is same as x | |||
* @par Outputs: | |||
* 1 output, including: | |||
* @li x_grad: A Tensor. shape, datatype and format is same as x | |||
*/ | |||
REG_OP(MishGrad) | |||
.INPUT(grad, TensorType({ DT_FLOAT,DT_FLOAT16 })) | |||
.INPUT(x, TensorType({ DT_FLOAT,DT_FLOAT16 })) | |||
.OPTIONAL_INPUT(tanhx, TensorType({ DT_FLOAT,DT_FLOAT16 })) | |||
.OUTPUT(x_grad, TensorType({ DT_FLOAT,DT_FLOAT16 })) | |||
.OP_END_FACTORY_REG(MishGrad) | |||
/** | |||
* @brief pytorch hardtanh_backward operator. | |||
* | |||
* @par Inputs: | |||
@@ -993,6 +1020,30 @@ REG_OP(HardSigmoidGrad) | |||
.ATTR(beta, Float, 0.5) | |||
.OP_END_FACTORY_REG(HardSigmoidGrad) | |||
/** | |||
* @brief Calculate the shrink function. \n | |||
* @par Inputs: | |||
* One inputs, including: | |||
* @li input_x: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
* @par Attributes: | |||
* @li lambd: An optional float. Defaults to 0.5. \n | |||
* @li bias: An optional float. Defaults to 0.0. \n | |||
* @par Outputs: | |||
* y: A Tensor with the same dtype and shape of input_x's. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the ONNX operator Shrink. \n | |||
*/ | |||
REG_OP(Shrink) | |||
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(lambd, Float, 0.5) | |||
.ATTR(bias, Float, 0.0) | |||
.OP_END_FACTORY_REG(Shrink) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ |
@@ -273,14 +273,11 @@ REG_OP(PadV3) | |||
*@brief Pads a tensor. | |||
*@par Inputs: | |||
*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32. | |||
* @li x: A Tensor. Must be one of the following types: float16, float32. | |||
* @li paddings: A Tensor. Must be int32 type | |||
* paddings is a required input tensor. | |||
*@par Attributes: | |||
* @li paddings: An required "vector<vector<int>>". | |||
* For each dimension D of input, paddings[D, 0] indicates how many | |||
* values to add before the contents of tensor in that dimension, | |||
* and paddings[D, 1] indicates how many values to add after the | |||
* contents of tensor in that dimension. | |||
* @li constant_values: An optional int value for pad. | |||
* @li mode: An optional string, Defaults to "constant", indicates paddings mode, | |||
* support "constant", "reflect", "edge" | |||
@@ -298,9 +295,9 @@ REG_OP(PadV3) | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead. | |||
*/ | |||
REG_OP(PadV3D) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) | |||
.REQUIRED_ATTR(paddings, ListListInt) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(paddings, TensorType({DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(constant_values, Int, 0) | |||
.ATTR(mode, String, "constant") | |||
.ATTR(paddings_contiguous, Bool, true) | |||
@@ -214,7 +214,7 @@ REG_OP(AscendRequant) | |||
*@brief Requantizes the input of int16 . \n | |||
*@par Inputs: | |||
*@li x: An NC1HWC0 tensor of type int16, specifying the input. | |||
*@li x0: An NC1HWC0 tensor of type int16, specifying the input. | |||
*@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio. | |||
*@li x1: An NC1HWC0 tensor of type int16 . \n | |||
@@ -223,17 +223,17 @@ REG_OP(AscendRequant) | |||
*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n | |||
*@par Outputs: | |||
*@li y: The dequantized output tensor of type int8 and with format NC1HWC0. | |||
*@li y0: The dequantized output tensor of type int8 and with format NC1HWC0. | |||
*@li y1: The dequantized output tensor of type int16 and with format NC1HWC0 . \n | |||
*@par Third-party framework compatibility | |||
* It is a custom operator. It has no corresponding operator in Caffe. | |||
*/ | |||
REG_OP(AscendRequantS16) | |||
.INPUT(x, TensorType({DT_INT16})) | |||
.INPUT(x0, TensorType({DT_INT16})) | |||
.INPUT(req_scale, TensorType({DT_UINT64})) | |||
.OPTIONAL_INPUT(x1, TensorType({DT_INT16})) | |||
.OUTPUT(y, TensorType({DT_INT8})) | |||
.OUTPUT(y0, TensorType({DT_INT8})) | |||
.OUTPUT(y1, TensorType({DT_INT16})) | |||
.ATTR(dual_output, Bool, false) | |||
.ATTR(relu_flag, Bool, false) | |||
@@ -33,6 +33,7 @@ namespace ge { | |||
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. | |||
*@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n | |||
*@li mask:A 1D Tensor. Must be one of the following types: uint8. | |||
*@par Attributes: | |||
*@li keep_prob:An integer identifying the keep prob in the op. Default to 1. | |||
@@ -42,7 +43,6 @@ namespace ge { | |||
*@par Outputs: | |||
*seven outputs: | |||
*@li mask:A 1D Tensor. Must be one of the following types: uint8. | |||
*@li ct:A 4D Tensor. Must be one of the following types: float16, float32. | |||
*@li ht:A 4D Tensor. Must be one of the following types: float16. | |||
*@li it:A 4D Tensor. Must be one of the following types: float16, float32. | |||
@@ -209,6 +209,7 @@ REG_OP(DynamicRNNGrad) | |||
*@li time_major:An bool identifying the time major in the op. Default to true. | |||
*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. | |||
*@li forget_bias:An float identifying the forget bias in the op. Default to 0. | |||
*@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifjo". Default to "ijfo". | |||
*@li is_training:An bool identifying is training in the op. Default to true . \n | |||
*@par Outputs: | |||
@@ -253,10 +254,104 @@ REG_OP(DynamicRNN) | |||
.ATTR(time_major, Bool, true) | |||
.ATTR(activation, String, "tanh") | |||
.ATTR(forget_bias, Float, 0.0) | |||
.ATTR(gate_order, String, "ijfo") | |||
.ATTR(is_training, Bool, true) | |||
.OP_END_FACTORY_REG(DynamicRNN) | |||
/** | |||
*@brief: DynamicRNNV2 calculation. | |||
*@par Inputs: | |||
*ten inputs: | |||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li weight_input:A required 4D Tensor. Must be one of the following types: float16, float32. | |||
*The format must be FRACTAL_Z. | |||
*@li weight_hidden:A required 4D Tensor. Must be one of the following types: float16, float32. | |||
*The format must be FRACTAL_Z. | |||
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||
*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND. | |||
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n | |||
*@par Attributes: | |||
*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. | |||
*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". | |||
*Only UNIDIRECTIONAL is currently supported. | |||
*@li cell_depth:An integer identifying the cell depth in the op. Default to 1. | |||
*@li use_peephole:An bool identifying if use peephole in the op. Default to false. | |||
*@li keep_prob:An float identifying the keep prob in the op. Default to 1. | |||
*@li cell_clip:An float identifying the cell clip in the op. Default to -1. | |||
*@li num_proj:An integer identifying the num projection in the op. Default to 0. | |||
*@li time_major:An bool identifying the time major in the op. Default to true. | |||
*@li activation:An string identifying the type of activation function in the op. Default to "tanh". | |||
*Only tanh is currently supported. | |||
*@li recurrent_activation:An string identifying the type of activation function in the op. Default to "sigmoid". | |||
*Supprot "sigmoid" and "hard_sigmoid". In general, set "hard_sigmoid" for TF Keras LSTM. | |||
*@li forget_bias:An float identifying the forget bias in the op. Default to 0. | |||
*@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifco". Default to "ijfo". | |||
*Set "ijfo" for TF operator LSTM, Set "ifco" for TF Keras LSTM. | |||
*@li stateful: An bool identifying the type of stateful in the op. Default to fasle.Only false is currently supported. | |||
*@li merge_mode: An string identifying the type of merge_modein the op. Default to "concat". | |||
*Only "concat" is currently supported | |||
*@li is_training:An bool identifying is training in the op. Default to true . \n | |||
*@par Outputs: | |||
*eight outputs: | |||
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*Return the last output_h. | |||
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*Return the last output_c. | |||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@par Third-party framework compatibility: | |||
* Compatible with the TF operator LSTM or TF keras operator LSTM. | |||
*/ | |||
REG_OP(DynamicRNNV2) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(weight_input, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) | |||
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(mask, TensorType({DT_UINT8})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(cell_type, String, "LSTM") | |||
.ATTR(direction, String, "UNIDIRECTIONAL") | |||
.ATTR(cell_depth, Int, 1) | |||
.ATTR(use_peephole, Bool, false) | |||
.ATTR(keep_prob, Float, 1.0) | |||
.ATTR(cell_clip, Float, -1.0) | |||
.ATTR(num_proj, Int, 0) | |||
.ATTR(time_major, Bool, true) | |||
.ATTR(activation, String, "tanh") | |||
.ATTR(recurrent_activation, String, "sigmoid") | |||
.ATTR(forget_bias, Float, 0.0) | |||
.ATTR(gate_order, String, "ijfo") | |||
.ATTR(stateful, Bool, false) | |||
.ATTR(merge_mode, String, "concat") | |||
.ATTR(is_training, Bool, true) | |||
.OP_END_FACTORY_REG(DynamicRNNV2) | |||
/** | |||
*@brief: DynamicRNNV3 calculation. | |||
*@par Inputs: | |||
*ten inputs: | |||
@@ -743,6 +743,52 @@ REG_OP(Col2im) | |||
.OP_END_FACTORY_REG(Col2im) | |||
/** | |||
* @brief Performs Im2col for each batch entry. \n | |||
* @par Inputs: | |||
* x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the | |||
* following types:float32, int8, float16. The inputs must have data_format with | |||
* one of follows:NHWC, NCHW. | |||
* @par Attributes: | |||
* @li ksizes: A required list or tuple. The size of the sliding window for each | |||
* dimension of images. | |||
* @li strides: A optional list or tuple. How far the centers of two consecutive | |||
* patches are in the images. Defaults to "{1}". | |||
* @li dilations: A optional list or tuple. Defaults to "{1}". | |||
* This is the input stride, specifying how far two consecutive patch | |||
* samples are in the input. Equivalent to extracting patches | |||
* with patch_sizes_eff = patch_sizes + (patch_sizes - 1) * | |||
* (dilations - 1), followed by subsampling them spatially by a factor of dilations. | |||
* This is equivalent to rate in dilated (a.k.a. Atrous) convolutions. | |||
* @li padding_mode: A optional String. The type of padding algorithm to use, | |||
* support "SAME", "VALID", "CALCULATED". Among the three modes, only the "CALCULATED" | |||
* means to use the pads below. Defaults to "CALCULATED". | |||
* @li pads: A optional list or tuple. The pad distance. Defaults to "{0}". \n | |||
* @par Outputs: | |||
* y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows * | |||
* ksize_cols * depth] containing image patches with size ksize_rows x ksize_cols | |||
* x depth vectorized in the "depth" dimension. Note "out_rows" and "out_cols" | |||
* are the dimensions of the output patches . \n | |||
* @attention Constraints: | |||
* "ksizes", "strides", "dilations" and "pads" are lists of integers . \n | |||
* @par Third-party framework compatibility | |||
* Compatible with Pytorch Im2col operator. | |||
*/ | |||
REG_OP(Im2col) | |||
.INPUT(x, TensorType::RealNumberType()) | |||
.OUTPUT(y, TensorType::RealNumberType()) | |||
.REQUIRED_ATTR(ksizes, ListInt) | |||
.ATTR(strides, ListInt, {1}) | |||
.ATTR(dilations, ListInt, {1}) | |||
.ATTR(padding_mode, String, "CALCULATED") | |||
.ATTR(pads, ListInt, {0}) | |||
.OP_END_FACTORY_REG(Im2col) | |||
/** | |||
*@brief Generates a 2D or 3D flow field (sampling grid), given a batch of affine | |||
matrices theta. \n | |||
@@ -186,9 +186,9 @@ typedef void (*rtCallback_t)(void *fnData); | |||
#define RT_KERNEL_CUSTOM_AICPU (0x08) | |||
// STARS topic scheduler sqe : topic_type | |||
#define RT_KERNEL_DEVICE_FIRST (0X10) | |||
#define RT_KERNEL_HOST_ONLY (0X20) | |||
#define RT_KERNEL_HOST_FIRST (0X30) | |||
#define RT_KERNEL_DEVICE_FIRST (0x10) | |||
#define RT_KERNEL_HOST_ONLY (0x20) | |||
#define RT_KERNEL_HOST_FIRST (0x40) | |||
/** | |||
* @ingroup rt_kernel | |||
@@ -17,6 +17,8 @@ | |||
#ifndef D_SYSLOG_H_ | |||
#define D_SYSLOG_H_ | |||
static const int TMP_LOG = 0; | |||
#ifdef __cplusplus | |||
#ifndef LOG_CPP | |||
extern "C" { | |||
@@ -261,7 +263,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); | |||
#define dlog_error(moduleId, fmt, ...) \ | |||
do { \ | |||
DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} while (0) | |||
} while (TMP_LOG != 0) | |||
/** | |||
* @ingroup slog | |||
@@ -276,7 +278,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); | |||
if(CheckLogLevel(moduleId, DLOG_WARN) == 1) { \ | |||
DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} \ | |||
} while (0) | |||
} while (TMP_LOG != 0) | |||
/** | |||
* @ingroup slog | |||
@@ -291,7 +293,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); | |||
if(CheckLogLevel(moduleId, DLOG_INFO) == 1) { \ | |||
DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} \ | |||
} while (0) | |||
} while (TMP_LOG != 0) | |||
/** | |||
* @ingroup slog | |||
@@ -306,7 +308,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); | |||
if(CheckLogLevel(moduleId, DLOG_DEBUG) == 1) { \ | |||
DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} \ | |||
} while (0) | |||
} while (TMP_LOG != 0) | |||
/** | |||
* @ingroup slog | |||
@@ -318,7 +320,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); | |||
#define dlog_event(moduleId, fmt, ...) \ | |||
do { \ | |||
DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} while (0) | |||
} while (TMP_LOG != 0) | |||
/** | |||
* @ingroup slog | |||
@@ -334,7 +336,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); | |||
if(CheckLogLevel(moduleId, level) == 1) { \ | |||
DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} \ | |||
} while (0) | |||
} while (TMP_LOG != 0) | |||
/** | |||
* @ingroup slog | |||
@@ -351,7 +353,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); | |||
if(CheckLogLevel(moduleId, level) == 1) { \ | |||
DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ | |||
} \ | |||
} while (0) | |||
} while (TMP_LOG != 0) | |||
/** | |||
* @ingroup slog | |||
@@ -369,7 +371,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); | |||
if(CheckLogLevel(moduleId, level) == 1) { \ | |||
DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} \ | |||
} while (0) | |||
} while (TMP_LOG != 0) | |||
/** | |||
* @ingroup slog | |||
@@ -453,7 +455,7 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr); | |||
if(CheckLogLevelForC(moduleId, level) == 1) { \ | |||
DlogInnerForC(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} \ | |||
} while (0) | |||
} while (TMP_LOG != 0) | |||
/** | |||
* @ingroup slog | |||
@@ -470,7 +472,7 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr); | |||
if(CheckLogLevelForC(moduleId, level) == 1) { \ | |||
DlogInnerForC(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ | |||
} \ | |||
} while (0) | |||
} while (TMP_LOG != 0) | |||
/** | |||
* @ingroup slog | |||
@@ -488,7 +490,7 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr); | |||
if(CheckLogLevelForC(moduleId, level) == 1) { \ | |||
DlogWithKVInnerForC(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} \ | |||
} while (0) | |||
} while (TMP_LOG != 0) | |||
/** | |||
* @ingroup slog | |||