From 0e43f8c1d52e60fba16aad837f90ec3ad08fcffc Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Thu, 29 Jul 2021 09:50:53 +0800 Subject: [PATCH] update headers --- inc/external/acl/acl.h | 4 +- inc/external/acl/acl_base.h | 2 + inc/external/acl/acl_mdl.h | 16 +- inc/external/acl/acl_op.h | 28 + inc/external/acl/acl_op_compiler.h | 6 +- inc/external/acl/acl_prof.h | 37 ++ inc/external/acl/acl_rt.h | 18 + inc/external/acl/ops/acl_dvpp.h | 109 +++- inc/external/ge/ge_ir_build.h | 28 +- inc/external/hccl/hccl.h | 27 + inc/framework/executor/ge_executor.h | 138 ++-- inc/framework/ge_runtime/task_info.h | 253 ++++++-- third_party/fwkacllib/inc/ops/array_ops.h | 68 +- third_party/fwkacllib/inc/ops/control_flow_ops.h | 2 +- third_party/fwkacllib/inc/ops/ctc_ops.h | 8 +- third_party/fwkacllib/inc/ops/data_flow_ops.h | 41 +- .../fwkacllib/inc/ops/elewise_calculation_ops.h | 166 ++--- third_party/fwkacllib/inc/ops/functional_ops.h | 3 - third_party/fwkacllib/inc/ops/image_ops.h | 311 ++++++++- third_party/fwkacllib/inc/ops/linalg_ops.h | 17 +- third_party/fwkacllib/inc/ops/list_ops.h | 64 +- third_party/fwkacllib/inc/ops/lookup_ops.h | 4 +- third_party/fwkacllib/inc/ops/math_ops.h | 135 ++-- .../fwkacllib/inc/ops/matrix_calculation_ops.h | 232 +++++-- third_party/fwkacllib/inc/ops/nn_calculation_ops.h | 699 ++++++++++----------- third_party/fwkacllib/inc/ops/nn_detect_ops.h | 153 +++-- third_party/fwkacllib/inc/ops/nn_norm_ops.h | 175 ++---- third_party/fwkacllib/inc/ops/nn_ops.h | 28 +- third_party/fwkacllib/inc/ops/nn_pooling_ops.h | 97 ++- third_party/fwkacllib/inc/ops/nn_training_ops.h | 11 +- third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h | 85 +-- third_party/fwkacllib/inc/ops/pad_ops.h | 22 +- third_party/fwkacllib/inc/ops/parsing_ops.h | 135 ++-- third_party/fwkacllib/inc/ops/quantize_ops.h | 10 +- third_party/fwkacllib/inc/ops/ragged_array_ops.h | 9 +- .../fwkacllib/inc/ops/ragged_conversion_ops.h | 3 +- third_party/fwkacllib/inc/ops/ragged_math_ops.h | 6 +- third_party/fwkacllib/inc/ops/random_ops.h | 143 ++++- third_party/fwkacllib/inc/ops/reduce_ops.h | 107 +++- .../fwkacllib/inc/ops/resource_variable_ops.h | 28 +- third_party/fwkacllib/inc/ops/rnn.h | 18 +- third_party/fwkacllib/inc/ops/rpn_ops.h | 6 +- third_party/fwkacllib/inc/ops/sdca_ops.h | 14 +- third_party/fwkacllib/inc/ops/selection_ops.h | 163 +++-- third_party/fwkacllib/inc/ops/sparse_ops.h | 43 +- third_party/fwkacllib/inc/ops/spectral_ops.h | 20 +- .../fwkacllib/inc/ops/split_combination_ops.h | 9 +- third_party/fwkacllib/inc/ops/state_ops.h | 2 +- .../fwkacllib/inc/ops/stateful_random_ops.h | 31 +- third_party/fwkacllib/inc/ops/string_ops.h | 50 +- third_party/fwkacllib/inc/ops/transformation_ops.h | 17 +- third_party/fwkacllib/inc/runtime/base.h | 4 +- third_party/fwkacllib/inc/runtime/config.h | 15 +- third_party/fwkacllib/inc/runtime/context.h | 4 +- third_party/fwkacllib/inc/runtime/dev.h | 22 +- third_party/fwkacllib/inc/runtime/dvfsprofile.h | 4 +- third_party/fwkacllib/inc/runtime/event.h | 14 +- third_party/fwkacllib/inc/runtime/kernel.h | 4 +- third_party/fwkacllib/inc/runtime/mem.h | 4 +- third_party/fwkacllib/inc/runtime/rt_ffts.h | 73 ++- third_party/fwkacllib/inc/runtime/rt_model.h | 4 +- third_party/fwkacllib/inc/runtime/rt_stars.h | 8 +- third_party/fwkacllib/inc/runtime/stream.h | 4 +- third_party/fwkacllib/inc/toolchain/prof_acl_api.h | 32 + .../fwkacllib/inc/toolchain/prof_callback.h | 12 + 65 files changed, 2582 insertions(+), 1423 deletions(-) mode change 100755 => 100644 third_party/fwkacllib/inc/runtime/rt_ffts.h diff --git a/inc/external/acl/acl.h b/inc/external/acl/acl.h index 8d261201..a5194472 100644 --- a/inc/external/acl/acl.h +++ b/inc/external/acl/acl.h @@ -25,9 +25,9 @@ extern "C" { #endif -// Current version is 1.0.0 +// Current version is 1.1.0 #define ACL_MAJOR_VERSION 1 -#define ACL_MINOR_VERSION 0 +#define ACL_MINOR_VERSION 1 #define ACL_PATCH_VERSION 0 /** diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h index 64d4bd81..90da8b8f 100644 --- a/inc/external/acl/acl_base.h +++ b/inc/external/acl/acl_base.h @@ -150,6 +150,8 @@ typedef enum { ACL_DOUBLE = 11, ACL_BOOL = 12, ACL_STRING = 13, + ACL_COMPLEX64 = 16, + ACL_COMPLEX128 = 17 } aclDataType; typedef enum { diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h index 2bf85e29..522dbd38 100644 --- a/inc/external/acl/acl_mdl.h +++ b/inc/external/acl/acl_mdl.h @@ -297,9 +297,21 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset, /** * @ingroup AscendCL + * @brief Get aclTensorDesc from aclmdlDataset + * + * @param dataset [IN] aclmdlDataset pointer; + * @param index [IN] index of tensorDesc + * + * @retval Get address of aclTensorDesc when executed successfully. + * @retval Failure return NULL + */ +ACL_FUNC_VISIBILITY aclTensorDesc *aclmdlGetDatasetTensorDesc(const aclmdlDataset *dataset, size_t index); + +/** + * @ingroup AscendCL * @brief Get the number of aclDataBuffer in aclmdlDataset * - * @param dataset [IN] aclmdlDataset poiter + * @param dataset [IN] aclmdlDataset pointer * * @retval the number of aclDataBuffer */ @@ -309,7 +321,7 @@ ACL_FUNC_VISIBILITY size_t aclmdlGetDatasetNumBuffers(const aclmdlDataset *datas * @ingroup AscendCL * @brief Get the aclDataBuffer in aclmdlDataset by index * - * @param dataset [IN] aclmdlDataset poiter + * @param dataset [IN] aclmdlDataset pointer * @param index [IN] the index of aclDataBuffer * * @retval Get successfully, return the address of aclDataBuffer diff --git a/inc/external/acl/acl_op.h b/inc/external/acl/acl_op.h index d2e59bfb..f340b6bc 100644 --- a/inc/external/acl/acl_op.h +++ b/inc/external/acl/acl_op.h @@ -137,6 +137,34 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *att /** * @ingroup AscendCL + * @brief set an attribute. the type of the attribute is aclDataType + * + * @param attr [OUT] pointer to the instance of aclopAttr + * @param attrName [IN] attribute name + * @param attrValue [IN] attribute value + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetAttrDataType(aclopAttr *attr, const char *attrName, aclDataType attrValue); + +/** + * @ingroup AscendCL + * @brief set an attribute. the type of the attribute is list of aclDataType + * + * @param attr [OUT] pointer to the instance of aclopAttr + * @param attrName [IN] attribute name + * @param numValues [IN] number of values. false if attrValue is 0, true otherwise. + * @param values [IN] pointer to values + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetAttrListDataType(aclopAttr *attr, const char *attrName, int numValues, + const aclDataType values[]); + +/** + * @ingroup AscendCL * @brief set an attribute. the type of the attribute is list of bools * * @param attr [OUT] pointer to the instance of aclopAttr diff --git a/inc/external/acl/acl_op_compiler.h b/inc/external/acl/acl_op_compiler.h index d9d1b3da..b64b2bad 100644 --- a/inc/external/acl/acl_op_compiler.h +++ b/inc/external/acl/acl_op_compiler.h @@ -86,9 +86,9 @@ ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, con * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute( - const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], - int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr, - aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream); + const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], + int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr, + aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream); /** * @ingroup AscendCL diff --git a/inc/external/acl/acl_prof.h b/inc/external/acl/acl_prof.h index 3784d8c6..a93374b0 100644 --- a/inc/external/acl/acl_prof.h +++ b/inc/external/acl/acl_prof.h @@ -40,13 +40,20 @@ typedef enum { ACL_AICORE_MEMORY_BANDWIDTH = 2, ACL_AICORE_L0B_AND_WIDTH = 3, ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, + ACL_AICORE_MEMORY_UB = 5, ACL_AICORE_NONE = 0xFF } aclprofAicoreMetrics; +typedef enum { + ACL_STEP_START = 0, // step start + ACL_STEP_END = 1 // step end +} aclprofStepTag; + typedef struct aclprofConfig aclprofConfig; typedef struct aclprofStopConfig aclprofStopConfig; typedef struct aclprofAicoreEvents aclprofAicoreEvents; typedef struct aclprofSubscribeConfig aclprofSubscribeConfig; +typedef struct aclprofStepInfo aclprofStepInfo; /** * @ingroup AscendCL @@ -322,6 +329,36 @@ ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opI */ ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index); +/** + * @ingroup AscendCL + * @brief + * + * @param stepInfo [IN] pointer to stepInfo data + * @param aclprofstepTag [IN] start or end flag + * @param stream [IN] steam info + * + * @retval 0 for failed + */ +ACL_FUNC_VISIBILITY aclError aclprofGetStepTimestamp(aclprofStepInfo *stepInfo, aclprofStepTag tag, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief create pointer to aclprofStepInfo data + * + * + * @retval aclprofStepInfo pointer + */ +ACL_FUNC_VISIBILITY aclprofStepInfo *aclprofCreateStepInfo(); + +/** + * @ingroup AscendCL + * @brief destroy aclprofStepInfo pointer + * + * + * @retval void + */ +ACL_FUNC_VISIBILITY void aclprofDestroyStepInfo(aclprofStepInfo *stepinfo); + #ifdef __cplusplus } #endif diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h index 5ee70724..50dbc34d 100644 --- a/inc/external/acl/acl_rt.h +++ b/inc/external/acl/acl_rt.h @@ -44,6 +44,12 @@ typedef enum aclrtEventStatus { ACL_EVENT_STATUS_RESERVED = 2, } aclrtEventStatus; +typedef enum aclrtEventWaitStatus { + ACL_EVENT_WAIT_STATUS_COMPLETE = 0, + ACL_EVENT_WAIT_STATUS_NOT_READY = 1, + ACL_EVENT_WAIT_STATUS_RESERVED = 0xffff, +} aclrtEventWaitStatus; + typedef enum aclrtCallbackBlockType { ACL_CALLBACK_NO_BLOCK, ACL_CALLBACK_BLOCK, @@ -501,6 +507,18 @@ ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus /** * @ingroup AscendCL + * @brief Queries an event's wait-status + * + * @param event [IN] event to query + * @param status [OUT] event wait-status + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtQueryEventWaitStatus(aclrtEvent event, aclrtEventWaitStatus *status); + +/** + * @ingroup AscendCL * @brief Block Host Running, wait event to be complete * * @param event [IN] event to wait diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h index dcaa3936..5418ebd3 100644 --- a/inc/external/acl/ops/acl_dvpp.h +++ b/inc/external/acl/ops/acl_dvpp.h @@ -158,6 +158,20 @@ enum acldvppJpegFormat { ACL_JPEG_CSS_UNKNOWN = 1000 }; +enum acldvppChannelDescParamType { ACL_DVPP_CSC_MATRIX_UINT32 = 0 }; + +enum aclvdecChannelDescParamType { ACL_VDEC_CSC_MATRIX_UINT32 = 0 }; + +// Csc Matrix can be used both for acldvppChannelDescParamType and aclvdecChannelDescParamType +enum acldvppCscMatrix { + ACL_DVPP_CSC_MATRIX_BT601_WIDE = 0, + ACL_DVPP_CSC_MATRIX_BT601_NARROW, + ACL_DVPP_CSC_MATRIX_BT709_WIDE, + ACL_DVPP_CSC_MATRIX_BT709_NARROW, + ACL_DVPP_CSC_MATRIX_BT2020_WIDE, + ACL_DVPP_CSC_MATRIX_BT2020_NARROW +}; + /** * @ingroup AscendCL * @brief alloc device memory for dvpp. @@ -1910,9 +1924,9 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig */ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync( - acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, - acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[], - acldvppResizeConfig *resizeConfig, aclrtStream stream); + acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[], + acldvppResizeConfig *resizeConfig, aclrtStream stream); /** * @ingroup AscendCL @@ -2557,10 +2571,93 @@ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist); * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig */ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync( - acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, - acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[], - acldvppResizeConfig *resizeConfig, aclrtStream stream); + acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[], + acldvppResizeConfig *resizeConfig, aclrtStream stream); +/** + * @ingroup AscendCL + * @brief set param for dvpp channel desc + * + * @par Function + * set attribution in dvpp channelDesc for specified type + * + * @param channelDesc [OUT] the channel destruction + * @param paramType [IN] specified param type + * @param length [IN] mem length of param + * @param param [IN] pointer to param + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppGetChannelDescParam | acldvppCreateChannelDesc | acldvppDestroyChannelDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescParam(acldvppChannelDesc *channelDesc, + acldvppChannelDescParamType paramType, size_t length, + const void *param); + +/** + * @ingroup AscendCL + * @brief get param of dvpp channel desc + * + * @par Function + * get attribution value in dvpp channelDesc for specified type + * + * @param channelDesc [IN] the channel destruction + * @param paramType [IN] specified param type + * @param length [IN] mem length allocated for output param + * @param paramRetSize [OUT] mem length of output param + * @param param [OUT] pointer to output param + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppSetChannelDescParam | acldvppCreateChannelDesc | acldvppDestroyChannelDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppGetChannelDescParam(const acldvppChannelDesc *channelDesc, + acldvppChannelDescParamType paramType, size_t length, + size_t *paramRetSize, void *param); +/** + * @ingroup AscendCL + * @brief set param for vdec channel desc + * + * @par Function + * set attribution in channelDesc for specified type + * + * @param channelDesc [OUT] the vdec channel destruction + * @param paramType [IN] specified param type + * @param length [IN] mem length of param + * @param param [IN] pointer to param + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecGetChannelDescParam | aclvdecCreateChannelDesc | aclvdecDestroyChannelDesc + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescParam(aclvdecChannelDesc *channelDesc, + aclvdecChannelDescParamType paramType, size_t length, + const void *param); +/** + * @ingroup AscendCL + * @brief get param of vdec channel desc + * + * @par Function + * get attribution value in channelDesc for specified type + * + * @param channelDesc [IN] the vdec channel destruction + * @param paramType [IN] specified param type + * @param length [IN] mem length allocated for output param + * @param paramRetSize [OUT] mem length of output param + * @param param [OUT] pointer to output param + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecSetChannelDescParam | aclvdecCreateChannelDesc | aclvdecDestroyChannelDesc + */ +ACL_FUNC_VISIBILITY aclError aclvdecGetChannelDescParam(const aclvdecChannelDesc *channelDesc, + aclvdecChannelDescParamType paramType, size_t length, + size_t *paramRetSize, void *param); #ifdef __cplusplus } #endif diff --git a/inc/external/ge/ge_ir_build.h b/inc/external/ge/ge_ir_build.h index 04e059a1..729685a9 100644 --- a/inc/external/ge/ge_ir_build.h +++ b/inc/external/ge/ge_ir_build.h @@ -1,18 +1,18 @@ /** -* Copyright 2020 Huawei Technologies Co., Ltd - -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at - -* http://www.apache.org/licenses/LICENSE-2.0 - -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef INC_EXTERNAL_GE_IR_BUILD_H_ #define INC_EXTERNAL_GE_IR_BUILD_H_ diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h index 8261adc4..c24b5374 100644 --- a/inc/external/hccl/hccl.h +++ b/inc/external/hccl/hccl.h @@ -145,6 +145,33 @@ extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank); extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream); /** + * @brief AllGather operator. + * + * @param sendBuff A pointer identifying the input data address of the operator. + * @param count An integer(u64) identifying the number of the send data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param destRank An integer identifying the destination rank. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclSend(void *sendBuf, uint64_t count, HcclDataType dataType, uint32_t destRank, HcclComm comm, + aclrtStream stream); +/** + * @brief AllGather operator. + * + * @param recvBuff A pointer identifying the output data address of the operator. + * @param count An integer(u64) identifying the number of the receive data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param srcRank An integer identifying the source rank. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclRecv(void *recvBuf, uint64_t count, HcclDataType dataType, uint32_t srcRank, HcclComm comm, + aclrtStream stream); + +/** * @brief Destroy HCCL comm * * @param comm A pointer identifying the communication resource targetting diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index fcca561c..ce7c82ac 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -50,14 +50,30 @@ class GE_FUNC_VISIBILITY GeExecutor { public: GeExecutor(); ~GeExecutor() = default; - ge::Status Initialize(); - ge::Status Finalize(); - ge::Status UnloadModel(uint32_t modelId); + Status Initialize(); + Status Finalize(); + + /// + /// @ingroup ge + /// @brief Initialize global execute environment. + /// @param [in] options: environment variables. + /// @return init result + /// + static Status Initialize(const std::map &options); + + /// + /// @ingroup ge + /// @brief Finalize global execute environment. + /// @return execute result + /// + static Status FinalizeEx(); + + Status UnloadModel(uint32_t modelId); // Get input and output descriptor - ge::Status GetModelDescInfo(uint32_t model_id, std::vector &input_desc, - std::vector &output_desc, bool new_model_desc = false); + Status GetModelDescInfo(uint32_t model_id, std::vector &input_desc, std::vector &output_desc, + bool new_model_desc = false); /// /// @ingroup ge @@ -68,7 +84,7 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [in] batch_size: batch size entered by user in dynamic multi-batch scenario /// @return execute result /// - ge::Status SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t batch_size); + Status SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t batch_size); /// /// @ingroup ge @@ -80,8 +96,8 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [in] image_width: image width entered by user in dynamic multi-resolution scenario /// @return execute result /// - ge::Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height, - uint64_t image_width); + Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height, + uint64_t image_width); /// /// @ingroup ge @@ -93,8 +109,8 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [in] dynamic_dims: array of dynamic dimensions /// @return execute result /// - ge::Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length, - const std::vector &dynamic_dims); + Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length, + const std::vector &dynamic_dims); /// /// @ingroup ge @@ -104,8 +120,8 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] cur_dynamic_dims: current dynamic dims /// @return execute result /// - ge::Status GetCurDynamicDims(uint32_t model_id, const std::vector &dynamic_dims, - std::vector &cur_dynamic_dims); + Status GetCurDynamicDims(uint32_t model_id, const std::vector &dynamic_dims, + std::vector &cur_dynamic_dims); /// /// @ingroup ge @@ -115,8 +131,7 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] dynamic_type /// @return execute result /// - ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector> &batch_info, - int32_t &dynamic_type); + Status GetDynamicBatchInfo(uint32_t model_id, std::vector> &batch_info, int32_t &dynamic_type); /// /// @ingroup ge @@ -125,7 +140,7 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] batch_info /// @return execute result /// - ge::Status GetCombinedDynamicDims(uint32_t model_id, std::vector> &batch_info); + Status GetCombinedDynamicDims(uint32_t model_id, std::vector> &batch_info); /// /// @ingroup ge @@ -134,9 +149,9 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] user_designate_shape_order /// @return execute result /// - ge::Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector &user_designate_shape_order); + Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector &user_designate_shape_order); - ge::Status GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type); + Status GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type); /// /// @ingroup ge @@ -148,22 +163,22 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [in] aippParms: kAippDynamicPara by user in dynamic aipp /// @return execute result /// - ge::Status SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length, - const std::vector &aippBatchPara, - const kAippDynamicPara &aippParms); + Status SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length, + const std::vector &aipp_batch_para, + const kAippDynamicPara &aippParms); - ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); - ge::Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name, - std::string &attr_value); + Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name, + std::string &attr_value); - ge::Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); + Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); - ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); + Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); - ge::Status CommandHandle(const ge::Command &command); + Status CommandHandle(const Command &command); - ge::Status SetDump(const DumpConfig &dump_config); + Status SetDump(const DumpConfig &dump_config); /// /// @ingroup ge @@ -173,7 +188,7 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @return SUCCESS /// @return FAILED /// - ge::Status GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size); + Status GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size); /// /// @ingroup ge @@ -182,7 +197,7 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] ModelData &model_data: Offline model memory data /// @return SUCCESS handle successfully / others handle failed /// - ge::Status LoadDataFromFile(const std::string &path, ge::ModelData &model_data); + Status LoadDataFromFile(const std::string &path, ModelData &model_data); /// /// @ingroup ge @@ -195,8 +210,8 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] uint32_t &model_id: Corresponding identification after model loading /// @return SUCCESS handle successfully / others handle failed /// - ge::Status LoadModelFromData(uint32_t &model_id, const ge::ModelData &model_data, void *dev_ptr, size_t mem_size, - void *weight_ptr, size_t weight_size); + Status LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size, + void *weight_ptr, size_t weight_size); /// /// @ingroup ge @@ -207,9 +222,8 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [in] output_queue_ids: input queue ids create from user. /// @return: 0 for success / others for fail /// - ge::Status LoadModelWithQ(uint32_t &model_id, const ge::ModelData &model_data, - const std::vector &input_queue_ids, - const std::vector &output_queue_ids); + Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector &input_queue_ids, + const std::vector &output_queue_ids); /// /// @ingroup ge @@ -221,8 +235,8 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] domi::OutputData *output_data: Model output data /// @return SUCCESS handle successfully / others handle failed /// - ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &input_data, - ge::RunModelData &output_data, bool async_mode = false); + Status ExecModel(uint32_t model_id, void *stream, const RunModelData &input_data, RunModelData &output_data, + bool async_mode = false); /// /// @ingroup ge @@ -236,9 +250,9 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] std::vector &output_desc: description of model output data /// @return SUCCESS handle successfully / others handle failed /// - ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, - const std::vector &input_desc, ge::RunModelData &run_output_data, - std::vector &output_desc, bool async_mode = false); + Status ExecModel(uint32_t model_id, void *stream, const RunModelData &run_input_data, + const std::vector &input_desc, RunModelData &run_output_data, + std::vector &output_desc, bool async_mode = false); /// /// @ingroup ge @@ -248,7 +262,7 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] size_t &weight_size Weight memory space size /// @return SUCCESS handle successfully / others handle failed /// - ge::Status GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size); + Status GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size); /// /// @ingroup ge @@ -259,39 +273,39 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] size_t &weight_size Weight memory space size /// @return SUCCESS handle successfully / others handle failed /// - ge::Status GetMemAndWeightSize(const void *model_data, size_t model_size, size_t &mem_size, size_t &weight_size); + Status GetMemAndWeightSize(const void *model_data, size_t model_size, size_t &mem_size, size_t &weight_size); - static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream, - SingleOp **single_op); + static Status LoadSingleOp(const std::string &modelName, const ModelData &modelData, void *stream, + SingleOp **single_op); - static ge::Status LoadSingleOpV2(const std::string &modelName, const ge::ModelData &modelData, void *stream, - SingleOp **single_op, const uint64_t model_id); + static Status LoadSingleOpV2(const std::string &modelName, const ModelData &modelData, void *stream, + SingleOp **single_op, const uint64_t model_id); - static ge::Status ExecuteAsync(SingleOp *executor, const std::vector &inputs, - std::vector &outputs); + static Status ExecuteAsync(SingleOp *executor, const std::vector &inputs, + std::vector &outputs); - static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, - DynamicSingleOp **single_op); + static Status LoadDynamicSingleOp(const std::string &model_name, const ModelData &modelData, void *stream, + DynamicSingleOp **single_op); - static ge::Status LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream, - DynamicSingleOp **single_op, const uint64_t model_id); + static Status LoadDynamicSingleOpV2(const std::string &model_name, const ModelData &modelData, void *stream, + DynamicSingleOp **single_op, const uint64_t model_id); - static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector &input_desc, - const std::vector &inputs, std::vector &output_desc, - std::vector &outputs); + static Status ExecuteAsync(DynamicSingleOp *executor, const std::vector &input_desc, + const std::vector &inputs, std::vector &output_desc, + std::vector &outputs); - static ge::Status ReleaseSingleOpResource(void *stream); + static Status ReleaseSingleOpResource(void *stream); - static ge::Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id); + static Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id); - ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count); - ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); - ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector &input_dims, - std::vector &output_dims); - ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info); + Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count); + Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); + Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector &input_dims, + std::vector &output_dims); + Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info); private: - static bool isInit_; + static std::atomic_bool is_inited_; }; } // namespace ge diff --git a/inc/framework/ge_runtime/task_info.h b/inc/framework/ge_runtime/task_info.h index 4530bff7..abc4783d 100644 --- a/inc/framework/ge_runtime/task_info.h +++ b/inc/framework/ge_runtime/task_info.h @@ -50,10 +50,18 @@ enum TaskInfoType { class TaskInfo { public: virtual ~TaskInfo() {} - uint32_t stream_id() const { return stream_id_; } - TaskInfoType type() const { return type_; } - std::string op_name() const { return op_name_; } - bool dump_flag() const { return dump_flag_; } + uint32_t stream_id() const { + return stream_id_; + } + TaskInfoType type() const { + return type_; + } + std::string op_name() const { + return op_name_; + } + bool dump_flag() const { + return dump_flag_; + } protected: TaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, bool dump_flag) @@ -84,15 +92,33 @@ class CceTaskInfo : public TaskInfo { is_flowtable_(is_flowtable) {} ~CceTaskInfo() override {} - cce::ccOpContext cc_context() const { return ctx_; } - std::string stub_func() const { return stub_func_; } - uint32_t block_dim() const { return block_dim_; } - const std::vector &args() const { return args_; } - uint32_t args_size() const { return args_size_; } - const std::vector &sm_desc() const { return sm_desc_; } - const std::vector &flow_table() const { return flow_table_; } - const std::vector &args_offset() const { return args_offset_; } - bool is_flowtable() const { return is_flowtable_; } + cce::ccOpContext cc_context() const { + return ctx_; + } + std::string stub_func() const { + return stub_func_; + } + uint32_t block_dim() const { + return block_dim_; + } + const std::vector &args() const { + return args_; + } + uint32_t args_size() const { + return args_size_; + } + const std::vector &sm_desc() const { + return sm_desc_; + } + const std::vector &flow_table() const { + return flow_table_; + } + const std::vector &args_offset() const { + return args_offset_; + } + bool is_flowtable() const { + return is_flowtable_; + } private: cce::ccOpContext ctx_; @@ -126,17 +152,39 @@ class TbeTaskInfo : public TaskInfo { workspace_addrs_(workspace_addrs) {} ~TbeTaskInfo() override {} - const std::string &stub_func() const { return stub_func_; } - uint32_t block_dim() const { return block_dim_; } - const std::vector &args() const { return args_; } - uint32_t args_size() const { return args_size_; } - const std::vector &sm_desc() const { return sm_desc_; } - void *binary() const { return binary_; } - uint32_t binary_size() const { return binary_size_; } - const std::vector &meta_data() const { return meta_data_; } - const std::vector &input_data_addrs() const { return input_data_addrs_; } - const std::vector &output_data_addrs() const { return output_data_addrs_; } - const std::vector &workspace_addrs() const { return workspace_addrs_; } + const std::string &stub_func() const { + return stub_func_; + } + uint32_t block_dim() const { + return block_dim_; + } + const std::vector &args() const { + return args_; + } + uint32_t args_size() const { + return args_size_; + } + const std::vector &sm_desc() const { + return sm_desc_; + } + void *binary() const { + return binary_; + } + uint32_t binary_size() const { + return binary_size_; + } + const std::vector &meta_data() const { + return meta_data_; + } + const std::vector &input_data_addrs() const { + return input_data_addrs_; + } + const std::vector &output_data_addrs() const { + return output_data_addrs_; + } + const std::vector &workspace_addrs() const { + return workspace_addrs_; + } void SetBinary(void *binary, uint32_t binary_size) { binary_ = binary; @@ -171,12 +219,24 @@ class AicpuTaskInfo : public TaskInfo { output_data_addrs_(output_data_addrs) {} ~AicpuTaskInfo() override {} - const std::string &so_name() const { return so_name_; } - const std::string &kernel_name() const { return kernel_name_; } - const std::string &node_def() const { return node_def_; } - const std::vector &input_data_addrs() const { return input_data_addrs_; } - const std::vector &output_data_addrs() const { return output_data_addrs_; } - const std::string &ext_info() const { return ext_info_; } + const std::string &so_name() const { + return so_name_; + } + const std::string &kernel_name() const { + return kernel_name_; + } + const std::string &node_def() const { + return node_def_; + } + const std::vector &input_data_addrs() const { + return input_data_addrs_; + } + const std::vector &output_data_addrs() const { + return output_data_addrs_; + } + const std::string &ext_info() const { + return ext_info_; + } private: std::string so_name_; @@ -192,7 +252,9 @@ class LabelSetTaskInfo : public TaskInfo { LabelSetTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id) : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_SET, false), label_id_(label_id) {} ~LabelSetTaskInfo() override {} - uint32_t label_id() const { return label_id_; } + uint32_t label_id() const { + return label_id_; + } private: uint32_t label_id_; @@ -203,7 +265,9 @@ class LabelGotoTaskInfo : public TaskInfo { LabelGotoTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id) : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_GOTO, false), label_id_(label_id) {} ~LabelGotoTaskInfo() override {} - uint32_t label_id() const { return label_id_; } + uint32_t label_id() const { + return label_id_; + } private: uint32_t label_id_; @@ -218,9 +282,15 @@ class LabelSwitchTaskInfo : public TaskInfo { label_list_(label_list), cond_(cond) {} ~LabelSwitchTaskInfo() override {} - uint32_t label_size() const { return label_size_; } - const std::vector &label_list() const { return label_list_; } - void *cond() const { return cond_; } + uint32_t label_size() const { + return label_size_; + } + const std::vector &label_list() const { + return label_list_; + } + void *cond() const { + return cond_; + } private: uint32_t label_size_; @@ -230,7 +300,9 @@ class LabelSwitchTaskInfo : public TaskInfo { class EventTaskInfo : public TaskInfo { public: - uint32_t event_id() const { return event_id_; } + uint32_t event_id() const { + return event_id_; + } protected: EventTaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, uint32_t event_id) @@ -271,14 +343,13 @@ class FusionEndTaskInfo : public TaskInfo { class HcclTaskInfo : public TaskInfo { public: HcclTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string hccl_type, void *input_data_addr, - void *output_data_addr, void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num, + void *output_data_addr, int64_t workspace_size, int64_t hccl_stream_num, const std::vector &private_def, void *ops_kernel_store, int32_t count, int64_t root_id, int64_t op_type, int64_t data_type, const std::string &group, bool dump_flag) : TaskInfo(op_name, stream_id, TaskInfoType::HCCL, dump_flag), hccl_type_(hccl_type), input_data_addr_(input_data_addr), output_data_addr_(output_data_addr), - workspace_addr_(workspace_addr), workspace_size_(workspace_size), hccl_stream_num_(hccl_stream_num), private_def_(private_def), @@ -290,25 +361,47 @@ class HcclTaskInfo : public TaskInfo { group_(group) {} ~HcclTaskInfo() override {} - const std::string &hccl_type() const { return hccl_type_; } - void *input_data_addr() const { return input_data_addr_; } - void *output_data_addr() const { return output_data_addr_; } - void *workspace_addr() const { return workspace_addr_; } - int64_t workspace_size() const { return workspace_size_; } - int64_t hccl_stream_num() const { return hccl_stream_num_; } - const std::vector &private_def() const { return private_def_; } - void *ops_kernel_store() const { return ops_kernel_store_; } - int32_t count() const { return count_; } - int64_t root_id() const { return root_id_; } - int64_t op_type() const { return op_type_; } - int64_t data_type() const { return data_type_; } - const std::string &group() const { return group_; } + const std::string &hccl_type() const { + return hccl_type_; + } + void *input_data_addr() const { + return input_data_addr_; + } + void *output_data_addr() const { + return output_data_addr_; + } + int64_t workspace_size() const { + return workspace_size_; + } + int64_t hccl_stream_num() const { + return hccl_stream_num_; + } + const std::vector &private_def() const { + return private_def_; + } + void *ops_kernel_store() const { + return ops_kernel_store_; + } + int32_t count() const { + return count_; + } + int64_t root_id() const { + return root_id_; + } + int64_t op_type() const { + return op_type_; + } + int64_t data_type() const { + return data_type_; + } + const std::string &group() const { + return group_; + } private: std::string hccl_type_; void *input_data_addr_; void *output_data_addr_; - void *workspace_addr_; int64_t workspace_size_; int64_t hccl_stream_num_; std::vector private_def_; @@ -329,9 +422,15 @@ class ProfilerTraceTaskInfo : public TaskInfo { flat_(flat) {} ~ProfilerTraceTaskInfo() override {} - uint64_t log_id() const { return log_id_; } - bool notify() const { return notify_; } - uint32_t flat() const { return flat_; } + uint64_t log_id() const { + return log_id_; + } + bool notify() const { + return notify_; + } + uint32_t flat() const { + return flat_; + } private: uint64_t log_id_; @@ -351,11 +450,21 @@ class MemcpyAsyncTaskInfo : public TaskInfo { kind_(kind) {} ~MemcpyAsyncTaskInfo() override {} - void *dst() const { return dst_; } - uint64_t dst_max() const { return dst_max_; } - void *src() const { return src_; } - uint64_t count() const { return count_; } - uint32_t kind() const { return kind_; } + void *dst() const { + return dst_; + } + uint64_t dst_max() const { + return dst_max_; + } + void *src() const { + return src_; + } + uint64_t count() const { + return count_; + } + uint32_t kind() const { + return kind_; + } private: void *dst_; @@ -377,11 +486,21 @@ class StreamSwitchTaskInfo : public TaskInfo { data_type_(data_type) {} ~StreamSwitchTaskInfo() override {} - int64_t true_stream_id() const { return true_stream_id_; } - void *input_addr() const { return input_addr_; } - void *value_addr() const { return value_addr_; } - int64_t cond() const { return cond_; } - int64_t data_type() const { return data_type_; } + int64_t true_stream_id() const { + return true_stream_id_; + } + void *input_addr() const { + return input_addr_; + } + void *value_addr() const { + return value_addr_; + } + int64_t cond() const { + return cond_; + } + int64_t data_type() const { + return data_type_; + } private: int64_t true_stream_id_; @@ -397,7 +516,9 @@ class StreamActiveTaskInfo : public TaskInfo { : TaskInfo(op_name, stream_id, TaskInfoType::STREAM_ACTIVE, false), active_stream_id_(active_stream_id) {} ~StreamActiveTaskInfo() override {} - uint32_t active_stream_id() const { return active_stream_id_; } + uint32_t active_stream_id() const { + return active_stream_id_; + } private: uint32_t active_stream_id_; diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h index fd35b546..450c893e 100644 --- a/third_party/fwkacllib/inc/ops/array_ops.h +++ b/third_party/fwkacllib/inc/ops/array_ops.h @@ -35,7 +35,7 @@ namespace ge { * @li values:A `Tensor`. Must have the same type as `sorted_x`. \n *@par Attributes: -*@li out_type:An optional `DType` from: `int32, int64`. +*out_type:An optional `DType` from: `int32, int64`. Defaults to `int32`. \n *@par Outputs: @@ -504,7 +504,7 @@ REG_OP(Constant) *x: A tensor. \n *@par Outputs: -*y: A tensor. \n +*y: A copy of input tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Snapshot. @@ -684,7 +684,9 @@ REG_OP(ExpandDims) *@par Inputs: *@li x: Original tensor. -*@li axis: List of ints. \n + +*@par Attributes: +*@li axes: List of ints indicating the dimensions to be inserted. \n *@par Outputs: *y: Reshape tensor with same data as input. \n @@ -755,10 +757,10 @@ REG_OP(Squeeze) *@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n *@par Inputs: -*x: A tensor. \n +*x: A Tensor of type float32, float16, int8, int16, uint16, uint8, int32, int64, uint32, uint64, bool, double. \n *@par Outputs: -*y: A tensor. The rank of input tensor. \n +*y: A tensor. The rank of input tensor. Type is int32. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Rank. @@ -848,7 +850,6 @@ REG_OP(PlaceHolder) *x: A tensor. \n *@par Attributes: -*@li dtype: data type of tensor. *@li shape: tensor shape. \n *@par Outputs: @@ -867,13 +868,13 @@ REG_OP(PlaceholderWithDefault) *@brief Reads and returns the value of the input variable tensor. \n *@par Inputs: -*x: A tensor. \n +*x: A tensor must have numeric type. \n *@par Attributes: *dtype: An optional int32 or int64. The output data type. Defaults to int32. \n *@par Outputs: -*y: A tensor. \n +*y: A tensor must have numeric type. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ReadVariableOp. @@ -1134,10 +1135,10 @@ This is an M-length vector. This is an R-length vector *@par Attributes: -*@li normalize: boolean (if true, edit distances are normalized by length of truth). \n +*normalize: boolean (if true, edit distances are normalized by length of truth). \n *@par Outputs: -*@li output: A dense float tensor with rank R - 1. \n +*output: A dense float tensor with rank R - 1. \n *@par Third-party framework compatibility * Compatible with TensorFlow EditDistance operator. @@ -1154,18 +1155,17 @@ REG_OP(EditDistance) .OP_END_FACTORY_REG(EditDistance) /** -* @brief sort_v2. +* @brief sort the input tensor without returning the value of index. * @par Inputs: -* @li x: An ND tensor of type float16. +* x: An ND tensor of type float16. * @par Attributes: - * @li axis: An optional int. The dimension to sort along. This value defaults to -1. * @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False. * @par Outputs: -* @li y: An ND tensor of type float16. +* y: An ND tensor of type float16. * @attention Constraints: * @li Axis should select the last dim. @@ -1206,7 +1206,7 @@ REG_OP(Expand) *@Returns a tensor containing the indices of all non-zero elements of input. \n *@par Inputs: -*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64. +*x: A Tensor. Must be one of the following types: float16, float32, int32, int64. *@par Attributes: * transpose: the output tensor will be transposed if true. \n @@ -1230,15 +1230,15 @@ REG_OP(NonZero) * @par Inputs: * One inputs, including: -* @li x: A Tensor. Must be one of the following types: +* x: A Tensor. Must be one of the following types: * float16, float32, int32, int8 ,uint8. \n * @par Attributes: -* @li shape: A required listInt to specify the shape that the input tensor expanded to. \n +* shape: A required listInt to specify the shape that the input tensor expanded to. \n * @par Outputs: -* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n +* y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n * @par Third-party framework compatibility * Compatible with the ONNX operator Expand. @@ -1249,6 +1249,38 @@ REG_OP(ExpandD) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) .REQUIRED_ATTR(shape, ListInt) .OP_END_FACTORY_REG(ExpandD) + +/** +*@brief Finds unique elements in a 1D tensor. \n + +*@par Inputs: +*x: 1D tensor. Must be one of the following types: +* float16, float32, double, int64, int32, int16, uint16, int8 ,uint8. \n + +*@par Attributes: +*@li return_inverse: Whether to also return the indices for where elements in the original +* input ended up in the returned unique list. +*@li return_inverse: Whether to also return the counts for each unique element. + +*@par Outputs: +*@li y1: The output list of unique scalar elements. Has the same type as "x". +*@li y2: Representing the indices for where elements in the original input map to in the output. +*@li y3: Representing the number of occurrences for each unique value or tensor. \n + +* @par Third-party framework compatibility +* Compatible with the troch operator _unique2. +*/ + +REG_OP(UniqueWithCountsAndSorting) + .INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ + DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) + .OUTPUT(y1, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ + DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) + .OUTPUT(y2, TensorType({ DT_INT32, DT_INT64 })) + .OUTPUT(y3, TensorType({ DT_INT32, DT_INT64 })) + .ATTR(return_inverse, Bool, false) + .ATTR(return_counts, Bool, false) + .OP_END_FACTORY_REG(UniqueWithCountsAndSorting) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/control_flow_ops.h b/third_party/fwkacllib/inc/ops/control_flow_ops.h index e5bd3534..cd993599 100644 --- a/third_party/fwkacllib/inc/ops/control_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/control_flow_ops.h @@ -96,7 +96,7 @@ REG_OP(RefMerge) * Otherwise, the data is forwarded to "output_false" . \n *@par Inputs: - *@li data: The tensor to be forwarded. \n + *@li data: The tensor to be forwarded. * Must be one of the following types: float16, float32, float64, * int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool. *@li pred: A boolean scalar. The output port that will receive data . \n diff --git a/third_party/fwkacllib/inc/ops/ctc_ops.h b/third_party/fwkacllib/inc/ops/ctc_ops.h index e907b828..bbc610ff 100644 --- a/third_party/fwkacllib/inc/ops/ctc_ops.h +++ b/third_party/fwkacllib/inc/ops/ctc_ops.h @@ -74,7 +74,7 @@ REG_OP(CTCLoss) *@li sequence_length: A vector containing sequence lengths, size `(batch_size)`. \n *@par Attributes: -*@li merge_repeated: If True, merge repeated classes in output. \n +* merge_repeated: If True, merge repeated classes in output. \n *@par Outputs: *@li decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`, @@ -108,6 +108,8 @@ REG_OP(CTCGreedyDecoder) *@par Attributes: *@li merge_repeated: If True, merge repeated classes in output. \n +*@li beam_width:A scalar >= 0 (beam search beam width). +*@li top_paths:A scalar >= 0, <= beam_width (controls output size). *@par Outputs: *@li decoded_indices: A list (length: top_paths) of indices matrices. Matrix j, @@ -162,7 +164,7 @@ REG_OP(CTCBeamSearchDecoder) * Compatible with Pytorch CTCLoss operator. *@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*The length of Label should in [4, 1000]. */ REG_OP(CTCLossV2) .INPUT(log_probs, TensorType({DT_FLOAT, DT_DOUBLE})) @@ -203,7 +205,7 @@ REG_OP(CTCLossV2) * Compatible with Pytorch CTCLoss operator. *@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*The limit of Label’s length is 1K. */ REG_OP(CTCLossV2Grad) .INPUT(grad_out, TensorType({DT_FLOAT, DT_DOUBLE})) diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h index 6021f4e3..32454d27 100644 --- a/third_party/fwkacllib/inc/ops/data_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h @@ -1201,6 +1201,8 @@ REG_OP(TensorArraySize) *@brief A queue implementation that dequeues elements in a random order. \n *@par Attributes: +*@li component_types:A list of fully-defined Tensortype objects with +the same length as shapes, or None. *@li shapes: (Optional.) A list of fully-defined TensorShape objects with the same length as dtypes, or None. *@li capacity: An integer. The upper bound on the number of elements that may @@ -1281,6 +1283,7 @@ The length of this attr must be either 0 or the same as the length of elements are not constrained, and only one element may be dequeued at a time. *@li container: An optional string. Defaults to "". If non-empty, this queue is placed in the given container. Otherwise, a default container is used. +*@li capacity:An integer. The upper bound on the number of elements that may be stored in this queue. *@li shared_name: An optional string. Defaults to "". If non-empty, this queue will be shared under the given name across multiple sessions. \n @@ -1435,7 +1438,7 @@ REG_OP(OrderedMapClear) *@par Inputs: *Including: -* @li resource: A Tensor of type DT_RESOURCE. +* resource: A Tensor of type DT_RESOURCE. *@par Outputs: *handle: A Tensor of type DT_STRING ref. \n @@ -1526,7 +1529,7 @@ REG_OP(OrderedMapPeek) *@par Inputs: *Including: -* @li indices: A Tensor of type DT_INT32. \n +* indices: A Tensor of type DT_INT32. \n *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". @@ -2332,6 +2335,40 @@ REG_OP(CacheAllIndexToLocal) .OP_END_FACTORY_REG(CacheAllIndexToLocal) /** +*@brief LRUCacheV2, aicore LRUCache. +*@par Inputs: +*index_list: exchange index list +*data: host data +*cache: gm cache +*tag: cache's tag +*is_last_call: if is last call write all cache to data +*@par Outputs: +*data: output data +*cache: gm cache +*tag: cache's tag +*index_offset_list: index_offset_list +*not_in_cache_index_list: output not in cache's index_list +*not_in_cache_number: scalar +*@par Attributes: +*pre_route_count: types of all outputs +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(LRUCacheV2) + .INPUT(index_list, TensorType::BasicType()) + .INPUT(data, TensorType::BasicType()) + .INPUT(cache, TensorType::BasicType()) + .INPUT(tag, TensorType::BasicType()) + .INPUT(is_last_call, TensorType::BasicType()) + .OUTPUT(data, TensorType::BasicType()) + .OUTPUT(cache, TensorType::BasicType()) + .OUTPUT(tag, TensorType::BasicType()) + .OUTPUT(index_offset_list, TensorType::BasicType()) + .OUTPUT(not_in_cache_index_list, TensorType::BasicType()) + .OUTPUT(not_in_cache_number, TensorType::BasicType()) + .REQUIRED_ATTR(pre_route_count, Int) + .OP_END_FACTORY_REG(LRUCacheV2) + +/** *@brief DynamicGetNext, dynamic get next data *@par Inputs: *x: the iterator, all types are available diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index f61e2939..b4299026 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -624,9 +624,9 @@ REG_OP(Log1p) *@attention Constraints: *@li x2: The input data does not support 0 -*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the +*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the *requirement of double thousandths in the mini form -*@li Due to different architectures, the calculation results of this operator +*@li Due to different architectures, the calculation results of this operator *on NPU and CPU may be inconsistent *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 @@ -2066,9 +2066,9 @@ REG_OP(FloorDiv) *@attention Constraints: *@li x2: The input data does not support 0 -*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the +*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the *requirement of double thousandths in the mini form -*@li Due to different architectures, the calculation results of this operator +*@li Due to different architectures, the calculation results of this operator *on NPU and CPU may be inconsistent *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 @@ -2200,9 +2200,9 @@ REG_OP(Tan) *@attention Constraints: *@li x2: The input data does not support 0 -*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the +*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the *requirement of double thousandths in the mini form -*@li Due to different architectures, the calculation results of this operator +*@li Due to different architectures, the calculation results of this operator *on NPU and CPU may be inconsistent *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 @@ -2467,11 +2467,11 @@ REG_OP(Eltwise) *@par Inputs: *One inputs, including: - * @li input_x: A tensor. Must be one of the following types: + * input_x: A tensor. Must be one of the following types: * float16, float32. \n *@par Outputs: - *y: A Tensor with the same type and shape of input_x's. \n + *output_y: A Tensor with the same type and shape of input_x's. \n *@par Third-party framework compatibility *Compatible with the Pytorch operator Erfinv. \n @@ -3154,13 +3154,13 @@ REG_OP(FusedMulAddNL2loss) *@brief Tests whether the input exceeds a threshold. \n *@par Inputs: -*@li x: A Tensor with any format. Must be one of the following types: float16, float32. \n +* x: A Tensor with any format. Must be one of the following types: float16, float32. \n *@par Attributes: -*@li threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n +* threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n *@par Outputs: -*@li y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32. +* y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32. *@par Third-party framework compatibility * Compatible with the Caffe operator Threshold. */ @@ -3175,7 +3175,7 @@ REG_OP(FusedMulAddNL2loss) *@brief Returns the index number corresponding to the maximum value entered. \n *@par Inputs: -*@li x: A tensor. Must be one of the following types: float16, float32. \n +*x: A tensor. Must be one of the following types: float16, float32. \n *@par Attributes: *@li axis: An optional int. Specify the axis to be cut at the input tensor. If this parameter is not provided, find the topk for each batch. Defaults to 10000 @@ -3203,12 +3203,11 @@ REG_OP(ArgMaxWithK) *@brief Multiply tensor with scale. \n *@par Inputs: -*Five inputs, including: -* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32. -* @li x2: A scale. Must be float. \n +*One input, including: +*x: A Tensor. Must be one of the following types:int32,int16, float16, float32. *@par Outputs: -*@li y: A Tensor. Has the same type and shape as "x1". \n +*y: A Tensor. Has the same type and shape as "x1". \n *@par Third-party framework compatibility: * Compatible with the Pytorch operator muls. @@ -3223,12 +3222,11 @@ REG_OP(Muls) *@brief Fill tensor with scale. \n *@par Inputs: -*Five inputs, including: -* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32. -* @li x2: A scale. Must be float. \n +*One input, including: +*x1: A Tensor. Must be one of the following types:int32,int16, float16, float32. *@par Outputs: -*@li y: A Tensor. Has the same type and shape as "x1". \n +*y: A Tensor. Has the same type and shape as "x1". \n *@par Third-party framework compatibility: * Compatible with the Pytorch operator fills. @@ -3378,7 +3376,7 @@ REG_OP(TensorMove) *@par Inputs: *One inputs, including: -* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n +*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n *@par Outputs: *output_x: A Tensor. Has the same type as "x". \n @@ -3397,7 +3395,7 @@ REG_OP(TensorRedirect) * multiply the result by the scalar value and add it to tensor x1 * @par Inputs: -* Three inputs, including: +* Four inputs, including: * @li input_data: A mutable input Tensor. Must be one of the following types: * float16, float32. * @li x1: A mutable input Tensor of the same type as x1. @@ -3406,7 +3404,7 @@ REG_OP(TensorRedirect) * float16, float32, int32. \n * @par Outputs: -* @li y: A mutable Tensor. Has the same type as "x1". \n +* y: A mutable Tensor. Has the same type as "x1". \n * @par Third-party framework compatibility * Compatible with the Pytorch operator Addcdiv. @@ -3420,12 +3418,12 @@ REG_OP(Addcdiv) .OP_END_FACTORY_REG(Addcdiv) /** -* @brief Performs the element-wise multiplication of tensor x2 by tensor x3, -* multiply the result by the scalar value and add it to tensor input_data +* @brief Performs the element-wise multiplication of tensor x2 by tensor x3, +* multiply the result by the scalar value and add it to tensor input_data * @par Inputs: -* Three inputs, including: +* Four inputs, including: * @li input_data: A mutable input Tensor. Must be one of the following types: * float16, float32, int8, int32, uint8. * @li x1: A mutable input Tensor of the same type as x1. @@ -3433,7 +3431,7 @@ REG_OP(Addcdiv) * @li value: A tensor which includes only one element of the same type as x1. \n * @par Outputs: -* @li y: A mutable output Tensor. Has the same type as "x1". \n +* y: A mutable output Tensor. Has the same type as "x1". \n * @par Third-party framework compatibility * Compatible with the Pytorch operator Addcmul. @@ -3455,7 +3453,7 @@ REG_OP(Addcmul) * @li alpha: A scalar tensor of type float16, float32. \n * @par Outputs: -* @li y: An ND tensor tensor with the same shape and type as "x1". \n +* y: An ND tensor tensor with the same shape and type as "x1". \n * @par Third-party framework compatibility * Compatible with the Pytorch operator Axpy. @@ -3468,25 +3466,6 @@ REG_OP(AxpyV2) .OP_END_FACTORY_REG(AxpyV2) /** -* @brief Computes the result of x1 - x2. - -* @par Inputs: -* @li x1: An ND tensor of type float16, float, int32. -* @li x2: An ND tensor of type float16, float, int32. \n - -* @par Outputs: -* @li y: An ND tensor tensor with the same type as "x1". \n - -* @par Third-party framework compatibility -* Compatible with the Pytorch operator Sub. -*/ -REG_OP(PtSub) - .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) - .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) - .OP_END_FACTORY_REG(PtSub) - -/** * @brief Add the partial values of two tensors in format NC1HWC0. * @par Inputs: @@ -3502,7 +3481,7 @@ REG_OP(PtSub) * the difference between C1 and offset in "x1" and "x2". \n * @par Outputs: -* @li y: A Tensor of the same type as "x1", and the same shape as "x1", +* y: A Tensor of the same type as "x1", and the same shape as "x1", * except for the C1 value. Record the result after adding. \n */ REG_OP(StrideAdd) @@ -3523,7 +3502,7 @@ REG_OP(StrideAdd) * @li input_y: A Tensor. the second tensor. \n * @par Outputs: -* @li output_z: A Tensor. Bool type, compare result of the two inputs. \n +*output_z: A Tensor. Bool type, compare result of the two inputs. \n * @par Third-party framework compatibility * Compatible with the Pytorch equal operator. \n @@ -3535,21 +3514,21 @@ REG_OP(TensorEqual) .OP_END_FACTORY_REG(TensorEqual) /** - * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). - * All inputs and outputs must have the same data type. This operator supports multidirectional + * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). + * All inputs and outputs must have the same data type. This operator supports multidirectional * (i.e., Numpy-style) broadcasting - * - * @par inputs + * + * @par Inputs: * one input including: - * @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64 - * - * @par output + * x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64 + * + * @par Outputs: * one output including: - * @li y:A Tensor of the same type as x - * + * y:A Tensor of the same type as x + * */ REG_OP(MaxN) - .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) .OP_END_FACTORY_REG(MaxN) @@ -3634,16 +3613,16 @@ REG_OP(DataCompare) *which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the *corresponding input. * -*@par inputs +*@par Inputs: *one input including: -*@li x: input A Tensor.Must be one of the following types:float32,float16 +*x: input A Tensor.Must be one of the following types:float32,float16 * *@par Attributes: -*@li axis:A required int attribute that decides which dimension will be used to cal the hard_max +*axis:A required int attribute that decides which dimension will be used to cal the hard_max * -*@par output: +*@par Outputs: *one output including: -*@li y:A Tensor of the same type as x +*y:A Tensor of the same type as x * */ REG_OP(HardMax) @@ -3661,7 +3640,7 @@ REG_OP(HardMax) * @li input_y: A Tensor. the second tensor must be 1d. \n * @par Outputs: -* @li output: A Tensor. Result of the two inputs, must be 1d. \n +* output: A Tensor. Result of the two inputs, must be 1d. \n * @par Third-party framework compatibility * Compatible with the Pytorch dot operator. \n @@ -3671,7 +3650,7 @@ REG_OP(Dot) .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) .OP_END_FACTORY_REG(Dot) - + /** *@brief Returns a new tensor with boolean elements representing \n *if each element of input is “close” to the corresponding element of other \n @@ -3719,7 +3698,7 @@ REG_OP(IsClose) * *@attention Constraints: *@li indices: only support int32,and shape same to "updates" -*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". +*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". *@li y:A Tensor, the type and shape is same to "var" \n *@par Third-party framework compatibility @@ -3754,7 +3733,7 @@ REG_OP(ArgMaxGrad) *@attention Constraints: *@li indices: only support int32,and shape same to "updates" -*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". +*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". *@li y:A Tensor, the type and shape is same to "var" \n *@par Third-party framework compatibility @@ -3805,15 +3784,15 @@ REG_OP(AddMatMatElements) *@par Inputs: *Two inputs, including: -* @li input_x1: A tensor. Must be the following types: -* float32. \n +* @li input_x1: A tensor. Must be the following types: float32. +* @li input_x2: A tensor. Must of the following types: float32. \n -*@par Inputs: -*@li input_x2: A tensor. Must of the following types: -* float32. \n +* @par Attributes: +* @li dim:The type is Int and the default value is 1. +* @li eps:The type is Float and the default value is 1e-8. \n *@par Outputs: -*@li output_y: A Tensor with the same type of input_x's. \n +* output_y: A Tensor with the same type of input_x's. \n *@par Third-party framework compatibility *Compatible with the Pytorch operator CosineSimilarity. \n @@ -3826,6 +3805,45 @@ REG_OP(CosineSimilarity) .ATTR(eps, Float, 1e-8) .OP_END_FACTORY_REG(CosineSimilarity) +/** +*@brief count adam result. \n + +*@par Inputs: +*eleven inputs, including: +* @li var: A Tensor. Support float16/float32.\n +* @li m: A Tensor. Datatype and shape are same as exp_avg.\n +* @li v: A Tensor. Datatype and shape are same as exp_avg.\n +* @li lr: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n +* @li beta1: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n +* @li beta2: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n +* @li epsilon: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n +* @li grad: A Tensor. Datatype and shape are same as exp_avg.\n +* @li max_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n +* @li global_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n +* @li weight_decay: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n + +*@par Outputs: +*three inputs, including: +* @li var: A Tensor. Datatype and shape are same as exp_avg.\n +* @li m: A Tensor. Datatype and shape are same as exp_avg.\n +* @li v: A Tensor. Datatype and shape are same as exp_avg.\n +*/ +REG_OP(ApplyAdamV2) + .INPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(lr, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(beta1, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(beta2, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(epsilon, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(grad, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(max_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(global_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(weight_decay, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .OUTPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .OUTPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .OUTPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .OP_END_FACTORY_REG(ApplyAdamV2) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h index b09ac058..7cfe39c4 100644 --- a/third_party/fwkacllib/inc/ops/functional_ops.h +++ b/third_party/fwkacllib/inc/ops/functional_ops.h @@ -163,9 +163,6 @@ REG_OP(Case) * if it is not a scalar, non-empty means True and empty means False. *@li body: A subgraph takes 'input' and returns a another list of tensors . \n - *@par Attributes: - *parallel_iterations: An optional int, default as 10 . \n - *@par Outputs: *output: The output tensors returned by "body". Has the same type as "input" . \n diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index 6909345a..2327e76e 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -28,7 +28,7 @@ namespace ge { *@brief Decode the frame(s) of a GIF-encoded image to a uint8 tensor . \n *@par Inputs: -*@li contents:A Tensor of type string. 0-D. The GIF-encoded image. \n +*contents:A Tensor of type string. 0-D. The GIF-encoded image. \n *@par Outputs: *image:A Tensor of type uint8. \n @@ -128,8 +128,8 @@ crops from the input image tensor and resizes them using bilinear sampling or nearest neighbor sampling to a common output size specified by crop_size . \n *@par Inputs: -*Input images must be a 4-D tensor. Inputs include: -*@li images:A Tensor. Must be one of the following types:uint8, uint16, int8, +*Input x must be a 4-D tensor. Inputs include: +*@li x:A Tensor. Must be one of the following types:uint8, uint16, int8, int16, int32, int64, float16, float, double. A 4-D tensor of shape [batch, image_height, image_width, depth]. The format must be NHWC. *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. @@ -266,8 +266,9 @@ depth] containing the original image size. Both image_height and image_width need to be positive . \n *@par Attributes: -method: A string specifying the interpolation method. Only 'bilinear' is -supported for now . \n +*@li method: A string specifying the interpolation method. Only 'bilinear' is +supported for now . +*@li T: output of type \n *@par Outputs: *y:A 4-D tensor of shape [batch, image_height, image_width, depth]. The format @@ -585,9 +586,11 @@ REG_OP(ResizeNearestNeighborV2GradD) channels], The image tensor that was resized . \n *@par Attributes: -*align_corners: An optional bool. Defaults to False. If true, the centers of +*@li align_corners: An optional bool. Defaults to False. If true, the centers of the 4 corner pixels of the input and grad tensors are aligned. Defaults to -false . \n +false . +*@li half_pixel_centers: indicates if the offset coordinates are normalized. Defaults +to false . \n *@par Outputs: *y: A Tensor. Has the same type as original_image . \n @@ -617,9 +620,10 @@ REG_OP(ResizeBilinearV2Grad) size for the images . \n *@par Attributes: -*align_corners: If true, the centers of the 4 corner pixels of the input and +* @li align_corners: If true, the centers of the 4 corner pixels of the input and output tensors are aligned, preserving the values at the corner pixels. -Defaults to false . \n +Defaults to false . +* @li half_pixel_centers: An optional bool. Defaults to False . \n *@par Outputs: *y: 4-D with shape [batch, new_height, new_width, channels] . \n @@ -684,6 +688,9 @@ be non-negative. In the case of 0, the cropped area does not need to overlap any of the bounding boxes supplied . *@li aspect_ratio_range: The cropped area of the image must have an aspect ratio = width / height within this range. +*@li area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The +cropped area of the image must contain a fraction of the supplied image +within this range. *@li max_attempts: Number of attempts at generating a cropped region of the image of the specified constraints. After max_attempts failures, return the entire image. @@ -740,6 +747,9 @@ generator is seeded by the given seed. Otherwise, it is seeded by a random seed. *@li seed2: A second seed to avoid seed collision. *@li aspect_ratio_range: The cropped area of the image must have an aspect ratio = width / height within this range. +*@li area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The +cropped area of the image must contain a fraction of the supplied image +within this range. *@li max_attempts: Number of attempts at generating a cropped region of the image of the specified constraints. After max_attempts failures, return the entire image. @@ -787,9 +797,10 @@ REG_OP(SampleDistortedBoundingBoxExt2) The new size for the images . \n *@par Attributes: -*align_corners: If true, the centers of the 4 corner pixels of the input and +*@li align_corners: If true, the centers of the 4 corner pixels of the input and output tensors are aligned, preserving the values at the corner pixels. Defaults to false . \n +*@li half_pixel_centers: An optional bool. Defaults to False . \n *@par Outputs: *y: 4-D with shape [batch, new_height, new_width, channels] . \n @@ -999,10 +1010,6 @@ deciding whether boxes overlap too. *@li score_threshold: A 0-D float tensor representing the threshold for deciding when to remove boxes based on score . \n -*@par Attributes: -*pad_to_max_output_size: If true, the output selected_indices is padded -to be of length max_output_size. Defaults to false . \n - *@par Outputs: *selected_indices: A 1-D integer tensor of shape [M] representing the selected indices from the boxes tensor, where M <= max_output_size . \n @@ -1094,8 +1101,8 @@ REG_OP(EncodePng) *contents: 0-D. PNG-decoded image . *@par Attributes: -*channels: graph channels \n -*dtype: type of image +*@li channels: graph channels \n +*@li dtype: type of image *@par Outputs: *image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels] @@ -1116,10 +1123,10 @@ REG_OP(DecodePng) *@brief Bmp-decode an image. \n *@par Inputs: -*@li contents: A Tensor of type string. 0-D. The BMP-encoded image. \n +*contents: A Tensor of type string. 0-D. The BMP-encoded image. \n *@par Attributes: -*@li channels: Decode the desired number of color channels of the image. \n +*channels: Decode the desired number of color channels of the image. \n *@par Outputs: *image: A Tensor dtype of uint8. @@ -1253,6 +1260,7 @@ REG_OP(KeepRatioResizeBilinear) No default value. *@li align_corners: An optional bool. If "true", the centers of the corner pixels of the input and output tensors are aligned. Defaults to "false" . \n +*@li half_pixel_centers: An optional bool. Defaults to False . \n *@par Outputs: *y: A Tensor with the same type and format as input "images" . \n @@ -1381,6 +1389,7 @@ REG_OP(NonMaxSuppressionV5) *@li scale: A `Tensor` of type `float32`. *@li translation: A `Tensor` of type `float32` . \n +*@par Attributes: *@li kernel_type: type is string, default lanczos3 *@li antialias: type is bool, default true \n @@ -1411,6 +1420,7 @@ REG_OP(ScaleAndTranslate) *@li scale: A `Tensor` of type `float32`. *@li translation: A `Tensor` of type `float32` . \n +*@par Attributes: *@li kernel_type: type is string, default lanczos3 *@li antialias: type is bool, default true @@ -1460,9 +1470,10 @@ if they fall beyond [0, 1]. If false, do not do clipping and output the box coordinates as it is. If not specified, defaults to true . \n *@par Outputs: -*nmsed_boxes:type is float -*nmsed_scores:type is float -*nmsed_classes:type is float \n +*@li nmsed_boxes:type is float +*@li nmsed_scores:type is float +*@li nmsed_classes:type is float +*@li valid_detections:type is INT32 \n *@par Third-party framework compatibility * Compatible with tensorflow CombinedNonMaxSuppression operator. @@ -1508,6 +1519,9 @@ REG_OP(IMGWarp) *@par Outputs: *map_img: A Tensor after resize. \n + +*@par Restrictions: +*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(Remap) .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) @@ -1524,7 +1538,7 @@ and 4 mean input[(h_top, w_left), (h_top, w_right), (h_bottom, w_left), (h_bott *@li warp_index: the resize offset A 4-D float tensor of shape `[n, 2, h, w]`, 2 means (x, y) for resize point. *@par Outputs: -*remap_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n +*warp_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n */ REG_OP(IMGWarpResize) .INPUT(img, TensorType({DT_FLOAT32})) @@ -1559,6 +1573,39 @@ REG_OP(SpatialTransformerD) .OP_END_FACTORY_REG(SpatialTransformerD) /** +*@brief Function spatial transformer . \n + +*@par Inputs: +*@li x: A Tensor dtype of float16, float32, double, uint8, int8, uint16, int16, int32, uint32, uint64, int64. +*@li theta: A Tensor dtype of float16, float32, double, uint8, int8, uint16, int16, int32, uint32, uint64, int64, + auxiliary coefficients . \n + +*@par Attributes: +*@li output_size: A tuple output size. +*@li default_theta: A tuple default theta +*@li use_default_theta: List use default theta + +*@par Outputs: +*y: A Tensor dtype of float16, float32, double, uint8, int8, uint16, int16, int32, uint32, uint64, int64, + should be same shape and type as x. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(SpatialTransformer) + .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16,DT_DOUBLE,DT_UINT8,DT_INT8,DT_UINT16, + DT_INT16,DT_INT32,DT_UINT32,DT_UINT64,DT_INT64})) + .OPTIONAL_INPUT(theta, TensorType({DT_FLOAT,DT_FLOAT16,DT_DOUBLE,DT_UINT8,DT_INT8, + DT_UINT16,DT_INT16,DT_INT32,DT_UINT32,DT_UINT64,DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16,DT_DOUBLE,DT_UINT8,DT_INT8,DT_UINT16, + DT_INT16,DT_INT32,DT_UINT32,DT_UINT64,DT_INT64})) + .ATTR(output_size, ListInt, {-1, -1}) + .ATTR(default_theta, ListFloat, {}) + .ATTR(align_corners, Bool, false) + .ATTR(use_default_theta, ListInt, {}) + .OP_END_FACTORY_REG(SpatialTransformer) + +/** * @brief Resize the input tensor. \n currently, only support resize image tensor using nearest neighbor and linear interpolation. @@ -1623,7 +1670,7 @@ REG_OP(Resize) *@brief Function parse image from string to int. \n *@par Inputs: -*@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n +* contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n *@par Attributes: *@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image. @@ -1668,7 +1715,7 @@ REG_OP(DenseImageWarp) *@par Inputs: *One inputs, including: -* @li x: A tensor. Must be one of the following types: +* x: A tensor. Must be one of the following types: * float16, float32. \n *@par Attributes: @@ -1713,7 +1760,7 @@ REG_OP(ResizeD) *@par Inputs: *One inputs, including: -* @li grads: A tensor. Must be one of the following types: +* grads: A tensor. Must be one of the following types: * float16, float32. \n *@par Attributes: @@ -1762,8 +1809,8 @@ REG_OP(ResizeGradD) *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n *@par Outputs: -*grad_image: Returns 4-D with the same shape and dtype as `image`. -*grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n +*@li grad_image: Returns 4-D with the same shape and dtype as `image`. +*@li grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n */ REG_OP(DenseImageWarpGrad) .INPUT(grad, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -1817,12 +1864,12 @@ REG_OP(GridSampler2D) *@li assist: Assist matrix, a 4-D tensor of type float16. *@par Attributes: -*@li align_corners: An optional bool. If "true", the centers of the corner +*align_corners: An optional bool. If "true", the centers of the corner pixels of the input and output tensors are aligned. Defaults to "false" . *@par Outputs: -*diff: Returns 4-D Tensor with the same shape and dtype as `grid`. -*position: Returns 4-D Tensor with the same shape as `grid`. +*@li diff: Returns 4-D Tensor with the same shape and dtype as `grid`. +*@li position: Returns 4-D Tensor with the same shape as `grid`. */ REG_OP(GridUnnormal) .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1840,10 +1887,13 @@ REG_OP(GridUnnormal) *@li position: 4-D Tensor with shape `[batch, output_height, output_width, 2]`. *@par Attributes: -*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now . +*padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now . *@par Outputs: *y: Returns 4-D Tensor with the same dtype as `x`. + +*@par Restrictions: +*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ImageUnfold) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1936,5 +1986,204 @@ REG_OP(GridSampler3DGrad) .ATTR(align_corners, Bool, false) .OP_END_FACTORY_REG(GridSampler3DGrad) +/** +*@brief Upsample the 3-D data with the nearest neighbor ​interpolation algorithm. \n + +*@par Inputs: +*One inputs, including: +*x: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types: +* float16, float32, float64. \n + +*@par Attributes: +*@li output_size: An optional listInt. Defaults to none. + contain 3 elements: output_depth, output_height, output_width. The number of elements of 'output_size' + should be the same as the rank of input 'x'. Only one of 'scales' and 'output_size' can be specified. \n +*@li scales: An optional listFloat. Defaults to none. + The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. + The number of elements of 'scales' should be the same as the rank of input 'x'. One of 'scales' and + 'output_size' MUST be specified and it is an error if both are specified. \n + +*@par Outputs: +*y: A 5-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n +*/ + +REG_OP(UpsampleNearest3d) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .ATTR(output_size, ListInt, {}) + .ATTR(scales, ListFloat, {}) + .OP_END_FACTORY_REG(UpsampleNearest3d) + +/** +*@brief Upsample the 3-D data with the trilinear ​interpolation algorithm. \n + +*@par Inputs: +*One inputs, including: +*x: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types: +* float16, float32, float64. \n + +*@par Attributes: +*@li output_size: An optional listInt. Defaults to none. + contain 3 elements: output_depth, output_height, output_width. The number of elements of 'output_size' should + be the same as the rank of input 'x'. Only one of 'scales' and 'output_size' can be specified. \n +*@li scales: An optional listFloat. Defaults to none. + The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. + The number of elements of 'scales' should be the same as the rank of input 'x'. + One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n +*@li align_corners: An optional bool. Defaults to false. + If true, the input and output tensors are aligned by the center points of their corner pixels, preserving the + values at the corner pixels. If false, the input and output tensors are aligned by the corner points of their + corner pixels, and the interpolation use edge value padding for out of boundary values. \n + +*@par Outputs: +*y: A 5-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n +*/ + +REG_OP(UpsampleTrilinear3d) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .ATTR(output_size, ListInt, {}) + .ATTR(scales, ListFloat, {}) + .ATTR(align_corners, Bool, false) + .OP_END_FACTORY_REG(UpsampleTrilinear3d) + +/** +*@brief Upsample the 3-D gradient data with the nearest neighbor ​interpolation algorithm. \n + +*@par Inputs: +*One inputs, including: +*grad_output: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types: +* float16, float32, float64. \n + +*@par Attributes: +*@li input_size: An required listInt. + contain 5 elements: [min_batch, channels, depth, height, width]. Must: + input_size[0] == grad_output_tensor_size[0] + input_size[1] == grad_output_tensor_size[1]. \n +*@li output_size: An optional listInt. Defaults to none. + contain 3 elements: depth, height, width. The number of elements of 'output_size' should + be the same as the rank of input 'grad_output'. Only one of 'scales' and 'output_size' can be specified. Must: + grad_output_tensor_size[2] == floor(input_size[2] * scales[0]) == output_size[0] + grad_output_tensor_size[3] == floor(input_size[3] * scales[1]) == output_size[1] + grad_output_tensor_size[4] == floor(input_size[4] * scales[2]) == output_size[2]. \n +*@li scales: An optional listFloat. Defaults to none. + The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. + The number of elements of 'scales' should be the same as the rank of input 'grad_output'. + One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n + +*@par Outputs: +*y: A 5-D tensor. Has the same type as input grad_output, shape depends on Attributes:input_size. \n + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ + +REG_OP(UpsampleNearest3dGrad) + .INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .REQUIRED_ATTR(input_size, ListInt) + .ATTR(output_size, ListInt, {}) + .ATTR(scales, ListFloat, {}) + .OP_END_FACTORY_REG(UpsampleNearest3dGrad) + +/** +*@brief Upsample the 3-D gradient data trilinear ​interpolation algorithm. \n + +*@par Inputs: +*One inputs, including: +*grad_output: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types: +* float16, float32, float64. \n + +*@par Attributes: +*@li input_size: An required listInt. + contain 5 elements: [min_batch, channels, depth, height, width]. Must: + input_size[0] == grad_output_tensor_size[0] + input_size[1] == grad_output_tensor_size[1]. \n +*@li output_size: An optional listInt. Defaults to none. + contain 3 elements: depth, height, width. The number of elements of 'output_size' should + be the same as the rank of input 'grad_output'. Only one of 'scales' and 'output_size' can be specified. Must: + grad_output_tensor_size[2] == floor(input_size[2] * scales[0]) == output_size[0] + grad_output_tensor_size[3] == floor(input_size[3] * scales[1]) == output_size[1] + grad_output_tensor_size[4] == floor(input_size[4] * scales[2]) == output_size[2]. \n +*@li scales: An optional listFloat. Defaults to none. + The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. + The number of elements of 'scales' should be the same as the rank of input 'grad_output'. + One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n + +*@par Outputs: +*y: A Tensor with shape depends on intput_size and output_size/scales. Must be one of the following + types: float16, float32, float64. \n + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ + +REG_OP(UpsampleTrilinear3dGrad) + .INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .REQUIRED_ATTR(input_size, ListInt) + .ATTR(output_size, ListInt, {}) + .ATTR(scales, ListFloat, {}) + .ATTR(align_corners, Bool, false) + .OP_END_FACTORY_REG(UpsampleTrilinear3dGrad) + + +/** +*@brief Upsample the 1-D data with the nearest neighbor ​interpolation algorithm. \n + +*@par Inputs: +*x: A 1-D input tensor [N, C, W]. Must be one of the following types: +* float16, float32, float64. \n + +*@par Attributes: +*@li output_size: An required listInt contains output_width. +*@li scales: An optional listFloat contains scale_width. Defaults to be zero. \n + +*@par Outputs: +*y: A 3-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n +*/ + +REG_OP(UpsampleNearest1d) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .REQUIRED_ATTR(output_size, ListInt) + .ATTR(scales, ListFloat, {}) + .OP_END_FACTORY_REG(UpsampleNearest1d) + +/** +*@brief Upsample the 1-D gradient data with the nearest neighbor ​interpolation algorithm. \n + +*@par Inputs: +*grad_output: A 3-D input tensor [N, C, W]. Must be one of the following types: +* float16, float32, float64. \n + +*@par Attributes: +*@li output_size: An required listInt contains output_width. +*@li scales: An optional listFloat contains scale_width. Defaults to be zero. +*@li input_size: An required listInt contains output_width. \n + +*@par Outputs: +*y: A 3-D tensor. Has the same type as input grad_output, shape depends on Attributes:input_size. \n + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n +*/ + +REG_OP(UpsampleNearest1dGrad) + .INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .REQUIRED_ATTR(input_size, ListInt) + .REQUIRED_ATTR(output_size, ListInt) + .ATTR(scales, ListFloat, {}) + .OP_END_FACTORY_REG(UpsampleNearest1dGrad) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h index 69c77bf6..f6cc8694 100644 --- a/third_party/fwkacllib/inc/ops/linalg_ops.h +++ b/third_party/fwkacllib/inc/ops/linalg_ops.h @@ -347,6 +347,9 @@ REG_OP(SelfAdjointEig) .OP_END_FACTORY_REG(SelfAdjointEig) /** +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. + *@brief Computes the sign and the log of the absolute value of the determinant of one or more square matrices . \n @@ -382,9 +385,10 @@ REG_OP(Slogdet) *x:Tensor of shape [..., M, N]. Let P be the minimum of M and N . \n *@par Attributes: -*compute_uv:If True then left and right singular vectors will be computed and +*@li compute_uv:If True then left and right singular vectors will be computed and returned in u and v, respectively. Otherwise, only the singular values will -be computed, which can be significantly faster . \n +be computed, which can be significantly faster . +*@li full_matrices:the param effect u,v. \n *@par Outputs: *@li sigma:Singular values. Shape is [..., P]. The values are sorted in @@ -427,6 +431,9 @@ denotes the lower triangular factor `L` with unit diagonal. *@li p: upper triangular part denotes the upper triangular factor `U`.Permutation of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n +*@par Attributes: +*output_idx_type: An optional DType from: int32, int64. + *@par Third-party framework compatibility * Compatible with TensorFlow Lu operator. */ @@ -467,6 +474,12 @@ left-hand side . \n *@par Outputs: y: Tensor of shape `[..., M, K]` containing the solutions \n +*@par Attributes: +*partial_pivoting: Whether to perform partial pivoting. `True` by default. +Partial pivoting makes the procedure more stable, but slower. Partial +pivoting is unnecessary in some cases, including diagonally dominant and +symmetric positive definite matrices + *@par Third-party framework compatibility * Compatible with TensorFlow TridiagonalSolve operator. */ diff --git a/third_party/fwkacllib/inc/ops/list_ops.h b/third_party/fwkacllib/inc/ops/list_ops.h index a1b622e9..0aa94e73 100644 --- a/third_party/fwkacllib/inc/ops/list_ops.h +++ b/third_party/fwkacllib/inc/ops/list_ops.h @@ -35,10 +35,10 @@ namespace ge { *@li max_num_elements: The maximum number of elements. \n *@par Attributes: -*@li element_dtype: The type of elements in the list. \n +*element_dtype: The type of elements in the list. \n *@par Outputs: -*@li handle: An empty tensor list . \n +*handle: An empty tensor list . \n *@par Third-party framework compatibility. *Compatible with tensorflow EmptyTensorList operator. @@ -59,10 +59,10 @@ and the other elements of the given list in `input_handle`. \n *@li tensor: The tensor to put on the list. \n *@par Attributes: -*@li element_dtype: The type of elements in the list. \n +*element_dtype: The type of elements in the list. \n *@par Outputs: -*@li output_handle:A list with the elements of old list followed by tensor. \n +*output_handle:A list with the elements of old list followed by tensor. \n *@par Third-party framework compatibility. *Compatible with tensorflow TensorListPushBack operator. @@ -86,7 +86,7 @@ list with all but that element. \n *@li element_shape: A shape compatible with that of elements in the list. \n *@par Attributes: -*@li element_dtype: The type of elements in the list. \n +*element_dtype: The type of elements in the list. \n *@par Outputs: *@li output_handle:A list with the elements of the old list followed by tensor. @@ -110,10 +110,10 @@ REG_OP(TensorListPopBack) *@brief The number of tensors in the input tensor list. \n *@par Inputs: -*@li input_handle: The input list. \n +*input_handle: The input list. \n *@par Outputs: -*@li length:The number of tensors in the list. \n +*length:The number of tensors in the list. \n *@par Third-party framework compatibility. *Compatible with tensorflow TensorListLength operator. @@ -127,13 +127,13 @@ REG_OP(TensorListLength) *@brief The shape of elements in the input tensor list. \n *@par Inputs: -*@li input_handle: The input list. \n +*input_handle: The input list. \n *@par Attributes: -*@li shape_type: The type of shape in the list. \n +*shape_type: The type of shape in the list. \n *@par Outputs: -*@li element_shape:A shape compatible with that of elements in the list. \n +*element_shape:A shape compatible with that of elements in the list. \n *@par Third-party framework compatibility. *Compatible with tensorflow TensorListElementShape operator. @@ -156,7 +156,7 @@ REG_OP(TensorListElementShape) *@li shape_type: The type of shape in the list. \n *@par Outputs: -*@li handle: An output tensor list . \n +*handle: An output tensor list . \n *@par Third-party framework compatibility. *Compatible with tensorflow TensorListReserve operator. @@ -178,10 +178,10 @@ REG_OP(TensorListReserve) *@li element_shape: A shape compatible with that of elements in the list. \n *@par Attributes: -*@li element_dtype: The type of elements in the list. \n +*element_dtype: The type of elements in the list. \n *@par Outputs: -*@li item: An output tensor value of index position . \n +*item: An output tensor value of index position . \n *@par Third-party framework compatibility. *Compatible with tensorflow TensorListGetItem operator. @@ -206,10 +206,10 @@ REG_OP(TensorListGetItem) *@li item: The element to be assigned to that position. \n *@par Attributes: -*@li element_dtype: The type of elements in the list. \n +*element_dtype: The type of elements in the list. \n *@par Outputs: -*@li output_handle: An output tensor list . \n +*output_handle: An output tensor list . \n *@par Third-party framework compatibility. *Compatible with tensorflow TensorListSetItem operator. @@ -233,10 +233,10 @@ REG_OP(TensorListSetItem) *@li tensor: The tensor push into tensor list. \n *@par Attributes: -*@li element_dtype: The type of elements in the list. \n +*element_dtype: The type of elements in the list. \n *@par Outputs: -*@li output_handles: The output tensor lists. \n +*output_handles: The output tensor lists. \n *@par Third-party framework compatibility. *Compatible with tensorflow TensorListPushBackBatch operator. @@ -263,7 +263,7 @@ REG_OP(TensorListPushBackBatch) *@li num_elements: The number of elements in the list. \n *@par Outputs: -*@li tensor: The tensor of list. \n +*tensor: The tensor of list. \n *@par Third-party framework compatibility. *Compatible with tensorflow TensorListStack operator. @@ -293,7 +293,7 @@ the leading dim of input_handle.element_shape or the element_shape input arg is not already set. \n *@par Attributes: -*@li element_dtype: The type of elements in the list. \n +*element_dtype: The type of elements in the list. \n *@par Outputs: *@li tensor: The concated result. @@ -324,10 +324,10 @@ REG_OP(TensorListConcatV2) *@li lengths: Vector of sizes of the 0th dimension of tensors in the list. \n *@par Attributes: -*@li element_dtype: The type of elements in the list. \n +*element_dtype: The type of elements in the list. \n *@par Outputs: -*@li output_handle: The list. \n +*output_handle: The list. \n *@par Third-party framework compatibility. *Compatible with tensorflow TensorListSplit operator. @@ -351,10 +351,10 @@ REG_OP(TensorListSplit) *@li element_shape: The shape of elements in the list. \n *@par Attributes: -*@li element_dtype: The type of elements in the list. \n +*element_dtype: The type of elements in the list. \n *@par Outputs: -*@li output_handle: An output tensor list . \n +*output_handle: An output tensor list . \n *@par Third-party framework compatibility. *Compatible with tensorflow TensorListFromTensor operator. @@ -377,7 +377,7 @@ REG_OP(TensorListFromTensor) *@li size: size of the output list. \n *@par Outputs: -*@li output_handle: The output tensor list. \n +*output_handle: The output tensor list. \n *@par Third-party framework compatibility. *Compatible with tensorflow TensorListResize operator. @@ -397,10 +397,10 @@ REG_OP(TensorListResize) *@li element_shape: The shape of elements in the list. \n *@par Attributes: -*@li element_dtype: The type of elements in the list. \n +*element_dtype: The type of elements in the list. \n *@par Outputs: -*@li values: The tensor. \n +*values: The tensor. \n *@par Third-party framework compatibility. *Compatible with tensorflow TensorListGather operator. @@ -429,10 +429,10 @@ the largest index in indices. If -1, the list is just large enough to include the largest index in indices. \n *@par Attributes: -*@li element_dtype: The type of elements in the list. \n +*element_dtype: The type of elements in the list. \n *@par Outputs: -*@li output_handle: The TensorList. \n +*output_handle: The TensorList. \n *@par Third-party framework compatibility. *Compatible with tensorflow TensorListScatterV2 operator. @@ -458,10 +458,10 @@ REG_OP(TensorListScatterV2) *@li indices: The indices used to index into the list. \n *@par Attributes: -*@li element_dtype: The type of elements in the list. \n +*element_dtype: The type of elements in the list. \n *@par Outputs: -*@li output_handle: The TensorList. \n +*output_handle: The TensorList. \n *@par Third-party framework compatibility. *Compatible with tensorflow TensorListScatterIntoExistingList operator. @@ -485,10 +485,10 @@ REG_OP(TensorListScatterIntoExistingList) *@li input_b: The input tensor list B. \n *@par Attributes: -*@li element_dtype: The type of elements in the list. \n +*element_dtype: The type of elements in the list. \n *@par Outputs: -*@li output: The output list. \n +*output: The output list. \n *@par Third-party framework compatibility. *Compatible with tensorflow TensorListConcatLists operator. diff --git a/third_party/fwkacllib/inc/ops/lookup_ops.h b/third_party/fwkacllib/inc/ops/lookup_ops.h index 5d928e5a..b1fc254f 100644 --- a/third_party/fwkacllib/inc/ops/lookup_ops.h +++ b/third_party/fwkacllib/inc/ops/lookup_ops.h @@ -77,8 +77,8 @@ REG_OP(LookupTableInsert) *handle: A Tensor of type resource. Handle to the table . \n *@par Attributes: -*@li Tkeys: A DType. -*@li Tvalues: A DType . \n +*@li Tkeys: A DType of keys. +*@li Tvalues: A DType of values. *@par Outputs: *@li keys: A Tensor of type Tkeys. diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 319bcf70..6eb418d8 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -227,10 +227,10 @@ REG_OP(Bucketize) *@par Inputs: *One inputs, including: -* @li input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n +*input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n *@par Outputs: -*y: A tensor with the same type and shape of input_x \n +*output_y: A tensor with the same type and shape of input_x \n *@par Third-party framework compatibility *Compatible with the Pytorch operator Trunc. \n @@ -298,7 +298,7 @@ REG_OP(SparseSegmentMean) *@par Inputs: *The input grad must have be type float or double. Inputs include: -*@li grad: A Tensor. Must be one of the following types: float, double. +*@li x: A Tensor. Must be one of the following types: float, double. gradient propagated to the SparseSegmentMean op. *@li indices: A Tensor. Must be one of the following types: int32, int64. indices passed to the corresponding SparseSegmentMean op. @@ -365,6 +365,7 @@ REG_OP(InitData) component of an element of this dataset. *@li output_shapes: A nested structure of TensorShape objects corresponding to each component of an element of this dataset. +*@li output_num:output of nums. *@li channel_name: A string. Default "" . \n *@par Outputs: @@ -538,11 +539,11 @@ REG_OP(NextAfter) *@par Inputs: *One inputs, including: -* @li input_x: A tensor. Must be one of the following types: +* input_x: A tensor. Must be one of the following types: * float16, float32. \n *@par Attributes: -*@li p: An optional float.Defaults to 2. \n +*p: An optional float.Defaults to 2. \n *@par Outputs: *y: A Tensor with the same type and shape of input_x's. \n @@ -560,10 +561,10 @@ REG_OP(Pdist) *@brief Compute element-wise finiteness, return a boolean tensor. *@par Inputs: - *x:A Tensor. + *x:A Tensor of type float16, float32, double. *@par Outputs: - *y:A Tensor. Has the same shape as x. + *y:A Tensor. Returns which elements of x are finite *@par Third-party framework compatibility. *Compatible with tensorflow IsFinite operator. @@ -577,10 +578,10 @@ REG_OP(IsFinite) *@brief Compute element-wise infiniteness, return a boolean tensor. *@par Inputs: - *x:A Tensor. + *x:A Tensor of type float16, float32, double. *@par Outputs: - *y:A Tensor. Has the same shape as x. + *y:A Tensor. Has the same shape as x. Returns which elements of x are isinf. *@par Third-party framework compatibility. *Compatible with tensorflow IsInf operator. @@ -594,7 +595,11 @@ REG_OP(IsInf) *@brief Computes the complex absolute value of a tensor. *@par Inputs: - *x:A Tensor. + *x: x of complex numbers, this operation returns a tensor of type + float or double that is the absolute value of each element in x . + +* @par Attributes: +* Tout: representing the output of type. *@par Outputs: *y:A tensor of type `float` or `double` that is the absolute value of each element in `x`. @@ -612,10 +617,10 @@ REG_OP(ComplexAbs) *@brief Returns which elements of x are NaN. *@par Inputs: - *x:A Tensor. + *x:A Tensor of type float16, float32, double. *@par Outputs: - *y:A Tensor. Has the same shape as x. + *y:A Tensor. Has the same shape as x. Returns which elements of x are isnan *@par Third-party framework compatibility. *Compatible with tensorflow IsNan operator. @@ -629,7 +634,10 @@ REG_OP(IsNan) *@brief Returns the real part of a complex number. *@par Inputs: - *input:A Tensor. + *input:A Tensor. Must have numeric type. + + *@par Attributes: + *Tout: Type of outputs. \n *@par Outputs: *output:A Tensor. Has the same shape as input. @@ -670,7 +678,8 @@ REG_OP(Conj) *@li weight: A Tensor dtype of float32 . \n *@par Attributes: -*reduction: An optional attribute. Defaults to "mean" . \n +*@li reduction: An optional attribute. Defaults to "mean" . +*@li ignore_index:An optional attribute.Defaults to -100 . \n *@par Outputs: *@li y: A Tensor dtype of float32. @@ -700,7 +709,8 @@ REG_OP(NLLLoss) *@li total_weight:A Tensor dtype of float32 . \n *@par Attributes: -*reduction: An optional attribute. Defaults to "mean" . \n +*@li reduction: An optional attribute. Defaults to "mean" . +*@li ignore_index:An optional attribute.Defaults to -100 . \n *@par Outputs: *x_grad: A Tensor. Must be the following type: float32 . \n @@ -720,24 +730,24 @@ REG_OP(NLLLossGrad) .OP_END_FACTORY_REG(NLLLossGrad) /** -*@brief The ifmr . \n +*@brief IFMR(Input Feature Map Reconstruction). \n *@par Inputs: -*@li data:A Tensor of feature map -*@li data_min:A Tensor of min value of feature map. -*@li data_max:A Tensor of max value of feature map. -*@li cumsum:A Tensor of cumsum bin of data . \n +*@li data: A Tensor of feature map. +*@li data_min: A Tensor of min value of feature map. +*@li data_max: A Tensor of max value of feature map. +*@li cumsum: A Tensor of cumsum bin of data . \n *@par Attributes: -*min_percentile: min init percentile. -*max_percentile: max init percentile. -*search_range: search range. -*search_step: step size of searching. -*with_offset: whether using offset . \n +*@li min_percentile: min init percentile. +*@li max_percentile: max init percentile. +*@li search_range: search range. +*@li search_step: step size of searching. +*@li with_offset: whether using offset . \n *@par Outputs: -*scale: optimal scale. -*offset: optimal offset . \n +*@li scale: optimal scale. +*@li offset: optimal offset . \n *@par Third-party framework compatibility *Compatible with mindspore @@ -758,16 +768,16 @@ REG_OP(IFMR) .OP_END_FACTORY_REG(IFMR) /** -*@brief weights adaptive range quantization. \n +*@brief Weights Adaptive Range Quantization. \n *@par Inputs: -*@li w:A Tensor of weights. \n -*@li w_min:A Tensor of weights reduce_min. \n -*@li w_max:A Tensor of weights reduce_max. \n +*@li w: A Tensor of weights. \n +*@li w_min: A Tensor of weights reduce_min. \n +*@li w_max: A Tensor of weights reduce_max. \n *@par Attributes: -*num_bits: the bits num used for quantize. -*offset_flag: whether using offset. \n +*@li num_bits: the bits num used for quantize. +*@li offset_flag: whether using offset. \n *@par Outputs: *y: fake quantized weights. \n @@ -789,22 +799,22 @@ REG_OP(WtsARQ) .OP_END_FACTORY_REG(WtsARQ) /** -*@brief The acts_ulq. \n +*@brief Activations Universal Linear Quantization. \n *@par Inputs: -*@li x:A Tensor of feature map -*@li clamp _min:A Tensor of min clamp value of feature map. -*@li clamp _max:A Tensor of max clamp value of feature map. +*@li x: A Tensor of feature map. +*@li clamp _min: A Tensor of min clamp value of feature map. +*@li clamp _max: A Tensor of max clamp value of feature map. *@par Attributes: -*fixed_min: fix min to zero. -*num_bits: quant bits. \n +*@li fixed_min: fix min to zero. +*@li num_bits: quant bits. \n *@par Outputs: -*y: output fake quant feature map. -*clamp_min_mask: where x > clamp_min -*clamp_min_mask: where x < clamp_max -*x_clamped_loss: clamp loss. \n +*@li y: output fake quant feature map. +*@li clamp_min_mask: where x > clamp_min. +*@li clamp_min_mask: where x < clamp_max. +*@li x_clamped_loss: clamp loss. \n *@par Third-party framework compatibility *Compatible with mindspore @@ -826,12 +836,12 @@ REG_OP(ActsULQ) .OP_END_FACTORY_REG(ActsULQ) /** -*@brief The acts_ulq_input_grad. \n +*@brief The gradient of Activations Universal Linear Quantization. \n *@par Inputs: -*@li y_grad: A Tensor of gradient -*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed' -*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed' +*@li y_grad: A Tensor of gradient. +*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed'. +*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed'. *@par Outputs: *x_grapd: The gradient of inpust. \n @@ -851,10 +861,10 @@ REG_OP(ActsULQInputGrad) .OP_END_FACTORY_REG(ActsULQInputGrad) /** -*@brief The act_ulq_clamp_max_grad. \n +*@brief The gradient of Activations Universal Linear Quantization clamp max. \n *@par Inputs: -*@li y_grad: A Tensor of gradient +*@li y_grad: A Tensor of gradient. *@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed. *@li x_clamped_loss: A Tensor of gradient. \n @@ -876,10 +886,10 @@ REG_OP(ActULQClampMaxGrad) .OP_END_FACTORY_REG(ActULQClampMaxGrad) /** -*@brief The act_ulq_clamp_min_grad. \n +*@brief The gradient of Activations Universal Linear Quantization clamp min. \n *@par Inputs: -*@li y_grad: A Tensor of gradient +*@li y_grad: A Tensor of gradient. *@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed. *@li x_clamped_loss: A Tensor of gradient. \n @@ -904,7 +914,7 @@ REG_OP(ActULQClampMinGrad) * @brief Computes Lp norm. * @par Inputs: -* @li x: An ND tensor of type float16, float32. \n +* x: An ND tensor of type float16, float32. \n * * @par Attributes: * @li p: Int, "inf" or "-inf", default value is 2. @@ -913,7 +923,7 @@ REG_OP(ActULQClampMinGrad) * @li epsilon: Float, default is 1e-12. \n * @par Outputs: -* @li y: An ND tensor of type float16, float32. The shape of y is depending +* y: An ND tensor of type float16, float32. The shape of y is depending * on axes and keepdim. \n * @par Third-party framework compatibility @@ -932,11 +942,13 @@ REG_OP(LpNorm) * @brief get complex. * @par Inputs: -* @li real: An ND tensor of type float32. double -* @li imag: An ND tensor of type float32. double \n +* @li real: An ND tensor of type float32 double, representing the real part of a complex number. +* @li imag: An ND tensor of type float32 double, representing the imaginary part of a complex number. \n * +* @par Attributes: +* Tout: representing the output of type. * @par Outputs: -* @li out: An ND tensor of type complex64, complex128 \n +* out: An ND tensor of type complex64, complex128 \n */ REG_OP(Complex) .INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE})) @@ -949,10 +961,13 @@ REG_OP(Complex) * @brief deal complex. * @par Inputs: -* @li input: An ND tensor of type complex64, complex128 \n -* +* input: An ND tensor of type complex64, complex128 \n + +* @par Attributes: +* Tout: representing the output of type. + * @par Outputs: -* @li output: An ND tensor of type float32. double \n +* output: An ND tensor of type float32. double \n */ REG_OP(Imag) .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128})) @@ -988,7 +1003,7 @@ REG_OP(Angle) * float16, float32. \n *@par Attributes: -* @li reduction: Specifies the reduction to apply to the output: +* reduction: Specifies the reduction to apply to the output: * 'none' | 'mean' | 'sum'. Default: 'mean'. \n *@par Outputs: diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index b317be37..81c6a29e 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -61,21 +61,28 @@ REG_OP(MatMul) *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n *@par Inputs: -*Two inputs, including: -* @li x1: A matrix Tensor. 2D. Must be one of the following types: float16, -* float32, int32. Has format [ND, NHWC, FRACTAL_NZ]. -* @li x2: A matrix Tensor. 2D. Must be one of the following types: float16, -* float32, int32. Has format [ND, NHWC, FRACTAL_NZ]. -* @li bias: A 1D Tensor. Must be one of the following types: float16, -* float32, int32. Has format [ND, NHWC] . \n +*Four inputs, including: +* @li x1: A matrix Tensor. 2D. Must be one of the following types: float32, + float16, int32, int8. Has format [ND, NHWC, FRACTAL_NZ]. +* @li x2: A matrix Tensor. 2D. Must be one of the following types: float32, + float16, int32, int8. Has format [ND, NHWC, FRACTAL_NZ]. +* @li bias: A 1D Tensor. Must be one of the following types: float32, + float16, int32. Has format [ND, NHWC]. +* @li offset_w: A Optional 1D Tensor for quantized inference. Type is int8. + Reserved. \n *@par Attributes: -*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. -*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n +* @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to + [M, K]. +* @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to +[K, N]. +* @li offset_x: An optional integer for quantized MatMulV2. +* The negative offset added to the input x1 for int8 type. Ensure offset_x + within the effective range of int8 [-128, 127]. Defaults to "0". \n *@par Outputs: -*y: The result matrix Tensor. 2D. Must be one of the following types: float16, -* float32, int32. Has format [ND, NHWC, FRACTAL_NZ] . \n +*y: The result matrix Tensor. 2D. Must be one of the following types: float32, + float16, int32. Has format [ND, NHWC, FRACTAL_NZ]. \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator BatchMatmul. @@ -95,19 +102,27 @@ REG_OP(MatMulV2) *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n *@par Inputs: -*Two inputs, including: +*Five inputs, including: * @li x1: A matrix Tensor. 2D. Must be one of the following types: int8. * @li x2: A matrix Tensor. 2D. Must be one of the following types: int8. * @li compress_index: A compress index matrix of type int8. -* @li bias: A 1D Tensor. Must be one of the following types: int32, float16. +* @li bias: An optional Tensor. 1D. Must be one of the following types: int32, + float16. +* @li offset_w: An optional matrix Tensor. 2D. Must be one of the following + types: int8. \n *@par Attributes: -*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. -*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n +*@li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to + [M, K]. +*@li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to + [K, N]. +*@li offset_x: An optional integer for quantized MatMulV2Compress. +*The negative offset added to the input x1 for int8 type. Ensure offset_x + within the effective range of int8 [-128, 127]. Defaults to "0". \n *@par Outputs: -*y: The result matrix Tensor. 2D. Must be one of the following types: float16, -* int32. \n +*y: The result matrix Tensor. 2D. Must be one of the following types: int32, +* float16. \n */ REG_OP(MatMulV2Compress) @@ -488,13 +503,13 @@ REG_OP(ScatterElements) *@par Inputs: * Three inputs, including: -*@li var: An ND Tensor . \n +*@li var: An ND Tensor . *Must be one of the following types: float16, float32, int32, int8, uint8 *@li indices: An ND Tensor of type int32 or int64 -*@li updates: An Tensor. format:NCHW, NHWC . \n +*@li updates: An Tensor. format:NCHW, NHWC . *Must be one of the following types: float16, float32, int32, int8, uint8 @@ -517,6 +532,61 @@ REG_OP(ScatterAdd) .OP_END_FACTORY_REG(ScatterAdd) /** +*@brief Use a scalar to modify the tensor. \n + +*@par Inputs: +*inputs, including: +*@li index: An ND Tensor . \n + +*Must be one of the following types: float16, float32, int32, int8, uint8 + +*@par Attributes: +* dim : the axis along which to index . +* value : the source element(s) to scatter . \n + +*@par Outputs: +*y: A Tensor. Has the same type and format as input "index" . \n + +*@par Third-party framework compatibility +* Compatible with the Pytorch operator ScatterScalar. +*/ +REG_OP(ScatterScalar) + .INPUT(index, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .REQUIRED_ATTR(dim, Int) + .REQUIRED_ATTR(value, Float) + .OP_END_FACTORY_REG(ScatterScalar) + +/** +*@brief Use a tensor to modify the tensor . \n + +*@par Inputs: +* Two inputs, including: +*@li index: An ND Tensor . \n + +*Must be one of the following types: float16, float32, int32, int8, uint8 + +*@li src: An ND Tensor . \n + +*Must be one of the following types: float16, float32, int32, int8, uint8 + +*@par Attributes: +* dim : the axis along which to index . \n + +*@par Outputs: +*y: A Tensor. Has the same type and format as input "index" . \n + +*@par Third-party framework compatibility +* Compatible with the Pytorch operator ScatterTensor. +*/ +REG_OP(ScatterTensor) + .INPUT(index, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(src, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .REQUIRED_ATTR(dim, Int) + .OP_END_FACTORY_REG(ScatterTensor) + +/** *@brief Divides a variable reference by sparse updates . \n *@par Inputs: @@ -530,7 +600,7 @@ REG_OP(ScatterAdd) *Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: -*@li use_locking: An optional bool. Defaults to "False". If "True", +*use_locking: An optional bool. Defaults to "False". If "True", * the operation will be protected by a lock . \n *@par Outputs: @@ -752,10 +822,12 @@ REG_OP(DiagPart) *@par Attributes: *@li num_output: Reserved. -*@li transpose: A bool, specifying weight whether to transpose, either "true" or "false". Defaults to "false". +*@li transpose: A bool, specifying weight whether to transpose input w, either "true" or "false". Defaults to "false". *@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1. * The product of the subsequent dimensions starting form first dimension or the second dimension is "K". -*@li offset_x: Reserved . \n +*@li offset_x: An optional integer for quantized FullyConnection. +*The negative offset added to the input image for int8 type. Ensure offset_x within the +*effective range of int8 [-128, 127]. Defaults to "0". \n *@par Outputs: *y: The result tensor of type float16, int32, float32 . \n @@ -779,27 +851,34 @@ REG_OP(FullyConnection) .OP_END_FACTORY_REG(FullyConnection) /** -*@brief Also known as a "fully-connected-compress" layer, computes an inner product with a set of learned weights, and (optionally) adds biases . \n +*@brief Also known as a "fully-connected-compress" layer, computes an inner +product with a set of learned weights, and (optionally) adds biases . \n *@par Inputs: -* Four inputs, including: +* Five inputs, including: *@li x: A Tensor of type uint8, int8. -*@li w: A weight matrix of type int8, int8. -*@li w: A compress index matrix of type int8, int8. -*@li b: A Tensor of type float16, int32, int32. -*@li offset_w: A Tensor of type int8.i +*@li w: A weight matrix of type int8. +*@li compress_index: A compress index matrix of type int8. +*@li b: A Tensor of type int32. +*@li offset_w: A Tensor of type int8. *@par Attributes: -*@li num_output: Reserved. -*@li transpose: A bool, specifying whether to transpose, either "true" or "false". Defaults to "false". -*@li axis: Reserved. -*@li offset_x: Reserved . \n +*@li num_output: A int, specifying the number of outputs. +*@li transpose: A bool, specifying whether to transpose input w, either "true" + or "false". Defaults to "false". +*@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" +starts from. Defaults to "1". +* The product of the subsequent dimensions starting form first dimension or the +second dimension is "K". +*@li offset_x: An optional integer for quantized FullyConnectionCompress. +*The negative offset added to the input image for int8 type. Ensure offset_x +within the effective range of int8 [-128, 127]. Defaults to "0". \n *@par Outputs: -*y: The result tensor of type int32 . \n +*y: The result tensor of type int32. \n *@par Third-party framework compatibility -* Compatible with the Caffe operator InnerProduct . \n +* Compatible with the Caffe operator InnerProduct. \n *@par Quantization supported or not * Yes @@ -925,13 +1004,13 @@ REG_OP(ScatterMin) *@par Inputs: * Three inputs, including: -*@li var: An ND Tensor . \n +*@li var: An ND Tensor . *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An NCHW, NHWC, or ND Tensor . \n *Must be one of the following types: int32 or int64 -*@li updates: An NCHW, NHWC, or ND Tensor . \n +*@li updates: An NCHW, NHWC, or ND Tensor . *Must be one of the following types: float16, float, int32, int8, uint8 @@ -958,13 +1037,13 @@ REG_OP(ScatterMax) *@par Inputs: * Three inputs, including: -*@li var: An ND Tensor . \n +*@li var: An ND Tensor . *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor . \n *Must be one of the following types: int32 or int64 -*@li updates: An ND Tensor . \n +*@li updates: An ND Tensor . *Must be one of the following types: float16, float, int32, int8, uint8 @@ -1113,14 +1192,46 @@ REG_OP(IndexAdd) .OP_END_FACTORY_REG(IndexAdd) /** +* @brief According to the index number of indexes, replace the value +*corresponding to X1 with the value in x2. + +* @par Inputs: +* Three inputs, including: +* @li x1: A Tensor. Must be one of the following types: +* float16, float32, int32, int8, uint8. +* @li x2: A Tensor of the same type as "x1". +* @li indices: A Tensor of the indices, type should be int32. + +* @par Attributes: +* @li accumulate: Does it support self accumulation.Defaults to 0. + +* @par Outputs: +* @li y: A Tensor. Same as input "x1". + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator index_put. + +* @par Restrictions: +* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(IndexPut) + .INPUT(x1, TensorType({DT_INT64, DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + .INPUT(x2, TensorType({DT_INT64, DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + .INPUT(indices, TensorType({DT_INT64, DT_INT32})) + .OUTPUT(y, TensorType({DT_INT64, DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + .ATTR(accumulate, Int, 0) + .OP_END_FACTORY_REG(IndexPut) + +/** *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n *@par Inputs: -* Two inputs, including: -*@li x: A Tensor. Must be one of the following types: -* float16, float32, double, int32, uint8, int16, int8, complex64, int64, -* qint8, quint8, qint32, uint16, complex128, uint32, uint64. -*@li diagonal:(int, optional) – the diagonal to consider。\n +*x: A Tensor. Must be one of the following types: +*float16, float32, double, int32, uint8, int16, int8, complex64, int64, +*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n + +*@par Attributes: +*diagonal: An optional attribute indicates the diagonal to consider. \n *@par Outputs: *y: A Tensor. Has the same type as "x" . \n @@ -1138,11 +1249,12 @@ REG_OP(Triu) *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n *@par Inputs: -* Two inputs, including: -*@li x: A Tensor. Must be one of the following types: -* float16, float32, double, int32, uint8, int16, int8, complex64, int64, -* qint8, quint8, qint32, uint16, complex128, uint32, uint64. -*@li diagonal:(int, optional) – the diagonal to consider。\n +*x: A Tensor. Must be one of the following types: +*float16, float32, double, int32, uint8, int16, int8, complex64, int64, +*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n + +*@par Attributes: +*diagonal: An optional attribute indicates the diagonal to consider. \n *@par Outputs: *y: A Tensor. Has the same type as "x" . \n @@ -1213,6 +1325,30 @@ REG_OP(Eye) .ATTR(dtype, Int, 0) .OP_END_FACTORY_REG(Eye) +/** +*@brief: Fill diagonal of at least 2 dimension tensors with value . \n + +*@par Inputs: +*x: A Tensor. Must be one of the following types: +* float32, int32, int64 . \n + +*@par Outputs: +*y: A Tensor. Has the same type as "x" . \n + +*@par Attributes: +*fill_value:The value to fill in +*wrap: An optional bool. Defaults to "False". If "True", Use recursive fill. \n + +*@par Third-party framework compatibility +* Compatible with the Pytorch operator FillDiagonal. +*/ +REG_OP(FillDiagonal) + .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT64})) + .REQUIRED_ATTR(fill_value, Float) + .ATTR(wrap, Bool, false) + .OP_END_FACTORY_REG(FillDiagonal) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index 98473c65..a55cebe2 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -195,7 +195,7 @@ REG_OP(DepthwiseConv2DBackpropInput) .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) .INPUT(filter, TensorType({DT_FLOAT16})) .INPUT(out_backprop, TensorType({DT_FLOAT16})) - .OUTPUT(input_grad, TensorType({DT_FLOAT16})) + .OUTPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT32})) .REQUIRED_ATTR(strides, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1}) .REQUIRED_ATTR(pads, ListInt) @@ -255,7 +255,7 @@ REG_OP(DepthwiseConv2DBackpropInput) REG_OP(DepthwiseConv2DBackpropInputD) .INPUT(filter, TensorType({DT_FLOAT16})) .INPUT(out_backprop, TensorType({DT_FLOAT16})) - .OUTPUT(input_grad, TensorType({DT_FLOAT16})) + .OUTPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT32})) .REQUIRED_ATTR(input_size, ListInt) .REQUIRED_ATTR(strides, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1}) @@ -367,19 +367,19 @@ REG_OP(BiasAddGrad) * Gradients with respect to the output of the convolution. *\n *\n - * The following are the supported data types and data formats: -*@verbatim - | Tensor | out_bckprop | filter | y - ------------|-------------|---------|-------- - | Data Type | float16 | float16 | float16 - | |-------------|---------|-------- - | | float32 | float32 | float32 - | |-------------|---------|-------- - | | float64 | float64 | float64 - ------------|-------------|---------|-------- - | Format | NCHW | NCHW | NCHW - | | NHWC | HWCN | NHWC -@endverbatim + * The following are the supported data types and data formats:\n + *\n + | Tensor | out_bckprop | filter | y\n + ------------|-------------|---------|--------\n + | Data Type | float16 | float16 | float16\n + | |-------------|---------|--------\n + | | float32 | float32 | float32\n + | |-------------|---------|--------\n + | | float64 | float64 | float64\n + ------------|-------------|---------|--------\n + | Format | NCHW | NCHW | NCHW\n + | | NHWC | HWCN | NHWC\n + *\n * For float32 and float64 type, the actual calculation on the chip is based on * float16. *\n @@ -398,36 +398,37 @@ REG_OP(BiasAddGrad) * "NHWC". Specify the data format of the input and output data. *\n *\n - * The following value range restrictions must be met: -*@verbatim - | Name | Field | Scope - -------------------|----------|-------------- - | input_size | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | Filter | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | out_backprop | H*strideH| [1, 4096] - | | W*strideW| [1, 4096] - -------------------|----------|-------------- - | y(fmap) | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | Stride | H | [1, 63] - | | W | [1, 63] - -------------------|----------|-------------- - | Padding | Top | [0, 255] - | | Bottom | [0, 255] - | | Left | [0, 255] - | | Right | [0, 255] - -------------------|----------|-------------- - | Dilation | H | [1, 255] - | | W | [1, 255] + * The following value range restrictions must be met:\n + *\n + | Name | Field | Scope\n + -------------------|----------|--------------\n + | input_size | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | Filter | H | [1, 255]\n + | | W | [1, 255]\n + -------------------|----------|--------------\n + | out_backprop | H*strideH| [1, 200000]\n + | | W*strideW| [1, 4096]\n + -------------------|----------|--------------\n + | y(fmap) | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | Stride | H | [1, 63]\n + | | W | [1, 63]\n + -------------------|----------|--------------\n + | Padding | Top | [0, 255]\n + | | Bottom | [0, 255]\n + | | Left | [0, 255]\n + | | Right | [0, 255]\n + -------------------|----------|--------------\n + | Dilation | H | [1, 255]\n + | | W | [1, 255]\n + *\n -@endverbatim * In Ascend910, fmap or out_backprop's H and W not support 1 when * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 + * and filter_width > fmap_width * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096 *\n * @@ -496,7 +497,7 @@ REG_OP(Conv2DBackpropInput) REG_OP(Conv2DBackpropInputD) .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) .REQUIRED_ATTR(input_size, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) @@ -508,7 +509,7 @@ REG_OP(Conv2DBackpropInputD) /** *@brief Computes the Deconvolution with respect to the input. *@par Inputs: - * Three inputs: + * Two required inputs: * @li x: A Tensor of type float16 or int8. 4D with shape * [batch, out_channels, out_height, out_width]. Gradients with respect * to the output of the convolution. @@ -520,16 +521,16 @@ REG_OP(Conv2DBackpropInputD) * Type is int8. Reserved.\n *\n *\n - * The following are the supported data types and data formats: -*@verbatim - | Tensor | x | filter | bias | y - ------------|---------|---------|---------|-------- - | Data Type | float16 | float16 | float16 | float16 - | |---------|---------|---------|-------- - | | int8 | int8 | int32 | int32 - ------------|---------|---------|---------|-------- - | Format | NCHW | NCHW | ND | NCHW -@endverbatim + * The following are the supported data types and data formats:\n + *\n + | Tensor | x | filter | bias | y\n + ------------|---------|---------|---------|--------\n + | Data Type | float16 | float16 | float16 | float16\n + | |---------|---------|---------|--------\n + | | int8 | int8 | int32 | int32\n + ------------|---------|---------|---------|--------\n + | Format | NCHW | NCHW | ND | NCHW\n + *\n * For int8, a dequant or requant operator must be followed. *\n * @@ -550,35 +551,35 @@ REG_OP(Conv2DBackpropInputD) * within the effective range of int8 [-128, 127]. Defaults to "0". *\n *\n - * The following value range restrictions must be met: -*@verbatim - | Name | Field | Scope - -------------------|----------|-------------- - | x (out_backprop) | H*strideH| [1, 4096] - | | W*strideW| [1, 4096] - -------------------|----------|-------------- - | Filter | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | y (fmap) | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | Stride | H | [1, 63] - | | W | [1, 63] - -------------------|----------|-------------- - | Padding | Top | [0, 255] - | | Bottom | [0, 255] - | | Left | [0, 255] - | | Right | [0, 255] - -------------------|----------|-------------- - | Dilation | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | Offset_x | | [-128, 127] - -@endverbatim + * The following value range restrictions must be met:\n + *\n + | Name | Field | Scope\n + -------------------|----------|--------------\n + | x (out_backprop) | H*strideH| [1, 200000]\n + | | W*strideW| [1, 4096]\n + -------------------|----------|--------------\n + | Filter | H | [1, 255]\n + | | W | [1, 255]\n + -------------------|----------|--------------\n + | y (fmap) | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | Stride | H | [1, 63]\n + | | W | [1, 63]\n + -------------------|----------|--------------\n + | Padding | Top | [0, 255]\n + | | Bottom | [0, 255]\n + | | Left | [0, 255]\n + | | Right | [0, 255]\n + -------------------|----------|--------------\n + | Dilation | H | [1, 255]\n + | | W | [1, 255]\n + -------------------|----------|--------------\n + | Offset_x | | [-128, 127]\n + *\n * In Ascend910, fmap or out_backprop's H and W not support 1 when * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 + * and filter_width > fmap_width * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096 *\n * @@ -628,19 +629,19 @@ REG_OP(Deconvolution) * convolution. *\n *\n - * The following are the supported data types and data formats: -*@verbatim - | Tensor | x | out_backprop | y - ------------|---------|--------------|--------- - | Data Type | float16 | float16 | float16 - | |---------|--------------|--------- - | | float32 | float32 | float32 - | |---------|--------------|--------- - | | float64 | float64 | float64 - |-----------|---------|--------------|--------- - | Format | NCHW | NCHW | NCHW - | | NHWC | NHWC | HWCN -@endverbatim + * The following are the supported data types and data formats:\n + *\n + | Tensor | x | out_backprop | y\n + ------------|---------|--------------|---------\n + | Data Type | float16 | float16 | float16\n + | |---------|--------------|---------\n + | | float32 | float32 | float32\n + | |---------|--------------|---------\n + | | float64 | float64 | float64\n + |-----------|---------|--------------|---------\n + | Format | NCHW | NCHW | NCHW\n + | | NHWC | NHWC | HWCN\n + *\n * For float32 and float64 type of x and outbackprop, the actual calculation on the chip * is based on float16. *\n @@ -658,39 +659,34 @@ REG_OP(Deconvolution) * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to * "NHWC". Specify the data format of the input and output data. *\n -*\n -* The following value range restrictions must be met: -*@verbatim - | Name | Field | Scope - -------------------|----------|-------------- - | x(fmap) | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | Filter Size | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | out_backprop | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | y | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | Stride | H | [1, 63] - | | W | [1, 63] - -------------------|----------|-------------- - | Padding | Top | [0, 255] - | | Bottom | [0, 255] - | | Left | [0, 255] - | | Right | [0, 255] - -------------------|----------|-------------- - | Dilation | H | [1, 255] - | | W | [1, 255] - -@endverbatim - * In Ascend910, out_backprop's H and W not support 1 when - * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 *\n - * + * The following value range restrictions must be met:\n + *\n + | Name | Field | Scope\n + -------------------|----------|--------------\n + | x(fmap) | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | Filter Size | H | [1, 255]\n + | | W | [1, 255]\n + -------------------|----------|--------------\n + | out_backprop | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | y | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | Stride | H | [1, 63]\n + | | W | [1, 63]\n + -------------------|----------|--------------\n + | Padding | Top | [0, 255]\n + | | Bottom | [0, 255]\n + | | Left | [0, 255]\n + | | Right | [0, 255]\n + -------------------|----------|--------------\n + | Dilation | H | [1, 255]\n + | | W | [1, 255]\n + *\n *@par Outputs: * y: A Tensor. Has the same type as x, has the same format as filter_size. *\n @@ -780,16 +776,16 @@ REG_OP(Conv2DBackpropFilterD) *\n *\n * The following are the supported data types and data formats: -*@verbatim - | Tensor | x | filter | bias | y - ------------|---------|---------|---------|-------- - | Data Type | float16 | float16 | float16 | float16 - | | float32 | float32 | float32 | float32 - | | int8 | int8 | int32 | int32 - ------------|---------|---------|---------|-------- - | Format | NCHW | NCHW | ND | NCHW - | | NHWC | HWCN | | NHWC -@endverbatim +*\n +*\n +| Tensor | x | filter | bias | y |\n +| :-------: | :-----: | :-----: | :-----: | :-----: |\n +| Data Type | float16 | float16 | float16 | float16 |\n +| | float32 | float32 | float32 | float32 |\n +| | int8 | int8 | int32 | int32 |\n +| Format | NCHW | NCHW | ND | NCHW |\n +| | NHWC | HWCN | | NHWC |\n +*\n * For float32 type, the actual calculation on the chip is based on * float16. *\n @@ -813,35 +809,30 @@ REG_OP(Conv2DBackpropFilterD) *\n *\n * The following value range restrictions must be met: -*@verbatim - | Name | Field | Scope - -------------------|----------|-------------- - | Input Image Size | H | [1, 100000] - | | W | [1, 4096] - -------------------|----------|-------------- - | Filter Size | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | Stride | H | [1, 63] - | | W | [1, 63] - -------------------|----------|-------------- - | Padding | Top | [0, 255] - | | Bottom | [0, 255] - | | Left | [0, 255] - | | Right | [0, 255] - -------------------|----------|-------------- - | Dilation | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | Offset_x | | [-128, 127] - -@endverbatim +*\n +*\n +| Name | Field | Scope |\n +| :--------------: | :------: | :---------: |\n +| Input Image Size | H | [1, 100000] |\n +| | W | [1, 4096] |\n +| Filter Size | H | [1, 255] |\n +| | W | [1, 255] |\n +| Stride | H | [1, 63] |\n +| | W | [1, 63] |\n +| Padding | Top | [0, 255] |\n +| | Bottom | [0, 255] |\n +| | Left | [0, 255] |\n +| | Right | [0, 255] |\n +| Dilation | H | [1, 255] |\n +| | W | [1, 255] |\n +| Offset_x | - | [-128, 127] |\n +*\n * The W dimension of the input image supports cases exceeding 4096, but it may * cause compilation errors. *\n * *@par Outputs: -*@li y: A 4D Tensor of output feature map. Has the same type as "x". With the +* y: A 4D Tensor of output feature map. Has the same type as "x". With the * format "NHWC", the data is stored in the order of: [batch, out_height, * out_width, out_channels]. *\n @@ -956,16 +947,15 @@ REG_OP(Conv2DCompress) *\n *\n * The following are the supported data types and data formats: -*@verbatim - | Tensor | x | filter | offsets | bias | y - ------------|---------|---------|---------|----------|-------- - | Data Type | float16 | float16 | float16 | float16 | float16 - | |---------|---------|---------|----------|-------- - | | float32 | float32 | float32 | float32 | float32 - ------------|---------|---------|---------|----------|-------- - | Format | NCHW | NCHW | NCHW | ND | NCHW - | | NHWC | HWCN | NHWC | | NHWC -@endverbatim +*\n +*\n +| Tensor | x | filter | offsets | bias | y |\n +| :-------: | :-----: | :-----: | :-----: | :-----: | :-----: |\n +| Data Type | float16 | float16 | float16 | float16 | float16 |\n +| | float32 | float32 | float32 | float32 | float32 |\n +| Format | NCHW | NCHW | NCHW | ND | NCHW |\n +| | NHWC | HWCN | NCHW | | NHWC |\n +*\n * For float32 type, the actual convolution calculation part on the chip is * based on float16. *\n @@ -992,19 +982,18 @@ REG_OP(Conv2DCompress) *\n *\n * The following value range restrictions must be met: -*@verbatim - | Name | Field | Scope - --------------------|--------|---------------------------- - | Input Image Size | H | [1, 100000 / filter_height] - | | W | [1, 4096 / filter_width] - --------------------|--------|---------------------------- - | Filter Size | H | [1, 63] - | | W | [1, 63] -@endverbatim +*\n +*\n +| Name | Field | Scope |\n +| :--------------: | :------: | :-------------------------: |\n +| Input Image Size | H | [1, 100000 / filter_height] |\n +| | W | [1, 4096 / filter_width] |\n +| Filter Size | H | [1, 63] |\n +| | W | [1, 63] |\n *\n * *@par Outputs: -*@li y: A 4D Tensor of output feature map. Has the same type as "x". With the +* y: A 4D Tensor of output feature map. Has the same type as "x". With the * format "NHWC", the data is stored in the order of: [batch, out_height, * out_width, out_channels]. *\n @@ -1042,41 +1031,38 @@ REG_OP(DeformableConv2D) /** *@brief Computes a 3D convolution given 5D "x" and "filter" tensors. - *@par Inputs: + +*@par Inputs: * @li x: A 5D tensor. Must be one of the following types: float16, * (Currently does not support int8). The format of x is NCDHW or NDHWC. * @li filter: A 5D tensor of the same type as "x". * (Currently does not support int8). - * The format is NCDHW, NDHWC or DHWCN . \n - -*@par Optional input: - * @li bias: An optional 1D tensor of the same type as "x". - * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n + * The format is NCDHW, NDHWC or DHWCN. + * @li bias: Optional. An 1D tensor of the same type as "x". + * @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n -*@par Required Attributes: - * @li strides: A list of 5 integers. Specifies the stride of the sliding window +*@par Attributes: + * @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window * for each dimension of "x". * The N and C dimensions must be 1. Has the same format as "x". - * @li pads: A list of 6 integers. + * @li pads: Required. A list of 6 integers. * Supports only padding along the D, H and W dimensions in sequence of head, - * tail, top, bottom, left and right . \n - -*@par Attributes: - * @li groups: Number of blocked connections from input channels to output + * tail, top, bottom, left and right. + * @li dilations: Optional. A list of 5 integers. Specifies the dilation factor for each + * dimension of "x". + * @li groups: Optional. Number of blocked connections from input channels to output * channels. - * @li data_format: An optional string from: "NDHWC", "NCDHW". + * @li data_format: Optional. An string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. - * @li dilations: A list of 5 integers. Specifies the dilation factor for each - * dimension of "x". * The N, C and D dimensions must be 1. Has the same format as "x". - * @li offset_x: An optional int. Input offset, used for quantized inference. - * Defaults to 0. Reserved . \n + * @li offset_x: Optional. An int. Input offset, used for quantized inference. + * Defaults to 0. Reserved. \n *@par Outputs: - *y: A Tensor. Has the same type and data format as "x". \n + * y: A Tensor. Has the same type and data format as "x". \n *@attention Constraints: - *The image size after padding is greater than the filter size . \n + * The image size after padding is greater than the filter size. \n *@par Third-party framework compatibility * @li Compatible with the TensorFlow operator conv3d. @@ -1085,9 +1071,9 @@ REG_OP(DeformableConv2D) REG_OP(Conv3D) .INPUT(x, TensorType({DT_FLOAT16})) .INPUT(filter, TensorType({DT_FLOAT16})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT32})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32})) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1, 1}) @@ -1099,8 +1085,8 @@ REG_OP(Conv3D) /** *@brief Computes the gradients of convolution 3d with respect to the input. + *@par Inputs: - * Three inputs: * @li input_size: A Tensor of type int32, int64. An integer vector representing * the shape of input, where input is a 5-D tensor * [batch, depth, height, width, channels] or @@ -1110,28 +1096,25 @@ REG_OP(Conv3D) * @li out_backprop: A Tensor. Must have the same type as filter. * 5-D with shape [batch, depth, out_height, out_width, out_channels] * or [batch, out_channels, depth, out_height, out_width]. Gradients with - * respect to the output of the convolution . \n + * respect to the output of the convolution. \n -*@par Required Attributes: - * @li strides: A list of 5 integers. Specifies the stride of the sliding window +*@par Attributes: + * @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window * for each dimension of "out_backprop". * The N and C dimensions must be 1. Has the same format as "out_backprop". - * @li pads: A list of 6 integers. + * @li pads: Required. A list of 6 integers. * Supports only padding along the D, H and W dimensions in sequence of head, - * tail, top, bottom, left and right . \n - -*@par Attributes: - * Three attributes: - * @li groups: Number of blocked connections from input channels to output - * channels. - * @li data_format: An optional string from: "NDHWC", "NCDHW". - * Defaults to "NDHWC". Specify the data format of the input and output data. - * @li dilations: A tuple/list of 5 integers, The dilation factor for each + * tail, top, bottom, left and right. + * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each * dimension of the input. * The N, C and D dimensions must be 1. Has the same format as "out_backprop". + * @li groups: Optional. Number of blocked connections from input channels to output + * channels. + * @li data_format: Optional. An string from: "NDHWC", "NCDHW". + * Defaults to "NDHWC". Specify the data format of the input and output data. \n *@par Outputs: - * y: A Tensor. Has the same type as filter,and has same format as "input_size" + * y: A Tensor. Has the same type as filter,and has same format as "input_size". \n *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_input @@ -1150,45 +1133,44 @@ REG_OP(Conv3DBackpropInput) /** *@brief Computes the gradients of convolution 3d with respect to the input. + *@par Inputs: - * Two inputs: * @li filter: A Tensor whose type is float16. The format of filter is NCDHW, * NDHWC or DHWCN. * @li out_backprop: A Tensor. Must have the same type as filter. The format is - * NDHWC or NCDHW. \n + * NDHWC or NCDHW. \n -*@par Required Attributes: - * @li strides: A list of 5 integers. Specifies the stride of the sliding window +*@par Attributes: + * @li input_size: Required. A tuple/list of type int32, int64. An integer vector + * representing the shape of input, where input is a 5-D tensor + * [batch, depth, height, width, channels] or + * [batch, channels, depth, height, width]. + * @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window * for each dimension of "out_backprop". * The N and C dimensions must be 1. Has the same format as "out_backprop". - * @li pads: A list of 6 integers. Supports only padding along the D, H and W + * @li pads: Required. A list of 6 integers. Supports only padding along the D, H and W * dimensions in sequence of head, tail, top, bottom, left and right. - * @li input_size: A tuple/list of type int32, int64. An integer vector - * representing the shape of input, where input is a 5-D tensor - * [batch, depth, height, width, channels] or - * [batch, channels, depth, height, width] . \n - -*@par Attributes: - * Three attributes: - * @li groups: Number of blocked connections from input channels to output - * channels. - * @li data_format: An optional string from: "NDHWC", "NCDHW". - * Defaults to "NDHWC". Specify the data format of the input and output data. - * @li dilations: A tuple/list of 5 integers, The dilation factor for each + * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each * dimension of input. * The N, C and D dimensions must be 1. Has the same format as "out_backprop". + * @li groups: Optional. Number of blocked connections from input channels to output + * channels. + * @li data_format: Optional. An string from: "NDHWC", "NCDHW". + * Defaults to "NDHWC". Specify the data format of the input and output data. \n + *@par Outputs: - * y: A Tensor. Has the same type and data format as "out_backprop". + * y: A Tensor. Has the same type and data format as "out_backprop". \n + *@par Third-party framework compatibility - * Compatible with Tensorflow's conv3d_backprop_input + * Compatible with Tensorflow's conv3d_backprop_input. \n *@par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropInput instead. + * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropInput instead. */ REG_OP(Conv3DBackpropInputD) .INPUT(filter, TensorType({DT_FLOAT16})) .INPUT(out_backprop, TensorType({DT_FLOAT16})) - .OUTPUT(y, TensorType({DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32})) .REQUIRED_ATTR(input_size, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) @@ -1242,8 +1224,8 @@ REG_OP(LSTM) /** *@brief Computes the gradients of convolution3D with respect to the filter + *@par Inputs: - * Three inputs: * @li x: A Tensor. Must be one of the following types: float16, float32. * Currently does not support double. * 5-D with shape [batch, in_depth, in_height, in_width, in_channels] @@ -1258,26 +1240,23 @@ REG_OP(LSTM) * or [batch, out_channels, out_depth, out_height, out_width]. * Gradients with respect to the output of the convolution. \n -*@par Required Attributes: - * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding +*@par Attributes: + * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding * window for each dimension of "x". The N and C dimensions must be 1. * Has the same format as "x". - * @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right] - * pads on feature map . \n - -*@par Attributes: - * Three attributes: - * @li dilations: A tuple/list of 5 integers, The dilation factor for each + * @li pads: Required. A tuple/list of 6 integers, [front, back, top, bottom, left, right] + * pads on feature map. + * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each * dimension of input. * The N, C and D dimensions must be 1. Has the same format as "x". - * @li groups: Number of blocked connections from input channels to output + * @li groups: Optional. Number of blocked connections from input channels to output * channels. - * @li data_format: An optional string from: "NDHWC", "NCDHW". - * Defaults to "NDHWC". Specify the data format of the input and output data. + * @li data_format: Optional. An string from: "NDHWC", "NCDHW". + * Defaults to "NDHWC". Specify the data format of the input and output data. \n *@par Outputs: - * y: A Tensor that has the same type as "x" - * and the format is NDHWC, NCDHW or DHWCN. + * y: A Tensor that has the same type as "x" and the format is NDHWC, NCDHW or DHWCN. \n + *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_filter */ @@ -1295,8 +1274,8 @@ REG_OP(Conv3DBackpropFilter) /** *@brief Computes the gradients of convolution with respect to the filter. + *@par Inputs: - * Two inputs: * @li x: A Tensor of type float16. * 5-D with shape [batch, in_depth, in_height, in_width, in_channels] * or [batch, in_channels, in_depth, in_height, in_width]. @@ -1305,37 +1284,34 @@ REG_OP(Conv3DBackpropFilter) * or [batch, out_channels, out_depth, out_height, out_width]. * Gradients with respect to the output of the convolution. \n -*@par Required Attributes: - * @li filter_size: A tuple/list of type integers. An integer vector +*@par Attributes: + * @li filter_size: Required. A tuple/list of type integers. An integer vector * representing the tensor shape of filter, where filter is a 5-D tensor * [filter_depth, filter_height, filter_width, in_channels, out_channels], * [out_channels, filter_depth, filter_height, filter_width, in_channels] * or [out_channels, in_channels, filter_depth, filter_height, filter_width]. - * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding + * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding * window for each dimension of "x". * The N and C dimensions must be 1. Has the same format as "x". - * @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right] - * pads on feature map. \n - -*@par Attributes: - * Three attributes: - * @li dilations: A tuple/list of 5 integers, The dilation factor for each + * @li pads: Required. A tuple/list of 6 integers, [front, back, top, bottom, left, right] + * pads on feature map. + * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each * dimension of input. * The N, C and D dimensions must be 1. Has the same format as "x". - * @li groups: Number of blocked connections from input channels to output + * @li groups: Optional. Number of blocked connections from input channels to output * channels. - * @li data_format: An optional string from: "NDHWC", "NCDHW". - * Defaults to "NDHWC". Specify the data format of the input and output data. + * @li data_format: Optional. An optional string from: "NDHWC", "NCDHW". + * Defaults to "NDHWC". Specify the data format of the input and output data. \n *@par Outputs: - * y: A Tensor of type float32 and the format is NDHWC, NCDHW or DHWCN. + * y: A Tensor of type float32 and the format is NDHWC, NCDHW or DHWCN. \n + *@par Third-party framework compatibility - * Compatible with Tensorflow's conv3d_backprop_filter + * Compatible with Tensorflow's conv3d_backprop_filter. \n + *@par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropFilter instead. + * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropFilter instead. */ - - REG_OP(Conv3DBackpropFilterD) .INPUT(x, TensorType({DT_FLOAT16})) .INPUT(out_backprop, TensorType({DT_FLOAT16})) @@ -1350,37 +1326,32 @@ REG_OP(Conv3DBackpropFilterD) /** *@brief Computes the transpose of convolution 3d with respect to the input. + *@par Inputs: - * Three inputs: * @li input_size: A Tensor of type int32. An integer vector representing the * shape of input. * @li x: A Tensor of type float16, currently does not support int8. The format * is NDHWC or NCDHW. * @li filter: A Tensor of type float16, currently does not support int8. * The format is NDHWC, NCDHW or DHWCN. + * @li bias: Optional. An optional 1D tensor of the same type as "x". Reserved. + * @li offset_w: Optional. An optional 1D tensor for quantized deconvolution. Reserved. \n -*@par Optional input: - * Two optional inputs - * @li bias: An optional 1D tensor of the same type as "x". Reserved. - * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n - -*@par Required Attributes: - * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding +*@par Attributes: + * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding * window for each dimension of "x". * The N and C dimensions must be 1. Has the same format as "x". - * @li pads: A tuple/list of 6 integers - -*@par Attributes: - * Five attributes: - * @li groups: Number of blocked connections from input channels to output - * channels. - * @li dilations: A tuple/list of 5 integers, + * @li pads: Required. A tuple/list of 6 integers. + * @li dilations: Optional. A tuple/list of 5 integers, * The dilation factor for each dimension of input. * The N, C and D dimensions must be 1. Has the same format as "x". - * @li data_format: An optional string from: "NDHWC", "NCDHW". + * @li groups: Optional. Number of blocked connections from input channels to output + * channels. + * @li data_format: Optional. An string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. - * @li output_padding: The size will be added in the output shape. - * @li offset_x: Input offset_x value. Reserved. + * @li output_padding: Optional. The size will be added in the output shape. + * @li offset_x: Optional. Input offset_x value. Reserved. \n + *@par Outputs: * y: A Tensor. Has the same type and format as "x". */ @@ -1388,9 +1359,9 @@ REG_OP(Conv3DTranspose) .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) .INPUT(x, TensorType({DT_FLOAT16})) .INPUT(filter, TensorType({DT_FLOAT16})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT32})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32})) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1, 1}) @@ -1402,46 +1373,44 @@ REG_OP(Conv3DTranspose) /** *@brief Computes the transpose of convolution 3d with respect to the input. + *@par Inputs: * @li x: A Tensor of type float16, currently does not support int8. * The format is NDHWC or NCDHW. * @li filter: A Tensor of type float16, currently does not support int8. * The format is NDHWC, NCDHW or DHWCN. + * @li bias: Optional. An 1D tensor of the same type as "x". Reserved. + * @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n -*@par Optional inputs: - * @li bias: An optional 1D tensor of the same type as "x". Reserved. - * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n - -*@par Required Attributes: - * @li input_size: A tuple/list of type int32. - * An integer vector representing the shape of input - * @li strides: A tuple/list of 5 integers. +*@par Attributes: + * @li input_size: Required. A tuple/list of type int32. + * An integer vector representing the shape of input. + * @li strides: Required. A tuple/list of 5 integers. * Specifies the stride of the sliding window for each dimension of "x". * The N and C dimensions must be 1. Has the same format as "x". - * @li pads: A tuple/list of 6 integers . \n - -*@par Attributes: - * Five attributes: - * @li dilations: A tuple/list of 5 integers, The dilation factor for each + * @li pads: Required. A tuple/list of 6 integers. + * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each * dimension of input. * The N, C and D dimensions must be 1. Has the same format as "x". - * @li groups: Number of blocked connections from input channels to output + * @li groups: Optional. Number of blocked connections from input channels to output * channels. - * @li data_format: An optional string from: "NDHWC", "NCDHW". + * @li data_format: Optional. An optional string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. - * @li output_padding: The size will be added in the output shape. - * @li offset_x: Input offset_x value. Reserved. + * @li output_padding: Optional. The size will be added in the output shape. + * @li offset_x: Optional. Input offset_x value. Reserved. \n + *@par Outputs: - * y: A Tensor. Has the same type and format as "x". + * y: A Tensor. Has the same type and format as "x". \n + *@par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead. + * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead. */ REG_OP(Conv3DTransposeD) .INPUT(x, TensorType({DT_FLOAT16})) .INPUT(filter, TensorType({DT_FLOAT16})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT32})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32})) .REQUIRED_ATTR(input_size, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) @@ -1469,17 +1438,17 @@ REG_OP(Conv3DTransposeD) * @li offset_w: An optional 1D tensor for quantized inference. Reserved. *\n *\n - * The following are the supported data types and data formats: -*@verbatim - | Tensor | x | filter | bias | y - ------------|---------|---------|---------|-------- - | Data Type | float16 | float16 | float16 | float16 - | |---------|---------|---------|-------- - | | int8 | int8 | int32 | int32 - ------------|---------|---------|---------|-------- - | Format | NCHW | NCHW | ND | NCHW - | | NHWC | HWCN | | NHWC -@endverbatim + * The following are the supported data types and data formats:\n + *\n + | Tensor | x | filter | bias | y\n + ------------|---------|---------|---------|--------\n + | Data Type | float16 | float16 | float16 | float16\n + | |---------|---------|---------|--------\n + | | int8 | int8 | int32 | int32\n + ------------|---------|---------|---------|--------\n + | Format | NCHW | NCHW | ND | NCHW\n + | | NHWC | HWCN | | NHWC\n + *\n * For int8, a dequant or requant operator must be followed. *\n * @@ -1504,38 +1473,38 @@ REG_OP(Conv3DTransposeD) * within the effective range of int8 [-128, 127]. Defaults to "0". *\n *\n - * The following value range restrictions must be met: -*@verbatim - | Name | Field | Scope - -------------------|----------|-------------- - | input_size | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | x (out_backprop) | H*strideH| [1, 4096] - | | W*strideW| [1, 4096] - -------------------|----------|-------------- - | filter | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | y (fmap) | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | Stride | H | [1, 63] - | | W | [1, 63] - -------------------|----------|-------------- - | Padding | Top | [0, 255] - | | Bottom | [0, 255] - | | Left | [0, 255] - | | Right | [0, 255] - -------------------|----------|-------------- - | Dilation | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | Offset_x | | [-128, 127] - -@endverbatim + * The following value range restrictions must be met:\n + *\n + | Name | Field | Scope\n + -------------------|----------|--------------\n + | input_size | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | x (out_backprop) | H*strideH| [1, 200000]\n + | | W*strideW| [1, 4096]\n + -------------------|----------|--------------\n + | filter | H | [1, 255]\n + | | W | [1, 255]\n + -------------------|----------|--------------\n + | y (fmap) | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | Stride | H | [1, 63]\n + | | W | [1, 63]\n + -------------------|----------|--------------\n + | Padding | Top | [0, 255]\n + | | Bottom | [0, 255]\n + | | Left | [0, 255]\n + | | Right | [0, 255]\n + -------------------|----------|--------------\n + | Dilation | H | [1, 255]\n + | | W | [1, 255]\n + -------------------|----------|--------------\n + | Offset_x | | [-128, 127]\n + *\n * In Ascend910, fmap or out_backprop's H and W not support 1 when * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 + * and filter_width > fmap_width * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096 *\n * @@ -1557,9 +1526,9 @@ REG_OP(Conv2DTranspose) .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) .INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1}) @@ -1604,9 +1573,9 @@ REG_OP(Conv2DTranspose) REG_OP(Conv2DTransposeD) .INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) .REQUIRED_ATTR(input_size, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) @@ -1623,14 +1592,12 @@ REG_OP(Conv2DTransposeD) * Two inputs: * @li x: A Tensor of type float16,float32 * @li offsets: A Tensor of type float16,float32.Deformation offset parameter. -*@par Required Attributes: +*@par Attributes: * @li strides: A tuple/list of 4 integers.The stride of the sliding window for * height and width for H/W dimension. * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension * of the input. * @li ksize: A tuple/list of 2 integers.kernel size. -*@par Attributes: - * Four attributes: * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension * of input. Defaults to [1, 1, 1, 1] * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. @@ -1659,22 +1626,20 @@ REG_OP(DeformableOffsets) * @li grad: A Tensor of type float16,float32. gradients with respect to DeformableOffsets output * @li x: A Tensor of type float16,float32. * @li offsets: A Tensor of type float16,float32.Deformation offset parameter. -*@par Required Attributes: +*@par Attributes: * @li strides: A tuple/list of 4 integers.The stride of the sliding window for * height and width for H/W dimension. * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension * of the input. * @li ksize: A tuple/list of 2 integers.kernel size. -*@par Attributes: - * Three attributes: * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension * of input. Defaults to [1, 1, 1, 1] * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. * @li deformable_groups: Specify the c-axis grouping number of input x. * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1. *@par Outputs: - * grad_x: A Tensor of type float16, float32. Gradients with respect to input_x - * grad_offsets: A Tensor of type float16, float32. Gradients with respect to input_offsets + * @li grad_x: A Tensor of type float16, float32. Gradients with respect to input_x + * @li grad_offsets: A Tensor of type float16, float32. Gradients with respect to input_offsets */ REG_OP(DeformableOffsetsGrad) .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1695,11 +1660,9 @@ REG_OP(DeformableOffsetsGrad) *@brief Computes the deformed dilation output with the expected input *@par Inputs: * One inputs: - * @li x: A Tensor of type int8, float16, float32 -*@par Required Attributes: - * @li dilations: A tuple/list of integers. + * x: A Tensor of type int8, float16, float32 *@par Attributes: - * Two attributes: + * @li dilations: A tuple/list of integers. * @li padding_value: default value filling in blank * @li pads: A tuple/list of integers. *@par Outputs: diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index 5fa40ad6..bd14df77 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -153,6 +153,42 @@ REG_OP(Iou) .OP_END_FACTORY_REG(Iou) /** +*@brief First calculate the minimum closure area of the two boxes, IoU, +* the proportion of the closed area that does not belong to the two boxes in the closure area, +* and finally subtract this proportion from IoU to get GIoU . \n + +*@par Inputs: +* Two inputs, including: +*@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with +* shape (N, 4). "N" indicates the number of bounding boxes, and the value +* "4" refers to [x1, y1, x2, y2] or [x, y, w, h]. +*@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32 +* with shape (M, 4). "M" indicates the number of ground truth boxes, and +* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n + +*@par Attributes: +*@li trans: An optional bool, true for 'xywh', false for 'xyxy'. +*@li is_cross: An optional bool, control whether the output shape is [M, N] or [1, N] +*@li mode: Computation mode, a character string with the value range of [iou, iof] . \n + +*@par Outputs: +* overlap: A 2D Tensor of type float16 or float32 with shape [M, N] or [1, N], +* specifying the IoU or IoF ratio . \n + +*@attention Constraints: +* Only computation of float16 data is supported. To avoid overflow, the input +* length and width are scaled by 0.2 internally. +*/ +REG_OP(GIoU) + .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(trans, Bool, false) + .ATTR(is_cross, Bool, true) + .ATTR(mode, String, "iou") + .OP_END_FACTORY_REG(GIoU) + +/** *@brief Performs the backpropagation of ROIAlign for training scenarios . \n *@par Inputs: @@ -417,7 +453,7 @@ REG_OP(PSROIPooling) *@brief Returns detection result . \n *@par Inputs: -* Four inputs, including: +* Five inputs, including: *@li rois: An NCHW tensor of type floa16 or float32, output from operator proposal_d at the preceding layer, used as the input of operator FSRDetectionOutput. *@li bbox_delta: An NCHWC0 tensor of type floa16 or float32, specifying the prediction offset, used to update the coordinates [x1, y1, x2, y2] of each ROI. *@li score: An NCHWC0 tensor of type floa16 or float32, specifying the probability of each class. Class 0 is the background class. @@ -459,7 +495,7 @@ REG_OP(FSRDetectionOutput) *@brief Returns detection result . \n *@par Inputs: -* Four inputs, including: +* Three inputs, including: *@li bbox_delta: An ND tensor of type floa16 or float32, specifying the box loc predictions, used as the input of operator SSDDetectionOutput. *@li score: An ND tensor of type floa16 or float32, specifying the box confidences data, used as the input of operator SSDDetectionOutput. *@li anchors: An ND tensor of type floa16 or float32, output from operator PriorBoxD, used as the input of operator SSDDetectionOutput. @@ -474,7 +510,6 @@ REG_OP(FSRDetectionOutput) *@li code_type: An optional int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3 *@li keep_top_k: An optional int32, specify the topk value after nms. Defaults to -1 *@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold -*@li kernel_name: An optional string, specifying the operator name. Defaults to "ssd_detection_output". *@par Outputs: *@li out_boxnum: A tensor of type int32, specifying the number of output boxes. *@li y: A tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box. @@ -989,26 +1024,26 @@ REG_OP(SPP) * feature map . \n *@attention Constraints: -*@li For the feature map input: -(1) If pooled_h = pooled_w = 2, the feature map size must not exceed 50. -(2) If pooled_h = pooled_w = 3, the feature map size must not exceed 60. -(3) If pooled_h = pooled_w = 4, the feature map size must not exceed 70. -(4) If pooled_h = pooled_w = 5, the feature map size must not exceed 70. -(5) If pooled_h = pooled_w = 6, the feature map size must not exceed 80. -(6) If pooled_h = pooled_w = 7, the feature map size must not exceed 80. -(7) If pooled_h = pooled_w = 8, the feature map size must not exceed 80. -(8) If pooled_h = pooled_w = 9, the feature map size must not exceed 70. -(9) If pooled_h = pooled_w = 10, the feature map size must not exceed 70. -(10) If pooled_h = pooled_w = 11, the feature map size must not exceed 70. -(11) If pooled_h = pooled_w = 12, the feature map size must not exceed 70. -(12) If pooled_h = pooled_w = 13, the feature map size must not exceed 70. -(13) If pooled_h = pooled_w = 14, the feature map size must not exceed 70. -(14) If pooled_h = pooled_w = 15, the feature map size must not exceed 70. -(15) If pooled_h = pooled_w = 16, the feature map size must not exceed 70. -(16) If pooled_h = pooled_w = 17, the feature map size must not exceed 50. -(17) If pooled_h = pooled_w = 18, the feature map size must not exceed 40. -(18) If pooled_h = pooled_w = 19, the feature map size must not exceed 40. -(19) If pooled_h = pooled_w = 20, the feature map size must not exceed 40. +* For the feature map input: +*@li If pooled_h = pooled_w = 2, the feature map size must not exceed 50. +*@li If pooled_h = pooled_w = 3, the feature map size must not exceed 60. +*@li If pooled_h = pooled_w = 4, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 5, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 6, the feature map size must not exceed 80. +*@li If pooled_h = pooled_w = 7, the feature map size must not exceed 80. +*@li If pooled_h = pooled_w = 8, the feature map size must not exceed 80. +*@li If pooled_h = pooled_w = 9, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 10, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 11, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 12, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 13, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 14, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 15, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 16, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 17, the feature map size must not exceed 50. +*@li If pooled_h = pooled_w = 18, the feature map size must not exceed 40. +*@li If pooled_h = pooled_w = 19, the feature map size must not exceed 40. +*@li If pooled_h = pooled_w = 20, the feature map size must not exceed 40. *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. */ @@ -1222,9 +1257,7 @@ REG_OP(RpnProposalsD) * @li box_filter: bool, mark of box_filter. Defaults to "true" * @li core_max_num: int, max number of core. Defaults to "8" *@par Outputs: -* @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4]. -* @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1]. -* @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1]. +*sorted_box: A Tensor. Must be float16. N-D with shape [N, 1]. */ REG_OP(RpnProposalPostProcessing) .INPUT(sorted_proposal, TensorType({DT_FLOAT16})) @@ -1382,7 +1415,7 @@ REG_OP(BatchMultiClassNonMaxSuppression) * @li shape_hw: A 1D Tensor of type int32 . \n * @par Attributes: -* @li reversed_box: An optional bool, specifying the last two dims is "4,num" or +* reversed_box: An optional bool, specifying the last two dims is "4,num" or * "num,4", "true" for "4,num", "false" for "num,4". Defaults to "false" . \n * @par Outputs: @@ -1429,9 +1462,9 @@ REG_OP(NormalizeBBox) * @li anchors: A Tensor. Must be int32. * *@par Attributes: -* @li scales: optional, listfloat, . +* @li scales: optional, listfloat. * @li decode_clip: optional, float, threahold of decode process. -* @li reversed_boxes: optional, bool,. +* @li reversed_boxes: optional, bool. * *@par Outputs: * y: A Tensor. Must have the same type as box_predictions. @@ -1446,16 +1479,16 @@ REG_OP(DecodeBboxV2) .OP_END_FACTORY_REG(DecodeBboxV2) /** -*@brief Computes sort function. +*@brief sort the input tensor and return the value of index. * *@par Inputs: *Inputs include: -* x: A Tensor. Dtype support: flaot16, flaot, int16, int8, +* x: A Tensor. Dtype support: float16, float, int16, int8, uint8, int32, int64. -* + *@par Attributes: -* @li axis: optional, int. -* @li descending: optional,bool. +* @li axis: An optional attribute indicates the sorting axis. +* @li descending: An optional attribute indicates desending sort or not. * *@par Outputs: * @li y1: A Tensor. Must have the same type as x. @@ -1568,16 +1601,18 @@ deciding when to remove boxes based on score . \n the last dim representing (batch_id,class_id,index_id) . \n *@par Attributes: -*center_point_box:Integer indicate the format of the box data. +*@li center_point_box:Integer indicate the format of the box data. The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair of box corners and the coordinates can be provided as normalized (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models. 1 - the box data is supplied as [x_center, y_center, width, height]. Mostly used for Pytorch models. \n +*@li max_boxes_size: An optional attribute integer representing the real maximum +*number of boxes to be selected by non max suppression . \n *@par Outputs: -*@li selected_indices: A 2-D integer tensor of shape [M] representing the +*selected_indices: A 2-D integer tensor of shape [M] representing the selected indices from the boxes tensor, where M <= max_output_size. \n *@attention Constraints: @@ -1603,7 +1638,7 @@ REG_OP(NonMaxSuppressionV7) *@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n *@par Inputs: -* Three inputs, including: +* Two inputs, including: *@li features: A 5HD Tensor list of type float32 or float16. *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, * the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1". @@ -1760,7 +1795,7 @@ REG_OP(AnchorResponseFlags) * "N" indicates the number of ROIs. \n *@par Attributes: -*@li performance_mode: select performance mode, "high_precision" or "high_performance". +*performance_mode: select performance mode, "high_precision" or "high_performance". * select "high_precision" when input type is float32, the output tensor precision * will be smaller than 0.0001, select "high_performance" when input type is float32, * the ops will be best performance, but precision will be only smaller than 0.005. @@ -1795,12 +1830,12 @@ REG_OP(YoloBoxesEncode) *@li num_gts: A Tensor. Support int32. real k. shape (1, ) *@par Attributes: -*@li output_dim: float. IOU threshold for positive bboxes. -*@li group_size: float. minimum iou for a bbox to be considered as a positive bbox -*@li spatial_scale: bool. whether to assign all bboxes with the same highest overlap with some gt to that gt. +*@li pos_iou_thr: float. IOU threshold for positive bboxes. +*@li min_pos_iou: float. minimum iou for a bbox to be considered as a positive bbox +*@li gt_max_assign_all: bool. whether to assign all bboxes with the same highest overlap with some gt to that gt. *@par Outputs: -*@li assigned_gt_inds_pos: A Tensor. Support float16/float32. shape (n, ). +* assigned_gt_inds_pos: A Tensor. Support float16/float32. shape (n, ). */ REG_OP(GridAssignPositive) .INPUT(assigned_gt_inds, TensorType({ DT_FLOAT, DT_FLOAT16 })) @@ -1816,6 +1851,40 @@ REG_OP(GridAssignPositive) .REQUIRED_ATTR(min_pos_iou, Float) .REQUIRED_ATTR(gt_max_assign_all, Bool) .OP_END_FACTORY_REG(GridAssignPositive) + +/** +*@brief GIoUGrad . \n + +*@par Inputs: +*@li dy : data of grad increment, a 1D Tensor of type float16 or float32 with +* shape (N,). +*@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with +* shape (4, N). "N" indicates the number of bounding boxes, and the value +* "4" refers to [x1, y1, x2, y2] or [x, y, w, h]. +*@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32 +* with shape (4, M). "M" indicates the number of ground truth boxes, and +* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n + +*@par Attributes: +*@li trans: An optional attr, true for 'xywh', false for 'xyxy', only support true now. +*@li is_cross: An optional attr, if false M equals N, only support false now. +*@li mode: An optional attr, a character string with the value range of ['iou', 'iof'], +* only support 'iou' now. \n + +*@par Outputs: +*@li dbboxes: A 2D Tensor of type float16 or float32 with shape [4, N]. +*@li dgtboxes: A 2D Tensor of type float16 or float32 with shape [4, M]. +*/ +REG_OP(GIoUGrad) + .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dbboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dgtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(trans, Bool, false) + .ATTR(is_cross, Bool, true) + .ATTR(mode, String, "iou") + .OP_END_FACTORY_REG(GIoUGrad) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index b44c0780..9ce7abfd 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -54,15 +54,16 @@ REG_OP(LogSoftmaxGrad) *@par Inputs: *Two inputs, including: * @li features: A Tensor. Must be one of the following types: half, float32, double. -* A "batch_size * num_classes" matrix. +*A "batch_size * num_classes" matrix. * @li labels: A Tensor. Must be one of the following types: 'int32', 'int64'. -* batch_size vector with values in [0, num_classes). -* This is the label for the given minibatch entry. +*batch_size vector with values in [0, num_classes). +*This is the label for the given minibatch entry. \n *@par Outputs: -*loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features". -*backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n +*@li loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features". +*@li backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). +Has the same type as "features" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator SparseSoftmaxCrossEntropyWithLogits. @@ -84,8 +85,8 @@ REG_OP(SparseSoftmaxCrossEntropyWithLogits) * @li labels: A Tensor of the same type as "features". A "batch_size * num_classes" matrix . \n *@par Outputs: -*loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features". -*backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n +* @li loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features". +* @li backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator SoftmaxCrossEntropyWithLogits. @@ -127,12 +128,13 @@ REG_OP(SoftmaxGrad) *@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n *@par Inputs: -* Two inputs, including: +* Three inputs, including: *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value. -*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n +*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . +*@li dout:A multi-dimensional Tensor of float16 or float32,specifying the gradient transferred from the upper layer. \n *@par Outputs: -*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n +*gradient: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n *@par Third-party framework compatibility * Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SigmoidCrossEntropyWithLogitsGrad. @@ -148,13 +150,12 @@ REG_OP(SigmoidCrossEntropyWithLogitsGrad) *@brief Performs the backpropagation of SigmoidCrossEntropyWithLogits for training scenarios . \n *@par Inputs: -* Three inputs, including: +* Two inputs, including: *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value. -*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value. -*@li dout: A multi-dimensional Tensor of float16 or float32, specifying the gradient transferred from the upper layer . \n +*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value. \n *@par Outputs: -*gradient: Return gradient. Has the same dimensions and type as "predict" . \n +*loss: Return loss. Has the same dimensions and type as "predict" . \n *@par Third-party framework compatibility * Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SigmoidCrossEntropyWithLogits. @@ -572,7 +573,7 @@ REG_OP(LayerNorm) *@par Inputs: *One input, including: -* @li x: A Tensor. Must be one of the following types: float16, float32 . \n +* x: A Tensor. Must be one of the following types: float16, float32 . \n *@par Attributes: * @li p: Specify L_p norm, the type is float. @@ -581,7 +582,7 @@ REG_OP(LayerNorm) *@par Outputs: *One outputs, including: -* @li y: shape and dtype of output, should be same shape and type as input. +* y: shape and dtype of output, should be same shape and type as input. */ REG_OP(Renorm) .INPUT(x, TensorType::BasicType()) @@ -811,7 +812,7 @@ REG_OP(LayerNormBetaGammaBackpropV2) * shape of "keep_prob" should be (1,) or [1,]. * Has the same type as "x" . \n -*@par Output: +*@par Outputs: *y: A mutable Tensor. Has the same type as "x". */ REG_OP(DropOutDoMask) @@ -839,7 +840,7 @@ REG_OP(DropOutDoMask) * shape of "keep_prob" should be (1,) or [1,]. * Has the same type as "x" . \n -*@par Output: +*@par Outputs: *y: A mutable Tensor. Has the same type as "x". *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -1010,7 +1011,7 @@ REG_OP(LRNGrad) *@li grads: A Tensor. Has the same type as acts. *@par Attributes: - *@li blank_label: An optional attribute. Defaults to 0. + *blank_label: An optional attribute. Defaults to 0. *@par Third-party framework compatibility * Compatible with TensorFlow RNNTLoss operator. @@ -1198,13 +1199,11 @@ REG_OP(INInferV2D) * @li epsilon: An attribute of type Float. \n * @par Outputs: -*Three outputs, including: +* Three outputs, including: * @li y: A Tensor. Has the same type as "x". \n * @li mean: A Tensor. Has the same type as "x". \n * @li variance: A Tensor. Has the same type as "x". \n -* @par Third-party framework compatibility -* Can be used by onnx InstanceNormalization */ REG_OP(InstanceNorm) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1218,24 +1217,22 @@ REG_OP(InstanceNorm) .OP_END_FACTORY_REG(InstanceNorm) /** -*@brief InstanceNormGrad operator interface implementation. +* @brief InstanceNormGrad operator interface implementation. -*@par Inputs: -*Five inputs, including: +* @par Inputs: +* Five inputs, including: * @li dy: A Tensor. Must be one of the following types: float16, float32. * @li x: A Tensor. Must be one of the following types: float16, float32. * @li variance: A Tensor. Must be one of the following types: float16, float32. * @li mean: A Tensor. Must be one of the following types: float16, float32. * @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n -*@par Outputs: -*Three outputs, including: +* @par Outputs: +* Three outputs, including: * @li pd_x: A Tensor. Must be one of the following types: float16, float32. * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. * @li pd_beta: A Tensor. Must be one of the following types: float16, float32. -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(InstanceNormGrad) .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -1249,58 +1246,6 @@ REG_OP(InstanceNormGrad) .OP_END_FACTORY_REG(InstanceNormGrad) /** -*@brief InstanceNormXBackprop operator interface implementation. - -*@par Inputs: -*Five inputs, including: -* @li dy: A Tensor. Must be one of the following types: float16, float32. -* @li x: A Tensor. Must be one of the following types: float16, float32. -* @li variance: A Tensor. Must be one of the following types: float16, float32. -* @li mean: A Tensor. Must be one of the following types: float16, float32. -* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n - -*@par Outputs: -*Two outputs, including: -* @li pd_x: A Tensor. Must be one of the following types: float16, float32. -* @li res_for_gamma: A Tensor. Must be one of the following types: float32. - -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. -*/ -REG_OP(InstanceNormXBackprop) - .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16})) - .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16})) - .OUTPUT(res_for_gamma, TensorType({DT_FLOAT})) - .OP_END_FACTORY_REG(InstanceNormXBackprop) - -/** -*@brief InstanceNormBetaGammaBackprop operator interface implementation. - -*@par Inputs: -*Two inputs, including: -* @li dy: A Tensor. Must be one of the following types: float16, float32. -* @li res_for_gamma: A Tensor. Must be one of the following types: float32.\n - -*@par Outputs: -*Two outputs, including: -* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. -* @li pd_beta: A Tensor. Must be one of the following types: float16, float32. - -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. -*/ -REG_OP(InstanceNormBetaGammaBackprop) - .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(res_for_gamma, TensorType({DT_FLOAT})) - .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16})) - .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16})) - .OP_END_FACTORY_REG(InstanceNormBetaGammaBackprop) - -/** * @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n * @par Inputs: @@ -1340,10 +1285,10 @@ REG_OP(KlDivLossGrad) * @li label: A Tensor. Has the same type as "grads". Required. \n * @par Attributes: -* @li reduction: An optional attribute of type String. Defaults to "mean". \n +* reduction: An optional attribute of type String. Defaults to "mean". \n * @par Outputs: -* @li y: A Tensor. Has the same type as "x". \n +* y: A Tensor. Has the same type as "x". \n * @par Third-party framework compatibility * Compatible with the Pytorch operator L1LossGrad. @@ -1368,7 +1313,7 @@ REG_OP(L1LossGrad) * @li reduction: An optional string.Defaults to "mean". \n * @par Outputs: -* @li y: An ND tensor tensor with the same shape and type as "predict". \n +* y: An ND tensor tensor with the same shape and type as "predict". \n * @par Third-party framework compatibility * Compatible with the Pytorch operator LpLoss. @@ -1390,10 +1335,10 @@ REG_OP(LpLoss) * @li dout: An ND tensor of type float16, float32. \n * @par Attributes: -* @li reduction: An optional string.Defaults to "mean". \n +* reduction: An optional string.Defaults to "mean". \n * @par Outputs: -* @li y: An ND tensor tensor with the same shape and type as "predict". \n +* y: An ND tensor tensor with the same shape and type as "predict". \n * @par Third-party framework compatibility * Compatible with the Pytorch operator MseLossGrad. @@ -1414,10 +1359,10 @@ REG_OP(MseLossGrad) * @li label: An ND Tensor of dtype float16 or float32.\n * * @par Attributes: -* @li reduction:An optional str from sum, none, mean, Defaults to "mean".\n +* reduction:An optional str from sum, none, mean, Defaults to "mean".\n * * @par Outputs: -* @li y: when reduction=sum/mean, y is scale. when reduction=none, y has +* y: when reduction=sum/mean, y is scale. when reduction=none, y has * same type and shape as "predict".\n */ REG_OP(MseLoss) @@ -1445,7 +1390,7 @@ REG_OP(MseLoss) * Must be one of the following: "none", "mean", "sum". \n * @par Outputs: -* @li gradient: A Tensor. Has the same type as "predict". \n +* gradient: A Tensor. Has the same type as "predict". \n * @par Third-party framework compatibility * Compatible with the Pytorch operator SmoothL1LossBackward. @@ -1480,7 +1425,7 @@ REG_OP(SmoothL1LossGradV2) * the output,'sum': the output will be summed. Default: 'mean'. \n * @par Outputs: -* @li loss: Indicates the loss between the predictive value and target value. +* loss: Indicates the loss between the predictive value and target value. * Has the same dimensions as "predict". \n * @par Third-party framework compatibility @@ -1498,12 +1443,12 @@ REG_OP(SmoothL1LossV2) * @brief Computes Centralization. result = x - mean(x, axes) * @par Inputs: -* @li x: An ND tensor of type float16, float32. +* x: An ND tensor of type float16, float32. * @par Attributes: -* @li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType. +* axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType. * Must be in the range [-rank(x), rank(x)). * @par Outputs: -* @li y: A Tensor. Has the same type as "x". \n +* y: A Tensor. Has the same type as "x". \n * @par Third-party framework compatibility * custom operator \n @@ -1521,7 +1466,7 @@ REG_OP(Centralization) *@par Inputs: *One inputs, including: -* @li x: A tensor . Must be one of the following types: +* x: A tensor . Must be one of the following types: * float16, float32, int32, uint32, int8, uint8. \n *@par Attributes: @@ -1546,14 +1491,14 @@ REG_OP(Roll) logistic loss between input_x and input_y (containing 1 or -1). \n *@par Inputs: - *One inputs, including: + *Tow inputs, including: * @li input_x: A tensor. Must be one of the following types: * float16, float32. \n * @li input_y: A tensor. Must be one of the following types: * float16, float32. \n *@par Attributes: - *@li lambd: An optional string.Defaults to "mean". \n + *reduction: An optional string.Defaults to "mean". \n *@par Outputs: *output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n @@ -1580,10 +1525,10 @@ REG_OP(SoftMarginLoss) * @li pos_weight: An optional ND tensor of type float16, float32. \n * @par Attributes: -* @li reduction: An optional string.Defaults to "mean". \n +* reduction: An optional string.Defaults to "mean". \n * @par Outputs: -* @li gradient: An ND tensor tensor with the same shape and type as "predict". \n +* gradient: An ND tensor tensor with the same shape and type as "predict". \n * @par Third-party framework compatibility * Compatible with the Pytorch operator SigmoidCrossEntropyWithLogitsGrad. @@ -1603,24 +1548,14 @@ REG_OP(SigmoidCrossEntropyWithLogitsGradV2) * @par Inputs: * Two inputs, including: - * @li input_x: A tensor. Must be one of the following types: - * float16, float32. \n - * - * @par Inputs: - * @li target: A tensor. Must be one of the following types: - * float16, float32. \n + * @li input_x: A tensor. Must be one of the following types: float16, float32. + * @li target: A tensor. Must be one of the following types: float16, float32. \n * @par Attributes: * four Attributes, including: - * @li log_input: An optional bool. Defaults to "True" \n - * - * @par Attributes: - * @li full: An optional bool. Defaults to "False" \n - * - * @par Attributes: - * @li eps: An optional float. Defaults to "1e-8" \n - * - * @par Attributes: + * @li log_input: An optional bool. Defaults to "True" + * @li full: An optional bool. Defaults to "False" + * @li eps: An optional float. Defaults to "1e-8" * @li reduction: An optional string. Defaults to "mean" \n * @par Outputs: @@ -1641,14 +1576,14 @@ REG_OP(PoissonNllLoss) /** *@brief rnn_gen_mask * @par Inputs: - * @li seq_length: A ND Tensor of type int32. Recoed the current length of each batch.\n + * seq_length: A ND Tensor of type int32. Recoed the current length of each batch.\n * * @par Attributes: * @li num_step: A required int.\n * @li hidden_size: A required int. \n * * - * @par Output: + * @par Ouputs: * y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n * */ @@ -1666,18 +1601,16 @@ REG_OP(RnnGenMask) * @par Inputs: * Two inputs, including: * @li x: A tensor. Must be one of the following types: -* float16, float32. \n -* -* @par Inputs: +* float16, float32. * @li target: A tensor. Must be the following types: * int32. \n * @par Attributes: -* @li reduction: An optional string. Defaults to "mean" \n +* reduction: An optional string. Defaults to "mean" \n * @par Outputs: -* y: A Tensor has same element type as input x. \n -* is_target: A Tensor has same element type as input target. \n +* @li y: A Tensor has same element type as input x. \n +* @li is_target: A Tensor has same element type as input target. \n * @par Third-party framework compatibility * Compatible with the Pytorch operator MultiLabelMarginLoss. \n diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h index 49fd02fa..5b1a4dd0 100644 --- a/third_party/fwkacllib/inc/ops/nn_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_ops.h @@ -106,16 +106,16 @@ REG_OP(FusedBatchNormV2) .OP_END_FACTORY_REG(FusedBatchNormV2) /** - * @brief: Large amount of data sort.First operator of TopK. + * @brief Large amount of data sort.First operator of TopK. * @par Inputs: * two input, including: * @li input_data: A Tensor. Data to be sorted. Support float16 * @li input_index: A Tensor. Range(0, 2048). Datatype and format is same as input_data. * @par Attributes: - * @li k_num: Int.Number to be sorted. + * k_num: Int.Number to be sorted. * @par Outputs: - * 1 output, including: - * @li output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. + * One output, including: + * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. */ REG_OP(SegmentSort) .INPUT(input_data, TensorType({DT_FLOAT16})) @@ -127,13 +127,13 @@ REG_OP(SegmentSort) /** * @brief: Large amount of data sort.Second operator of TopK. * @par Inputs: - * two input, including: - * @li input_proposal: A Tensor. Proposal sorted for each channel. Support float16 + * One input, including: + * input_proposal: A Tensor. Proposal sorted for each channel. Support float16 * @par Attributes: - * @li k_num: Int.Number to be sorted. + * k_num: Int.Number to be sorted. * @par Outputs: - * 1 output, including: - * @li output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. + * One output, including: + * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. */ REG_OP(MultiMerge) .INPUT(input_proposal, TensorType({DT_FLOAT16})) @@ -142,14 +142,14 @@ REG_OP(MultiMerge) .OP_END_FACTORY_REG(MultiMerge) /** - * @brief: Large amount of data sort.Third operator of TopK. + * @brief Large amount of data sort.Third operator of TopK. * @par Inputs: - * two input, including: - * @li input_proposal: A Tensor. Proposal sorted for each channel. Support float16 + * One input, including: + * input_proposal: A Tensor. Proposal sorted for each channel. Support float16 * @par Attributes: - * @li k_num: Int.Number to be sorted. + * k_num: Int.Number to be sorted. * @par Outputs: - * 2 output, including: + * Two output, including: * @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted. * @li output_index: A Tensor. int32. Data index. */ diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index 80a21333..72363d18 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -29,7 +29,7 @@ namespace ge { /** *@brief Performs pooling on the input. *@par Inputs: -*@li x: An NCHW tensor of type float16, float32, int8. +* x: An NCHW tensor of type float16, float32, int8. *@par Attributes: *@li mode: An optional int32, specifying the pooling algorithm, either "0" (max pooling) or "1" (avg pooling). Defaults to "0". *@li global_pooling: An optional bool. Defaults to "false". @@ -50,6 +50,7 @@ namespace ge { *dilation[2]: An optional int32, specifying the left dilation. Defaults to "1". *dilation[3]: An optional int32, specifying the right dilation. Defaults to "1". *@li ceil_mode: An optional int32, either "0" (ceil mode) or "1" (floor mode). Defaults to "0". +*@li data_format: An optional string, Specify the data format of the input and output data. With the default format "NCHW". *@par Outputs: *y: An NCHW tensor of type float16, float32, int32. *@attention Constraints: @@ -204,7 +205,7 @@ REG_OP(AvgPool3D) *y: The average pooled output tensor . \n *@attention Constraints: -*@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] +*"ksize" is in the range [1, 255]. "strides" is in the range [1, 63] *@par Third-party framework compatibility * Compatible with the TensorFlow operator AvgPool3D. @@ -281,10 +282,10 @@ REG_OP(AvgPool3DGrad) * @li data_format: A string, format of input data . \n * @par Outputs: -* @output: The average pooled output tensor . \n +* output: The average pooled output tensor . \n * @attention Constraints: -* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] +* "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] * @par Third-party framework compatibility * Compatible with the TensorFlow operator AvgPool3DGradD. @@ -431,6 +432,47 @@ REG_OP(MaxPool3D) .OP_END_FACTORY_REG(MaxPool3D) /** +* @brief Performs max pooling3d on both max values and indices. +* +* @par Inputs: +* One input: +* x: An 6D tensor. Supported type: float16. Format as NDC1HWC0. +* @par Attributes: +* @li ksize: A required list of int32 values, +* specifying the size of the window for each dimension of the input tensor. +* No default value. +* @li strides: A required list of int32 values, +* specifying the stride of the sliding window for each dimension of +* the input tensor. No default value. +* @li pads: A required 3*2-dimension-list of int32 values. +* specifying the pad of three dimension of input, implement with 0. +* @li dilation: dilation of kernel. default value is {1,1,1,1,1}. +* @li ceil_mode: default value is false. +* @li data_format: the format of torch input, default value is "NCDHW". +* @li argmax_type: the function of this field is to determine the type of +* output argmax, "bitmask" is the default value, the argmax will return +* a img2col bitmask. "index_int32" and "index_int64" represent the torch +* output indices. +* @par Outputs: +* y: An 6D tensor. the maxpool3d output(max value), format as NDoC1HoWoC0. +* @par Outputs: +* argmax: A 5D uint16 tensor. the indice output. +* format as NC1HWC0, actually it represent N, Do, C1*ksize, Ho*Wo//16, 16. +*/ +REG_OP(MaxPool3DWithArgmax) + .INPUT(x, TensorType::RealNumberType()) + .OUTPUT(y, TensorType::RealNumberType()) + .OUTPUT(argmax, TensorType::IndexNumberType()) + .REQUIRED_ATTR(ksize, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(dilation, ListInt, {1, 1, 1, 1, 1}) + .ATTR(ceil_mode, Bool, false) + .ATTR(data_format, String, "NCDHW") + .ATTR(argmax_type, String, "bitmask") + .OP_END_FACTORY_REG(MaxPool3DWithArgmax) + +/** *@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n * The output is of size H x W, for any input size. @@ -522,8 +564,7 @@ REG_OP(MaxPool3DGradGrad) * y: A mutable tensor. Has the same shape and type as "x1" . \n * @attention Constraints: -* @li Computing gradients of global pooling is not supported, which means -* "ksize < x1". +* @li ksize is limited by buffer with full tiling. * @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] * @par Third-party framework compatibility @@ -568,7 +609,7 @@ REG_OP(MaxPoolGrad) * @li Other dimensions of ksize and strides is 1 . \n * @par Outputs: -* @li y: Has the same type and format as input "x1" . \n +* y: Has the same type and format as input "x1" . \n * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator MaxPoolGradGrad. @@ -588,7 +629,7 @@ REG_OP(MaxPoolGradGrad) *@brief Performs max_pool_ext2 on the input . \n *@par Inputs: -* Two inputs: +* Three inputs: *@li x: An NC1HWC0 Tensor of type float16. *@li strides: A required type of int32 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value. *@li ksize: A required type of int32 values, specifying the size of the window for each dimension of the input tensor. No default value. @@ -635,7 +676,8 @@ REG_OP(MaxPoolV2) *@li strides: A required list of int8, int16, int32, or int64 values, * specifying the stride of the sliding window for each dimension of * the input tensor. No default value. -*@li padding: A required string. No default value . \n +*@li padding: A required string. No default value . +*@li Targmax:An optional int with default value 7 . \n *@par Outputs: *@li y: A Tensor. Has the same type and format as input "x". @@ -645,7 +687,7 @@ REG_OP(MaxPoolV2) * ksize[1] * ksize[2] <= 255. *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. -*@li "padding" is either "SAME" or "VALID" . \n +*@li "padding" is either "SAME" or "VALID" . *@par Third-party framework compatibility * Compatible with the TensorFlow operator MaxPoolWithArgmax. @@ -710,14 +752,15 @@ REG_OP(MaxPoolGradWithArgmax) *@brief Performs transform mask to argmax . \n *@par Inputs: -* Two input: -*x: An NC1HWC0 Tensor of type float16. -*mask: An NC1HWC0 Tensor of type uint16 . \n +* Two inputs: +*@li x: An NC1HWC0 Tensor of type float16. +*@li mask: An NC1HWC0 Tensor of type uint16 . \n *@par Attributes: *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value. *@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value. -*@li padding: A required string. No default value . \n +*@li padding: A required string. No default value . +*@li originshape:A required list of int8, int16, int32, or int64 values, No default value. \n *@par Outputs: *argmax: An NC1HWC0 Tensor of type int32 . \n @@ -754,7 +797,7 @@ REG_OP(Mask2Argmax) * @li strides: A required list, specifying the stride of the sliding window. * @li padding: A required string, window sliding mode. Either SAME or VALID. * @par Outputs: -* @li y:Result tensor. Supported type: float, double, int32, +* y:Result tensor. Supported type: float, double, int32, * uint8, int16, int8, int64, uint16, half, uint32, uint64 * @attention Constraints: @@ -767,7 +810,7 @@ REG_OP(Mask2Argmax) * (shape_max_pool[2] * shape_max_pool[3] + 31) // 16, 16), else failed . \n * @par Third-party framework compatibility -* @li Compatible with the TensorFlow operator MaxPoolGradGradWithArgmax. +* Compatible with the TensorFlow operator MaxPoolGradGradWithArgmax. */ REG_OP(MaxPoolGradGradWithArgmax) .INPUT(x, TensorType::RealNumberType()) @@ -931,11 +974,11 @@ REG_OP(AvgPoolV2GradD) .OP_END_FACTORY_REG(AvgPoolV2GradD) /** -*@brief :upsample the layer +*@brief upsample the layer, similar to the nearest-neighbor difference scaling algorithm. *@par Inputs: * one input, including: -*@li x: A tensor of type float16 or float32. +* x: A tensor of type float16 or float32. *@par Attributes: *@li scale: A optional float32, scale factor of x. Defaults to "1.0". *@li stride_h: An optional int32, broadcast the axis of h. Defaults to "2". @@ -1419,7 +1462,7 @@ REG_OP(MaxPoolV3) * the floor function will be used. Default False \n * @par Outputs: -* y: A mutable tensor. Has the same shape and type as "x1" . \n +* out_grad: A mutable tensor. Has the same shape and type as "x1" . \n * @attention Constraints: * @li Computing gradients of global pooling is not supported, which means @@ -1447,8 +1490,8 @@ REG_OP(MaxPoolV3Grad) *@brief Performs Dilation2D on the input . \n *@par Inputs: -*x: A tensor of shape is 4d, format is support NHWC. -*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. \n +*@li x: A tensor of shape is 4d, format is support NHWC. +*@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. \n *@par Attributes: *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. @@ -1480,9 +1523,9 @@ REG_OP(Dilation2D) *@brief Performs Dilation2DBackpropFilter on the input. \n *@par Inputs: -*x: A tensor of shape is 4d, format is support NHWC. -*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. -*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n +*@li x: A tensor of shape is 4d, format is support NHWC. +*@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. +*@li out_backprop: Has the same type and format as input x and the c dimension is same with x. \n *@par Attributes *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1. @@ -1519,9 +1562,9 @@ REG_OP(Dilation2DBackpropFilter) *@brief Performs Dilation2DBackpropInput on the input. \n *@par Inputs: -*x: A tensor of shape is 4d, format is support NHWC. -*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. -*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n +*@li x: A tensor of shape is 4d, format is support NHWC. +*@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. +*@li out_backprop: Has the same type and format as input x and the c dimension is same with x. \n *@par Attributes *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1. diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index 75e91aee..9dd502cd 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -289,7 +289,8 @@ REG_OP(SparseApplyAdagradV2D) * Should be from a Variable(). *@li lr: A scalar. Has the same type as "var". *@li grad: A tensor for the gradient. Has the same type as "var". -* +*@li momentum: Momentum. Must be a scalar. + *@par Attributes: *@li use_nesterov: An optional bool. Defaults to "False". * If "True", the tensor passed to compute grad will be @@ -701,7 +702,7 @@ REG_OP(ApplyPowerSignD) /** *@brief Updates "var" as FOBOS algorithm with fixed learning rate. * prox_v = var - alpha * delta -* var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0} +* var = sign(prox_v)/(1+alpha * l2) * max{|prox_v|-alpha * l1,0} * *@attention Constraints: * the input tensors must have the same shape. @@ -2128,10 +2129,12 @@ REG_OP(FusedMulApplyMomentumExtern) * otherwise the behavior is undefined, but may exhibit less contention. * *@par Outputs: -* var: A mutable tensor. Has the same type as input "var". +* @li var: A mutable tensor. Has the same type as input "var". +* @li accum: A mutable tensor. Has the same type as input "accum". * *@attention Constraints: -* The input tensors must have the same shape. +* @li var: A mutable tensor. Has the same type as input "var". +* @li accum: A mutable tensor. Has the same type as input "accum". * *@par Third-party framework compatibility * Compatible with the TensorFlow operator ResourceApplyKerasMomentum. diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index ca1c24eb..01ff77cb 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -28,8 +28,8 @@ namespace ge { *@brief Computes the for the gelu of "x" . \n *@par Inputs: -*Two inputs, including: -* @li x: A Tensor. Must be one of the following types: float16, float32 +*One input, including: +*x: A Tensor. Must be one of the following types: float16, float32 *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -66,8 +66,8 @@ REG_OP(GeluGrad) *@brief Computes the for the fast_gelu of "x" . \n *@par Inputs: -*Two inputs, including: -* @li x: A Tensor. Must be one of the following types: float16, float32 +*One input, including: +*x: A Tensor. Must be one of the following types: float16, float32 *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -83,7 +83,7 @@ REG_OP(FastGelu) *@brief Computes the gradient for the fast_gelu of "x" . \n *@par Inputs: -*Three inputs, including: +*Two inputs, including: * @li dy: A Tensor. Must be one of the following types: float16, float32 * @li x: A Tensor of the same type as "dy" . \n @@ -169,7 +169,7 @@ REG_OP(Relu) * x: A Tensor of type RealNumberType . \n * @par Outputs: -* y: A Tensor of type RealNumberType . \n +* y: A Tensor with the same type as x . \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator Relu6. @@ -209,8 +209,12 @@ REG_OP(Relu6D) * backprops = gradients * (features > 0) * (features < 6) . \n * @par Inputs: -* @li features: A Tensor of type RealNumberType. -* @li gradients: A Tensor of type RealNumberType . \n +* @li gradients: A Tensor of type RealNumberType. The backpropagated + gradients to the corresponding Relu6 operation. +* @li features: A Tensor with the same type as gradients.he features passed + as input to the corresponding Relu6 operation, or its output; + using either one produces the same result. \n + * @par Outputs: * backprops: A Tensor of type RealNumberType . \n @@ -228,7 +232,7 @@ REG_OP(Relu6Grad) *Applies the element-wise function: * Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha . *@par Inputs: -*One inputs, including: +*Two inputs, including: * @li grads: A tensor. Must be one of the following types: * float16, float32. * @li activations: A tensor. Must be one of the following types: @@ -238,7 +242,7 @@ REG_OP(Relu6Grad) *y: A Tensor with the same type and shape of grads's. * *@par Attributes: -*@li alpha: scalar parameter, default value = 1.0 +*alpha: scalar parameter, default value = 1.0 */ REG_OP(EluGradV2) .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -539,13 +543,9 @@ REG_OP(Elu) *x: A float16, float32, for the input data type . \n *@par Attributes: -*alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n - -*@par Attributes: -*alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n - -*@par Attributes: -*alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n +*@li alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . +*@li alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . +*@li alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n *@par Outputs: *y: A float16, float32, for the normalized result . \n @@ -706,8 +706,8 @@ REG_OP(Mish) * @li x: A Tensor. Must be one of the following types: float16, float32 * @li tanhx: A Tensor. shape, datatype and format is same as x * @par Outputs: - * 1 output, including: - * @li x_grad: A Tensor. shape, datatype and format is same as x + * One output, including: + * x_grad: A Tensor. shape, datatype and format is same as x */ REG_OP(MishGrad) @@ -721,20 +721,20 @@ REG_OP(MishGrad) * @brief pytorch hardtanh_backward operator. * * @par Inputs: - * 2 inputs, including: + * Two inputs, including: * @li result, minimum tensor of the linear region range, * datatype: float16/float32, format:ND/5HD. * @li grad, maximum tensor of the linear region range, * datatype:float16/float32, format:ND/5HD. \n * @par Attributes: - * 2 attributes, including: + * Two attributes, including: * @li min_val, minimum value of the linear region range, datatype:float. * @li max_val, maximum value of the linear region range, datatype:float. \n * @par Outputs: - * 1 output, including: - * @li y, hardtanh_backward output tensor, datatype and format is same as + * One output, including: + * y, hardtanh_backward output tensor, datatype and format is same as * input result. \n * @attention Constraints: @@ -756,7 +756,7 @@ REG_OP(HardtanhGrad) * @par Inputs: * One inputs, including: -* @li x: A mutable Tensor. Must be one of the following types: +* x: A mutable Tensor. Must be one of the following types: * float16, float32. \n * @par Attributes: @@ -765,7 +765,7 @@ REG_OP(HardtanhGrad) * @li threshold: An optional float. Defaults to "20.0" \n * @par Outputs: -* @li y: A mutable Tensor. Has the same type as "x" \n +* y: A mutable Tensor. Has the same type as "x" \n * @par Third-party framework compatibility * Compatible with the Pytorch operator Softplus. @@ -792,7 +792,7 @@ REG_OP(SoftplusV2) * @li threshold: An optional float. Defaults to "20.0" \n * @par Outputs: -* @li output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n +* output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n * @par Third-party framework compatibility * Compatible with the Pytorch operator SoftplusGrad. @@ -809,13 +809,16 @@ REG_OP(SoftplusV2Grad) * @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor) * where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise. * - * @par inputs + * @par Inputs: * one input including: - * @li x: input A Tensor. Must be one of the following types: float32, float16 + * x: input A Tensor. Must be one of the following types: float32, float16 * - * @par output + * @par Attributes: + * alpha: An optional float. Defaults to 1.0. \n + + * @par Outputs: * one output including: - * @li y:A Tensor of the same type as x + * y:A Tensor of the same type as x * */ REG_OP(ThresholdedRelu) @@ -829,14 +832,14 @@ REG_OP(ThresholdedRelu) * @par Inputs: * One inputs, including: -* @li input_x: A tensor. Must be one of the following types: +* input_x: A tensor. Must be one of the following types: * float16, float32. \n * @par Attributes: -* @li lambd: An optional float. Defaults to 0.5. \n +* lambd: An optional float. Defaults to 0.5. \n * @par Outputs: -* y: A Tensor with the same dtype and shape of input_x's. \n +* output_y: A Tensor with the same dtype and shape of input_x's. \n * @par Third-party framework compatibility * Compatible with the Pytorch operator Hardshrink. \n @@ -863,7 +866,7 @@ REG_OP(HardShrink) *backprops: A Tensor with the same type and shape of features's. \n * *@par Attributes: -*@li lambd: An optional float.Defaults to 0.5. \n +*lambd: An optional float.Defaults to 0.5. \n * *@par Third-party framework compatibility *Compatible with the Pytorch operator Hardshrink_backward. \n @@ -880,7 +883,7 @@ REG_OP(HardShrink) * @par Inputs: * One inputs, including: -* @li input_x: A tensor. Must be one of the following types: +* input_x: A tensor. Must be one of the following types: * float16, float32, int32. \n * @par Attributes: @@ -905,11 +908,11 @@ REG_OP(HardSigmoid) * @par Inputs: * One inputs, including: -* @li input_x: A tensor. Must be one of the following types: +* input_x: A tensor. Must be one of the following types: * float16, float32. \n * @par Attributes: -* @li lambd: An optional float. Defaults to 0.5. \n +* lambd: An optional float. Defaults to 0.5. \n * @par Outputs: * y: A Tensor with the same dtype and shape of input_x's. \n @@ -933,7 +936,7 @@ REG_OP(SoftShrink) * @li input_x: A tensor of the same dtype as "input_grad". \n * @par Attributes: -* @li lambd: An optional float. Defaults to 0.5. \n +* lambd: An optional float. Defaults to 0.5. \n * @par Outputs: * y: A Tensor of the same dtype and shape as "input_graxd". \n @@ -976,12 +979,12 @@ REG_OP(LogSigmoidGrad) *@par Inputs: *One inputs, including: -* @li x: A tensor. Must be one of the following types: +* x: A tensor. Must be one of the following types: * float16, float32. \n *@par Outputs: *One outputs, including: -* @li y: A tensor with the same type and shape of x's. \n +* y: A tensor with the same type and shape of x's. \n *@par Third-party framework compatibility *Compatible with the Pytorch operator LogSigmoid. \n @@ -1003,7 +1006,7 @@ REG_OP(LogSigmoid) *@par Outputs: *One outputs, including: -* @li y: A tensor with the same type and shape of x's. \n +* y: A tensor with the same type and shape of x's. \n * @par Attributes: * @li alpha: An optional float. Defaults to 0.16666666. \n diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index 6854c866..9d0e7a62 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -33,8 +33,8 @@ namespace ge { *@li value: A 0D scalar. Specifies the value to fill the returned tensor. * Must be one of the following types: -* float16, float32, double, int32, uint8, int16, int8, complex64, int64, -* qint8, quint8, qint32, uint16, complex128, uint32, uint64. +* float16, float32, double, int32, uint8, int16, int8, complex64, int64, bool, +* qint8, quint8, qint32, qint16, quint16, uint16, complex128, uint32, uint64, . * *@par Outputs: * y: A tensor. Has the same type as "value". @@ -46,8 +46,14 @@ namespace ge { */ REG_OP(Fill) .INPUT(dims, TensorType::IndexNumberType()) - .INPUT(value, TensorType::BasicType()) - .OUTPUT(y, TensorType::BasicType()) + .INPUT(value, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16, + DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8, + DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16, DT_UINT16, + DT_COMPLEX128, DT_FLOAT16, DT_UINT32, DT_UINT64})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16, + DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8, + DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16, DT_UINT16, + DT_COMPLEX128, DT_FLOAT16, DT_UINT32, DT_UINT64})) .OP_END_FACTORY_REG(Fill) /** @@ -213,11 +219,11 @@ REG_OP(PadV2) *@brief Pads a tensor . \n *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n -*constant_values: A Tensor. Must have the same type as input. +*@li x: A Tensor. Must be one of the following types: float16, float32, int32 . \n +*@li constant_values: A Tensor. Must have the same type as input. *@par Attributes: -*paddings: An optional "vector>". Defaults to "{}". +*paddings: A required Attribute. * For each dimension D of input, paddings[D, 0] indicates how many * values to add before the contents of tensor in that dimension, * and paddings[D, 1] indicates how many values to add after the @@ -461,7 +467,7 @@ REG_OP(FillV2) * @li dims: An required listInt to specify the shape that the value to fill. * @par Outputs: -* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value. +* y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value. * @par Third-party framework compatibility * Compatible with the ONNX operator ConstantOfShape. diff --git a/third_party/fwkacllib/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/ops/parsing_ops.h index b625180a..e578997c 100644 --- a/third_party/fwkacllib/inc/ops/parsing_ops.h +++ b/third_party/fwkacllib/inc/ops/parsing_ops.h @@ -54,27 +54,26 @@ REG_OP(StringToNumber) /** *@brief Convert serialized tensorflow.TensorProto prototype to Tensor. *@brief Parse an Example prototype. -*@par Input: -*serialized: A Tensor of type string. -*dense_defaults: DYNAMIC INPUT Tensor type as string, float, int64. \n +*@par Inputs: +*@li serialized: A Tensor of type string. +*@li dense_defaults: DYNAMIC INPUT Tensor type as string, float, int64. \n *@par Attributes: -*num_sparse: type int num of inputs sparse_indices , sparse_values, sparse_shapes -*out_type: output type -*sparse_keys: ListString -*sparse_types: types of sparse_values -*dense_keys: ListString -*dense_shapes: output of dense_defaults shape -*dense_types: output of dense_defaults type \n +*@li num_sparse: type int num of inputs sparse_indices , sparse_values, sparse_shapes +*@li sparse_keys: ListString +*@li sparse_types: types of sparse_values +*@li dense_keys: ListString +*@li Tdense: output of dense_defaults type +*@li dense_shapes: output of dense_defaults shape \n *@par Outputs: -*sparse_indices: A Tensor of type string. -*sparse_values: Has the same type as sparse_types. -*sparse_shapes: A Tensor of type int64 -*dense_values: Has the same type as dense_defaults. +*@li sparse_indices: A Tensor of type string. +*@li sparse_values: Has the same type as sparse_types. +*@li sparse_shapes: A Tensor of type int64 +*@li dense_values: Has the same type as dense_defaults. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. -**/ +*/ REG_OP(ParseSingleExample) .INPUT(serialized, TensorType({DT_STRING})) .DYNAMIC_INPUT(dense_defaults, TensorType({DT_STRING,DT_FLOAT,DT_INT64})) @@ -92,16 +91,16 @@ REG_OP(ParseSingleExample) /** *@brief Decodes raw file into tensor . \n -*@par Input: +*@par Inputs: *bytes: A Tensor of type string. *@par Attributes: -*little_endian: bool ture -*out_type: output type +*@li little_endian: bool ture +*@li out_type: output type *@par Outputs: *Output: A Tensor -**/ +*/ REG_OP(DecodeRaw) .INPUT(bytes, TensorType({DT_STRING})) .OUTPUT(output, TensorType({DT_BOOL,DT_FLOAT16,DT_DOUBLE,DT_FLOAT, @@ -147,18 +146,20 @@ REG_OP(ParseTensor) *@par Inputs: *Inputs include: -*records: Each string is a record/row in the csv and all records should have the +*@li records: Each string is a record/row in the csv and all records should have the *same format. \n -*record_defaults: One tensor per column of the input record, with either a +*@li record_defaults: One tensor per column of the input record, with either a *scalar default value for that column or an empty vector if the column is *required. \n *@par Attributes: -*OUT_TYPE: The numeric type to interpret each string in string_tensor as . \n -*field_delim: char delimiter to separate fields in a record. \n -*use_quote_delim: If false, treats double quotation marks as regular characters +*@li OUT_TYPE: The numeric type to interpret each string in string_tensor as . \n +*@li field_delim: char delimiter to separate fields in a record. \n +*@li use_quote_delim: If false, treats double quotation marks as regular characters *inside of the string fields (ignoring RFC 4180, Section 2, Bullet 5). \n -*na_value: Additional string to recognize as NA/NaN. \n +*@li na_value: Additional string to recognize as NA/NaN. \n +*@li select_cols: Optional sorted list of column indices to select. If specified, +only this subset of columns will be parsed and returned. *@par Outputs: *output: A Tensor. Has the same type as x . \n @@ -186,25 +187,25 @@ REG_OP(DecodeCSV) /** *@brief Convert serialized tensorflow.TensorProto prototype to Tensor. *@brief Parse an Example prototype. -*@par Input: -*serialized: A Tensor of type string. \n -*name:A Tensor of type string. \n -*sparse_keys: Dynamic input tensor of string. \n -*dense_keys: Dynamic input tensor of string \n -*dense_defaults: Dynamic input tensor type as string, float, int64. \n +*@par Inputs: +*@li serialized: A Tensor of type string. \n +*@li name:A Tensor of type string. \n +*@li sparse_keys: Dynamic input tensor of string. \n +*@li dense_keys: Dynamic input tensor of string \n +*@li dense_defaults: Dynamic input tensor type as string, float, int64. \n *@par Attributes: -*Nsparse: Number of sparse_keys, sparse_indices and sparse_shapes \n -*Ndense: Number of dense_keys \n -*sparse_types: types of sparse_values \n -*Tdense: Type of dense_defaults dense_defaults and dense_values \n -*dense_shapes: output of dense_defaults shape \n +*@li Nsparse: Number of sparse_keys, sparse_indices and sparse_shapes \n +*@li Ndense: Number of dense_keys \n +*@li sparse_types: types of sparse_values \n +*@li Tdense: Type of dense_defaults dense_defaults and dense_values \n +*@li dense_shapes: output of dense_defaults shape \n *@par Outputs: -*sparse_indices: A Tensor of type string. \n -*sparse_values: Has the same type as sparse_types. \n -*sparse_shapes: A Tensor of type int64 \n -*dense_values: Has the same type as dense_defaults. \n +*@li sparse_indices: A Tensor of type string. \n +*@li sparse_values: Has the same type as sparse_types. \n +*@li sparse_shapes: A Tensor of type int64 \n +*@li dense_values: Has the same type as dense_defaults. \n *@par Third-party framework compatibility \n *@li compatible with tensorflow StringToNumber operator. \n */ @@ -228,37 +229,37 @@ REG_OP(ParseExample) /** *@brief Transforms a scalar brain.SequenceExample proto (as strings) into typed *tensors. -*@par Input: -*serialized: A Tensor of type string. \n -*feature_list_dense_missing_assumed_empty:A Tensor of type string. \n -*context_sparse_keys: Dynamic input tensor of string. \n -*context_dense_keys: Dynamic input tensor of string \n -*feature_list_sparse_keys: Dynamic input tensor of string \n -*feature_list_dense_keys: Dynamic input tensor of string \n -*context_dense_defaults: Dynamic input tensor of string, float, int64 \n -*debug_name: A Tensor of type string. \n +*@par Inputs: +*@li serialized: A Tensor of type string. \n +*@li feature_list_dense_missing_assumed_empty:A Tensor of type string. \n +*@li context_sparse_keys: Dynamic input tensor of string. \n +*@li context_dense_keys: Dynamic input tensor of string \n +*@li feature_list_sparse_keys: Dynamic input tensor of string \n +*@li feature_list_dense_keys: Dynamic input tensor of string \n +*@li context_dense_defaults: Dynamic input tensor of string, float, int64 \n +*@li debug_name: A Tensor of type string. \n *@par Attributes: -*Ncontext_sparse: Number of context_sparse_keys, context_sparse_indices and context_sparse_shapes \n -*Ncontext_dense: Number of context_dense_keys \n -*Nfeature_list_sparse: Number of feature_list_sparse_keys \n -*Nfeature_list_dense: Number of feature_list_dense_keys \n -*context_sparse_types: Types of context_sparse_values \n -*Tcontext_dense: Number of dense_keys \n -*feature_list_dense_types: Types of feature_list_dense_values \n -*context_dense_shapes: Shape of context_dense \n -*feature_list_sparse_types: Type of feature_list_sparse_values \n -*feature_list_dense_shapes: Shape of feature_list_dense \n +*@li Ncontext_sparse: Number of context_sparse_keys, context_sparse_indices and context_sparse_shapes \n +*@li Ncontext_dense: Number of context_dense_keys \n +*@li Nfeature_list_sparse: Number of feature_list_sparse_keys \n +*@li Nfeature_list_dense: Number of feature_list_dense_keys \n +*@li context_sparse_types: Types of context_sparse_values \n +*@li Tcontext_dense: Number of dense_keys \n +*@li feature_list_dense_types: Types of feature_list_dense_values \n +*@li context_dense_shapes: Shape of context_dense \n +*@li feature_list_sparse_types: Type of feature_list_sparse_values \n +*@li feature_list_dense_shapes: Shape of feature_list_dense \n *@par Outputs: -*context_sparse_indices: Dynamic output tensor of type int64. \n -*context_sparse_values: Dynamic output tensor of type string, float, int64. \n -*context_sparse_shapes: Dynamic output tensor of type int64 \n -*context_dense_values: Dynamic output tensor of type string, float, int64. \n -*feature_list_sparse_indices: Dynamic output tensor of type int64. \n -*feature_list_sparse_values: Dynamic output tensor of type string, float, int64. \n -*feature_list_sparse_shapes: Dynamic output tensor of type int64 \n -*feature_list_dense_values: Dynamic output tensor of type string, float, int64. \n +*@li context_sparse_indices: Dynamic output tensor of type int64. \n +*@li context_sparse_values: Dynamic output tensor of type string, float, int64. \n +*@li context_sparse_shapes: Dynamic output tensor of type int64 \n +*@li context_dense_values: Dynamic output tensor of type string, float, int64. \n +*@li feature_list_sparse_indices: Dynamic output tensor of type int64. \n +*@li feature_list_sparse_values: Dynamic output tensor of type string, float, int64. \n +*@li feature_list_sparse_shapes: Dynamic output tensor of type int64 \n +*@li feature_list_dense_values: Dynamic output tensor of type string, float, int64. \n *@par Third-party framework compatibility \n *@li compatible with tensorflow StringToNumber operator. \n */ diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h index 69d5e67e..0636833c 100644 --- a/third_party/fwkacllib/inc/ops/quantize_ops.h +++ b/third_party/fwkacllib/inc/ops/quantize_ops.h @@ -63,10 +63,11 @@ REG_OP(Dequantize) /** *@brief Quantizes the input . \n *@par Inputs: -*x: shape and dtype of input_x. \n -*scales: shape and dtype of input_scales. \n -*zero_points: shape and dtype of input_zero_points \n +*@li x: shape and dtype of input_x. \n +*@li scales: shape and dtype of input_scales. \n +*@li zero_points: shape and dtype of input_zero_points \n *@par Attributes: +*@li dtype: required, type. *@li axis: the processed dim. \n *@par Outputs: *y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n @@ -91,7 +92,8 @@ REG_OP(Quantize) *@li offset: A required float16, specifying the offset. *@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False". *@li round_mode: An optional string, specifying the float16 to int8 cast type. -* The value range is [Round, Floor, Ceiling, Truncate]. Defaults to "Round" . \n +* The value range is [Round, Floor, Ceil, Truncate]. Defaults to "Round" . +*@li dst_type: A optional int32, specifying the output data type. Defaults to "DT_INT8" . \n *@par Outputs: *y: The quantized output tensor of type int8 and with format NC1HWC0 . \n diff --git a/third_party/fwkacllib/inc/ops/ragged_array_ops.h b/third_party/fwkacllib/inc/ops/ragged_array_ops.h index 20484623..5af2dd74 100644 --- a/third_party/fwkacllib/inc/ops/ragged_array_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_array_ops.h @@ -37,13 +37,18 @@ namespace ge { *deprecated name. *@li indices: Indices in the outermost dimension of `params` of the values that should be *gathered. + +*@par Attributes: +*@li PARAMS_RAGGED_RANK:The ragged rank of the params_nested_splits. +*@li Tsplits:A type of output_nested_splits. *@li OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain *this number of `row_splits` tensors. This value should equal *`indices.shape.ndims + params.ragged_rank - 1` . \n *@par Outputs: -*y:A Returns The `nested_row_splits` tensors that define the row-partitioning for the -*returned RaggedTensor.The `flat_values` for the returned RaggedTensor . \n +*@li output_nested_splits:A Returns The `nested_row_splits` tensors that define the row-partitioning for the +*returned RaggedTensor.The `flat_values` for the returned RaggedTensor . +*@li output_dense_values:The `flat_values` for the returned RaggedTensor. \n *@par Third-party framework compatibility * Compatible with tensorflow RaggedGather operator. diff --git a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h index 020e3da4..ceaa64e4 100644 --- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h @@ -61,7 +61,6 @@ REG_OP(RaggedTensorToSparse) *@brief Create a dense tensor from a ragged tensor, possibly altering its shape . \n *@par Inputs: -*Six inputs, including: *@li shape:A `Tensor`. Must be one of the following types: `int64`, `int32`. *@li values:A 1D tensor representing the values of the ragged tensor. *@li default_value:A `Tensor`. Must have the same type as `values`. @@ -78,7 +77,7 @@ The types of the row partition tensors. At present, these can be: is preceeded by "FIRST_DIM_SIZE" . \n *@par Outputs: -*@li result: A `Tensor`. Has the same type as `values`. +*result: A `Tensor`. Has the same type as `values`. */ REG_OP(RaggedTensorToTensor) .INPUT(shape, TensorType({DT_INT32, DT_INT64})) diff --git a/third_party/fwkacllib/inc/ops/ragged_math_ops.h b/third_party/fwkacllib/inc/ops/ragged_math_ops.h index 258b0ca1..4376437f 100644 --- a/third_party/fwkacllib/inc/ops/ragged_math_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_math_ops.h @@ -35,7 +35,11 @@ namespace ge { *@li deltas: The deltas of each range . \n *@par Outputs: -*y:A Returns The `row_splits` for the returned `RaggedTensor`.The `flat_values` for the returned `RaggedTensor` . \n +*@li rt_dense_values:The `flat_values` for the returned `RaggedTensor`. +*@li rt_nested_splits:The `row_splits` for the returned `RaggedTensor`. \n + +*@par Attributes: +*Tsplits:A type of rt_nested_splits. *@attention Constraints: *The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors. diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index b65a68f1..66f9b65f 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -148,6 +148,32 @@ REG_OP(RandomGamma) .OP_END_FACTORY_REG(RandomGamma) /** +*@brief Returns the random permutation of integers from 0 to n-1. \n + +*@par Attributes: +*@li n: An required int. +*@li dtype: An optional str. Defaults to int64 . +*@li layout: An optional int. Defaults to 0 . \n + +*@par Outputs: +*out: A required Tensor. Must be one of the following types: + float16, float32, float32, int8, uint8, int16, int32, int64. \n + +*@attention Constraints: +*The implementation for Randperm on Ascend uses AICPU, with bad performance. + +*@par Third-party framework compatibility +*@li compatible with Pytorch Randperm operator. +*/ +REG_OP(Randperm) + .OUTPUT(out, TensorType({DT_INT64, DT_INT32, DT_INT16, + DT_UINT8, DT_INT8, DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .REQUIRED_ATTR(n, Int) + .ATTR(layout, Int, 0) + .ATTR(dtype, Type, DT_INT64) + .OP_END_FACTORY_REG(Randperm) + +/** *@brief Outputs random values from the Poisson distribution(s) described by rate . \n *@par Inputs: @@ -157,11 +183,12 @@ REG_OP(RandomGamma) *@par Attributes: *@li dtype: An optional type from: half, float32, float64, int32, int64. Defaults to int64. -*@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0 . \n +*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, +the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed. +*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n *@par Outputs: -*y: A Tensor of type dtype . \n +*y: A Tensor of type dtype float16, float, double, int32, int64. \n *@attention Constraints: *The implementation for RandomPoisson on Ascend uses AICPU, with bad performance. @@ -188,11 +215,13 @@ REG_OP(RandomPoisson) *x: A Tensor. The tensor to be shuffled . \n *@par Attributes: -*@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0 . \n +*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, +the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed. +*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n *@par Outputs: -*y: A Tensor. Has the same type as x . \n +*y: A Tensor. Has the same type as x . A Tensor of type float16, float, +*double, int32, int64, int16, uint16, int8, uint8, int32,int64. \n *@attention Constraints: *The implementation for RandomShuffle on Ascend uses AICPU, with bad performance. @@ -220,11 +249,12 @@ REG_OP(RandomShuffle) *@par Attributes: *@li dtype: A type from: half, float16, float32, float64. The type of the output. -*@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0 . \n +*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, +the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed. +*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n *@par Outputs: -*y: A Tensor of type dtype . \n +*y: A Tensor of type float32, float16, double. \n *@attention Constraints: *The implementation for RandomStandardNormal on Ascend uses AICPU, with bad performance. @@ -241,6 +271,28 @@ REG_OP(RandomStandardNormal) .OP_END_FACTORY_REG(RandomStandardNormal) /** +*@brief Output random value from separate normal distribution. \n + +*@par Inputs: +*Inputs include: +*mean: The mean is a tensor with the mean of each output element’s normal distribution . +*std: The std is a tensor with the standard deviation of each output element’s normal distribution. \n +*@par Outputs: +*y: A Tensor of type dtype . \n + +*@attention Constraints: +*The implementation for Normal on Ascend uses AICPU, with bad performance. + +*@par Third-party framework compatibility +*@li compatible with Pytorch Normal operator. +*/ +REG_OP(Normal) + .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(std, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OP_END_FACTORY_REG(Normal) + +/** *@brief Outputs random integers from a uniform distribution . \n *@par Inputs: @@ -250,8 +302,9 @@ REG_OP(RandomStandardNormal) * @li max: A Tensor. Must have the same type as minval. 0-D . \n *@par Attributes: -*@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0 . \n +*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, +the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed. +*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n *@par Outputs: *y: A Tensor. Has the same type as min . \n @@ -280,8 +333,9 @@ REG_OP(RandomUniformInt) *@par Attributes: *@li dtype: A type from: half, float16, float32, float64. The type of the output. -*@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0 . \n +*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, +the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed. +*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n *@par Outputs: *y: A Tensor of type dtype . \n @@ -308,11 +362,14 @@ REG_OP(RandomUniform) *shape: A Tensor. Must be one of the following types: int32, int64 . \n *@par Attributes: -*@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0 . \n +*@li seed: An optional int. Defaults to 0.If either `seed` or `seed2` +are set to be non-zero, the random number generator is seeded by the given +seed. Otherwise, it is seeded by a random seed. +*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n *@par Outputs: -*size: A Tensor of types: float16, float32, double . \n +*y: A Tensor of types: float16, float32, double . A tensor of the specified shape +filled with random truncated normal values. \n *@attention Constraints: *The implementation for TruncatedNormal on Ascend uses AICPU, with bad performance. @@ -505,15 +562,15 @@ REG_OP(RandomChoiceWithMask) *@par Inputs: *Inputs including: -* @li x: A required Tensor. Must be one of the following types: - float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n +* x: A required Tensor. Must be one of the following types: + float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n *@par Attributes: -*@li group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n +* group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n *@par Outputs: -*y: A required Tensor. Has same type and shape as "x". Must be one of the following types: - float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n +* y: A required Tensor. Has same type and shape as "x". Must be one of the following types: + float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n *@attention Constraints: *@li "group" must be greater than 0 and must evenly divide the channel dimension size. @@ -584,6 +641,50 @@ REG_OP(DropoutV2) .OUTPUT(seed, TensorType({ DT_FLOAT })) .REQUIRED_ATTR(p, Float) .OP_END_FACTORY_REG(DropoutV2) + +/** +* @brief The Bernoulli distribution with probability . \n + +* @par Inputs: +* @li x: A ND Tensor. Must be one of the following data types: + int8, uint8, int16, int32, int64, bool, float32, float64 . +* @li p: A ND Tensor. The probability of an element to be zeroed. + Must be one of the following data types: float32, float64. \n + +* @par Attributes: +* seed: An Integer, the seed of the random generator. Default value -1 + to use current timestamp, otherwise it should be a positive integer. + +* @par Outputs: +* y: A tensor with the same shape and type as "x". +*/ + +REG_OP(Bernoulli) + .INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) + .INPUT(p, TensorType({ DT_FLOAT, DT_DOUBLE })) + .OUTPUT(y, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) + .ATTR(seed, Int, -1) + .OP_END_FACTORY_REG(Bernoulli) + +/** + * @brief: Fill the input tensor with values drawn from the uniform distribution U(from, to). \n + + * @par Inputs: + * x: A Tensor. Must be one of the following types: float16, float, double. \n + + * @par Attributes: + * @li from: The lower bound of the uniform. Defaults: 0.0 + * @li to: The upper bound of the uniform. Defaults: 1.0 \n + + * @par Outputs: + * y: A Tensor has the same type as x. \n + */ +REG_OP(Uniform) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .ATTR(from, Float, 0.0) + .ATTR(to, Float, 1.0) + .OP_END_FACTORY_REG(Uniform) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 97c7b8e1..1578ba59 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -576,7 +576,7 @@ REG_OP(ReduceAll) *@li axis: A mutable Tensor. The dimensions to reduce . \n *@par Attributes: -*@li keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n +*keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n *@par Outputs: *y: A Tensor. Has the same type and format as input "x" . \n @@ -967,9 +967,9 @@ REG_OP(EuclideanNormD) Defaults to "0.00001" . \n *@par Outputs: -*y: A Tensor of type float16 or float32 for the normalized "x". -*batch_mean: A Tensor of type float32 for the result mean. -*batch_ variance: A Tensor of type float32 for the result variance . \n +*@li y: A Tensor of type float16 or float32 for the normalized "x". +*@li batch_mean: A Tensor of type float32 for the result mean. +*@li batch_ variance: A Tensor of type float32 for the result variance . \n *@attention Constraints: *For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction. @@ -987,7 +987,7 @@ REG_OP(INInferV2) .OP_END_FACTORY_REG(INInferV2) /** -*@brief Performs reduced instance normalization . \n +*@brief Performs reduce instance normalization. \n *@par Inputs: *x: A Tensor of type float16 or float32. \n @@ -1008,32 +1008,31 @@ REG_OP(INTrainingReduceV2) /** -*@brief Performs update instance normalization . \n +*@brief Performs update instance normalization. \n *@par Inputs: -* Seven inputs, including: (NC1HWC0supported) +* Seven inputs, including: *@li x: A Tensor of type float16 or float32. *@li sum: A Tensor of type float32 for the output of operator INTrainingReduceV2. *@li square_sum: A Tensor of type float32 for the output of operator INTrainingReduceV2. *@li gamma: A Tensor of type float32, for the scaling gamma. *@li beta: A Tensor of type float32, for the scaling beta. *@li mean: A Tensor of type float32, for the updated mean. -*@li variance: A Tensor of type float32, for the updated variance . \n +*@li variance: A Tensor of type float32, for the updated variance. \n *@par Attributes: *@li momentum: A required float32, specifying the momentum to update mean and var. -*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n +*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero. \n *@par Outputs: * Three outputs *@li y: A Tensor of type float16 or float32, for normalized "x". *@li batch_mean: A Tensor of type float32, for the updated mean. -*@li batch_variance: A Tensor of type float32, for the updated variance . \n +*@li batch_variance: A Tensor of type float32, for the updated variance. \n *@attention Constraints: -*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. +* This operator is a InstanceNorm fusion operator for updating the moving averages for training. * This operator is used in conjunction with INTrainingReduceV2. -*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. */ REG_OP(INTrainingUpdateV2) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -1052,6 +1051,80 @@ REG_OP(INTrainingUpdateV2) /** +*@brief Performs the backpropagation of InstanceNorm. \n + +*@par Inputs: +* Seven inputs, including: +*@li dy: A Tensor of type float16 or float32. +*@li x: A Tensor of type float16 or float32. +*@li variance: A Tensor of type float32, for the variance of "x". +*@li mean: A Tensor of type float32, for the mean of "x". +*@li res_gamma: A Tensor of type float32. +*@li res_beta: A Tensor of type float32. +*@li gamma: A Tensor of type float32. \n + +*@par Outputs: +*pd_x: A Tensor of type float16 or float32, for the offset of "x". \n + +*@attention Constraints: +* The preceding layer of this operator must be INTrainingUpdateGrad. \n +*/ +REG_OP(INTrainingReduceGrad) + .INPUT(dy, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(variance, TensorType({DT_FLOAT})) + .INPUT(mean, TensorType({DT_FLOAT})) + .INPUT(res_gamma, TensorType({DT_FLOAT})) + .INPUT(res_beta, TensorType({DT_FLOAT})) + .INPUT(gamma, TensorType({DT_FLOAT})) + .OUTPUT(pd_x, TensorType({DT_FLOAT16,DT_FLOAT})) + .OP_END_FACTORY_REG(INTrainingReduceGrad) + +/** +*@brief Performs the backpropagation of InstanceNorm. \n + +*@par Inputs: +* Four inputs, including: +*@li dy: A Tensor of type float16 or float32, for the gradient. +*@li x: A Tensor of type float16 or float32. +*@li variance: A Tensor of type float32, for the variance of "x". +*@li mean: A Tensor of type float32, for the mean of "x". \n + +*@par Outputs: +*@li res_gamma: A Tensor of type float32. +*@li res_beta: A Tensor of type float32. \n + +*/ +REG_OP(INTrainingUpdateGrad) + .INPUT(dy, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(variance, TensorType({DT_FLOAT})) + .INPUT(mean, TensorType({DT_FLOAT})) + .OUTPUT(res_gamma, TensorType({DT_FLOAT})) + .OUTPUT(res_beta, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(INTrainingUpdateGrad) + +/** +*@brief Performs the backpropagation of InstanceNorm. \n + +*@par Inputs: +* Two inputs, including: +*@li res_gamma: A Tensor of type float32. +*@li res_beta: A Tensor of type float32. \n + +*@par Outputs: +*@li pd_gamma: A Tensor of type float32. +*@li pd_beta: A Tensor of type float32. \n + +*/ +REG_OP(INTrainingUpdateGradGammaBeta) + .INPUT(res_gamma, TensorType({DT_FLOAT})) + .INPUT(res_beta, TensorType({DT_FLOAT})) + .OUTPUT(pd_gamma, TensorType({DT_FLOAT})) + .OUTPUT(pd_beta, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(INTrainingUpdateGradGammaBeta) + +/** *@brief Performs reduced group normalization . \n *@par Inputs: @@ -1063,7 +1136,7 @@ REG_OP(INTrainingUpdateV2) *@par Attributes: -*@li num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n +*num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n *@attention Constraints: * This operator is a GroupNorm fusion operator for updating the moving averages for training. @@ -1081,7 +1154,7 @@ REG_OP(GNTrainingReduce) *@brief Performs update group normalization . \n *@par Inputs: -* Eight inputs, including: (NCHW NHWC supported) +* Seven inputs, including: (NCHW NHWC supported) *@li x: A Tensor of type float16 or float32. *@li sum: A 5D Tensor of type float32, shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC @@ -1145,8 +1218,8 @@ include: *@li keep_dims:A bool, An optional bool. Defaults to False. If True, retain reduced dimensions with length 1.. *@li separator:string. -*@par output: -*@li output::A Tensor of type string.. +*@par Outputs: +*output:A Tensor of type string. */ REG_OP(ReduceJoin) .INPUT(input, TensorType({DT_STRING})) @@ -1160,7 +1233,7 @@ REG_OP(ReduceJoin) * @brief Calculates the standard deviation and average value of Tensors. * @par Inputs: -* @li x: A Tensor. Must be one of the following types: +* x: A Tensor. Must be one of the following types: * float16, float32. \n * @par Attributes: diff --git a/third_party/fwkacllib/inc/ops/resource_variable_ops.h b/third_party/fwkacllib/inc/ops/resource_variable_ops.h index 74ac83f8..156f2f34 100644 --- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h +++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h @@ -33,10 +33,12 @@ namespace ge { *y:A Tensor of type resource. \n *@par Attributes: -* @li container: optional, string. -* @li shared_name: optional, string. -* @li dtype: required, type. -* @li shape: optional, ListInt. \n +* @li container: optional, string. the container this +variable is placed in. +* @li shared_name: optional, string.the name by which + this variable is referred to. +* @li dtype: required, type. the output of type. +* @li shape: optional, ListInt. the output of shape. \n *@see VarHandleOp. */ @@ -53,11 +55,11 @@ REG_OP(VarHandleOp) *@brief Assigns a new value to a variable. \n *@par Inputs: -*resource:Handle to the resource in which to store the variable. -*value:The value to set the new tensor to use. \n +*@li resource:Handle to the resource in which to store the variable. +*@li value:The value to set the new tensor to use. \n *@par Attributes: -* @li dtype: required, type. \n +* dtype: required, type. \n *@see AssignVariableOp. */ @@ -73,11 +75,11 @@ REG_OP(AssignVariableOp) *@brief Adds a value to the current value of a variable. \n *@par Inputs: -*resource:Handle to the resource in which to store the variable. -*value:The value by which the variable will be incremented. \n +*@li resource:Handle to the resource in which to store the variable. +*@li value:The value by which the variable will be incremented. \n *@par Attributes: -* @li dtype: required, type. \n +* dtype: required, type. \n *@see AssignAddVariableOp. */ @@ -93,11 +95,11 @@ REG_OP(AssignAddVariableOp) *@brief Subtracts a value to the current value of a variable. \n *@par Inputs: -*resource:Handle to the resource in which to store the variable. -*value:The value by which the variable will be incremented. \n +*@li resource:Handle to the resource in which to store the variable. +*@li value:The value by which the variable will be incremented. \n *@par Attributes: -* @li dtype: required, type. \n +* dtype: required, type. \n *@see AssignSubVariableOp. */ diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index 80546860..20828a89 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -127,9 +127,7 @@ REG_OP(DynamicLSTM) *@li cell_clip:An float identifying the cell clip in the op. Default to -1. *@li num_proj:An integer identifying the num projection in the op. Default to 0. *@li time_major:An bool identifying the time major in the op. Default to false. -*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. *@li forget_bias:An float identifying the forget bias in the op. Default to 0. -*@li is_training:An bool identifying is training in the op. Default to true. *@par Outputs: *eight outputs: \n @@ -491,7 +489,6 @@ REG_OP(DynamicLSTMV2) *ten inputs: \n *@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. @@ -504,10 +501,11 @@ REG_OP(DynamicLSTMV2) *@par Outputs: -*eight outputs: \n +*four outputs: \n *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dgate:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. */ REG_OP(LSTMInputGrad) .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -571,13 +569,13 @@ REG_OP(DynamicLSTMGradCell) .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(t_state, TensorType({DT_INT32, DT_INT32})) + .INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dgate, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dct_1, TensorType({DT_FLOAT16, DT_FLOAT})) - .ATTR(forget_bias, Float, 1) - .ATTR(activation, String, "") - .ATTR(direction, String, "Forward") + .ATTR(forget_bias, Float, 1.0) + .ATTR(activation, String, "tanh") + .ATTR(direction, String, "UNIDIRECTIONAL") .ATTR(gate_order, String, "ijfo") .OP_END_FACTORY_REG(DynamicLSTMGradCell) @@ -1070,7 +1068,7 @@ REG_OP(GRUV2HiddenGradCell) * If "False", "grad_weight" will not be scale by word_frequency. \n * @par Outputs: -* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n +* y: A mutable output Tensor of new word grad has the same type as "grads". \n * @par Third-party framework compatibility * Compatible with the Pytorch operator EmbeddingDenseGrad. @@ -1222,7 +1220,7 @@ REG_OP(CommonGRU) * is equivalent to the size of indices. This matches the CSR format.. \n * @par Outputs: -* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n +* y: A mutable output Tensor of new word grad has the same type as "grads". \n * @par Third-party framework compatibility * Compatible with the Pytorch operator EmbeddingBag. diff --git a/third_party/fwkacllib/inc/ops/rpn_ops.h b/third_party/fwkacllib/inc/ops/rpn_ops.h index 089af326..850b3e5a 100644 --- a/third_party/fwkacllib/inc/ops/rpn_ops.h +++ b/third_party/fwkacllib/inc/ops/rpn_ops.h @@ -28,12 +28,12 @@ namespace ge { * iou_threshold with higher scoring box according to their * intersection-over-union (IoU) . \n -*@par Input: -* @li box_scores: 2-D tensor with shape of [N, 8], including proposal boxes and +* @par Inputs: +* box_scores: 2-D tensor with shape of [N, 8], including proposal boxes and * corresponding confidence scores . \n * @par Attributes: -* @li iou_threshold: An optional float. The threshold for deciding whether boxes +* iou_threshold: An optional float. The threshold for deciding whether boxes * overlap too much with respect to IOU . \n * @par Outputs: diff --git a/third_party/fwkacllib/inc/ops/sdca_ops.h b/third_party/fwkacllib/inc/ops/sdca_ops.h index 34c6a268..601b360b 100644 --- a/third_party/fwkacllib/inc/ops/sdca_ops.h +++ b/third_party/fwkacllib/inc/ops/sdca_ops.h @@ -45,7 +45,13 @@ namespace ge { *corresponding weights in sparse_weights. This field maybe omitted for the dense approach.It's a dynamic input. *@li sparse_weights: a list of vectors where each value is the weight associated with a sparse feature group. *@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group.It's a dynamic input. -*@li example_state_data: a list of vectors containing the example state data. +*@li example_state_data: a list of vectors containing the example state data. \n + +*@par Attributes: +*@li adaptive: the type is bool default false. +*@li num_sparse_features:The num of sparse. +*@li num_sparse_features_with_values: The num of sparse_feature_values +*@li num_dense_features:The num of dense. *@li loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, squared and hinge losses. *@li l1: Symmetric l1 regularization strength. *@li l2: Symmetric l2 regularization strength. @@ -53,10 +59,10 @@ namespace ge { *@li num_inner_iterations: Number of iterations per mini-batch . \n *@par Outputs: -*y: A Returns a list of vectors containing the updated example state +*@li out_example_state_data: A Returns a list of vectors containing the updated example state *data.a list of vectors where each value is the delta -*weights associated with a sparse feature group.a list of vectors where the values are the delta -*weights associated with a dense feature group . \n +*@li out_delta_sparse_weights:weights associated with a sparse feature group.a list of vectors where the values are the delta +*@li out_delta_dense_weights:weights associated with a dense feature group . \n *@par Third-party framework compatibility * Compatible with tensorflow SdcaOptimizerV2 operator. diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 1c26e033..43f72ef3 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -258,7 +258,7 @@ REG_OP(GatherV2D) REG_OP(GatherElements) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) - .INPUT(index, TensorType({DT_INT64})) + .INPUT(index, TensorType({DT_INT32, DT_INT64})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) .ATTR(dim, Int, 0) .OP_END_FACTORY_REG(GatherElements) @@ -508,7 +508,7 @@ REG_OP(UnsortedSegmentSum) *@par Inputs: *One inputs, including: -* @li assist: A tensor. Must be one of the following types: +* assist: A tensor. Must be one of the following types: * float16, float32. \n * @par Attributes: @@ -970,10 +970,11 @@ REG_OP(TopKV2) * for matrices) . \n * @par Attributes: -* @li sorted: An optional bool. Defaults to true. +* @li sorted: Defaults to true. * If true, the resulting "k" elements will be sorted by the values in descending * order. -* @li T: Indicator of indices type . \n +* @li largest:If true the resulting `k` elements will be sorted by the values in descending order. +* @li dim:0-D. Number of top elements to look for along the last dimension (along each row for matrices). \n * @par Outputs: * @li values: A Tensor, specifying the sorted data. Has the same type as @@ -982,7 +983,7 @@ REG_OP(TopKV2) * @see TopK() * @par Third-party framework compatibility -* @li Compatible with the TensorFlow operator TopKV2. +* Compatible with the TensorFlow operator TopKV2. */ REG_OP(TopK) .INPUT(x, TensorType::RealNumberType()) @@ -1085,7 +1086,6 @@ REG_OP(InTopKD) * @brief Says whether the targets are in the top "k" predictions . \n * @par Inputs: -* Two inputs, including: * @li x1: A 2D Tensor of type float32. A "batch_size * classes" tensor. * @li x2: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids. * @li k: A 1D Tensor of the same type as "x2". @@ -1618,12 +1618,12 @@ REG_OP(UnsortedSegmentMinD) * y: A Tensor of type RealNumberType . \n * @attention Constraints: -* @li segment_ids must be non-negative tensor. +* segment_ids must be non-negative tensor. * @see UnsortedSegmentSum(), UnsortedSegmentProd(), * @par Third-party framework compatibility -* @li Compatible with the TensorFlow operator UnsortedSegmentMax. +* Compatible with the TensorFlow operator UnsortedSegmentMax. */ REG_OP(UnsortedSegmentMax) .INPUT(x, TensorType::RealNumberType()) @@ -1875,15 +1875,15 @@ REG_OP(Crop) *@par Inputs: *One inputs, including: -* @li x: A tensor . Must be one of the following types: +* x: A tensor . Must be one of the following types: * float16, float32, int32, uint32, int8, uint8. \n *@par Attributes: -* @li axis: Axis along which to cummin. \n +* axis: Axis along which to cummin. \n *@par Outputs: -* y: A Tensor with the same type and shape of x's. \n -* indices: A Tensor with the int32 type and the same shape of x's. \n +* @li y: A Tensor with the same type and shape of x's. +* @li indices: A Tensor with the int32 type and the same shape of x's. \n *@par Third-party framework compatibility *Compatible with the Pytorch operator Cummin. \n @@ -1968,17 +1968,14 @@ REG_OP(WriteSelect) .OP_END_FACTORY_REG(WriteSelect) /** -*@brief Read data by stride . \n +*@brief Read data by stride. *@par Inputs: -*One input: -*x: A Tensor. Must be one of the following types: float16, int8 . \n +*x: A Tensor. Must be one of the following types: float16, int8. \n *@par Attributes: -*@li axis: A required int32, specifying the index of axis to read by stride . \n - -*@par Attributes: -*@li stride: A required int32, specifying the value of reading stride . \n +*@li axis: A required int32, specifying the index of axis to read by stride. \n +*@li stride: A required int32, specifying the value of reading stride. \n *@par Outputs: *y: A Tensor of the same type as "x". @@ -1991,16 +1988,14 @@ REG_OP(StridedRead) .OP_END_FACTORY_REG(StridedRead) /** -*@brief: Write data by stride . \n +*@brief Write data by stride. *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, int8 . \n - -*@par Attributes: -*@li axis: A required int32, specifying the index of axis to write by stride . \n +*x: A Tensor. Must be one of the following types: float16, int8. \n *@par Attributes: -*@li stride: A required int32, specifying the value of writing stride . \n +*@li axis: A required int32, specifying the index of axis to write by stride. \n +*@li stride: A required int32, specifying the value of writing stride. \n *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -2076,10 +2071,10 @@ REG_OP(CumulativeLogsumexpD) * @li updates: A Tensor of the same type as "var". \n * @par Attributes: -* @li axis: An required int to specify the axis to perform indices add. \n +* axis: An required int to specify the axis to perform indices add. \n * @par Outputs: -* @li var: A Tensor. Same as input "var". +* var: A Tensor. Same as input "var". * @par Third-party framework compatibility * Compatible with the Pytorch operator index_add_. @@ -2104,7 +2099,7 @@ REG_OP(InplaceIndexAdd) * @li value: A Tensor of dtype float16 or float32 or int64 or int32 or int8. * @par Outputs: -* @li y: A tensor. Must be one of the following dtypes: +* y: A tensor. Must be one of the following dtypes: * float16, float32, int64, int32, int8. */ REG_OP(MaskedFill) @@ -2123,7 +2118,7 @@ REG_OP(MaskedFill) * @li mask: A Tensor of dtype is bool. \n * @par Outputs: -* @li y: A tensor with the same type as x. \n +* y: A tensor with the same type as x. \n * @par Third-party framework compatibility * Compatible with the Numpy operator select. @@ -2134,13 +2129,50 @@ REG_OP(MaskedSelectV2) .INPUT(mask, TensorType({DT_BOOL})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OP_END_FACTORY_REG(MaskedSelectV2) + +/** +* @brief Choose the value of X with value according to mask. + +* @par Inputs: +* two inputs, including: +* @li x: A Tensor of dtype is float16 or float32 or float64 or int64 or int32 or int16 or int8 or uint8. +* @li mask: A Tensor of dtype is bool. \n + +* @par Outputs: +* @li y: A tensor with the same type as x. \n + +*/ +REG_OP(MaskedSelect) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) + .INPUT(mask, TensorType({DT_BOOL})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) + .OP_END_FACTORY_REG(MaskedSelect) + +/** +* @brief update the value of X with value according to mask. + +* @par Inputs: +* three inputs, including: +* @li x: A Tensor of dtype is float16 or float32 or float64 or int64 or int32 or int16 or int8 or uint8. +* @li mask: A Tensor of dtype is bool. +* @li updates: A tensor with the same type as x. \n + +* @par Outputs: +* @li y: A tensor with the same type as x. \n +*/ +REG_OP(MaskedScatter) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) + .INPUT(mask, TensorType({DT_BOOL})) + .INPUT(updates, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) + .OP_END_FACTORY_REG(MaskedScatter) /** * @brief Slice a tensor at its last dim, e.x. a[..., begin:end:stride]. \n * @par Inputs: * One inputs, including: -* @li x: A Tensor. Must be one of the following types: float16, float32, int16, int32. +* x: A Tensor. Must be one of the following types: float16, float32, int16, int32. * @par Attributes: * @li start: An attribute of type Int, start index of last dim. \n @@ -2148,7 +2180,7 @@ REG_OP(MaskedSelectV2) * @li stride: An attribute of type Int, stride of slice. \n * @par Outputs: -* @li y: A Tensor. Has the same type as "x". \n +* y: A Tensor. Has the same type as "x". \n * @par Third-party framework compatibility * No compatibility @@ -2162,39 +2194,36 @@ REG_OP(SliceLastDim) .OP_END_FACTORY_REG(SliceLastDim) /** -* @brief Extracts a strided slice of a tensor. Roughly speaking, this op \n -* extracts a slice of size (end-begin)/stride from the given input tensor. \n -* Starting at the location specified by begin the slice continues by \n +* @brief Extracts a strided slice of a tensor. Roughly speaking, this op +* extracts a slice of size (end-begin)/stride from the given input tensor. +* Starting at the location specified by begin the slice continues by * adding stride to the index until all dimensions are not less than end. \n * * @par Inputs: -* Four inputs, including: -* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n -* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n -* complex128, float16, uint32, uint64, complex64, complex128. \n +* Five inputs, including: +* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, +* complex128, float16, uint32, uint64, complex64, complex128. * @li begin: A Tensor of type int32 or int64, for the index of the first value to select. -* * @li end: A Tensor of type int32 or int64, for the index of the last value to select. -* * @li axes: A Tensor of type int32 or int64, indicate axis to be select. -* -* @li strides: A Tensor of type int32 or int64, for the increment. +* @li strides: A Tensor of type int32 or int64, for the increment. \n * * @par Attributes: -* @li begin_mask: A Tensor of type int32. \n -* A bitmask where a bit "i" being "1" means to ignore the begin \n +* @li begin_mask: A Tensor of type int32. +* A bitmask where a bit "i" being "1" means to ignore the begin * value and instead use the largest interval possible. -* @li end_mask: A Tensor of type int32. \n +* @li end_mask: A Tensor of type int32. * Analogous to "begin_mask". -* @li ellipsis_mask: A Tensor of type int32. \n -* A bitmask where bit "i" being "1" means the "i"th position \n +* @li ellipsis_mask: A Tensor of type int32. +* A bitmask where bit "i" being "1" means the "i"th position * is actually an ellipsis. -* @li new_axis_mask: A Tensor of type int32. \n -* A bitmask where bit "i" being "1" means the "i"th \n +* @li new_axis_mask: A Tensor of type int32. +* A bitmask where bit "i" being "1" means the "i"th * specification creates a new shape 1 dimension. -* @li shrink_axis_mask: A Tensor of type int32. \n -* A bitmask where bit "i" implies that the "i"th \n -* specification should shrink the dimensionality. +* @li shrink_axis_mask: A Tensor of type int32. +* A bitmask where bit "i" implies that the "i"th +* specification should shrink the dimensionality. \n * * @par Outputs: * y: A Tensor. Has the same type as "x". @@ -2231,7 +2260,7 @@ REG_OP(StridedSliceV2) * float16, float32, int32. \n * @par Attributes: -* @li dim: A required int. Used to select the dimension of this tensor. \n +* dim: A required int. Used to select the dimension of this tensor. \n *@par Outputs: *y: A Tensor with the same type and shape of input_x's. \n @@ -2307,6 +2336,34 @@ REG_OP(MaskedFillRange) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32})) .REQUIRED_ATTR(axis, Int) .OP_END_FACTORY_REG(MaskedFillRange) + +/** +* @brief After a set of sorted data and a new set of data are re-sorted, get the first k data. \n +* +* @par Inputs: +* Six inputs, including: +* @li topk_pq_distance: A sorted Tensor, Will be updated after calculation. Must be one of the following types: float32, float16. +* @li topk_pq_index: A Tensor of type int32, index corresponding to topk_pq_distance. +* @li topk_pq_ivf: A Tensor of type int32 , the bucket number corresponding to topk_pq_distance. +* @li pq_distance: A Tensor of type float32 or float16, the new data set will be reordered with topk_pq_distance and updated to topk_pq_distance. +* @li pq_index: A Tensor of type int32, index corresponding to pq_distance. +* @li pq_ivf: A scalar of type int32 , the bucket number corresponding to pq_distance. \n +* +* @par Attributes: +* @li order: A string, indicates the sorting method of topk_pq_distance. \n +* +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(InplaceTopKDistance) + .INPUT(topk_pq_distance, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(topk_pq_index, TensorType({DT_INT32})) + .INPUT(topk_pq_ivf, TensorType({DT_INT32})) + .INPUT(pq_distance, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(pq_index, TensorType({DT_INT32})) + .INPUT(pq_ivf, TensorType({DT_INT32})) + .ATTR(order, String, "asc") + .OP_END_FACTORY_REG(InplaceTopKDistance) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h index a1fc9ee6..8eb7b521 100644 --- a/third_party/fwkacllib/inc/ops/sparse_ops.h +++ b/third_party/fwkacllib/inc/ops/sparse_ops.h @@ -281,9 +281,9 @@ REG_OP(SparseSliceGrad) * @li size: A 1D Tensor of type int64. The size of the slice . \n *@par Outputs: -*y_indices: A Tensor of type int64. -*y_values: A Tensor. Has the same type as "values". -*y_values: A Tensor of type int64 . \n +*@li y_indices: A Tensor of type int64. +*@li y_values: A Tensor. Has the same type as "values". +*@li y_shape: A Tensor of type int64 . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseSlice. @@ -313,8 +313,8 @@ REG_OP(SparseSlice) * @li sum_indices: A 2D Tensor of type int64. The indices of the sum SparseTensor, with size [nnz(sum), ndims] . \n *@par Outputs: -*x1_val_grad: A Tensor. Has the same type as "backprop_val_grad". -*x2_val_grad: A Tensor. Has the same type as "backprop_val_grad" . \n +*@li x1_val_grad: A Tensor. Has the same type as "backprop_val_grad". +*@li x2_val_grad: A Tensor. Has the same type as "backprop_val_grad" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseAddGrad. @@ -363,7 +363,7 @@ REG_OP(SparseFillEmptyRowsGrad) *@par Inputs: * @li x1_indices: A 2D Tensor of type int32 or int64. -* @li The indices of the matrix "SparseTensor", with size [nnz, 2]. +*The indices of the matrix "SparseTensor", with size [nnz, 2]. * @li x1_values: A 1D Tensor. The values of the SparseTensor, with size [nnz]. * @li x1_shape: A 1D Tensor of type int64. The shape of the SparseTensor, with size [2]. * @li x2: A dense matrix Tensor of the same type as "x1_values". 2D . \n @@ -373,9 +373,9 @@ REG_OP(SparseFillEmptyRowsGrad) *@par Attributes: *@li adjoint_a: An optional bool. Defaults to "False".Use the adjoint of A in the matrix multiply. -*@li If A is complex, this is transpose(conj(A)). Otherwise it is transpose(A). +*If A is complex, this is transpose(conj(A)). Otherwise it is transpose(A). *@li adjoint_b: An optional bool. Defaults to "False".Use the adjoint of B in the matrix multiply. -*@li If B is complex, this is transpose(conj(B)). Otherwise it is transpose(B) . \n +*If B is complex, this is transpose(conj(B)). Otherwise it is transpose(B) . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseTensorDenseMatMul. @@ -400,9 +400,13 @@ REG_OP(SparseTensorDenseMatMul) * @li indices: A 0D, 1D, or 2D Tensor of type int32 or int64. * @li output_shape: A 1D Tensor of the same type as "sparse_indices". The shape of the dense output tensor. * @li values: A 1D Tensor. Values corresponding to each row of "sparse_indices", -* @li or a scalar value to be used for all sparse indices. +or a scalar value to be used for all sparse indices. * @li default_value: A Tensor of the same type as "sparse_values" . \n +*@par Attributes: +*validate_indices: If true, indices are checked to make sure they are sorted in +lexicographic order and that there are no repeats. \n + *@par Outputs: *y: A Tensor. Has the same type as "values" . \n @@ -427,7 +431,6 @@ REG_OP(SparseToDense) *Concatenation is with respect to the dense versions of these sparse tensors . \n *@par Inputs: -*3 or 5 inputs,contains: * @li indices:A list of at least 2 `Tensor` objects with type `int64`.2-D. *Indices of each input `SparseTensor`.It's a dynamic input. * @li values:A list with the same length as `indices` of `Tensor` objects with the same type. @@ -700,7 +703,6 @@ REG_OP(SparseReduceMaxSparse) *@brief Computes the sum of elements across dimensions of a SparseTensor . \n *@par Inputs: -*4 or 5 inputs, including: * @li x_indices: A 2D Tensor of type int64. *"N x R" matrix with the indices of non-empty values in a *SparseTensor, possibly not in canonical ordering. @@ -711,13 +713,11 @@ REG_OP(SparseReduceMaxSparse) *A length-"K" vector containing the reduction axes . \n *@par Attributes: -* keep_dims: An optional bool. Defaults to "False". +*keep_dims: An optional bool. Defaults to "False". *If true, retains reduced dimensions with length 1 . \n *@par Outputs: -* @li y_indices: A Tensor of type int64. -* @li y_values: A Tensor. Has the same type as "input_values". -* @li y_shape: A Tensor of type int64 . \n +*y: A Tensor. Has the same type as "x_values". \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseReduceSum. @@ -818,7 +818,6 @@ REG_OP(SparseSplit) *@brief Generates sparse cross from a list of sparse and dense tensors . \n *@par Inputs: -*8 or 10 inputs, including: * @li indices: A list of 2D Tensor objects of type int64. * Indices of each input SparseTensor.It's a dynamic input. * @li values: A list of 1D Tensor objects of type int64 or string. @@ -899,9 +898,8 @@ REG_OP(AddManySparseToTensorsMap) *@brief Reads SparseTensors from a "SparseTensorsMap" and concatenate them . \n *@par Inputs: -*2 or 4 inputs, including: * handles: A 1D Tensor of type int64. -* The "N" serialized SparseTensor objects . \n +*The "N" serialized SparseTensor objects . \n *@par Attributes: * @li dtype: A tf.DType. The "dtype" of the SparseTensor objects stored in the "SparseTensorsMap". @@ -911,9 +909,9 @@ REG_OP(AddManySparseToTensorsMap) *The shared name for the "SparseTensorsMap" read by this op . \n *@par Outputs: -* @li indices: A Tensor of type int64. -* @li values: A Tensor of type "dtype". -* @li shape: A Tensor of type int64 . \n +* @li indices: A Tensor of type int64.2-D. The `indices` of the minibatch `SparseTensor`. +* @li values: A Tensor of type "dtype". 1-D. The `values` of the minibatch `SparseTensor`. +* @li shape: A Tensor of type int64 . 1-D. The `shape` of the minibatch `SparseTensor`. \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator TakeManySparseFromTensorsMap. @@ -989,8 +987,7 @@ REG_OP(SerializeManySparse) *@brief Deserializes SparseTensor objects . \n *@par Inputs: -*Two inputs, including: -* serialized_sparse: A Tensor. The serialized SparseTensor objects. +*serialized_sparse: A Tensor. The serialized SparseTensor objects. *The last dimension must have 3 columns . \n *@par Attributes: diff --git a/third_party/fwkacllib/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/ops/spectral_ops.h index 34ccb398..ab9e1dec 100644 --- a/third_party/fwkacllib/inc/ops/spectral_ops.h +++ b/third_party/fwkacllib/inc/ops/spectral_ops.h @@ -31,10 +31,10 @@ namespace ge { inner-most dimension of `x`. \n *@par Inputs: -*@li x: A Tensor. Must be the following types: complex64, complex128. \n +*x: A Tensor. Must be the following types: complex64, complex128. \n *@par Outputs: -*@li y: A complex tensor of the same rank as `x`. \n +*y: A complex tensor of the same rank as `x`. \n *@par Third-party framework compatibility * Compatible with TensorFlow IFFT operator. @@ -52,7 +52,7 @@ REG_OP(IFFT) *@li fft_length: An int32 tensor of shape [1]. The FFT length . \n *@par Outputs: -*@li y: A complex64 tensor of the same rank as `input`. The inner-most +*y: A complex64 tensor of the same rank as `input`. The inner-most dimension of `input` is replaced with the `fft_length / 2 + 1` unique frequency components of its 1D Fourier transform . \n @@ -73,7 +73,7 @@ REG_OP(RFFT) *@li fft_length: An int32 tensor of shape [1]. The FFT length. \n *@par Outputs: -*@li y: A float32 tensor of the same rank as `input`. The inner-most +* y: A float32 tensor of the same rank as `input`. The inner-most dimension of `input` is replaced with the `fft_length` samples of its inverse 1D Fourier transform. \n @@ -91,10 +91,10 @@ REG_OP(IRFFT) *@brief 2D fast Fourier transform. \n *@par Inputs: -*@li x: A complex64 tensor. +*x: A complex64 tensor. *@par Outputs: -*@li y: A complex64 tensor of the same shape as `input`. The inner-most 2 +*y: A complex64 tensor of the same shape as `input`. The inner-most 2 dimensions of `input` are replaced with their 2D Fourier transform. \n *@par Third-party framework compatibility @@ -110,10 +110,10 @@ REG_OP(FFT2D) innermost dimension of the input. \n *@par Inputs: -*@li x: A Tensor. Must be the following types: complex64, complex128. \n +*x: A Tensor. Must be the following types: complex64, complex128. \n *@par Outputs: -*@li y: A complex tensor with the same shape as input. The innermost dimension +*y: A complex tensor with the same shape as input. The innermost dimension of the input is replaced by its 1-dimensional Fourier transform. \n *@par Third-party framework compatibility @@ -129,10 +129,10 @@ REG_OP(FFT) innermost dimension of the input. \n *@par Inputs: -*@li x: A Tensor. Must be the following types: complex64, complex128. \n +*x: A Tensor. Must be the following types: complex64, complex128. \n *@par Outputs: -*@li y: A complex tensor with the same shape as input. The innermost dimension +*y: A complex tensor with the same shape as input. The innermost dimension of the input is replaced by its inverse two-dimensional Fourier transform. \n *@par Third-party framework compatibility diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h index fe25a46f..98d4d111 100644 --- a/third_party/fwkacllib/inc/ops/split_combination_ops.h +++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h @@ -161,14 +161,11 @@ REG_OP(SplitVD) /** *@brief Concatenates a list of N tensors along the first dimension. *@par Inputs: -* Two inputs, including: -* @li values: A list of Tensors. Must be one of the following types: int8, int16, int32, +* One input, including: +* values: A list of Tensors. Must be one of the following types: int8, int16, int32, * int64, uint8, uint16, uint32, uint64, float16, float32. * Tensors to be concatenated. All must have size 1 in the first dimension and same shape. -* It's a dynamic input. -* @li shape: A Tensor of the same type as "x". -* The final shape of the result. Should be equal to the shapes of any input -* but with the number of input values in the first dimension . \n +* It's a dynamic input. \n *@par Attributes: * @li shape: A required list of ints. diff --git a/third_party/fwkacllib/inc/ops/state_ops.h b/third_party/fwkacllib/inc/ops/state_ops.h index 3c8e32b6..d1ec00b5 100644 --- a/third_party/fwkacllib/inc/ops/state_ops.h +++ b/third_party/fwkacllib/inc/ops/state_ops.h @@ -104,7 +104,7 @@ REG_OP(DestroyTemporaryVariable) *@brief Checks whether a tensor has been initialized. Outputs boolean scalar indicating whether the tensor has been initialized . \n *@par Inputs: -*x: A tensor . \n +*x: A Tensor of type float16, float32, double, bool, int8, uint8, uint16, int16, int32, uint32, uint64, int64. *@par Outputs: *y: A tensor, indicating whether "x" has been initialized . \n diff --git a/third_party/fwkacllib/inc/ops/stateful_random_ops.h b/third_party/fwkacllib/inc/ops/stateful_random_ops.h index c2f65c6a..f4eb763c 100644 --- a/third_party/fwkacllib/inc/ops/stateful_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateful_random_ops.h @@ -32,7 +32,10 @@ namespace ge { *@par Inputs: *This op may use some OS-provided source of non-determinism (e.g. an RNG), *so each execution will give different results. Inputs included: -*@li shape: The shape of the output tensor . \n +*shape: The shape of the output tensor . \n + +*@par Attributes: +*dtype: required, type. \n *@par Outputs: *y:A Returns Non-deterministic integer values with specified shape . \n @@ -54,13 +57,10 @@ REG_OP(NonDeterministicInts) *counter is an unspecified implementation detail . \n *@par Inputs: -*@li resource: The handle of the resource variable that stores the state of the RNG. +*@li x: The handle of the resource variable that stores the state of the RNG. *@li algorithm: The RNG algorithm. *@li delta: The amount of advancement . \n -*@par Outputs: -*y:A Returns the created operation . \n - *@par Third-party framework compatibility * Compatible with tensorflow RngSkip operator. */ @@ -81,11 +81,16 @@ power of two. The bias is small for values of `maxval - minval` significantly smaller than the range of the output (either `2^32` or `2^64`) . \n *@par Inputs: -*@li resource: The handle of the resource variable that stores the state of the RNG. +*@li x: The handle of the resource variable that stores the state of the RNG. *@li algorithm: The RNG algorithm. *@li shape: The shape of the output tensor. -*@li minval: Minimum value (inclusive, scalar). -*@li maxval: Maximum value (exclusive, scalar) . \n +*@li counts: A 0/1-D Tensor or Python value. The counts of the binomial +distribution. Must be broadcastable with the leftmost dimension defined by `shape`. +*@li probs: A 0/1-D Tensor or Python value. The probability of success for the +binomial distribution. Must be broadcastable with the leftmost dimension defined by `shape`.\n + +*@par Attributes: +*dtype: required, type. \n *@par Outputs: *y:A Returns Random values with specified shape . \n @@ -109,7 +114,7 @@ REG_OP(StatefulRandomBinomial) *The generated values will have mean 0 and standard deviation 1 . \n *@par Inputs: -*@li resource: The handle of the resource variable that stores the state of the RNG. +*@li x: The handle of the resource variable that stores the state of the RNG. *@li algorithm: The RNG algorithm. *@li shape: The shape of the output tensor . \n @@ -134,7 +139,7 @@ REG_OP(StatefulStandardNormalV2) *deviations from the mean are dropped and re-picked . \n *@par Inputs: -*@li resource: The handle of the resource variable that stores the state of the RNG. +*@li x: The handle of the resource variable that stores the state of the RNG. *@li algorithm: The RNG algorithm. *@li shape: The shape of the output tensor . \n @@ -158,7 +163,7 @@ The generated values follow a uniform distribution in the range `[0, 1)`. The lower bound 0 is included in the range, while the upper bound 1 is excluded. *@par Inputs: -*@li resource: The handle of the resource variable that stores the state of the RNG. +*@li x: The handle of the resource variable that stores the state of the RNG. *@li algorithm: The RNG algorithm. *@li shape: The shape of the output tensor . \n @@ -181,7 +186,7 @@ REG_OP(StatefulUniform) The generated values are uniform integers covering the whole range of `dtype` . \n *@par Inputs: -*@li resource: The handle of the resource variable that stores the state of the RNG. +*@li x: The handle of the resource variable that stores the state of the RNG. *@li algorithm: The RNG algorithm. *@li shape: The shape of the output tensor . \n @@ -209,7 +214,7 @@ power of two. The bias is small for values of `maxval - minval` significantly smaller than the range of the output (either `2^32` or `2^64`) . \n *@par Inputs: -*@li resource: The handle of the resource variable that stores the state of the RNG. +*@li x: The handle of the resource variable that stores the state of the RNG. *@li algorithm: The RNG algorithm. *@li shape: The shape of the output tensor. *@li minval: Minimum value (inclusive, scalar). diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h index f9cc2549..a78d63a1 100644 --- a/third_party/fwkacllib/inc/ops/string_ops.h +++ b/third_party/fwkacllib/inc/ops/string_ops.h @@ -295,7 +295,7 @@ REG_OP(StringSplit) *@par Inputs: include: -*@li input:A Tensor of type string. The text to be processed. \n +*input:A Tensor of type string. The text to be processed. \n *@par Attributes: *@li pattern:A string. The regular expression to match the input. @@ -303,8 +303,8 @@ include: *@li replace_global:An optional bool. Defaults to True. If True, the replacement is global, otherwise the replacement is done only on the first match. -*@par output: -*@li output::A Tensor of type string. +*@par Outputs: +*output::A Tensor of type string. */ REG_OP(StaticRegexReplace) .INPUT(input, TensorType({DT_STRING})) @@ -322,13 +322,13 @@ REG_OP(StaticRegexReplace) *@par Inputs: include: -*@li input:A Tensor of type string. The text to be processed. \n +*input:A Tensor of type string. The text to be processed. \n *@par Attributes: -*@li pattern:A string. The regular expression to match the input. +*pattern:A string. The regular expression to match the input. -*@par output: -*@li output::A bool tensor with the same shape as `input`. +*@par Outputs: +*output::A bool tensor with the same shape as `input`. */ REG_OP(StaticRegexFullMatch) .INPUT(input, TensorType({DT_STRING})) @@ -347,10 +347,10 @@ include: *@li num_segments:A Tensor. Must be one of the following types: int32, int64. A scalar. *@par Attributes: -*@li separator:An optional string. Defaults to "". The separator to use when joining. +*separator:An optional string. Defaults to "". The separator to use when joining. -*@par output: -*@li output::A Tensor of type string.. +*@par Outputs: +*output::A Tensor of type string.. */ REG_OP(UnsortedSegmentJoin) .INPUT(input, TensorType({DT_STRING})) @@ -366,13 +366,13 @@ REG_OP(UnsortedSegmentJoin) *@par Inputs: include: -*@li input:A Tensor of type string. The text to be processed. +*input:A Tensor of type string. The text to be processed. *@par Attributes: -*@li encoding:An optional string. Defaults to "". +*encoding:An optional string. Defaults to "". -*@par output: -*@li output::A Tensor of type string.. +*@par Outputs: +*output::A Tensor of type string.. */ REG_OP(StringLower) .INPUT(input, TensorType({DT_STRING})) @@ -386,13 +386,13 @@ REG_OP(StringLower) *@par Inputs: include: -*@li input:A Tensor of type string. The text to be processed. +*input:A Tensor of type string. The text to be processed. *@par Attributes: -*@li encoding:An optional string. Defaults to "". +*encoding:An optional string. Defaults to "". -*@par output: -*@li output::A Tensor of type string.. +*@par Outputs: +*output::A Tensor of type string.. */ REG_OP(StringUpper) .INPUT(input, TensorType({DT_STRING})) @@ -901,10 +901,10 @@ REG_OP(DecodeBase64) *@brief StringNormalization performs string operations for basic cleaning . \n *@par Inputs: -*@li input: only accepts [C] or [1, C] UTF-8 strings tensor . \n +*input: only accepts [C] or [1, C] UTF-8 strings tensor . \n *@par Outputs: -*@li output: UTF-8 strings tensor after cleaning . \n +*output: UTF-8 strings tensor after cleaning . \n *@par Attributes: *@li stopwords : list of strings (default is empty). @@ -919,13 +919,13 @@ case-sensitive. Default is false. *string enum that cases output to be lowercased/uppercases/unchanged. Valid values are "LOWER", "UPPER", "NONE". Default is "NONE". -*@li local : string (default is "en_US"). +*@li locale : string (default is "C"). *Environment dependent string that denotes the locale according to which output -strings needs to be upper/lowercased.Default en_US or platform specific equivalent -as decided by the implementation . \n +strings needs to be upper/lowercased.Default C or platform specific equivalent +as decided by the implementation. \n *@attention Constraints: -*@li input can be either a 1-D or 2-D tensor, the shape of 2-D tensor must be [1, C]. +*input can be either a 1-D or 2-D tensor, the shape of 2-D tensor must be [1, C]. */ REG_OP(StringNormalizer) .INPUT(input, TensorType({DT_STRING})) @@ -933,7 +933,7 @@ REG_OP(StringNormalizer) .ATTR(stopwords, ListString, {}) .ATTR(is_case_sensitive, Bool, false) .ATTR(case_change_action, String, "NONE") - .ATTR(local, String, "en_US") + .ATTR(locale, String, "C") .OP_END_FACTORY_REG(StringNormalizer) } // namespace ge diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 4a46e35f..f403fe12 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -29,15 +29,15 @@ namespace ge { *@par Inputs: *The input handle must have the resource type. Inputs include: -*@li x:A list of Tensor objects. One or more tensors from which +*x:A list of Tensor objects. One or more tensors from which the enqueued tensors should be taken . \n *@par Outputs: -*@li y:A list of Tensor objects. One or more tensors from which +*y:A list of Tensor objects. One or more tensors from which the enqueued tensors should be taken . \n *@par Attributes: -*@li type: An optional ge::DataType. It refers to the target data type of outputs . \n +*type: An optional ge::DataType. It refers to the target data type of outputs . \n *@par Third-party framework compatibility *Compatible with tensorflow QueueIsClosed operator. @@ -723,11 +723,12 @@ REG_OP(CompressFcOp) *@brief Performs Col2im for each batch entry. \n *@par Inputs: -*@li input_x: The Col Tensor. 5-D, shape: `(n, c1, kernel_h*kernel_w, ho*wo, c0)`. -where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1 \n +*@li x: The Col Tensor. 4-D, shape: `(n, c, kernel_h*kernel_w, ho*wo)`. +where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1. +*@li output_size: The img shape Tensor. 1-D, shape:`(2)`, value: (output_h, output_w). \n *@par Outputs: -*@li output_y: The img Tensor. 5-D, shape: `(n, c1, output_h, output_w, c0)`. \n +*y: The img Tensor. 4-D, shape: `(n, c, output_h, output_w)`. \n *@par Attributes: *@li kernel_shape: ListInt, value: `(kernel_h, kernel_w)`, the shape of kernel in convolution. @@ -909,7 +910,7 @@ output shape would be [max(ngram_indexes) + 1]. If input shape is [N, C], this o *@li either pool_strings or pool_int64s attributes must be present but not both. */ -REG_OP(TfidVectorizer) +REG_OP(TfIdfVectorizer) .INPUT(input, TensorType({DT_INT32, DT_INT64, DT_STRING})) .OUTPUT(output, TensorType({DT_FLOAT})) .REQUIRED_ATTR(max_gram_length, Int) @@ -921,7 +922,7 @@ REG_OP(TfidVectorizer) .ATTR(pool_int64s, ListInt, {}) .ATTR(pool_strings, ListString, {}) .ATTR(weights, ListFloat, {}) - .OP_END_FACTORY_REG(TfidVectorizer) + .OP_END_FACTORY_REG(TfIdfVectorizer) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 7fc1cdea..70e42dc9 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -20,7 +20,7 @@ #include #include "toolchain/prof_callback.h" -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) extern "C" { #endif @@ -357,7 +357,7 @@ RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_ */ RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId); -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) } #endif diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index a244c793..76836e7b 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -19,7 +19,7 @@ #include "base.h" -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) extern "C" { #endif @@ -43,6 +43,7 @@ typedef enum tagRtChipType { CHIP_LHISI, CHIP_DC, CHIP_CLOUD_V2, + CHIP_NO_DEVICE, CHIP_END, } rtChipType_t; @@ -53,11 +54,11 @@ typedef enum tagRtAicpuScheType { } rtAicpuScheType; typedef enum tagRtDeviceCapabilityType { - RT_SCHEDULE_SOFTWARE = 0, // SoftWare Schedule - RT_SCHEDULE_SOFTWARE_OPT, - RT_SCHEDULE_HARDWARE, // HWTS Schedule - RT_AICPU_BLOCKING_OP_NOT_SUPPORT, - RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation + RT_SCHEDULE_SOFTWARE = 0, // Software Schedule + RT_SCHEDULE_SOFTWARE_OPT, + RT_SCHEDULE_HARDWARE, // HWTS Schedule + RT_AICPU_BLOCKING_OP_NOT_SUPPORT, + RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation } rtDeviceCapabilityType; typedef enum tagRtVersion { @@ -235,7 +236,7 @@ RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout); */ RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout); -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) } #endif diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h index e95d4c89..c597a657 100644 --- a/third_party/fwkacllib/inc/runtime/context.h +++ b/third_party/fwkacllib/inc/runtime/context.h @@ -19,7 +19,7 @@ #include "base.h" -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) extern "C" { #endif @@ -157,7 +157,7 @@ RTS_API rtError_t rtGetGroupCount(uint32_t *count); */ RTS_API rtError_t rtSetCtxINFMode(bool mode); -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) } #endif diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index 18d837eb..4a9a5817 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -19,7 +19,7 @@ #include "base.h" -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) extern "C" { #endif @@ -80,15 +80,15 @@ typedef enum tagMemoryInfo { } rtMemoryInfo_t; typedef enum tagRtDeviceModuleType { - RT_MODULE_TYPE_SYSTEM = 0, - RT_MODULE_TYPE_AICPU, - RT_MODULE_TYPE_CCPU, - RT_MODULE_TYPE_DCPU, - RT_MODULE_TYPE_AICORE, - RT_MODULE_TYPE_TSCPU, - RT_MODULE_TYPE_PCIE, - RT_MODULE_TYPE_VECTOR_CORE -} tagRtDeviceModuleType_t; + RT_MODULE_TYPE_SYSTEM = 0, /**< system info*/ + RT_MODULE_TYPE_AICPU, /** < aicpu info*/ + RT_MODULE_TYPE_CCPU, /**< ccpu_info*/ + RT_MODULE_TYPE_DCPU, /**< dcpu info*/ + RT_MODULE_TYPE_AICORE, /**< AI CORE info*/ + RT_MODULE_TYPE_TSCPU, /**< tscpu info*/ + RT_MODULE_TYPE_PCIE, /**< PCIE info*/ + RT_MODULE_TYPE_VECTOR_CORE, /**< VECTOR CORE info*/ +} rtDeviceModuleType_t; /** * @ingroup dvrt_dev @@ -380,7 +380,7 @@ RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device); */ RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device); -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) } #endif diff --git a/third_party/fwkacllib/inc/runtime/dvfsprofile.h b/third_party/fwkacllib/inc/runtime/dvfsprofile.h index 6e451695..33e2f4c1 100644 --- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h +++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h @@ -19,7 +19,7 @@ #include "base.h" -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) extern "C" { #endif @@ -56,7 +56,7 @@ RTS_API rtError_t rtUnsetDvfsProfile(); */ RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode); -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) } #endif diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h index 1cd1a198..81b635c3 100644 --- a/third_party/fwkacllib/inc/runtime/event.h +++ b/third_party/fwkacllib/inc/runtime/event.h @@ -19,7 +19,7 @@ #include "base.h" -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) extern "C" { #endif @@ -41,16 +41,6 @@ typedef enum rtEventWaitStatus { #define RT_EVENT_DDSYNC 0x04U #define RT_EVENT_TIME_LINE 0x08U -#define RT_EVENT_DDSYNC_NS 0x01U -#define RT_EVENT_STREAM_MARK 0x02U -#define RT_EVENT_DDSYNC 0x04U -#define RT_EVENT_TIME_LINE 0x08U - -#define RT_EVENT_DDSYNC_NS 0x01U -#define RT_EVENT_STREAM_MARK 0x02U -#define RT_EVENT_DDSYNC 0x04U -#define RT_EVENT_TIME_LINE 0x08U - /** * @ingroup dvrt_event * @brief create event instance @@ -282,7 +272,7 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs */ RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num); -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) } #endif diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index 9b0221c7..c1b9bd6d 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -20,7 +20,7 @@ #include "base.h" #include "stream.h" -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) extern "C" { #endif @@ -647,7 +647,7 @@ RTS_API rtError_t rtStartMDCProfiler(void **addr, uint32_t length); */ RTS_API rtError_t rtStopMDCProfiler(void *addr); -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) } #endif diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index bace4bc6..b049e762 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -24,7 +24,7 @@ #include "config.h" #include "stream.h" -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) extern "C" { #endif @@ -547,7 +547,7 @@ RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num); */ RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream); -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) } #endif diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts.h b/third_party/fwkacllib/inc/runtime/rt_ffts.h old mode 100755 new mode 100644 index 720da7cd..f2809218 --- a/third_party/fwkacllib/inc/runtime/rt_ffts.h +++ b/third_party/fwkacllib/inc/runtime/rt_ffts.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Huawei Technologies Co. , Ltd. 2021. All rights reserved. + * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. * Description: ffts interface */ @@ -8,7 +8,7 @@ #include "base.h" -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) extern "C" { #endif @@ -19,8 +19,8 @@ extern "C" { #define RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN 32U typedef enum tagFftsType { - RT_FFTS_TYPE_AUTO_THREAD = 2, // ffts auto thread mode, same as ffts define - RT_FFTS_TYPE_MANUAL_THREAD = 3, // ffts manual thread mode, same as ffts define + RT_FFTS_TYPE_AUTO_THREAD = 2, // ffts auto thread mode, same as ffts define + RT_FFTS_TYPE_MANUAL_THREAD = 3, // ffts manual thread mode, same as ffts define } rtFftsType_t; typedef enum tagFftsSubTaskType { @@ -37,7 +37,7 @@ typedef enum tagFftsSubTaskType { } rtFftsSubTaskType_t; typedef struct tagManualThreadDmuInfo { - uint64_t dataAddr; // device mem + uint64_t dataAddr; // device mem uint16_t numOuter; uint16_t numInner; uint32_t strideOuter; @@ -50,44 +50,43 @@ typedef struct tagManualThreadDependency { } rtManualThreadDependency_t; typedef struct tagManualThreadAicAivInfo { - uint64_t taskParamAddr; // device mem + uint64_t taskParamAddr; // device mem uint16_t taskParamOffset; // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16 - // when satMode=0 and FP16 computation with none INF inputs overflows/underflows - // results will be saturated to +/- MAX of FP16 + // when satMode=0 and FP16 computation with none INF inputs overflows/underflows, + // results will be saturated to +/-MAX of FP16 uint8_t satMode; - uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode, 3: reserved - uint8_t iCachePrefetchCnt; // units is 2K - uint8_t prefetchEnableBitmap; // 8 bit bitmap 1 0 1 0 - uint8_t prefetchOnceBitmap; // 8 bit bitmap 1 0 1 0 - uint16_t prefetchOnceDmuNum; // prefetch_once_dmu_descriptor_index in ffts - // num: thread0_prefetch_dmu_descriptor_index - prefetch_once_dmu_descriptor_index - uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; // max valid is threadDim + uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved + uint8_t iCachePrefetchCnt; // units is 2K + uint8_t prefetchEnableBitmap; // 8 bit bitmap 1 0 1 0 + uint8_t prefetchOnceBitmap; // 8 bit bitmap 1 0 1 0 + uint16_t prefetchOnceDmuNum; // prefetch_once_dmu_descriptor_index in ffts + // num: thread0_prefetch_dmu_descriptor_index – prefetch_once_dmu_descriptor_index + uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; // max valid is threadDim uint16_t threadBlkDim[RT_FFTS_MAX_MANUAL_THREAD_NUM]; const char *threadTaskFuncStub[RT_FFTS_MAX_MANUAL_THREAD_NUM]; - rtManualThreadDmuInfo_t *prefetchList; // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim - 1] + rtManualThreadDmuInfo_t *prefetchList; // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim-1] rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; } rtManualThreadAicAivInfo_t; typedef struct tagAutoThreadPrefetch { - uint64_t dataAddr; // device mem + uint64_t dataAddr; // device mem uint32_t dataAddrOffset; uint32_t nonTailDataLen; uint32_t tailDataLen; } rtAutoThreadPrefetch_t; typedef struct tagAutoThreadAicAivInfo { - uint64_t taskParamAddr; // device mem + uint64_t taskParamAddr; // device mem uint16_t taskParamOffset; // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16 - // when satMode=0 and FP16 computation with none INF inputs overflows/underflows - // results will be saturated to +/- MAX of FP16 + // when satMode=0 and FP16 computation with none INF inputs overflows/underflows, results will be saturated to +/-MAX of FP16 uint8_t satMode; - uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode, 3: reserved - uint8_t iCachePrefetchCnt; // units is 2K - uint8_t prefetchEnableBitmap; // 8 bit bitmap - uint8_t prefetchOnceBitmap; // 8 bit bitmap + uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved + uint8_t iCachePrefetchCnt; // units is 2K + uint8_t prefetchEnableBitmap; // 8 bit bitmap + uint8_t prefetchOnceBitmap; // 8 bit bitmap uint16_t tailBlkDim; uint16_t nonTailBlkDim; @@ -95,13 +94,13 @@ typedef struct tagAutoThreadAicAivInfo { const char *nonTailTaskFuncStub; const char *tailTaskFuncStub; - // for prefetch, valid num is prefetchEnableBitmap bit count - // if prefetchEnableBitmap = '00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid + // for prefetch, valid num is prefetchEnableBitmap bit count. + // if prefetchEnableBitmap='00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid rtAutoThreadPrefetch_t srcPrefetch[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; } rtAutoThreadAicAivInfo_t; typedef struct tagAutoThreadCacheInfo { - uint64_t dataAddr; // device mem + uint64_t dataAddr; // device mem uint32_t dataAddrOffset; uint32_t nonTailDataLen; uint32_t tailDataLen; @@ -109,7 +108,7 @@ typedef struct tagAutoThreadCacheInfo { } rtAutoThreadCacheInfo_t; typedef struct tagManualThreadCacheInfo { - rtManualThreadDmuInfo_t *dmuList; // 0-64k + rtManualThreadDmuInfo_t *dmuList; // 0-64k uint16_t dmuNum; uint16_t sliceDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; uint16_t ticketCacheRefCntTbl[RT_FFTS_MAX_MANUAL_THREAD_NUM]; @@ -152,11 +151,11 @@ typedef struct tagFftsSubTaskInfo { } rtFftsSubTaskInfo_t; typedef struct tagFftsDescInfo { - uint8_t tm; // thread subtask kickstart mode, 0:order, 1:disorder - uint8_t di; // discard invalidate - uint8_t dw; // discard write back - uint8_t df; // discard flush - uint8_t dataSplitUnit; // split source or ticket cache by 2~dataSplitUnit MB + uint8_t tm; // thread subtask kickstart mode, 0:order, 1:disorder + uint8_t di; // discard invalidate + uint8_t dw; // discard write back + uint8_t df; // discard flush + uint8_t dataSplitUnit; // split source or ticket cache by 2^dataSplitUnit MB uint8_t prefetchOstNum; uint8_t cacheMaintainOstNum; uint8_t aicPrefetchUpper; @@ -166,20 +165,20 @@ typedef struct tagFftsDescInfo { } rtFftsDescInfo_t; typedef struct tagFftsTaskInfo { - rtFftsType_t fftsType; + rtFftsType_t fftsType; uint16_t subTaskNum; uint16_t tickCacheNum; rtFftsDescInfo_t fftsDesc; // sub task desc, real num is subTaskNum rtFftsSubTaskInfo_t subTask[RT_FFTS_MAX_SUB_TASK_NUM]; - // ticket cache, real number is ticketCacheNum + // ticket cache, real number is tickCacheNum. rtTicketCache_t ticketCache[RT_FFTS_MAX_TICKET_CACHE_NUM]; } rtFftsTaskInfo_t; RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream); -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) } #endif -#endif //__CCE_RUNTIME_FFTS_H +#endif // __CCE_RUNTIME_FFTS_H diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index a7618b45..d4af72c5 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -19,7 +19,7 @@ #include "base.h" -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) extern "C" { #endif @@ -490,7 +490,7 @@ RTS_API rtError_t rtDebugRegister(rtModel_t model, uint32_t flag, const void *ad */ RTS_API rtError_t rtDebugUnRegister(rtModel_t model); -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) } #endif diff --git a/third_party/fwkacllib/inc/runtime/rt_stars.h b/third_party/fwkacllib/inc/runtime/rt_stars.h index 188656b1..016c352a 100644 --- a/third_party/fwkacllib/inc/runtime/rt_stars.h +++ b/third_party/fwkacllib/inc/runtime/rt_stars.h @@ -8,7 +8,7 @@ #include "base.h" -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) extern "C" { #endif @@ -23,6 +23,7 @@ extern "C" { */ RTS_API rtError_t rtStarsTaskLaunch(const void *taskSqe, uint32_t sqeLen, rtStream_t stream); + /** * @ingroup rt_stars * @brief create cdq instance. @@ -76,10 +77,11 @@ RTS_API rtError_t rtCdqEnQueue(const char *queName, uint32_t cdqeIndex, void *da * @param [in] stream launch task on the stream * @return RT_ERROR_NONE for ok, others failed */ -RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, const void *prtAddr, +RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, const void *ptrAddr, rtStream_t stream); -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) + } #endif #endif // __CCE_RUNTIME_STARS_H diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h index f9981514..3a078e99 100644 --- a/third_party/fwkacllib/inc/runtime/stream.h +++ b/third_party/fwkacllib/inc/runtime/stream.h @@ -20,7 +20,7 @@ #include "base.h" #include "event.h" -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) extern "C" { #endif @@ -211,7 +211,7 @@ RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, con */ RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream); -#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +#if defined(__cplusplus) } #endif diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h index 07b32149..9350f9d4 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h +++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h @@ -84,6 +84,7 @@ #endif #include +#include namespace Msprofiler { namespace Api { @@ -105,6 +106,37 @@ extern "C" { MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); +typedef int Status; +typedef struct aclprofSubscribeConfig aclprofSubscribeConfig1; +/// +/// @ingroup AscendCL +/// @brief subscribe profiling data of graph +/// @param [in] graphId: the graph id subscribed +/// @param [in] profSubscribeConfig: pointer to config of model subscribe +/// @return Status result of function +/// +Status aclgrphProfGraphSubscribe(const uint32_t graphId, + const aclprofSubscribeConfig1 *profSubscribeConfig); + +/// +/// @ingroup AscendCL +/// @brief unsubscribe profiling data of graph +/// @param [in] graphId: the graph id subscribed +/// @return Status result of function +/// +Status aclgrphProfGraphUnSubscribe(const uint32_t graphId); + +/** + * @ingroup AscendCL + * @brief get graph id from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * + * @retval graph id of subscription data + * @retval 0 for failed + */ +size_t aclprofGetGraphId(const void *opInfo, size_t opInfoLen, uint32_t index); #ifdef __cplusplus } #endif diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h index 5073cfb1..36b55216 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_callback.h +++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h @@ -55,6 +55,17 @@ struct ReporterData { }; /** + * @name HashData + * @brief struct of data to hash + */ +struct HashData { + int deviceId; // the index of device + size_t dataLen; // the length of data + unsigned char *data; // the data content + uint64_t hashId; // the id of hashed data +}; + +/** * @name MsprofReporterModuleId * @brief module id of data to report */ @@ -75,6 +86,7 @@ enum MsprofReporterCallbackType { MSPROF_REPORTER_INIT, // init reporter MSPROF_REPORTER_UNINIT, // uninit reporter MSPROF_REPORTER_DATA_MAX_LEN, // data max length for calling report callback + MSPROF_REPORTER_HASH // hash data to id }; /**