Browse Source

!2069 manually revert headers

Merge pull request !2069 from upgrade_ascend
upgrade_ascend
lujiale Gitee 3 years ago
parent
commit
3a527af706
64 changed files with 1354 additions and 2387 deletions
  1. +2
    -2
      inc/external/acl/acl.h
  2. +0
    -2
      inc/external/acl/acl_base.h
  3. +2
    -14
      inc/external/acl/acl_mdl.h
  4. +0
    -28
      inc/external/acl/acl_op.h
  5. +0
    -37
      inc/external/acl/acl_prof.h
  6. +0
    -18
      inc/external/acl/acl_rt.h
  7. +0
    -97
      inc/external/acl/ops/acl_dvpp.h
  8. +14
    -14
      inc/external/ge/ge_ir_build.h
  9. +0
    -27
      inc/external/hccl/hccl.h
  10. +62
    -76
      inc/framework/executor/ge_executor.h
  11. +6
    -1
      inc/framework/ge_runtime/task_info.h
  12. +18
    -50
      third_party/fwkacllib/inc/ops/array_ops.h
  13. +1
    -1
      third_party/fwkacllib/inc/ops/control_flow_ops.h
  14. +3
    -5
      third_party/fwkacllib/inc/ops/ctc_ops.h
  15. +2
    -39
      third_party/fwkacllib/inc/ops/data_flow_ops.h
  16. +74
    -92
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  17. +3
    -0
      third_party/fwkacllib/inc/ops/functional_ops.h
  18. +31
    -280
      third_party/fwkacllib/inc/ops/image_ops.h
  19. +2
    -15
      third_party/fwkacllib/inc/ops/linalg_ops.h
  20. +32
    -32
      third_party/fwkacllib/inc/ops/list_ops.h
  21. +2
    -2
      third_party/fwkacllib/inc/ops/lookup_ops.h
  22. +60
    -75
      third_party/fwkacllib/inc/ops/math_ops.h
  23. +48
    -184
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  24. +368
    -331
      third_party/fwkacllib/inc/ops/nn_calculation_ops.h
  25. +42
    -111
      third_party/fwkacllib/inc/ops/nn_detect_ops.h
  26. +121
    -54
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  27. +14
    -14
      third_party/fwkacllib/inc/ops/nn_ops.h
  28. +27
    -70
      third_party/fwkacllib/inc/ops/nn_pooling_ops.h
  29. +4
    -7
      third_party/fwkacllib/inc/ops/nn_training_ops.h
  30. +41
    -44
      third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
  31. +8
    -14
      third_party/fwkacllib/inc/ops/pad_ops.h
  32. +67
    -68
      third_party/fwkacllib/inc/ops/parsing_ops.h
  33. +4
    -6
      third_party/fwkacllib/inc/ops/quantize_ops.h
  34. +2
    -7
      third_party/fwkacllib/inc/ops/ragged_array_ops.h
  35. +2
    -1
      third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
  36. +1
    -5
      third_party/fwkacllib/inc/ops/ragged_math_ops.h
  37. +21
    -122
      third_party/fwkacllib/inc/ops/random_ops.h
  38. +17
    -90
      third_party/fwkacllib/inc/ops/reduce_ops.h
  39. +13
    -15
      third_party/fwkacllib/inc/ops/resource_variable_ops.h
  40. +10
    -8
      third_party/fwkacllib/inc/ops/rnn.h
  41. +3
    -3
      third_party/fwkacllib/inc/ops/rpn_ops.h
  42. +4
    -10
      third_party/fwkacllib/inc/ops/sdca_ops.h
  43. +53
    -110
      third_party/fwkacllib/inc/ops/selection_ops.h
  44. +23
    -20
      third_party/fwkacllib/inc/ops/sparse_ops.h
  45. +10
    -10
      third_party/fwkacllib/inc/ops/spectral_ops.h
  46. +6
    -3
      third_party/fwkacllib/inc/ops/split_combination_ops.h
  47. +1
    -1
      third_party/fwkacllib/inc/ops/state_ops.h
  48. +13
    -18
      third_party/fwkacllib/inc/ops/stateful_random_ops.h
  49. +25
    -25
      third_party/fwkacllib/inc/ops/string_ops.h
  50. +8
    -9
      third_party/fwkacllib/inc/ops/transformation_ops.h
  51. +2
    -2
      third_party/fwkacllib/inc/runtime/base.h
  52. +7
    -8
      third_party/fwkacllib/inc/runtime/config.h
  53. +2
    -2
      third_party/fwkacllib/inc/runtime/context.h
  54. +11
    -11
      third_party/fwkacllib/inc/runtime/dev.h
  55. +2
    -2
      third_party/fwkacllib/inc/runtime/dvfsprofile.h
  56. +12
    -2
      third_party/fwkacllib/inc/runtime/event.h
  57. +2
    -2
      third_party/fwkacllib/inc/runtime/kernel.h
  58. +2
    -2
      third_party/fwkacllib/inc/runtime/mem.h
  59. +37
    -36
      third_party/fwkacllib/inc/runtime/rt_ffts.h
  60. +2
    -2
      third_party/fwkacllib/inc/runtime/rt_model.h
  61. +3
    -5
      third_party/fwkacllib/inc/runtime/rt_stars.h
  62. +2
    -2
      third_party/fwkacllib/inc/runtime/stream.h
  63. +0
    -32
      third_party/fwkacllib/inc/toolchain/prof_acl_api.h
  64. +0
    -12
      third_party/fwkacllib/inc/toolchain/prof_callback.h

+ 2
- 2
inc/external/acl/acl.h View File

@@ -25,9 +25,9 @@
extern "C" { extern "C" {
#endif #endif


// Current version is 1.1.0
// Current version is 1.0.0
#define ACL_MAJOR_VERSION 1 #define ACL_MAJOR_VERSION 1
#define ACL_MINOR_VERSION 1
#define ACL_MINOR_VERSION 0
#define ACL_PATCH_VERSION 0 #define ACL_PATCH_VERSION 0


/** /**


+ 0
- 2
inc/external/acl/acl_base.h View File

@@ -150,8 +150,6 @@ typedef enum {
ACL_DOUBLE = 11, ACL_DOUBLE = 11,
ACL_BOOL = 12, ACL_BOOL = 12,
ACL_STRING = 13, ACL_STRING = 13,
ACL_COMPLEX64 = 16,
ACL_COMPLEX128 = 17
} aclDataType; } aclDataType;


typedef enum { typedef enum {


+ 2
- 14
inc/external/acl/acl_mdl.h View File

@@ -297,21 +297,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset,


/** /**
* @ingroup AscendCL * @ingroup AscendCL
* @brief Get aclTensorDesc from aclmdlDataset
*
* @param dataset [IN] aclmdlDataset pointer;
* @param index [IN] index of tensorDesc
*
* @retval Get address of aclTensorDesc when executed successfully.
* @retval Failure return NULL
*/
ACL_FUNC_VISIBILITY aclTensorDesc *aclmdlGetDatasetTensorDesc(const aclmdlDataset *dataset, size_t index);

/**
* @ingroup AscendCL
* @brief Get the number of aclDataBuffer in aclmdlDataset * @brief Get the number of aclDataBuffer in aclmdlDataset
* *
* @param dataset [IN] aclmdlDataset pointer
* @param dataset [IN] aclmdlDataset poiter
* *
* @retval the number of aclDataBuffer * @retval the number of aclDataBuffer
*/ */
@@ -321,7 +309,7 @@ ACL_FUNC_VISIBILITY size_t aclmdlGetDatasetNumBuffers(const aclmdlDataset *datas
* @ingroup AscendCL * @ingroup AscendCL
* @brief Get the aclDataBuffer in aclmdlDataset by index * @brief Get the aclDataBuffer in aclmdlDataset by index
* *
* @param dataset [IN] aclmdlDataset pointer
* @param dataset [IN] aclmdlDataset poiter
* @param index [IN] the index of aclDataBuffer * @param index [IN] the index of aclDataBuffer
* *
* @retval Get successfully, return the address of aclDataBuffer * @retval Get successfully, return the address of aclDataBuffer


+ 0
- 28
inc/external/acl/acl_op.h View File

@@ -137,34 +137,6 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *att


/** /**
* @ingroup AscendCL * @ingroup AscendCL
* @brief set an attribute. the type of the attribute is aclDataType
*
* @param attr [OUT] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param attrValue [IN] attribute value
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrDataType(aclopAttr *attr, const char *attrName, aclDataType attrValue);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is list of aclDataType
*
* @param attr [OUT] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param numValues [IN] number of values. false if attrValue is 0, true otherwise.
* @param values [IN] pointer to values
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListDataType(aclopAttr *attr, const char *attrName, int numValues,
const aclDataType values[]);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is list of bools * @brief set an attribute. the type of the attribute is list of bools
* *
* @param attr [OUT] pointer to the instance of aclopAttr * @param attr [OUT] pointer to the instance of aclopAttr


+ 0
- 37
inc/external/acl/acl_prof.h View File

@@ -40,20 +40,13 @@ typedef enum {
ACL_AICORE_MEMORY_BANDWIDTH = 2, ACL_AICORE_MEMORY_BANDWIDTH = 2,
ACL_AICORE_L0B_AND_WIDTH = 3, ACL_AICORE_L0B_AND_WIDTH = 3,
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
ACL_AICORE_MEMORY_UB = 5,
ACL_AICORE_NONE = 0xFF ACL_AICORE_NONE = 0xFF
} aclprofAicoreMetrics; } aclprofAicoreMetrics;


typedef enum {
ACL_STEP_START = 0, // step start
ACL_STEP_END = 1 // step end
} aclprofStepTag;

typedef struct aclprofConfig aclprofConfig; typedef struct aclprofConfig aclprofConfig;
typedef struct aclprofStopConfig aclprofStopConfig; typedef struct aclprofStopConfig aclprofStopConfig;
typedef struct aclprofAicoreEvents aclprofAicoreEvents; typedef struct aclprofAicoreEvents aclprofAicoreEvents;
typedef struct aclprofSubscribeConfig aclprofSubscribeConfig; typedef struct aclprofSubscribeConfig aclprofSubscribeConfig;
typedef struct aclprofStepInfo aclprofStepInfo;


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -329,36 +322,6 @@ ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opI
*/ */
ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index); ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index);


/**
* @ingroup AscendCL
* @brief
*
* @param stepInfo [IN] pointer to stepInfo data
* @param aclprofstepTag [IN] start or end flag
* @param stream [IN] steam info
*
* @retval 0 for failed
*/
ACL_FUNC_VISIBILITY aclError aclprofGetStepTimestamp(aclprofStepInfo *stepInfo, aclprofStepTag tag, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create pointer to aclprofStepInfo data
*
*
* @retval aclprofStepInfo pointer
*/
ACL_FUNC_VISIBILITY aclprofStepInfo *aclprofCreateStepInfo();

/**
* @ingroup AscendCL
* @brief destroy aclprofStepInfo pointer
*
*
* @retval void
*/
ACL_FUNC_VISIBILITY void aclprofDestroyStepInfo(aclprofStepInfo *stepinfo);

#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


+ 0
- 18
inc/external/acl/acl_rt.h View File

@@ -44,12 +44,6 @@ typedef enum aclrtEventStatus {
ACL_EVENT_STATUS_RESERVED = 2, ACL_EVENT_STATUS_RESERVED = 2,
} aclrtEventStatus; } aclrtEventStatus;


typedef enum aclrtEventWaitStatus {
ACL_EVENT_WAIT_STATUS_COMPLETE = 0,
ACL_EVENT_WAIT_STATUS_NOT_READY = 1,
ACL_EVENT_WAIT_STATUS_RESERVED = 0xffff,
} aclrtEventWaitStatus;

typedef enum aclrtCallbackBlockType { typedef enum aclrtCallbackBlockType {
ACL_CALLBACK_NO_BLOCK, ACL_CALLBACK_NO_BLOCK,
ACL_CALLBACK_BLOCK, ACL_CALLBACK_BLOCK,
@@ -507,18 +501,6 @@ ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus


/** /**
* @ingroup AscendCL * @ingroup AscendCL
* @brief Queries an event's wait-status
*
* @param event [IN] event to query
* @param status [OUT] event wait-status
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtQueryEventWaitStatus(aclrtEvent event, aclrtEventWaitStatus *status);

/**
* @ingroup AscendCL
* @brief Block Host Running, wait event to be complete * @brief Block Host Running, wait event to be complete
* *
* @param event [IN] event to wait * @param event [IN] event to wait


+ 0
- 97
inc/external/acl/ops/acl_dvpp.h View File

@@ -158,20 +158,6 @@ enum acldvppJpegFormat {
ACL_JPEG_CSS_UNKNOWN = 1000 ACL_JPEG_CSS_UNKNOWN = 1000
}; };


enum acldvppChannelDescParamType { ACL_DVPP_CSC_MATRIX_UINT32 = 0 };

enum aclvdecChannelDescParamType { ACL_VDEC_CSC_MATRIX_UINT32 = 0 };

// Csc Matrix can be used both for acldvppChannelDescParamType and aclvdecChannelDescParamType
enum acldvppCscMatrix {
ACL_DVPP_CSC_MATRIX_BT601_WIDE = 0,
ACL_DVPP_CSC_MATRIX_BT601_NARROW,
ACL_DVPP_CSC_MATRIX_BT709_WIDE,
ACL_DVPP_CSC_MATRIX_BT709_NARROW,
ACL_DVPP_CSC_MATRIX_BT2020_WIDE,
ACL_DVPP_CSC_MATRIX_BT2020_NARROW
};

/** /**
* @ingroup AscendCL * @ingroup AscendCL
* @brief alloc device memory for dvpp. * @brief alloc device memory for dvpp.
@@ -2574,90 +2560,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(
acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[], acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[],
acldvppResizeConfig *resizeConfig, aclrtStream stream); acldvppResizeConfig *resizeConfig, aclrtStream stream);
/**
* @ingroup AscendCL
* @brief set param for dvpp channel desc
*
* @par Function
* set attribution in dvpp channelDesc for specified type
*
* @param channelDesc [OUT] the channel destruction
* @param paramType [IN] specified param type
* @param length [IN] mem length of param
* @param param [IN] pointer to param
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acldvppGetChannelDescParam | acldvppCreateChannelDesc | acldvppDestroyChannelDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescParam(acldvppChannelDesc *channelDesc,
acldvppChannelDescParamType paramType, size_t length,
const void *param);

/**
* @ingroup AscendCL
* @brief get param of dvpp channel desc
*
* @par Function
* get attribution value in dvpp channelDesc for specified type
*
* @param channelDesc [IN] the channel destruction
* @param paramType [IN] specified param type
* @param length [IN] mem length allocated for output param
* @param paramRetSize [OUT] mem length of output param
* @param param [OUT] pointer to output param
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acldvppSetChannelDescParam | acldvppCreateChannelDesc | acldvppDestroyChannelDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppGetChannelDescParam(const acldvppChannelDesc *channelDesc,
acldvppChannelDescParamType paramType, size_t length,
size_t *paramRetSize, void *param);
/**
* @ingroup AscendCL
* @brief set param for vdec channel desc
*
* @par Function
* set attribution in channelDesc for specified type
*
* @param channelDesc [OUT] the vdec channel destruction
* @param paramType [IN] specified param type
* @param length [IN] mem length of param
* @param param [IN] pointer to param
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclvdecGetChannelDescParam | aclvdecCreateChannelDesc | aclvdecDestroyChannelDesc
*/
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescParam(aclvdecChannelDesc *channelDesc,
aclvdecChannelDescParamType paramType, size_t length,
const void *param);


/**
* @ingroup AscendCL
* @brief get param of vdec channel desc
*
* @par Function
* get attribution value in channelDesc for specified type
*
* @param channelDesc [IN] the vdec channel destruction
* @param paramType [IN] specified param type
* @param length [IN] mem length allocated for output param
* @param paramRetSize [OUT] mem length of output param
* @param param [OUT] pointer to output param
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclvdecSetChannelDescParam | aclvdecCreateChannelDesc | aclvdecDestroyChannelDesc
*/
ACL_FUNC_VISIBILITY aclError aclvdecGetChannelDescParam(const aclvdecChannelDesc *channelDesc,
aclvdecChannelDescParamType paramType, size_t length,
size_t *paramRetSize, void *param);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


+ 14
- 14
inc/external/ge/ge_ir_build.h View File

@@ -1,18 +1,18 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/


#ifndef INC_EXTERNAL_GE_IR_BUILD_H_ #ifndef INC_EXTERNAL_GE_IR_BUILD_H_
#define INC_EXTERNAL_GE_IR_BUILD_H_ #define INC_EXTERNAL_GE_IR_BUILD_H_


+ 0
- 27
inc/external/hccl/hccl.h View File

@@ -145,33 +145,6 @@ extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank);
extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream); extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream);


/** /**
* @brief AllGather operator.
*
* @param sendBuff A pointer identifying the input data address of the operator.
* @param count An integer(u64) identifying the number of the send data.
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param destRank An integer identifying the destination rank.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
*/
extern HcclResult HcclSend(void *sendBuf, uint64_t count, HcclDataType dataType, uint32_t destRank, HcclComm comm,
aclrtStream stream);
/**
* @brief AllGather operator.
*
* @param recvBuff A pointer identifying the output data address of the operator.
* @param count An integer(u64) identifying the number of the receive data.
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param srcRank An integer identifying the source rank.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
*/
extern HcclResult HcclRecv(void *recvBuf, uint64_t count, HcclDataType dataType, uint32_t srcRank, HcclComm comm,
aclrtStream stream);

/**
* @brief Destroy HCCL comm * @brief Destroy HCCL comm
* *
* @param comm A pointer identifying the communication resource targetting * @param comm A pointer identifying the communication resource targetting


+ 62
- 76
inc/framework/executor/ge_executor.h View File

@@ -50,30 +50,14 @@ class GE_FUNC_VISIBILITY GeExecutor {
public: public:
GeExecutor(); GeExecutor();
~GeExecutor() = default; ~GeExecutor() = default;
ge::Status Initialize();
ge::Status Finalize();


Status Initialize();
Status Finalize();

///
/// @ingroup ge
/// @brief Initialize global execute environment.
/// @param [in] options: environment variables.
/// @return init result
///
static Status Initialize(const std::map<std::string, std::string> &options);

///
/// @ingroup ge
/// @brief Finalize global execute environment.
/// @return execute result
///
static Status FinalizeEx();

Status UnloadModel(uint32_t modelId);
ge::Status UnloadModel(uint32_t modelId);


// Get input and output descriptor // Get input and output descriptor
Status GetModelDescInfo(uint32_t model_id, std::vector<TensorDesc> &input_desc, std::vector<TensorDesc> &output_desc,
bool new_model_desc = false);
ge::Status GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
std::vector<ge::TensorDesc> &output_desc, bool new_model_desc = false);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -84,7 +68,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [in] batch_size: batch size entered by user in dynamic multi-batch scenario /// @param [in] batch_size: batch size entered by user in dynamic multi-batch scenario
/// @return execute result /// @return execute result
/// ///
Status SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t batch_size);
ge::Status SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t batch_size);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -96,8 +80,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [in] image_width: image width entered by user in dynamic multi-resolution scenario /// @param [in] image_width: image width entered by user in dynamic multi-resolution scenario
/// @return execute result /// @return execute result
/// ///
Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height,
uint64_t image_width);
ge::Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height,
uint64_t image_width);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -109,8 +93,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [in] dynamic_dims: array of dynamic dimensions /// @param [in] dynamic_dims: array of dynamic dimensions
/// @return execute result /// @return execute result
/// ///
Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
const std::vector<uint64_t> &dynamic_dims);
ge::Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
const std::vector<uint64_t> &dynamic_dims);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -120,8 +104,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] cur_dynamic_dims: current dynamic dims /// @param [out] cur_dynamic_dims: current dynamic dims
/// @return execute result /// @return execute result
/// ///
Status GetCurDynamicDims(uint32_t model_id, const std::vector<uint64_t> &dynamic_dims,
std::vector<uint64_t> &cur_dynamic_dims);
ge::Status GetCurDynamicDims(uint32_t model_id, const std::vector<uint64_t> &dynamic_dims,
std::vector<uint64_t> &cur_dynamic_dims);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -131,7 +115,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] dynamic_type /// @param [out] dynamic_type
/// @return execute result /// @return execute result
/// ///
Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type);
ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
int32_t &dynamic_type);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -140,7 +125,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] batch_info /// @param [out] batch_info
/// @return execute result /// @return execute result
/// ///
Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);
ge::Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -149,9 +134,9 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] user_designate_shape_order /// @param [out] user_designate_shape_order
/// @return execute result /// @return execute result
/// ///
Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector<std::string> &user_designate_shape_order);
ge::Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector<std::string> &user_designate_shape_order);


Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);
ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -163,22 +148,22 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [in] aippParms: kAippDynamicPara by user in dynamic aipp /// @param [in] aippParms: kAippDynamicPara by user in dynamic aipp
/// @return execute result /// @return execute result
/// ///
Status SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
const std::vector<kAippDynamicBatchPara> &aipp_batch_para,
const kAippDynamicPara &aippParms);
ge::Status SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
const std::vector<kAippDynamicBatchPara> &aippBatchPara,
const kAippDynamicPara &aippParms);


Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);


Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
std::string &attr_value);
ge::Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
std::string &attr_value);


Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);
ge::Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);


Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);
ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);


Status CommandHandle(const Command &command);
ge::Status CommandHandle(const ge::Command &command);


Status SetDump(const DumpConfig &dump_config);
ge::Status SetDump(const DumpConfig &dump_config);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -188,7 +173,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @return SUCCESS /// @return SUCCESS
/// @return FAILED /// @return FAILED
/// ///
Status GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size);
ge::Status GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -197,7 +182,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] ModelData &model_data: Offline model memory data /// @param [out] ModelData &model_data: Offline model memory data
/// @return SUCCESS handle successfully / others handle failed /// @return SUCCESS handle successfully / others handle failed
/// ///
Status LoadDataFromFile(const std::string &path, ModelData &model_data);
ge::Status LoadDataFromFile(const std::string &path, ge::ModelData &model_data);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -210,8 +195,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] uint32_t &model_id: Corresponding identification after model loading /// @param [out] uint32_t &model_id: Corresponding identification after model loading
/// @return SUCCESS handle successfully / others handle failed /// @return SUCCESS handle successfully / others handle failed
/// ///
Status LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size,
void *weight_ptr, size_t weight_size);
ge::Status LoadModelFromData(uint32_t &model_id, const ge::ModelData &model_data, void *dev_ptr, size_t mem_size,
void *weight_ptr, size_t weight_size);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -222,8 +207,9 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [in] output_queue_ids: input queue ids create from user. /// @param [in] output_queue_ids: input queue ids create from user.
/// @return: 0 for success / others for fail /// @return: 0 for success / others for fail
/// ///
Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector<uint32_t> &input_queue_ids,
const std::vector<uint32_t> &output_queue_ids);
ge::Status LoadModelWithQ(uint32_t &model_id, const ge::ModelData &model_data,
const std::vector<uint32_t> &input_queue_ids,
const std::vector<uint32_t> &output_queue_ids);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -235,8 +221,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] domi::OutputData *output_data: Model output data /// @param [out] domi::OutputData *output_data: Model output data
/// @return SUCCESS handle successfully / others handle failed /// @return SUCCESS handle successfully / others handle failed
/// ///
Status ExecModel(uint32_t model_id, void *stream, const RunModelData &input_data, RunModelData &output_data,
bool async_mode = false);
ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &input_data,
ge::RunModelData &output_data, bool async_mode = false);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -250,9 +236,9 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] std::vector<GeTensorDesc> &output_desc: description of model output data /// @param [out] std::vector<GeTensorDesc> &output_desc: description of model output data
/// @return SUCCESS handle successfully / others handle failed /// @return SUCCESS handle successfully / others handle failed
/// ///
Status ExecModel(uint32_t model_id, void *stream, const RunModelData &run_input_data,
const std::vector<GeTensorDesc> &input_desc, RunModelData &run_output_data,
std::vector<GeTensorDesc> &output_desc, bool async_mode = false);
ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data,
std::vector<GeTensorDesc> &output_desc, bool async_mode = false);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -262,7 +248,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] size_t &weight_size Weight memory space size /// @param [out] size_t &weight_size Weight memory space size
/// @return SUCCESS handle successfully / others handle failed /// @return SUCCESS handle successfully / others handle failed
/// ///
Status GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size);
ge::Status GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -273,39 +259,39 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] size_t &weight_size Weight memory space size /// @param [out] size_t &weight_size Weight memory space size
/// @return SUCCESS handle successfully / others handle failed /// @return SUCCESS handle successfully / others handle failed
/// ///
Status GetMemAndWeightSize(const void *model_data, size_t model_size, size_t &mem_size, size_t &weight_size);
ge::Status GetMemAndWeightSize(const void *model_data, size_t model_size, size_t &mem_size, size_t &weight_size);


static Status LoadSingleOp(const std::string &modelName, const ModelData &modelData, void *stream,
SingleOp **single_op);
static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream,
SingleOp **single_op);


static Status LoadSingleOpV2(const std::string &modelName, const ModelData &modelData, void *stream,
SingleOp **single_op, const uint64_t model_id);
static ge::Status LoadSingleOpV2(const std::string &modelName, const ge::ModelData &modelData, void *stream,
SingleOp **single_op, const uint64_t model_id);


static Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,
std::vector<DataBuffer> &outputs);
static ge::Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,
std::vector<DataBuffer> &outputs);


static Status LoadDynamicSingleOp(const std::string &model_name, const ModelData &modelData, void *stream,
DynamicSingleOp **single_op);
static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
DynamicSingleOp **single_op);


static Status LoadDynamicSingleOpV2(const std::string &model_name, const ModelData &modelData, void *stream,
DynamicSingleOp **single_op, const uint64_t model_id);
static ge::Status LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream,
DynamicSingleOp **single_op, const uint64_t model_id);


static Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc,
const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc,
std::vector<DataBuffer> &outputs);
static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc,
const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc,
std::vector<DataBuffer> &outputs);


static Status ReleaseSingleOpResource(void *stream);
static ge::Status ReleaseSingleOpResource(void *stream);


static Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id);
static ge::Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id);


Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count);
Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
std::vector<InputOutputDims> &output_dims);
Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);
ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count);
ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
std::vector<InputOutputDims> &output_dims);
ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);


private: private:
static std::atomic_bool is_inited_;
static bool isInit_;
}; };
} // namespace ge } // namespace ge




+ 6
- 1
inc/framework/ge_runtime/task_info.h View File

@@ -343,13 +343,14 @@ class FusionEndTaskInfo : public TaskInfo {
class HcclTaskInfo : public TaskInfo { class HcclTaskInfo : public TaskInfo {
public: public:
HcclTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string hccl_type, void *input_data_addr, HcclTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string hccl_type, void *input_data_addr,
void *output_data_addr, int64_t workspace_size, int64_t hccl_stream_num,
void *output_data_addr, void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num,
const std::vector<uint8_t> &private_def, void *ops_kernel_store, int32_t count, int64_t root_id, const std::vector<uint8_t> &private_def, void *ops_kernel_store, int32_t count, int64_t root_id,
int64_t op_type, int64_t data_type, const std::string &group, bool dump_flag) int64_t op_type, int64_t data_type, const std::string &group, bool dump_flag)
: TaskInfo(op_name, stream_id, TaskInfoType::HCCL, dump_flag), : TaskInfo(op_name, stream_id, TaskInfoType::HCCL, dump_flag),
hccl_type_(hccl_type), hccl_type_(hccl_type),
input_data_addr_(input_data_addr), input_data_addr_(input_data_addr),
output_data_addr_(output_data_addr), output_data_addr_(output_data_addr),
workspace_addr_(workspace_addr),
workspace_size_(workspace_size), workspace_size_(workspace_size),
hccl_stream_num_(hccl_stream_num), hccl_stream_num_(hccl_stream_num),
private_def_(private_def), private_def_(private_def),
@@ -370,6 +371,9 @@ class HcclTaskInfo : public TaskInfo {
void *output_data_addr() const { void *output_data_addr() const {
return output_data_addr_; return output_data_addr_;
} }
void *workspace_addr() const {
return workspace_addr_;
}
int64_t workspace_size() const { int64_t workspace_size() const {
return workspace_size_; return workspace_size_;
} }
@@ -402,6 +406,7 @@ class HcclTaskInfo : public TaskInfo {
std::string hccl_type_; std::string hccl_type_;
void *input_data_addr_; void *input_data_addr_;
void *output_data_addr_; void *output_data_addr_;
void *workspace_addr_;
int64_t workspace_size_; int64_t workspace_size_;
int64_t hccl_stream_num_; int64_t hccl_stream_num_;
std::vector<uint8_t> private_def_; std::vector<uint8_t> private_def_;


+ 18
- 50
third_party/fwkacllib/inc/ops/array_ops.h View File

@@ -35,7 +35,7 @@ namespace ge {
* @li values:A `Tensor`. Must have the same type as `sorted_x`. \n * @li values:A `Tensor`. Must have the same type as `sorted_x`. \n


*@par Attributes: *@par Attributes:
*out_type:An optional `DType` from: `int32, int64`.
*@li out_type:An optional `DType` from: `int32, int64`.
Defaults to `int32`. \n Defaults to `int32`. \n


*@par Outputs: *@par Outputs:
@@ -504,7 +504,7 @@ REG_OP(Constant)
*x: A tensor. \n *x: A tensor. \n


*@par Outputs: *@par Outputs:
*y: A copy of input tensor. \n
*y: A tensor. \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the TensorFlow operator Snapshot. *Compatible with the TensorFlow operator Snapshot.
@@ -684,9 +684,7 @@ REG_OP(ExpandDims)


*@par Inputs: *@par Inputs:
*@li x: Original tensor. *@li x: Original tensor.

*@par Attributes:
*@li axes: List of ints indicating the dimensions to be inserted. \n
*@li axis: List of ints. \n


*@par Outputs: *@par Outputs:
*y: Reshape tensor with same data as input. \n *y: Reshape tensor with same data as input. \n
@@ -757,10 +755,10 @@ REG_OP(Squeeze)
*@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n *@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n


*@par Inputs: *@par Inputs:
*x: A Tensor of type float32, float16, int8, int16, uint16, uint8, int32, int64, uint32, uint64, bool, double. \n
*x: A tensor. \n


*@par Outputs: *@par Outputs:
*y: A tensor. The rank of input tensor. Type is int32. \n
*y: A tensor. The rank of input tensor. \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the TensorFlow operator Rank. *Compatible with the TensorFlow operator Rank.
@@ -850,6 +848,7 @@ REG_OP(PlaceHolder)
*x: A tensor. \n *x: A tensor. \n


*@par Attributes: *@par Attributes:
*@li dtype: data type of tensor.
*@li shape: tensor shape. \n *@li shape: tensor shape. \n


*@par Outputs: *@par Outputs:
@@ -868,13 +867,13 @@ REG_OP(PlaceholderWithDefault)
*@brief Reads and returns the value of the input variable tensor. \n *@brief Reads and returns the value of the input variable tensor. \n


*@par Inputs: *@par Inputs:
*x: A tensor must have numeric type. \n
*x: A tensor. \n


*@par Attributes: *@par Attributes:
*dtype: An optional int32 or int64. The output data type. Defaults to int32. \n *dtype: An optional int32 or int64. The output data type. Defaults to int32. \n


*@par Outputs: *@par Outputs:
*y: A tensor must have numeric type. \n
*y: A tensor. \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the TensorFlow operator ReadVariableOp. *Compatible with the TensorFlow operator ReadVariableOp.
@@ -1135,10 +1134,10 @@ This is an M-length vector.
This is an R-length vector This is an R-length vector


*@par Attributes: *@par Attributes:
*normalize: boolean (if true, edit distances are normalized by length of truth). \n
*@li normalize: boolean (if true, edit distances are normalized by length of truth). \n


*@par Outputs: *@par Outputs:
*output: A dense float tensor with rank R - 1. \n
*@li output: A dense float tensor with rank R - 1. \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with TensorFlow EditDistance operator. * Compatible with TensorFlow EditDistance operator.
@@ -1155,17 +1154,18 @@ REG_OP(EditDistance)
.OP_END_FACTORY_REG(EditDistance) .OP_END_FACTORY_REG(EditDistance)


/** /**
* @brief sort the input tensor without returning the value of index.
* @brief sort_v2.


* @par Inputs: * @par Inputs:
* x: An ND tensor of type float16.
* @li x: An ND tensor of type float16.


* @par Attributes: * @par Attributes:

* @li axis: An optional int. The dimension to sort along. This value defaults to -1. * @li axis: An optional int. The dimension to sort along. This value defaults to -1.
* @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False. * @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False.


* @par Outputs: * @par Outputs:
* y: An ND tensor of type float16.
* @li y: An ND tensor of type float16.


* @attention Constraints: * @attention Constraints:
* @li Axis should select the last dim. * @li Axis should select the last dim.
@@ -1206,7 +1206,7 @@ REG_OP(Expand)
*@Returns a tensor containing the indices of all non-zero elements of input. \n *@Returns a tensor containing the indices of all non-zero elements of input. \n


*@par Inputs: *@par Inputs:
*x: A Tensor. Must be one of the following types: float16, float32, int32, int64.
*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64.


*@par Attributes: *@par Attributes:
* transpose: the output tensor will be transposed if true. \n * transpose: the output tensor will be transposed if true. \n
@@ -1230,15 +1230,15 @@ REG_OP(NonZero)


* @par Inputs: * @par Inputs:
* One inputs, including: * One inputs, including:
* x: A Tensor. Must be one of the following types:
* @li x: A Tensor. Must be one of the following types:
* float16, float32, int32, int8 ,uint8. \n * float16, float32, int32, int8 ,uint8. \n


* @par Attributes: * @par Attributes:
* shape: A required listInt to specify the shape that the input tensor expanded to. \n
* @li shape: A required listInt to specify the shape that the input tensor expanded to. \n




* @par Outputs: * @par Outputs:
* y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n
* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the ONNX operator Expand. * Compatible with the ONNX operator Expand.
@@ -1249,38 +1249,6 @@ REG_OP(ExpandD)
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
.REQUIRED_ATTR(shape, ListInt) .REQUIRED_ATTR(shape, ListInt)
.OP_END_FACTORY_REG(ExpandD) .OP_END_FACTORY_REG(ExpandD)

/**
*@brief Finds unique elements in a 1D tensor. \n

*@par Inputs:
*x: 1D tensor. Must be one of the following types:
* float16, float32, double, int64, int32, int16, uint16, int8 ,uint8. \n

*@par Attributes:
*@li return_inverse: Whether to also return the indices for where elements in the original
* input ended up in the returned unique list.
*@li return_inverse: Whether to also return the counts for each unique element.

*@par Outputs:
*@li y1: The output list of unique scalar elements. Has the same type as "x".
*@li y2: Representing the indices for where elements in the original input map to in the output.
*@li y3: Representing the number of occurrences for each unique value or tensor. \n

* @par Third-party framework compatibility
* Compatible with the troch operator _unique2.
*/

REG_OP(UniqueWithCountsAndSorting)
.INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
.OUTPUT(y1, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
.OUTPUT(y2, TensorType({ DT_INT32, DT_INT64 }))
.OUTPUT(y3, TensorType({ DT_INT32, DT_INT64 }))
.ATTR(return_inverse, Bool, false)
.ATTR(return_counts, Bool, false)
.OP_END_FACTORY_REG(UniqueWithCountsAndSorting)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_

+ 1
- 1
third_party/fwkacllib/inc/ops/control_flow_ops.h View File

@@ -96,7 +96,7 @@ REG_OP(RefMerge)
* Otherwise, the data is forwarded to "output_false" . \n * Otherwise, the data is forwarded to "output_false" . \n


*@par Inputs: *@par Inputs:
*@li data: The tensor to be forwarded.
*@li data: The tensor to be forwarded. \n
* Must be one of the following types: float16, float32, float64, * Must be one of the following types: float16, float32, float64,
* int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool. * int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool.
*@li pred: A boolean scalar. The output port that will receive data . \n *@li pred: A boolean scalar. The output port that will receive data . \n


+ 3
- 5
third_party/fwkacllib/inc/ops/ctc_ops.h View File

@@ -74,7 +74,7 @@ REG_OP(CTCLoss)
*@li sequence_length: A vector containing sequence lengths, size `(batch_size)`. \n *@li sequence_length: A vector containing sequence lengths, size `(batch_size)`. \n


*@par Attributes: *@par Attributes:
* merge_repeated: If True, merge repeated classes in output. \n
*@li merge_repeated: If True, merge repeated classes in output. \n


*@par Outputs: *@par Outputs:
*@li decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`, *@li decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`,
@@ -108,8 +108,6 @@ REG_OP(CTCGreedyDecoder)


*@par Attributes: *@par Attributes:
*@li merge_repeated: If True, merge repeated classes in output. \n *@li merge_repeated: If True, merge repeated classes in output. \n
*@li beam_width:A scalar >= 0 (beam search beam width).
*@li top_paths:A scalar >= 0, <= beam_width (controls output size).


*@par Outputs: *@par Outputs:
*@li decoded_indices: A list (length: top_paths) of indices matrices. Matrix j, *@li decoded_indices: A list (length: top_paths) of indices matrices. Matrix j,
@@ -164,7 +162,7 @@ REG_OP(CTCBeamSearchDecoder)
* Compatible with Pytorch CTCLoss operator. * Compatible with Pytorch CTCLoss operator.


*@par Restrictions: *@par Restrictions:
*The length of Label should in [4, 1000].
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(CTCLossV2) REG_OP(CTCLossV2)
.INPUT(log_probs, TensorType({DT_FLOAT, DT_DOUBLE})) .INPUT(log_probs, TensorType({DT_FLOAT, DT_DOUBLE}))
@@ -205,7 +203,7 @@ REG_OP(CTCLossV2)
* Compatible with Pytorch CTCLoss operator. * Compatible with Pytorch CTCLoss operator.


*@par Restrictions: *@par Restrictions:
*The limit of Label’s length is 1K.
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(CTCLossV2Grad) REG_OP(CTCLossV2Grad)
.INPUT(grad_out, TensorType({DT_FLOAT, DT_DOUBLE})) .INPUT(grad_out, TensorType({DT_FLOAT, DT_DOUBLE}))


+ 2
- 39
third_party/fwkacllib/inc/ops/data_flow_ops.h View File

@@ -1201,8 +1201,6 @@ REG_OP(TensorArraySize)
*@brief A queue implementation that dequeues elements in a random order. \n *@brief A queue implementation that dequeues elements in a random order. \n


*@par Attributes: *@par Attributes:
*@li component_types:A list of fully-defined Tensortype objects with
the same length as shapes, or None.
*@li shapes: (Optional.) A list of fully-defined TensorShape objects with *@li shapes: (Optional.) A list of fully-defined TensorShape objects with
the same length as dtypes, or None. the same length as dtypes, or None.
*@li capacity: An integer. The upper bound on the number of elements that may *@li capacity: An integer. The upper bound on the number of elements that may
@@ -1283,7 +1281,6 @@ The length of this attr must be either 0 or the same as the length of
elements are not constrained, and only one element may be dequeued at a time. elements are not constrained, and only one element may be dequeued at a time.
*@li container: An optional string. Defaults to "". If non-empty, this queue *@li container: An optional string. Defaults to "". If non-empty, this queue
is placed in the given container. Otherwise, a default container is used. is placed in the given container. Otherwise, a default container is used.
*@li capacity:An integer. The upper bound on the number of elements that may be stored in this queue.
*@li shared_name: An optional string. Defaults to "". If non-empty, this *@li shared_name: An optional string. Defaults to "". If non-empty, this
queue will be shared under the given name across multiple sessions. \n queue will be shared under the given name across multiple sessions. \n


@@ -1438,7 +1435,7 @@ REG_OP(OrderedMapClear)


*@par Inputs: *@par Inputs:
*Including: *Including:
* resource: A Tensor of type DT_RESOURCE.
* @li resource: A Tensor of type DT_RESOURCE.


*@par Outputs: *@par Outputs:
*handle: A Tensor of type DT_STRING ref. \n *handle: A Tensor of type DT_STRING ref. \n
@@ -1529,7 +1526,7 @@ REG_OP(OrderedMapPeek)


*@par Inputs: *@par Inputs:
*Including: *Including:
* indices: A Tensor of type DT_INT32. \n
* @li indices: A Tensor of type DT_INT32. \n


*@par Attributes: *@par Attributes:
*@li capacity: An optional int that is >= 0. Defaults to "0". *@li capacity: An optional int that is >= 0. Defaults to "0".
@@ -2335,40 +2332,6 @@ REG_OP(CacheAllIndexToLocal)
.OP_END_FACTORY_REG(CacheAllIndexToLocal) .OP_END_FACTORY_REG(CacheAllIndexToLocal)


/** /**
*@brief LRUCacheV2, aicore LRUCache.
*@par Inputs:
*index_list: exchange index list
*data: host data
*cache: gm cache
*tag: cache's tag
*is_last_call: if is last call write all cache to data
*@par Outputs:
*data: output data
*cache: gm cache
*tag: cache's tag
*index_offset_list: index_offset_list
*not_in_cache_index_list: output not in cache's index_list
*not_in_cache_number: scalar
*@par Attributes:
*pre_route_count: types of all outputs
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(LRUCacheV2)
.INPUT(index_list, TensorType::BasicType())
.INPUT(data, TensorType::BasicType())
.INPUT(cache, TensorType::BasicType())
.INPUT(tag, TensorType::BasicType())
.INPUT(is_last_call, TensorType::BasicType())
.OUTPUT(data, TensorType::BasicType())
.OUTPUT(cache, TensorType::BasicType())
.OUTPUT(tag, TensorType::BasicType())
.OUTPUT(index_offset_list, TensorType::BasicType())
.OUTPUT(not_in_cache_index_list, TensorType::BasicType())
.OUTPUT(not_in_cache_number, TensorType::BasicType())
.REQUIRED_ATTR(pre_route_count, Int)
.OP_END_FACTORY_REG(LRUCacheV2)

/**
*@brief DynamicGetNext, dynamic get next data *@brief DynamicGetNext, dynamic get next data
*@par Inputs: *@par Inputs:
*x: the iterator, all types are available *x: the iterator, all types are available


+ 74
- 92
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -624,9 +624,9 @@ REG_OP(Log1p)


*@attention Constraints: *@attention Constraints:
*@li x2: The input data does not support 0 *@li x2: The input data does not support 0
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
*requirement of double thousandths in the mini form *requirement of double thousandths in the mini form
*@li Due to different architectures, the calculation results of this operator
*@li Due to different architectures, the calculation results of this operator
*on NPU and CPU may be inconsistent *on NPU and CPU may be inconsistent
*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8


@@ -2066,9 +2066,9 @@ REG_OP(FloorDiv)


*@attention Constraints: *@attention Constraints:
*@li x2: The input data does not support 0 *@li x2: The input data does not support 0
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
*requirement of double thousandths in the mini form *requirement of double thousandths in the mini form
*@li Due to different architectures, the calculation results of this operator
*@li Due to different architectures, the calculation results of this operator
*on NPU and CPU may be inconsistent *on NPU and CPU may be inconsistent
*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8


@@ -2200,9 +2200,9 @@ REG_OP(Tan)


*@attention Constraints: *@attention Constraints:
*@li x2: The input data does not support 0 *@li x2: The input data does not support 0
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
*requirement of double thousandths in the mini form *requirement of double thousandths in the mini form
*@li Due to different architectures, the calculation results of this operator
*@li Due to different architectures, the calculation results of this operator
*on NPU and CPU may be inconsistent *on NPU and CPU may be inconsistent
*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8


@@ -2467,11 +2467,11 @@ REG_OP(Eltwise)


*@par Inputs: *@par Inputs:
*One inputs, including: *One inputs, including:
* input_x: A tensor. Must be one of the following types:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n


*@par Outputs: *@par Outputs:
*output_y: A Tensor with the same type and shape of input_x's. \n
*y: A Tensor with the same type and shape of input_x's. \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the Pytorch operator Erfinv. \n *Compatible with the Pytorch operator Erfinv. \n
@@ -3154,13 +3154,13 @@ REG_OP(FusedMulAddNL2loss)
*@brief Tests whether the input exceeds a threshold. \n *@brief Tests whether the input exceeds a threshold. \n


*@par Inputs: *@par Inputs:
* x: A Tensor with any format. Must be one of the following types: float16, float32. \n
*@li x: A Tensor with any format. Must be one of the following types: float16, float32. \n


*@par Attributes: *@par Attributes:
* threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n
*@li threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n


*@par Outputs: *@par Outputs:
* y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32.
*@li y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32.
*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the Caffe operator Threshold. * Compatible with the Caffe operator Threshold.
*/ */
@@ -3175,7 +3175,7 @@ REG_OP(FusedMulAddNL2loss)
*@brief Returns the index number corresponding to the maximum value entered. \n *@brief Returns the index number corresponding to the maximum value entered. \n


*@par Inputs: *@par Inputs:
*x: A tensor. Must be one of the following types: float16, float32. \n
*@li x: A tensor. Must be one of the following types: float16, float32. \n


*@par Attributes: *@par Attributes:
*@li axis: An optional int. Specify the axis to be cut at the input tensor. If this parameter is not provided, find the topk for each batch. Defaults to 10000 *@li axis: An optional int. Specify the axis to be cut at the input tensor. If this parameter is not provided, find the topk for each batch. Defaults to 10000
@@ -3203,11 +3203,12 @@ REG_OP(ArgMaxWithK)
*@brief Multiply tensor with scale. \n *@brief Multiply tensor with scale. \n


*@par Inputs: *@par Inputs:
*One input, including:
*x: A Tensor. Must be one of the following types:int32,int16, float16, float32.
*Five inputs, including:
* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
* @li x2: A scale. Must be float. \n


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type and shape as "x1". \n
*@li y: A Tensor. Has the same type and shape as "x1". \n


*@par Third-party framework compatibility: *@par Third-party framework compatibility:
* Compatible with the Pytorch operator muls. * Compatible with the Pytorch operator muls.
@@ -3222,11 +3223,12 @@ REG_OP(Muls)
*@brief Fill tensor with scale. \n *@brief Fill tensor with scale. \n


*@par Inputs: *@par Inputs:
*One input, including:
*x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
*Five inputs, including:
* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
* @li x2: A scale. Must be float. \n


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type and shape as "x1". \n
*@li y: A Tensor. Has the same type and shape as "x1". \n


*@par Third-party framework compatibility: *@par Third-party framework compatibility:
* Compatible with the Pytorch operator fills. * Compatible with the Pytorch operator fills.
@@ -3376,7 +3378,7 @@ REG_OP(TensorMove)


*@par Inputs: *@par Inputs:
*One inputs, including: *One inputs, including:
*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n
* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n


*@par Outputs: *@par Outputs:
*output_x: A Tensor. Has the same type as "x". \n *output_x: A Tensor. Has the same type as "x". \n
@@ -3395,7 +3397,7 @@ REG_OP(TensorRedirect)
* multiply the result by the scalar value and add it to tensor x1 * multiply the result by the scalar value and add it to tensor x1


* @par Inputs: * @par Inputs:
* Four inputs, including:
* Three inputs, including:
* @li input_data: A mutable input Tensor. Must be one of the following types: * @li input_data: A mutable input Tensor. Must be one of the following types:
* float16, float32. * float16, float32.
* @li x1: A mutable input Tensor of the same type as x1. * @li x1: A mutable input Tensor of the same type as x1.
@@ -3404,7 +3406,7 @@ REG_OP(TensorRedirect)
* float16, float32, int32. \n * float16, float32, int32. \n


* @par Outputs: * @par Outputs:
* y: A mutable Tensor. Has the same type as "x1". \n
* @li y: A mutable Tensor. Has the same type as "x1". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator Addcdiv. * Compatible with the Pytorch operator Addcdiv.
@@ -3418,12 +3420,12 @@ REG_OP(Addcdiv)
.OP_END_FACTORY_REG(Addcdiv) .OP_END_FACTORY_REG(Addcdiv)


/** /**
* @brief Performs the element-wise multiplication of tensor x2 by tensor x3,
* multiply the result by the scalar value and add it to tensor input_data
* @brief Performs the element-wise multiplication of tensor x2 by tensor x3,
* multiply the result by the scalar value and add it to tensor input_data




* @par Inputs: * @par Inputs:
* Four inputs, including:
* Three inputs, including:
* @li input_data: A mutable input Tensor. Must be one of the following types: * @li input_data: A mutable input Tensor. Must be one of the following types:
* float16, float32, int8, int32, uint8. * float16, float32, int8, int32, uint8.
* @li x1: A mutable input Tensor of the same type as x1. * @li x1: A mutable input Tensor of the same type as x1.
@@ -3431,7 +3433,7 @@ REG_OP(Addcdiv)
* @li value: A tensor which includes only one element of the same type as x1. \n * @li value: A tensor which includes only one element of the same type as x1. \n


* @par Outputs: * @par Outputs:
* y: A mutable output Tensor. Has the same type as "x1". \n
* @li y: A mutable output Tensor. Has the same type as "x1". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator Addcmul. * Compatible with the Pytorch operator Addcmul.
@@ -3453,7 +3455,7 @@ REG_OP(Addcmul)
* @li alpha: A scalar tensor of type float16, float32. \n * @li alpha: A scalar tensor of type float16, float32. \n


* @par Outputs: * @par Outputs:
* y: An ND tensor tensor with the same shape and type as "x1". \n
* @li y: An ND tensor tensor with the same shape and type as "x1". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator Axpy. * Compatible with the Pytorch operator Axpy.
@@ -3466,6 +3468,25 @@ REG_OP(AxpyV2)
.OP_END_FACTORY_REG(AxpyV2) .OP_END_FACTORY_REG(AxpyV2)


/** /**
* @brief Computes the result of x1 - x2.

* @par Inputs:
* @li x1: An ND tensor of type float16, float, int32.
* @li x2: An ND tensor of type float16, float, int32. \n

* @par Outputs:
* @li y: An ND tensor tensor with the same type as "x1". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Sub.
*/
REG_OP(PtSub)
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OP_END_FACTORY_REG(PtSub)

/**
* @brief Add the partial values of two tensors in format NC1HWC0. * @brief Add the partial values of two tensors in format NC1HWC0.


* @par Inputs: * @par Inputs:
@@ -3481,7 +3502,7 @@ REG_OP(AxpyV2)
* the difference between C1 and offset in "x1" and "x2". \n * the difference between C1 and offset in "x1" and "x2". \n


* @par Outputs: * @par Outputs:
* y: A Tensor of the same type as "x1", and the same shape as "x1",
* @li y: A Tensor of the same type as "x1", and the same shape as "x1",
* except for the C1 value. Record the result after adding. \n * except for the C1 value. Record the result after adding. \n
*/ */
REG_OP(StrideAdd) REG_OP(StrideAdd)
@@ -3502,7 +3523,7 @@ REG_OP(StrideAdd)
* @li input_y: A Tensor. the second tensor. \n * @li input_y: A Tensor. the second tensor. \n


* @par Outputs: * @par Outputs:
*output_z: A Tensor. Bool type, compare result of the two inputs. \n
* @li output_z: A Tensor. Bool type, compare result of the two inputs. \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch equal operator. \n * Compatible with the Pytorch equal operator. \n
@@ -3514,21 +3535,21 @@ REG_OP(TensorEqual)
.OP_END_FACTORY_REG(TensorEqual) .OP_END_FACTORY_REG(TensorEqual)


/** /**
* @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support).
* All inputs and outputs must have the same data type. This operator supports multidirectional
* @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support).
* All inputs and outputs must have the same data type. This operator supports multidirectional
* (i.e., Numpy-style) broadcasting * (i.e., Numpy-style) broadcasting
*
* @par Inputs:
*
* @par inputs
* one input including: * one input including:
* x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64
*
* @par Outputs:
* @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64
*
* @par output
* one output including: * one output including:
* y:A Tensor of the same type as x
*
* @li y:A Tensor of the same type as x
*
*/ */
REG_OP(MaxN) REG_OP(MaxN)
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
.OP_END_FACTORY_REG(MaxN) .OP_END_FACTORY_REG(MaxN)


@@ -3613,16 +3634,16 @@ REG_OP(DataCompare)
*which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the *which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the
*corresponding input. *corresponding input.
* *
*@par Inputs:
*@par inputs
*one input including: *one input including:
*x: input A Tensor.Must be one of the following types:float32,float16
*@li x: input A Tensor.Must be one of the following types:float32,float16
* *
*@par Attributes: *@par Attributes:
*axis:A required int attribute that decides which dimension will be used to cal the hard_max
*@li axis:A required int attribute that decides which dimension will be used to cal the hard_max
* *
*@par Outputs:
*@par output:
*one output including: *one output including:
*y:A Tensor of the same type as x
*@li y:A Tensor of the same type as x
* *
*/ */
REG_OP(HardMax) REG_OP(HardMax)
@@ -3640,7 +3661,7 @@ REG_OP(HardMax)
* @li input_y: A Tensor. the second tensor must be 1d. \n * @li input_y: A Tensor. the second tensor must be 1d. \n


* @par Outputs: * @par Outputs:
* output: A Tensor. Result of the two inputs, must be 1d. \n
* @li output: A Tensor. Result of the two inputs, must be 1d. \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch dot operator. \n * Compatible with the Pytorch dot operator. \n
@@ -3650,7 +3671,7 @@ REG_OP(Dot)
.INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
.OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
.OP_END_FACTORY_REG(Dot) .OP_END_FACTORY_REG(Dot)
/** /**
*@brief Returns a new tensor with boolean elements representing \n *@brief Returns a new tensor with boolean elements representing \n
*if each element of input is “close” to the corresponding element of other \n *if each element of input is “close” to the corresponding element of other \n
@@ -3698,7 +3719,7 @@ REG_OP(IsClose)
* *
*@attention Constraints: *@attention Constraints:
*@li indices: only support int32,and shape same to "updates" *@li indices: only support int32,and shape same to "updates"
*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x".
*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x".
*@li y:A Tensor, the type and shape is same to "var" \n *@li y:A Tensor, the type and shape is same to "var" \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
@@ -3733,7 +3754,7 @@ REG_OP(ArgMaxGrad)


*@attention Constraints: *@attention Constraints:
*@li indices: only support int32,and shape same to "updates" *@li indices: only support int32,and shape same to "updates"
*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x".
*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x".
*@li y:A Tensor, the type and shape is same to "var" \n *@li y:A Tensor, the type and shape is same to "var" \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
@@ -3784,15 +3805,15 @@ REG_OP(AddMatMatElements)


*@par Inputs: *@par Inputs:
*Two inputs, including: *Two inputs, including:
* @li input_x1: A tensor. Must be the following types: float32.
* @li input_x2: A tensor. Must of the following types: float32. \n
* @li input_x1: A tensor. Must be the following types:
* float32. \n


* @par Attributes:
* @li dim:The type is Int and the default value is 1.
* @li eps:The type is Float and the default value is 1e-8. \n
*@par Inputs:
*@li input_x2: A tensor. Must of the following types:
* float32. \n


*@par Outputs: *@par Outputs:
* output_y: A Tensor with the same type of input_x's. \n
*@li output_y: A Tensor with the same type of input_x's. \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the Pytorch operator CosineSimilarity. \n *Compatible with the Pytorch operator CosineSimilarity. \n
@@ -3805,45 +3826,6 @@ REG_OP(CosineSimilarity)
.ATTR(eps, Float, 1e-8) .ATTR(eps, Float, 1e-8)
.OP_END_FACTORY_REG(CosineSimilarity) .OP_END_FACTORY_REG(CosineSimilarity)


/**
*@brief count adam result. \n

*@par Inputs:
*eleven inputs, including:
* @li var: A Tensor. Support float16/float32.\n
* @li m: A Tensor. Datatype and shape are same as exp_avg.\n
* @li v: A Tensor. Datatype and shape are same as exp_avg.\n
* @li lr: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
* @li beta1: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
* @li beta2: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
* @li epsilon: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
* @li grad: A Tensor. Datatype and shape are same as exp_avg.\n
* @li max_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
* @li global_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
* @li weight_decay: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n

*@par Outputs:
*three inputs, including:
* @li var: A Tensor. Datatype and shape are same as exp_avg.\n
* @li m: A Tensor. Datatype and shape are same as exp_avg.\n
* @li v: A Tensor. Datatype and shape are same as exp_avg.\n
*/
REG_OP(ApplyAdamV2)
.INPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(lr, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(beta1, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(beta2, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(epsilon, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(grad, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(max_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(global_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(weight_decay, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.OUTPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.OUTPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.OUTPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.OP_END_FACTORY_REG(ApplyAdamV2)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_

+ 3
- 0
third_party/fwkacllib/inc/ops/functional_ops.h View File

@@ -163,6 +163,9 @@ REG_OP(Case)
* if it is not a scalar, non-empty means True and empty means False. * if it is not a scalar, non-empty means True and empty means False.
*@li body: A subgraph takes 'input' and returns a another list of tensors . \n *@li body: A subgraph takes 'input' and returns a another list of tensors . \n


*@par Attributes:
*parallel_iterations: An optional int, default as 10 . \n

*@par Outputs: *@par Outputs:
*output: The output tensors returned by "body". Has the same type as "input" . \n *output: The output tensors returned by "body". Has the same type as "input" . \n




+ 31
- 280
third_party/fwkacllib/inc/ops/image_ops.h View File

@@ -28,7 +28,7 @@ namespace ge {
*@brief Decode the frame(s) of a GIF-encoded image to a uint8 tensor . \n *@brief Decode the frame(s) of a GIF-encoded image to a uint8 tensor . \n


*@par Inputs: *@par Inputs:
*contents:A Tensor of type string. 0-D. The GIF-encoded image. \n
*@li contents:A Tensor of type string. 0-D. The GIF-encoded image. \n


*@par Outputs: *@par Outputs:
*image:A Tensor of type uint8. \n *image:A Tensor of type uint8. \n
@@ -128,8 +128,8 @@ crops from the input image tensor and resizes them using bilinear sampling or
nearest neighbor sampling to a common output size specified by crop_size . \n nearest neighbor sampling to a common output size specified by crop_size . \n


*@par Inputs: *@par Inputs:
*Input x must be a 4-D tensor. Inputs include:
*@li x:A Tensor. Must be one of the following types:uint8, uint16, int8,
*Input images must be a 4-D tensor. Inputs include:
*@li images:A Tensor. Must be one of the following types:uint8, uint16, int8,
int16, int32, int64, float16, float, double. A 4-D tensor of shape int16, int32, int64, float16, float, double. A 4-D tensor of shape
[batch, image_height, image_width, depth]. The format must be NHWC. [batch, image_height, image_width, depth]. The format must be NHWC.
*@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4].
@@ -266,9 +266,8 @@ depth] containing the original image size. Both image_height and image_width
need to be positive . \n need to be positive . \n


*@par Attributes: *@par Attributes:
*@li method: A string specifying the interpolation method. Only 'bilinear' is
supported for now .
*@li T: output of type \n
method: A string specifying the interpolation method. Only 'bilinear' is
supported for now . \n


*@par Outputs: *@par Outputs:
*y:A 4-D tensor of shape [batch, image_height, image_width, depth]. The format *y:A 4-D tensor of shape [batch, image_height, image_width, depth]. The format
@@ -586,11 +585,9 @@ REG_OP(ResizeNearestNeighborV2GradD)
channels], The image tensor that was resized . \n channels], The image tensor that was resized . \n


*@par Attributes: *@par Attributes:
*@li align_corners: An optional bool. Defaults to False. If true, the centers of
*align_corners: An optional bool. Defaults to False. If true, the centers of
the 4 corner pixels of the input and grad tensors are aligned. Defaults to the 4 corner pixels of the input and grad tensors are aligned. Defaults to
false .
*@li half_pixel_centers: indicates if the offset coordinates are normalized. Defaults
to false . \n
false . \n


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as original_image . \n *y: A Tensor. Has the same type as original_image . \n
@@ -620,10 +617,9 @@ REG_OP(ResizeBilinearV2Grad)
size for the images . \n size for the images . \n


*@par Attributes: *@par Attributes:
* @li align_corners: If true, the centers of the 4 corner pixels of the input and
*align_corners: If true, the centers of the 4 corner pixels of the input and
output tensors are aligned, preserving the values at the corner pixels. output tensors are aligned, preserving the values at the corner pixels.
Defaults to false .
* @li half_pixel_centers: An optional bool. Defaults to False . \n
Defaults to false . \n


*@par Outputs: *@par Outputs:
*y: 4-D with shape [batch, new_height, new_width, channels] . \n *y: 4-D with shape [batch, new_height, new_width, channels] . \n
@@ -688,9 +684,6 @@ be non-negative. In the case of 0, the cropped area does not need to overlap
any of the bounding boxes supplied . any of the bounding boxes supplied .
*@li aspect_ratio_range: The cropped area of the image must have an aspect *@li aspect_ratio_range: The cropped area of the image must have an aspect
ratio = width / height within this range. ratio = width / height within this range.
*@li area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
cropped area of the image must contain a fraction of the supplied image
within this range.
*@li max_attempts: Number of attempts at generating a cropped region of the *@li max_attempts: Number of attempts at generating a cropped region of the
image of the specified constraints. After max_attempts failures, return the image of the specified constraints. After max_attempts failures, return the
entire image. entire image.
@@ -747,9 +740,6 @@ generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
*@li seed2: A second seed to avoid seed collision. *@li seed2: A second seed to avoid seed collision.
*@li aspect_ratio_range: The cropped area of the image must have an aspect *@li aspect_ratio_range: The cropped area of the image must have an aspect
ratio = width / height within this range. ratio = width / height within this range.
*@li area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
cropped area of the image must contain a fraction of the supplied image
within this range.
*@li max_attempts: Number of attempts at generating a cropped region of the *@li max_attempts: Number of attempts at generating a cropped region of the
image of the specified constraints. After max_attempts failures, return the image of the specified constraints. After max_attempts failures, return the
entire image. entire image.
@@ -797,10 +787,9 @@ REG_OP(SampleDistortedBoundingBoxExt2)
The new size for the images . \n The new size for the images . \n


*@par Attributes: *@par Attributes:
*@li align_corners: If true, the centers of the 4 corner pixels of the input and
*align_corners: If true, the centers of the 4 corner pixels of the input and
output tensors are aligned, preserving the values at the corner pixels. output tensors are aligned, preserving the values at the corner pixels.
Defaults to false . \n Defaults to false . \n
*@li half_pixel_centers: An optional bool. Defaults to False . \n


*@par Outputs: *@par Outputs:
*y: 4-D with shape [batch, new_height, new_width, channels] . \n *y: 4-D with shape [batch, new_height, new_width, channels] . \n
@@ -1010,6 +999,10 @@ deciding whether boxes overlap too.
*@li score_threshold: A 0-D float tensor representing the threshold for *@li score_threshold: A 0-D float tensor representing the threshold for
deciding when to remove boxes based on score . \n deciding when to remove boxes based on score . \n


*@par Attributes:
*pad_to_max_output_size: If true, the output selected_indices is padded
to be of length max_output_size. Defaults to false . \n

*@par Outputs: *@par Outputs:
*selected_indices: A 1-D integer tensor of shape [M] representing the *selected_indices: A 1-D integer tensor of shape [M] representing the
selected indices from the boxes tensor, where M <= max_output_size . \n selected indices from the boxes tensor, where M <= max_output_size . \n
@@ -1101,8 +1094,8 @@ REG_OP(EncodePng)
*contents: 0-D. PNG-decoded image . *contents: 0-D. PNG-decoded image .


*@par Attributes: *@par Attributes:
*@li channels: graph channels \n
*@li dtype: type of image
*channels: graph channels \n
*dtype: type of image


*@par Outputs: *@par Outputs:
*image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels] *image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels]
@@ -1123,10 +1116,10 @@ REG_OP(DecodePng)
*@brief Bmp-decode an image. \n *@brief Bmp-decode an image. \n


*@par Inputs: *@par Inputs:
*contents: A Tensor of type string. 0-D. The BMP-encoded image. \n
*@li contents: A Tensor of type string. 0-D. The BMP-encoded image. \n


*@par Attributes: *@par Attributes:
*channels: Decode the desired number of color channels of the image. \n
*@li channels: Decode the desired number of color channels of the image. \n


*@par Outputs: *@par Outputs:
*image: A Tensor dtype of uint8. *image: A Tensor dtype of uint8.
@@ -1260,7 +1253,6 @@ REG_OP(KeepRatioResizeBilinear)
No default value. No default value.
*@li align_corners: An optional bool. If "true", the centers of the corner *@li align_corners: An optional bool. If "true", the centers of the corner
pixels of the input and output tensors are aligned. Defaults to "false" . \n pixels of the input and output tensors are aligned. Defaults to "false" . \n
*@li half_pixel_centers: An optional bool. Defaults to False . \n


*@par Outputs: *@par Outputs:
*y: A Tensor with the same type and format as input "images" . \n *y: A Tensor with the same type and format as input "images" . \n
@@ -1389,7 +1381,6 @@ REG_OP(NonMaxSuppressionV5)
*@li scale: A `Tensor` of type `float32`. *@li scale: A `Tensor` of type `float32`.
*@li translation: A `Tensor` of type `float32` . \n *@li translation: A `Tensor` of type `float32` . \n


*@par Attributes:
*@li kernel_type: type is string, default lanczos3 *@li kernel_type: type is string, default lanczos3
*@li antialias: type is bool, default true \n *@li antialias: type is bool, default true \n


@@ -1420,7 +1411,6 @@ REG_OP(ScaleAndTranslate)
*@li scale: A `Tensor` of type `float32`. *@li scale: A `Tensor` of type `float32`.
*@li translation: A `Tensor` of type `float32` . \n *@li translation: A `Tensor` of type `float32` . \n


*@par Attributes:
*@li kernel_type: type is string, default lanczos3 *@li kernel_type: type is string, default lanczos3
*@li antialias: type is bool, default true *@li antialias: type is bool, default true


@@ -1470,10 +1460,9 @@ if they fall beyond [0, 1]. If false, do not do clipping and output the box
coordinates as it is. If not specified, defaults to true . \n coordinates as it is. If not specified, defaults to true . \n


*@par Outputs: *@par Outputs:
*@li nmsed_boxes:type is float
*@li nmsed_scores:type is float
*@li nmsed_classes:type is float
*@li valid_detections:type is INT32 \n
*nmsed_boxes:type is float
*nmsed_scores:type is float
*nmsed_classes:type is float \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with tensorflow CombinedNonMaxSuppression operator. * Compatible with tensorflow CombinedNonMaxSuppression operator.
@@ -1519,9 +1508,6 @@ REG_OP(IMGWarp)


*@par Outputs: *@par Outputs:
*map_img: A Tensor after resize. \n *map_img: A Tensor after resize. \n

*@par Restrictions:
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(Remap) REG_OP(Remap)
.INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
@@ -1538,7 +1524,7 @@ and 4 mean input[(h_top, w_left), (h_top, w_right), (h_bottom, w_left), (h_bott
*@li warp_index: the resize offset A 4-D float tensor of shape `[n, 2, h, w]`, 2 means (x, y) for resize point. *@li warp_index: the resize offset A 4-D float tensor of shape `[n, 2, h, w]`, 2 means (x, y) for resize point.


*@par Outputs: *@par Outputs:
*warp_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n
*remap_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n
*/ */
REG_OP(IMGWarpResize) REG_OP(IMGWarpResize)
.INPUT(img, TensorType({DT_FLOAT32})) .INPUT(img, TensorType({DT_FLOAT32}))
@@ -1573,39 +1559,6 @@ REG_OP(SpatialTransformerD)
.OP_END_FACTORY_REG(SpatialTransformerD) .OP_END_FACTORY_REG(SpatialTransformerD)


/** /**
*@brief Function spatial transformer . \n

*@par Inputs:
*@li x: A Tensor dtype of float16, float32, double, uint8, int8, uint16, int16, int32, uint32, uint64, int64.
*@li theta: A Tensor dtype of float16, float32, double, uint8, int8, uint16, int16, int32, uint32, uint64, int64,
auxiliary coefficients . \n

*@par Attributes:
*@li output_size: A tuple output size.
*@li default_theta: A tuple default theta
*@li use_default_theta: List use default theta

*@par Outputs:
*y: A Tensor dtype of float16, float32, double, uint8, int8, uint16, int16, int32, uint32, uint64, int64,
should be same shape and type as x.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(SpatialTransformer)
.INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16,DT_DOUBLE,DT_UINT8,DT_INT8,DT_UINT16,
DT_INT16,DT_INT32,DT_UINT32,DT_UINT64,DT_INT64}))
.OPTIONAL_INPUT(theta, TensorType({DT_FLOAT,DT_FLOAT16,DT_DOUBLE,DT_UINT8,DT_INT8,
DT_UINT16,DT_INT16,DT_INT32,DT_UINT32,DT_UINT64,DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16,DT_DOUBLE,DT_UINT8,DT_INT8,DT_UINT16,
DT_INT16,DT_INT32,DT_UINT32,DT_UINT64,DT_INT64}))
.ATTR(output_size, ListInt, {-1, -1})
.ATTR(default_theta, ListFloat, {})
.ATTR(align_corners, Bool, false)
.ATTR(use_default_theta, ListInt, {})
.OP_END_FACTORY_REG(SpatialTransformer)

/**
* @brief Resize the input tensor. \n * @brief Resize the input tensor. \n
currently, only support resize image tensor using nearest neighbor and linear interpolation. currently, only support resize image tensor using nearest neighbor and linear interpolation.


@@ -1670,7 +1623,7 @@ REG_OP(Resize)
*@brief Function parse image from string to int. \n *@brief Function parse image from string to int. \n


*@par Inputs: *@par Inputs:
* contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n
*@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n


*@par Attributes: *@par Attributes:
*@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image. *@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image.
@@ -1715,7 +1668,7 @@ REG_OP(DenseImageWarp)


*@par Inputs: *@par Inputs:
*One inputs, including: *One inputs, including:
* x: A tensor. Must be one of the following types:
* @li x: A tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n


*@par Attributes: *@par Attributes:
@@ -1760,7 +1713,7 @@ REG_OP(ResizeD)


*@par Inputs: *@par Inputs:
*One inputs, including: *One inputs, including:
* grads: A tensor. Must be one of the following types:
* @li grads: A tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n


*@par Attributes: *@par Attributes:
@@ -1809,8 +1762,8 @@ REG_OP(ResizeGradD)
*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n


*@par Outputs: *@par Outputs:
*@li grad_image: Returns 4-D with the same shape and dtype as `image`.
*@li grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n
*grad_image: Returns 4-D with the same shape and dtype as `image`.
*grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n
*/ */
REG_OP(DenseImageWarpGrad) REG_OP(DenseImageWarpGrad)
.INPUT(grad, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(grad, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -1864,12 +1817,12 @@ REG_OP(GridSampler2D)
*@li assist: Assist matrix, a 4-D tensor of type float16. *@li assist: Assist matrix, a 4-D tensor of type float16.


*@par Attributes: *@par Attributes:
*align_corners: An optional bool. If "true", the centers of the corner
*@li align_corners: An optional bool. If "true", the centers of the corner
pixels of the input and output tensors are aligned. Defaults to "false" . pixels of the input and output tensors are aligned. Defaults to "false" .


*@par Outputs: *@par Outputs:
*@li diff: Returns 4-D Tensor with the same shape and dtype as `grid`.
*@li position: Returns 4-D Tensor with the same shape as `grid`.
*diff: Returns 4-D Tensor with the same shape and dtype as `grid`.
*position: Returns 4-D Tensor with the same shape as `grid`.
*/ */
REG_OP(GridUnnormal) REG_OP(GridUnnormal)
.INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1887,13 +1840,10 @@ REG_OP(GridUnnormal)
*@li position: 4-D Tensor with shape `[batch, output_height, output_width, 2]`. *@li position: 4-D Tensor with shape `[batch, output_height, output_width, 2]`.


*@par Attributes: *@par Attributes:
*padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .
*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .


*@par Outputs: *@par Outputs:
*y: Returns 4-D Tensor with the same dtype as `x`. *y: Returns 4-D Tensor with the same dtype as `x`.

*@par Restrictions:
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(ImageUnfold) REG_OP(ImageUnfold)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1986,204 +1936,5 @@ REG_OP(GridSampler3DGrad)
.ATTR(align_corners, Bool, false) .ATTR(align_corners, Bool, false)
.OP_END_FACTORY_REG(GridSampler3DGrad) .OP_END_FACTORY_REG(GridSampler3DGrad)


/**
*@brief Upsample the 3-D data with the nearest neighbor ​interpolation algorithm. \n

*@par Inputs:
*One inputs, including:
*x: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
* float16, float32, float64. \n

*@par Attributes:
*@li output_size: An optional listInt. Defaults to none.
contain 3 elements: output_depth, output_height, output_width. The number of elements of 'output_size'
should be the same as the rank of input 'x'. Only one of 'scales' and 'output_size' can be specified. \n
*@li scales: An optional listFloat. Defaults to none.
The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width.
The number of elements of 'scales' should be the same as the rank of input 'x'. One of 'scales' and
'output_size' MUST be specified and it is an error if both are specified. \n

*@par Outputs:
*y: A 5-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
*/

REG_OP(UpsampleNearest3d)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.ATTR(output_size, ListInt, {})
.ATTR(scales, ListFloat, {})
.OP_END_FACTORY_REG(UpsampleNearest3d)

/**
*@brief Upsample the 3-D data with the trilinear ​interpolation algorithm. \n

*@par Inputs:
*One inputs, including:
*x: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
* float16, float32, float64. \n

*@par Attributes:
*@li output_size: An optional listInt. Defaults to none.
contain 3 elements: output_depth, output_height, output_width. The number of elements of 'output_size' should
be the same as the rank of input 'x'. Only one of 'scales' and 'output_size' can be specified. \n
*@li scales: An optional listFloat. Defaults to none.
The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width.
The number of elements of 'scales' should be the same as the rank of input 'x'.
One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n
*@li align_corners: An optional bool. Defaults to false.
If true, the input and output tensors are aligned by the center points of their corner pixels, preserving the
values at the corner pixels. If false, the input and output tensors are aligned by the corner points of their
corner pixels, and the interpolation use edge value padding for out of boundary values. \n

*@par Outputs:
*y: A 5-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
*/

REG_OP(UpsampleTrilinear3d)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.ATTR(output_size, ListInt, {})
.ATTR(scales, ListFloat, {})
.ATTR(align_corners, Bool, false)
.OP_END_FACTORY_REG(UpsampleTrilinear3d)

/**
*@brief Upsample the 3-D gradient data with the nearest neighbor ​interpolation algorithm. \n

*@par Inputs:
*One inputs, including:
*grad_output: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
* float16, float32, float64. \n

*@par Attributes:
*@li input_size: An required listInt.
contain 5 elements: [min_batch, channels, depth, height, width]. Must:
input_size[0] == grad_output_tensor_size[0]
input_size[1] == grad_output_tensor_size[1]. \n
*@li output_size: An optional listInt. Defaults to none.
contain 3 elements: depth, height, width. The number of elements of 'output_size' should
be the same as the rank of input 'grad_output'. Only one of 'scales' and 'output_size' can be specified. Must:
grad_output_tensor_size[2] == floor(input_size[2] * scales[0]) == output_size[0]
grad_output_tensor_size[3] == floor(input_size[3] * scales[1]) == output_size[1]
grad_output_tensor_size[4] == floor(input_size[4] * scales[2]) == output_size[2]. \n
*@li scales: An optional listFloat. Defaults to none.
The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width.
The number of elements of 'scales' should be the same as the rank of input 'grad_output'.
One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n

*@par Outputs:
*y: A 5-D tensor. Has the same type as input grad_output, shape depends on Attributes:input_size. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(UpsampleNearest3dGrad)
.INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.REQUIRED_ATTR(input_size, ListInt)
.ATTR(output_size, ListInt, {})
.ATTR(scales, ListFloat, {})
.OP_END_FACTORY_REG(UpsampleNearest3dGrad)

/**
*@brief Upsample the 3-D gradient data trilinear ​interpolation algorithm. \n

*@par Inputs:
*One inputs, including:
*grad_output: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
* float16, float32, float64. \n

*@par Attributes:
*@li input_size: An required listInt.
contain 5 elements: [min_batch, channels, depth, height, width]. Must:
input_size[0] == grad_output_tensor_size[0]
input_size[1] == grad_output_tensor_size[1]. \n
*@li output_size: An optional listInt. Defaults to none.
contain 3 elements: depth, height, width. The number of elements of 'output_size' should
be the same as the rank of input 'grad_output'. Only one of 'scales' and 'output_size' can be specified. Must:
grad_output_tensor_size[2] == floor(input_size[2] * scales[0]) == output_size[0]
grad_output_tensor_size[3] == floor(input_size[3] * scales[1]) == output_size[1]
grad_output_tensor_size[4] == floor(input_size[4] * scales[2]) == output_size[2]. \n
*@li scales: An optional listFloat. Defaults to none.
The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width.
The number of elements of 'scales' should be the same as the rank of input 'grad_output'.
One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n

*@par Outputs:
*y: A Tensor with shape depends on intput_size and output_size/scales. Must be one of the following
types: float16, float32, float64. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(UpsampleTrilinear3dGrad)
.INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.REQUIRED_ATTR(input_size, ListInt)
.ATTR(output_size, ListInt, {})
.ATTR(scales, ListFloat, {})
.ATTR(align_corners, Bool, false)
.OP_END_FACTORY_REG(UpsampleTrilinear3dGrad)


/**
*@brief Upsample the 1-D data with the nearest neighbor ​interpolation algorithm. \n

*@par Inputs:
*x: A 1-D input tensor [N, C, W]. Must be one of the following types:
* float16, float32, float64. \n

*@par Attributes:
*@li output_size: An required listInt contains output_width.
*@li scales: An optional listFloat contains scale_width. Defaults to be zero. \n

*@par Outputs:
*y: A 3-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
*/

REG_OP(UpsampleNearest1d)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.REQUIRED_ATTR(output_size, ListInt)
.ATTR(scales, ListFloat, {})
.OP_END_FACTORY_REG(UpsampleNearest1d)

/**
*@brief Upsample the 1-D gradient data with the nearest neighbor ​interpolation algorithm. \n

*@par Inputs:
*grad_output: A 3-D input tensor [N, C, W]. Must be one of the following types:
* float16, float32, float64. \n

*@par Attributes:
*@li output_size: An required listInt contains output_width.
*@li scales: An optional listFloat contains scale_width. Defaults to be zero.
*@li input_size: An required listInt contains output_width. \n

*@par Outputs:
*y: A 3-D tensor. Has the same type as input grad_output, shape depends on Attributes:input_size. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
*/

REG_OP(UpsampleNearest1dGrad)
.INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.REQUIRED_ATTR(input_size, ListInt)
.REQUIRED_ATTR(output_size, ListInt)
.ATTR(scales, ListFloat, {})
.OP_END_FACTORY_REG(UpsampleNearest1dGrad)
} // namespace ge } // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_

+ 2
- 15
third_party/fwkacllib/inc/ops/linalg_ops.h View File

@@ -347,9 +347,6 @@ REG_OP(SelfAdjointEig)
.OP_END_FACTORY_REG(SelfAdjointEig) .OP_END_FACTORY_REG(SelfAdjointEig)


/** /**
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.

*@brief Computes the sign and the log of the absolute value of the determinant *@brief Computes the sign and the log of the absolute value of the determinant
of one or more square matrices . \n of one or more square matrices . \n


@@ -385,10 +382,9 @@ REG_OP(Slogdet)
*x:Tensor of shape [..., M, N]. Let P be the minimum of M and N . \n *x:Tensor of shape [..., M, N]. Let P be the minimum of M and N . \n


*@par Attributes: *@par Attributes:
*@li compute_uv:If True then left and right singular vectors will be computed and
*compute_uv:If True then left and right singular vectors will be computed and
returned in u and v, respectively. Otherwise, only the singular values will returned in u and v, respectively. Otherwise, only the singular values will
be computed, which can be significantly faster .
*@li full_matrices:the param effect u,v. \n
be computed, which can be significantly faster . \n


*@par Outputs: *@par Outputs:
*@li sigma:Singular values. Shape is [..., P]. The values are sorted in *@li sigma:Singular values. Shape is [..., P]. The values are sorted in
@@ -431,9 +427,6 @@ denotes the lower triangular factor `L` with unit diagonal.
*@li p: upper triangular part denotes the upper triangular factor `U`.Permutation *@li p: upper triangular part denotes the upper triangular factor `U`.Permutation
of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n


*@par Attributes:
*output_idx_type: An optional DType from: int32, int64.

*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with TensorFlow Lu operator. * Compatible with TensorFlow Lu operator.
*/ */
@@ -474,12 +467,6 @@ left-hand side . \n
*@par Outputs: *@par Outputs:
y: Tensor of shape `[..., M, K]` containing the solutions \n y: Tensor of shape `[..., M, K]` containing the solutions \n


*@par Attributes:
*partial_pivoting: Whether to perform partial pivoting. `True` by default.
Partial pivoting makes the procedure more stable, but slower. Partial
pivoting is unnecessary in some cases, including diagonally dominant and
symmetric positive definite matrices

*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with TensorFlow TridiagonalSolve operator. * Compatible with TensorFlow TridiagonalSolve operator.
*/ */


+ 32
- 32
third_party/fwkacllib/inc/ops/list_ops.h View File

@@ -35,10 +35,10 @@ namespace ge {
*@li max_num_elements: The maximum number of elements. \n *@li max_num_elements: The maximum number of elements. \n


*@par Attributes: *@par Attributes:
*element_dtype: The type of elements in the list. \n
*@li element_dtype: The type of elements in the list. \n


*@par Outputs: *@par Outputs:
*handle: An empty tensor list . \n
*@li handle: An empty tensor list . \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow EmptyTensorList operator. *Compatible with tensorflow EmptyTensorList operator.
@@ -59,10 +59,10 @@ and the other elements of the given list in `input_handle`. \n
*@li tensor: The tensor to put on the list. \n *@li tensor: The tensor to put on the list. \n


*@par Attributes: *@par Attributes:
*element_dtype: The type of elements in the list. \n
*@li element_dtype: The type of elements in the list. \n


*@par Outputs: *@par Outputs:
*output_handle:A list with the elements of old list followed by tensor. \n
*@li output_handle:A list with the elements of old list followed by tensor. \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow TensorListPushBack operator. *Compatible with tensorflow TensorListPushBack operator.
@@ -86,7 +86,7 @@ list with all but that element. \n
*@li element_shape: A shape compatible with that of elements in the list. \n *@li element_shape: A shape compatible with that of elements in the list. \n


*@par Attributes: *@par Attributes:
*element_dtype: The type of elements in the list. \n
*@li element_dtype: The type of elements in the list. \n


*@par Outputs: *@par Outputs:
*@li output_handle:A list with the elements of the old list followed by tensor. *@li output_handle:A list with the elements of the old list followed by tensor.
@@ -110,10 +110,10 @@ REG_OP(TensorListPopBack)
*@brief The number of tensors in the input tensor list. \n *@brief The number of tensors in the input tensor list. \n


*@par Inputs: *@par Inputs:
*input_handle: The input list. \n
*@li input_handle: The input list. \n


*@par Outputs: *@par Outputs:
*length:The number of tensors in the list. \n
*@li length:The number of tensors in the list. \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow TensorListLength operator. *Compatible with tensorflow TensorListLength operator.
@@ -127,13 +127,13 @@ REG_OP(TensorListLength)
*@brief The shape of elements in the input tensor list. \n *@brief The shape of elements in the input tensor list. \n


*@par Inputs: *@par Inputs:
*input_handle: The input list. \n
*@li input_handle: The input list. \n


*@par Attributes: *@par Attributes:
*shape_type: The type of shape in the list. \n
*@li shape_type: The type of shape in the list. \n


*@par Outputs: *@par Outputs:
*element_shape:A shape compatible with that of elements in the list. \n
*@li element_shape:A shape compatible with that of elements in the list. \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow TensorListElementShape operator. *Compatible with tensorflow TensorListElementShape operator.
@@ -156,7 +156,7 @@ REG_OP(TensorListElementShape)
*@li shape_type: The type of shape in the list. \n *@li shape_type: The type of shape in the list. \n


*@par Outputs: *@par Outputs:
*handle: An output tensor list . \n
*@li handle: An output tensor list . \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow TensorListReserve operator. *Compatible with tensorflow TensorListReserve operator.
@@ -178,10 +178,10 @@ REG_OP(TensorListReserve)
*@li element_shape: A shape compatible with that of elements in the list. \n *@li element_shape: A shape compatible with that of elements in the list. \n


*@par Attributes: *@par Attributes:
*element_dtype: The type of elements in the list. \n
*@li element_dtype: The type of elements in the list. \n


*@par Outputs: *@par Outputs:
*item: An output tensor value of index position . \n
*@li item: An output tensor value of index position . \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow TensorListGetItem operator. *Compatible with tensorflow TensorListGetItem operator.
@@ -206,10 +206,10 @@ REG_OP(TensorListGetItem)
*@li item: The element to be assigned to that position. \n *@li item: The element to be assigned to that position. \n


*@par Attributes: *@par Attributes:
*element_dtype: The type of elements in the list. \n
*@li element_dtype: The type of elements in the list. \n


*@par Outputs: *@par Outputs:
*output_handle: An output tensor list . \n
*@li output_handle: An output tensor list . \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow TensorListSetItem operator. *Compatible with tensorflow TensorListSetItem operator.
@@ -233,10 +233,10 @@ REG_OP(TensorListSetItem)
*@li tensor: The tensor push into tensor list. \n *@li tensor: The tensor push into tensor list. \n


*@par Attributes: *@par Attributes:
*element_dtype: The type of elements in the list. \n
*@li element_dtype: The type of elements in the list. \n


*@par Outputs: *@par Outputs:
*output_handles: The output tensor lists. \n
*@li output_handles: The output tensor lists. \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow TensorListPushBackBatch operator. *Compatible with tensorflow TensorListPushBackBatch operator.
@@ -263,7 +263,7 @@ REG_OP(TensorListPushBackBatch)
*@li num_elements: The number of elements in the list. \n *@li num_elements: The number of elements in the list. \n


*@par Outputs: *@par Outputs:
*tensor: The tensor of list. \n
*@li tensor: The tensor of list. \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow TensorListStack operator. *Compatible with tensorflow TensorListStack operator.
@@ -293,7 +293,7 @@ the leading dim of input_handle.element_shape or the element_shape input arg
is not already set. \n is not already set. \n


*@par Attributes: *@par Attributes:
*element_dtype: The type of elements in the list. \n
*@li element_dtype: The type of elements in the list. \n


*@par Outputs: *@par Outputs:
*@li tensor: The concated result. *@li tensor: The concated result.
@@ -324,10 +324,10 @@ REG_OP(TensorListConcatV2)
*@li lengths: Vector of sizes of the 0th dimension of tensors in the list. \n *@li lengths: Vector of sizes of the 0th dimension of tensors in the list. \n


*@par Attributes: *@par Attributes:
*element_dtype: The type of elements in the list. \n
*@li element_dtype: The type of elements in the list. \n


*@par Outputs: *@par Outputs:
*output_handle: The list. \n
*@li output_handle: The list. \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow TensorListSplit operator. *Compatible with tensorflow TensorListSplit operator.
@@ -351,10 +351,10 @@ REG_OP(TensorListSplit)
*@li element_shape: The shape of elements in the list. \n *@li element_shape: The shape of elements in the list. \n


*@par Attributes: *@par Attributes:
*element_dtype: The type of elements in the list. \n
*@li element_dtype: The type of elements in the list. \n


*@par Outputs: *@par Outputs:
*output_handle: An output tensor list . \n
*@li output_handle: An output tensor list . \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow TensorListFromTensor operator. *Compatible with tensorflow TensorListFromTensor operator.
@@ -377,7 +377,7 @@ REG_OP(TensorListFromTensor)
*@li size: size of the output list. \n *@li size: size of the output list. \n


*@par Outputs: *@par Outputs:
*output_handle: The output tensor list. \n
*@li output_handle: The output tensor list. \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow TensorListResize operator. *Compatible with tensorflow TensorListResize operator.
@@ -397,10 +397,10 @@ REG_OP(TensorListResize)
*@li element_shape: The shape of elements in the list. \n *@li element_shape: The shape of elements in the list. \n


*@par Attributes: *@par Attributes:
*element_dtype: The type of elements in the list. \n
*@li element_dtype: The type of elements in the list. \n


*@par Outputs: *@par Outputs:
*values: The tensor. \n
*@li values: The tensor. \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow TensorListGather operator. *Compatible with tensorflow TensorListGather operator.
@@ -429,10 +429,10 @@ the largest index in indices. If -1, the list is just large enough to include
the largest index in indices. \n the largest index in indices. \n


*@par Attributes: *@par Attributes:
*element_dtype: The type of elements in the list. \n
*@li element_dtype: The type of elements in the list. \n


*@par Outputs: *@par Outputs:
*output_handle: The TensorList. \n
*@li output_handle: The TensorList. \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow TensorListScatterV2 operator. *Compatible with tensorflow TensorListScatterV2 operator.
@@ -458,10 +458,10 @@ REG_OP(TensorListScatterV2)
*@li indices: The indices used to index into the list. \n *@li indices: The indices used to index into the list. \n


*@par Attributes: *@par Attributes:
*element_dtype: The type of elements in the list. \n
*@li element_dtype: The type of elements in the list. \n


*@par Outputs: *@par Outputs:
*output_handle: The TensorList. \n
*@li output_handle: The TensorList. \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow TensorListScatterIntoExistingList operator. *Compatible with tensorflow TensorListScatterIntoExistingList operator.
@@ -485,10 +485,10 @@ REG_OP(TensorListScatterIntoExistingList)
*@li input_b: The input tensor list B. \n *@li input_b: The input tensor list B. \n


*@par Attributes: *@par Attributes:
*element_dtype: The type of elements in the list. \n
*@li element_dtype: The type of elements in the list. \n


*@par Outputs: *@par Outputs:
*output: The output list. \n
*@li output: The output list. \n


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow TensorListConcatLists operator. *Compatible with tensorflow TensorListConcatLists operator.


+ 2
- 2
third_party/fwkacllib/inc/ops/lookup_ops.h View File

@@ -77,8 +77,8 @@ REG_OP(LookupTableInsert)
*handle: A Tensor of type resource. Handle to the table . \n *handle: A Tensor of type resource. Handle to the table . \n


*@par Attributes: *@par Attributes:
*@li Tkeys: A DType of keys.
*@li Tvalues: A DType of values.
*@li Tkeys: A DType.
*@li Tvalues: A DType . \n


*@par Outputs: *@par Outputs:
*@li keys: A Tensor of type Tkeys. *@li keys: A Tensor of type Tkeys.


+ 60
- 75
third_party/fwkacllib/inc/ops/math_ops.h View File

@@ -227,10 +227,10 @@ REG_OP(Bucketize)


*@par Inputs: *@par Inputs:
*One inputs, including: *One inputs, including:
*input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n
* @li input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n


*@par Outputs: *@par Outputs:
*output_y: A tensor with the same type and shape of input_x \n
*y: A tensor with the same type and shape of input_x \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the Pytorch operator Trunc. \n *Compatible with the Pytorch operator Trunc. \n
@@ -298,7 +298,7 @@ REG_OP(SparseSegmentMean)


*@par Inputs: *@par Inputs:
*The input grad must have be type float or double. Inputs include: *The input grad must have be type float or double. Inputs include:
*@li x: A Tensor. Must be one of the following types: float, double.
*@li grad: A Tensor. Must be one of the following types: float, double.
gradient propagated to the SparseSegmentMean op. gradient propagated to the SparseSegmentMean op.
*@li indices: A Tensor. Must be one of the following types: int32, int64. *@li indices: A Tensor. Must be one of the following types: int32, int64.
indices passed to the corresponding SparseSegmentMean op. indices passed to the corresponding SparseSegmentMean op.
@@ -365,7 +365,6 @@ REG_OP(InitData)
component of an element of this dataset. component of an element of this dataset.
*@li output_shapes: A nested structure of TensorShape objects corresponding *@li output_shapes: A nested structure of TensorShape objects corresponding
to each component of an element of this dataset. to each component of an element of this dataset.
*@li output_num:output of nums.
*@li channel_name: A string. Default "" . \n *@li channel_name: A string. Default "" . \n


*@par Outputs: *@par Outputs:
@@ -539,11 +538,11 @@ REG_OP(NextAfter)


*@par Inputs: *@par Inputs:
*One inputs, including: *One inputs, including:
* input_x: A tensor. Must be one of the following types:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n


*@par Attributes: *@par Attributes:
*p: An optional float.Defaults to 2. \n
*@li p: An optional float.Defaults to 2. \n


*@par Outputs: *@par Outputs:
*y: A Tensor with the same type and shape of input_x's. \n *y: A Tensor with the same type and shape of input_x's. \n
@@ -561,10 +560,10 @@ REG_OP(Pdist)
*@brief Compute element-wise finiteness, return a boolean tensor. *@brief Compute element-wise finiteness, return a boolean tensor.


*@par Inputs: *@par Inputs:
*x:A Tensor of type float16, float32, double.
*x:A Tensor.


*@par Outputs: *@par Outputs:
*y:A Tensor. Returns which elements of x are finite
*y:A Tensor. Has the same shape as x.


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow IsFinite operator. *Compatible with tensorflow IsFinite operator.
@@ -578,10 +577,10 @@ REG_OP(IsFinite)
*@brief Compute element-wise infiniteness, return a boolean tensor. *@brief Compute element-wise infiniteness, return a boolean tensor.


*@par Inputs: *@par Inputs:
*x:A Tensor of type float16, float32, double.
*x:A Tensor.


*@par Outputs: *@par Outputs:
*y:A Tensor. Has the same shape as x. Returns which elements of x are isinf.
*y:A Tensor. Has the same shape as x.


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow IsInf operator. *Compatible with tensorflow IsInf operator.
@@ -595,11 +594,7 @@ REG_OP(IsInf)
*@brief Computes the complex absolute value of a tensor. *@brief Computes the complex absolute value of a tensor.


*@par Inputs: *@par Inputs:
*x: x of complex numbers, this operation returns a tensor of type
float or double that is the absolute value of each element in x .

* @par Attributes:
* Tout: representing the output of type.
*x:A Tensor.


*@par Outputs: *@par Outputs:
*y:A tensor of type `float` or `double` that is the absolute value of each element in `x`. *y:A tensor of type `float` or `double` that is the absolute value of each element in `x`.
@@ -617,10 +612,10 @@ REG_OP(ComplexAbs)
*@brief Returns which elements of x are NaN. *@brief Returns which elements of x are NaN.


*@par Inputs: *@par Inputs:
*x:A Tensor of type float16, float32, double.
*x:A Tensor.


*@par Outputs: *@par Outputs:
*y:A Tensor. Has the same shape as x. Returns which elements of x are isnan
*y:A Tensor. Has the same shape as x.


*@par Third-party framework compatibility. *@par Third-party framework compatibility.
*Compatible with tensorflow IsNan operator. *Compatible with tensorflow IsNan operator.
@@ -634,10 +629,7 @@ REG_OP(IsNan)
*@brief Returns the real part of a complex number. *@brief Returns the real part of a complex number.


*@par Inputs: *@par Inputs:
*input:A Tensor. Must have numeric type.

*@par Attributes:
*Tout: Type of outputs. \n
*input:A Tensor.


*@par Outputs: *@par Outputs:
*output:A Tensor. Has the same shape as input. *output:A Tensor. Has the same shape as input.
@@ -678,8 +670,7 @@ REG_OP(Conj)
*@li weight: A Tensor dtype of float32 . \n *@li weight: A Tensor dtype of float32 . \n


*@par Attributes: *@par Attributes:
*@li reduction: An optional attribute. Defaults to "mean" .
*@li ignore_index:An optional attribute.Defaults to -100 . \n
*reduction: An optional attribute. Defaults to "mean" . \n


*@par Outputs: *@par Outputs:
*@li y: A Tensor dtype of float32. *@li y: A Tensor dtype of float32.
@@ -709,8 +700,7 @@ REG_OP(NLLLoss)
*@li total_weight:A Tensor dtype of float32 . \n *@li total_weight:A Tensor dtype of float32 . \n


*@par Attributes: *@par Attributes:
*@li reduction: An optional attribute. Defaults to "mean" .
*@li ignore_index:An optional attribute.Defaults to -100 . \n
*reduction: An optional attribute. Defaults to "mean" . \n


*@par Outputs: *@par Outputs:
*x_grad: A Tensor. Must be the following type: float32 . \n *x_grad: A Tensor. Must be the following type: float32 . \n
@@ -730,24 +720,24 @@ REG_OP(NLLLossGrad)
.OP_END_FACTORY_REG(NLLLossGrad) .OP_END_FACTORY_REG(NLLLossGrad)


/** /**
*@brief IFMR(Input Feature Map Reconstruction). \n
*@brief The ifmr . \n


*@par Inputs: *@par Inputs:
*@li data: A Tensor of feature map.
*@li data_min: A Tensor of min value of feature map.
*@li data_max: A Tensor of max value of feature map.
*@li cumsum: A Tensor of cumsum bin of data . \n
*@li data:A Tensor of feature map
*@li data_min:A Tensor of min value of feature map.
*@li data_max:A Tensor of max value of feature map.
*@li cumsum:A Tensor of cumsum bin of data . \n


*@par Attributes: *@par Attributes:
*@li min_percentile: min init percentile.
*@li max_percentile: max init percentile.
*@li search_range: search range.
*@li search_step: step size of searching.
*@li with_offset: whether using offset . \n
*min_percentile: min init percentile.
*max_percentile: max init percentile.
*search_range: search range.
*search_step: step size of searching.
*with_offset: whether using offset . \n


*@par Outputs: *@par Outputs:
*@li scale: optimal scale.
*@li offset: optimal offset . \n
*scale: optimal scale.
*offset: optimal offset . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with mindspore *Compatible with mindspore
@@ -768,16 +758,16 @@ REG_OP(IFMR)
.OP_END_FACTORY_REG(IFMR) .OP_END_FACTORY_REG(IFMR)


/** /**
*@brief Weights Adaptive Range Quantization. \n
*@brief weights adaptive range quantization. \n


*@par Inputs: *@par Inputs:
*@li w: A Tensor of weights. \n
*@li w_min: A Tensor of weights reduce_min. \n
*@li w_max: A Tensor of weights reduce_max. \n
*@li w:A Tensor of weights. \n
*@li w_min:A Tensor of weights reduce_min. \n
*@li w_max:A Tensor of weights reduce_max. \n


*@par Attributes: *@par Attributes:
*@li num_bits: the bits num used for quantize.
*@li offset_flag: whether using offset. \n
*num_bits: the bits num used for quantize.
*offset_flag: whether using offset. \n


*@par Outputs: *@par Outputs:
*y: fake quantized weights. \n *y: fake quantized weights. \n
@@ -799,22 +789,22 @@ REG_OP(WtsARQ)
.OP_END_FACTORY_REG(WtsARQ) .OP_END_FACTORY_REG(WtsARQ)


/** /**
*@brief Activations Universal Linear Quantization. \n
*@brief The acts_ulq. \n


*@par Inputs: *@par Inputs:
*@li x: A Tensor of feature map.
*@li clamp _min: A Tensor of min clamp value of feature map.
*@li clamp _max: A Tensor of max clamp value of feature map.
*@li x:A Tensor of feature map
*@li clamp _min:A Tensor of min clamp value of feature map.
*@li clamp _max:A Tensor of max clamp value of feature map.


*@par Attributes: *@par Attributes:
*@li fixed_min: fix min to zero.
*@li num_bits: quant bits. \n
*fixed_min: fix min to zero.
*num_bits: quant bits. \n


*@par Outputs: *@par Outputs:
*@li y: output fake quant feature map.
*@li clamp_min_mask: where x > clamp_min.
*@li clamp_min_mask: where x < clamp_max.
*@li x_clamped_loss: clamp loss. \n
*y: output fake quant feature map.
*clamp_min_mask: where x > clamp_min
*clamp_min_mask: where x < clamp_max
*x_clamped_loss: clamp loss. \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with mindspore *Compatible with mindspore
@@ -836,12 +826,12 @@ REG_OP(ActsULQ)
.OP_END_FACTORY_REG(ActsULQ) .OP_END_FACTORY_REG(ActsULQ)


/** /**
*@brief The gradient of Activations Universal Linear Quantization. \n
*@brief The acts_ulq_input_grad. \n


*@par Inputs: *@par Inputs:
*@li y_grad: A Tensor of gradient.
*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed'.
*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed'.
*@li y_grad: A Tensor of gradient
*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed'
*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed'


*@par Outputs: *@par Outputs:
*x_grapd: The gradient of inpust. \n *x_grapd: The gradient of inpust. \n
@@ -861,10 +851,10 @@ REG_OP(ActsULQInputGrad)
.OP_END_FACTORY_REG(ActsULQInputGrad) .OP_END_FACTORY_REG(ActsULQInputGrad)


/** /**
*@brief The gradient of Activations Universal Linear Quantization clamp max. \n
*@brief The act_ulq_clamp_max_grad. \n


*@par Inputs: *@par Inputs:
*@li y_grad: A Tensor of gradient.
*@li y_grad: A Tensor of gradient
*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed. *@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed.
*@li x_clamped_loss: A Tensor of gradient. \n *@li x_clamped_loss: A Tensor of gradient. \n


@@ -886,10 +876,10 @@ REG_OP(ActULQClampMaxGrad)
.OP_END_FACTORY_REG(ActULQClampMaxGrad) .OP_END_FACTORY_REG(ActULQClampMaxGrad)


/** /**
*@brief The gradient of Activations Universal Linear Quantization clamp min. \n
*@brief The act_ulq_clamp_min_grad. \n


*@par Inputs: *@par Inputs:
*@li y_grad: A Tensor of gradient.
*@li y_grad: A Tensor of gradient
*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed. *@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed.
*@li x_clamped_loss: A Tensor of gradient. \n *@li x_clamped_loss: A Tensor of gradient. \n


@@ -914,7 +904,7 @@ REG_OP(ActULQClampMinGrad)
* @brief Computes Lp norm. * @brief Computes Lp norm.


* @par Inputs: * @par Inputs:
* x: An ND tensor of type float16, float32. \n
* @li x: An ND tensor of type float16, float32. \n
* *
* @par Attributes: * @par Attributes:
* @li p: Int, "inf" or "-inf", default value is 2. * @li p: Int, "inf" or "-inf", default value is 2.
@@ -923,7 +913,7 @@ REG_OP(ActULQClampMinGrad)
* @li epsilon: Float, default is 1e-12. \n * @li epsilon: Float, default is 1e-12. \n


* @par Outputs: * @par Outputs:
* y: An ND tensor of type float16, float32. The shape of y is depending
* @li y: An ND tensor of type float16, float32. The shape of y is depending
* on axes and keepdim. \n * on axes and keepdim. \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
@@ -942,13 +932,11 @@ REG_OP(LpNorm)
* @brief get complex. * @brief get complex.


* @par Inputs: * @par Inputs:
* @li real: An ND tensor of type float32 double, representing the real part of a complex number.
* @li imag: An ND tensor of type float32 double, representing the imaginary part of a complex number. \n
* @li real: An ND tensor of type float32. double
* @li imag: An ND tensor of type float32. double \n
* *
* @par Attributes:
* Tout: representing the output of type.
* @par Outputs: * @par Outputs:
* out: An ND tensor of type complex64, complex128 \n
* @li out: An ND tensor of type complex64, complex128 \n
*/ */
REG_OP(Complex) REG_OP(Complex)
.INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE})) .INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE}))
@@ -961,13 +949,10 @@ REG_OP(Complex)
* @brief deal complex. * @brief deal complex.


* @par Inputs: * @par Inputs:
* input: An ND tensor of type complex64, complex128 \n

* @par Attributes:
* Tout: representing the output of type.

* @li input: An ND tensor of type complex64, complex128 \n
*
* @par Outputs: * @par Outputs:
* output: An ND tensor of type float32. double \n
* @li output: An ND tensor of type float32. double \n
*/ */
REG_OP(Imag) REG_OP(Imag)
.INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128})) .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
@@ -1003,7 +988,7 @@ REG_OP(Angle)
* float16, float32. \n * float16, float32. \n


*@par Attributes: *@par Attributes:
* reduction: Specifies the reduction to apply to the output:
* @li reduction: Specifies the reduction to apply to the output:
* 'none' | 'mean' | 'sum'. Default: 'mean'. \n * 'none' | 'mean' | 'sum'. Default: 'mean'. \n


*@par Outputs: *@par Outputs:


+ 48
- 184
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -61,28 +61,21 @@ REG_OP(MatMul)
*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n


*@par Inputs: *@par Inputs:
*Four inputs, including:
* @li x1: A matrix Tensor. 2D. Must be one of the following types: float32,
float16, int32, int8. Has format [ND, NHWC, FRACTAL_NZ].
* @li x2: A matrix Tensor. 2D. Must be one of the following types: float32,
float16, int32, int8. Has format [ND, NHWC, FRACTAL_NZ].
* @li bias: A 1D Tensor. Must be one of the following types: float32,
float16, int32. Has format [ND, NHWC].
* @li offset_w: A Optional 1D Tensor for quantized inference. Type is int8.
Reserved. \n
*Two inputs, including:
* @li x1: A matrix Tensor. 2D. Must be one of the following types: float16,
* float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
* @li x2: A matrix Tensor. 2D. Must be one of the following types: float16,
* float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
* @li bias: A 1D Tensor. Must be one of the following types: float16,
* float32, int32. Has format [ND, NHWC] . \n


*@par Attributes: *@par Attributes:
* @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to
[M, K].
* @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to
[K, N].
* @li offset_x: An optional integer for quantized MatMulV2.
* The negative offset added to the input x1 for int8 type. Ensure offset_x
within the effective range of int8 [-128, 127]. Defaults to "0". \n
*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n


*@par Outputs: *@par Outputs:
*y: The result matrix Tensor. 2D. Must be one of the following types: float32,
float16, int32. Has format [ND, NHWC, FRACTAL_NZ]. \n
*y: The result matrix Tensor. 2D. Must be one of the following types: float16,
* float32, int32. Has format [ND, NHWC, FRACTAL_NZ] . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the TensorFlow operator BatchMatmul. * Compatible with the TensorFlow operator BatchMatmul.
@@ -102,27 +95,19 @@ REG_OP(MatMulV2)
*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n


*@par Inputs: *@par Inputs:
*Five inputs, including:
*Two inputs, including:
* @li x1: A matrix Tensor. 2D. Must be one of the following types: int8. * @li x1: A matrix Tensor. 2D. Must be one of the following types: int8.
* @li x2: A matrix Tensor. 2D. Must be one of the following types: int8. * @li x2: A matrix Tensor. 2D. Must be one of the following types: int8.
* @li compress_index: A compress index matrix of type int8. * @li compress_index: A compress index matrix of type int8.
* @li bias: An optional Tensor. 1D. Must be one of the following types: int32,
float16.
* @li offset_w: An optional matrix Tensor. 2D. Must be one of the following
types: int8. \n
* @li bias: A 1D Tensor. Must be one of the following types: int32, float16.


*@par Attributes: *@par Attributes:
*@li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to
[M, K].
*@li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to
[K, N].
*@li offset_x: An optional integer for quantized MatMulV2Compress.
*The negative offset added to the input x1 for int8 type. Ensure offset_x
within the effective range of int8 [-128, 127]. Defaults to "0". \n
*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n


*@par Outputs: *@par Outputs:
*y: The result matrix Tensor. 2D. Must be one of the following types: int32,
* float16. \n
*y: The result matrix Tensor. 2D. Must be one of the following types: float16,
* int32. \n


*/ */
REG_OP(MatMulV2Compress) REG_OP(MatMulV2Compress)
@@ -503,13 +488,13 @@ REG_OP(ScatterElements)


*@par Inputs: *@par Inputs:
* Three inputs, including: * Three inputs, including:
*@li var: An ND Tensor .
*@li var: An ND Tensor . \n


*Must be one of the following types: float16, float32, int32, int8, uint8 *Must be one of the following types: float16, float32, int32, int8, uint8
*@li indices: An ND Tensor of type int32 or int64 *@li indices: An ND Tensor of type int32 or int64




*@li updates: An Tensor. format:NCHW, NHWC .
*@li updates: An Tensor. format:NCHW, NHWC . \n


*Must be one of the following types: float16, float32, int32, int8, uint8 *Must be one of the following types: float16, float32, int32, int8, uint8


@@ -532,61 +517,6 @@ REG_OP(ScatterAdd)
.OP_END_FACTORY_REG(ScatterAdd) .OP_END_FACTORY_REG(ScatterAdd)


/** /**
*@brief Use a scalar to modify the tensor. \n

*@par Inputs:
*inputs, including:
*@li index: An ND Tensor . \n

*Must be one of the following types: float16, float32, int32, int8, uint8

*@par Attributes:
* dim : the axis along which to index .
* value : the source element(s) to scatter . \n

*@par Outputs:
*y: A Tensor. Has the same type and format as input "index" . \n

*@par Third-party framework compatibility
* Compatible with the Pytorch operator ScatterScalar.
*/
REG_OP(ScatterScalar)
.INPUT(index, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.REQUIRED_ATTR(dim, Int)
.REQUIRED_ATTR(value, Float)
.OP_END_FACTORY_REG(ScatterScalar)

/**
*@brief Use a tensor to modify the tensor . \n

*@par Inputs:
* Two inputs, including:
*@li index: An ND Tensor . \n

*Must be one of the following types: float16, float32, int32, int8, uint8

*@li src: An ND Tensor . \n

*Must be one of the following types: float16, float32, int32, int8, uint8

*@par Attributes:
* dim : the axis along which to index . \n

*@par Outputs:
*y: A Tensor. Has the same type and format as input "index" . \n

*@par Third-party framework compatibility
* Compatible with the Pytorch operator ScatterTensor.
*/
REG_OP(ScatterTensor)
.INPUT(index, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(src, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.REQUIRED_ATTR(dim, Int)
.OP_END_FACTORY_REG(ScatterTensor)

/**
*@brief Divides a variable reference by sparse updates . \n *@brief Divides a variable reference by sparse updates . \n


*@par Inputs: *@par Inputs:
@@ -600,7 +530,7 @@ REG_OP(ScatterTensor)
*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8


*@par Attributes: *@par Attributes:
*use_locking: An optional bool. Defaults to "False". If "True",
*@li use_locking: An optional bool. Defaults to "False". If "True",
* the operation will be protected by a lock . \n * the operation will be protected by a lock . \n


*@par Outputs: *@par Outputs:
@@ -822,12 +752,10 @@ REG_OP(DiagPart)


*@par Attributes: *@par Attributes:
*@li num_output: Reserved. *@li num_output: Reserved.
*@li transpose: A bool, specifying weight whether to transpose input w, either "true" or "false". Defaults to "false".
*@li transpose: A bool, specifying weight whether to transpose, either "true" or "false". Defaults to "false".
*@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1. *@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1.
* The product of the subsequent dimensions starting form first dimension or the second dimension is "K". * The product of the subsequent dimensions starting form first dimension or the second dimension is "K".
*@li offset_x: An optional integer for quantized FullyConnection.
*The negative offset added to the input image for int8 type. Ensure offset_x within the
*effective range of int8 [-128, 127]. Defaults to "0". \n
*@li offset_x: Reserved . \n


*@par Outputs: *@par Outputs:
*y: The result tensor of type float16, int32, float32 . \n *y: The result tensor of type float16, int32, float32 . \n
@@ -851,34 +779,27 @@ REG_OP(FullyConnection)
.OP_END_FACTORY_REG(FullyConnection) .OP_END_FACTORY_REG(FullyConnection)


/** /**
*@brief Also known as a "fully-connected-compress" layer, computes an inner
product with a set of learned weights, and (optionally) adds biases . \n
*@brief Also known as a "fully-connected-compress" layer, computes an inner product with a set of learned weights, and (optionally) adds biases . \n


*@par Inputs: *@par Inputs:
* Five inputs, including:
* Four inputs, including:
*@li x: A Tensor of type uint8, int8. *@li x: A Tensor of type uint8, int8.
*@li w: A weight matrix of type int8.
*@li compress_index: A compress index matrix of type int8.
*@li b: A Tensor of type int32.
*@li offset_w: A Tensor of type int8.
*@li w: A weight matrix of type int8, int8.
*@li w: A compress index matrix of type int8, int8.
*@li b: A Tensor of type float16, int32, int32.
*@li offset_w: A Tensor of type int8.i


*@par Attributes: *@par Attributes:
*@li num_output: A int, specifying the number of outputs.
*@li transpose: A bool, specifying whether to transpose input w, either "true"
or "false". Defaults to "false".
*@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K"
starts from. Defaults to "1".
* The product of the subsequent dimensions starting form first dimension or the
second dimension is "K".
*@li offset_x: An optional integer for quantized FullyConnectionCompress.
*The negative offset added to the input image for int8 type. Ensure offset_x
within the effective range of int8 [-128, 127]. Defaults to "0". \n
*@li num_output: Reserved.
*@li transpose: A bool, specifying whether to transpose, either "true" or "false". Defaults to "false".
*@li axis: Reserved.
*@li offset_x: Reserved . \n


*@par Outputs: *@par Outputs:
*y: The result tensor of type int32. \n
*y: The result tensor of type int32 . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the Caffe operator InnerProduct. \n
* Compatible with the Caffe operator InnerProduct . \n


*@par Quantization supported or not *@par Quantization supported or not
* Yes * Yes
@@ -1004,13 +925,13 @@ REG_OP(ScatterMin)


*@par Inputs: *@par Inputs:
* Three inputs, including: * Three inputs, including:
*@li var: An ND Tensor .
*@li var: An ND Tensor . \n


*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8
*@li indices: An NCHW, NHWC, or ND Tensor . \n *@li indices: An NCHW, NHWC, or ND Tensor . \n


*Must be one of the following types: int32 or int64 *Must be one of the following types: int32 or int64
*@li updates: An NCHW, NHWC, or ND Tensor .
*@li updates: An NCHW, NHWC, or ND Tensor . \n


*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8


@@ -1037,13 +958,13 @@ REG_OP(ScatterMax)


*@par Inputs: *@par Inputs:
* Three inputs, including: * Three inputs, including:
*@li var: An ND Tensor .
*@li var: An ND Tensor . \n


*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8
*@li indices: An ND Tensor . \n *@li indices: An ND Tensor . \n


*Must be one of the following types: int32 or int64 *Must be one of the following types: int32 or int64
*@li updates: An ND Tensor .
*@li updates: An ND Tensor . \n


*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8


@@ -1192,46 +1113,14 @@ REG_OP(IndexAdd)
.OP_END_FACTORY_REG(IndexAdd) .OP_END_FACTORY_REG(IndexAdd)


/** /**
* @brief According to the index number of indexes, replace the value
*corresponding to X1 with the value in x2.

* @par Inputs:
* Three inputs, including:
* @li x1: A Tensor. Must be one of the following types:
* float16, float32, int32, int8, uint8.
* @li x2: A Tensor of the same type as "x1".
* @li indices: A Tensor of the indices, type should be int32.

* @par Attributes:
* @li accumulate: Does it support self accumulation.Defaults to 0.

* @par Outputs:
* @li y: A Tensor. Same as input "x1".

* @par Third-party framework compatibility
* Compatible with the Pytorch operator index_put.

* @par Restrictions:
* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(IndexPut)
.INPUT(x1, TensorType({DT_INT64, DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
.INPUT(x2, TensorType({DT_INT64, DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
.INPUT(indices, TensorType({DT_INT64, DT_INT32}))
.OUTPUT(y, TensorType({DT_INT64, DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
.ATTR(accumulate, Int, 0)
.OP_END_FACTORY_REG(IndexPut)

/**
*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n


*@par Inputs: *@par Inputs:
*x: A Tensor. Must be one of the following types:
*float16, float32, double, int32, uint8, int16, int8, complex64, int64,
*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n

*@par Attributes:
*diagonal: An optional attribute indicates the diagonal to consider. \n
* Two inputs, including:
*@li x: A Tensor. Must be one of the following types:
* float16, float32, double, int32, uint8, int16, int8, complex64, int64,
* qint8, quint8, qint32, uint16, complex128, uint32, uint64.
*@li diagonal:(int, optional) – the diagonal to consider。\n


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as "x" . \n *y: A Tensor. Has the same type as "x" . \n
@@ -1249,12 +1138,11 @@ REG_OP(Triu)
*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n


*@par Inputs: *@par Inputs:
*x: A Tensor. Must be one of the following types:
*float16, float32, double, int32, uint8, int16, int8, complex64, int64,
*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n

*@par Attributes:
*diagonal: An optional attribute indicates the diagonal to consider. \n
* Two inputs, including:
*@li x: A Tensor. Must be one of the following types:
* float16, float32, double, int32, uint8, int16, int8, complex64, int64,
* qint8, quint8, qint32, uint16, complex128, uint32, uint64.
*@li diagonal:(int, optional) – the diagonal to consider。\n


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as "x" . \n *y: A Tensor. Has the same type as "x" . \n
@@ -1325,30 +1213,6 @@ REG_OP(Eye)
.ATTR(dtype, Int, 0) .ATTR(dtype, Int, 0)
.OP_END_FACTORY_REG(Eye) .OP_END_FACTORY_REG(Eye)


/**
*@brief: Fill diagonal of at least 2 dimension tensors with value . \n

*@par Inputs:
*x: A Tensor. Must be one of the following types:
* float32, int32, int64 . \n

*@par Outputs:
*y: A Tensor. Has the same type as "x" . \n

*@par Attributes:
*fill_value:The value to fill in
*wrap: An optional bool. Defaults to "False". If "True", Use recursive fill. \n

*@par Third-party framework compatibility
* Compatible with the Pytorch operator FillDiagonal.
*/
REG_OP(FillDiagonal)
.INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT64}))
.REQUIRED_ATTR(fill_value, Float)
.ATTR(wrap, Bool, false)
.OP_END_FACTORY_REG(FillDiagonal)

} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_

+ 368
- 331
third_party/fwkacllib/inc/ops/nn_calculation_ops.h
File diff suppressed because it is too large
View File


+ 42
- 111
third_party/fwkacllib/inc/ops/nn_detect_ops.h View File

@@ -153,42 +153,6 @@ REG_OP(Iou)
.OP_END_FACTORY_REG(Iou) .OP_END_FACTORY_REG(Iou)


/** /**
*@brief First calculate the minimum closure area of the two boxes, IoU,
* the proportion of the closed area that does not belong to the two boxes in the closure area,
* and finally subtract this proportion from IoU to get GIoU . \n

*@par Inputs:
* Two inputs, including:
*@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
* shape (N, 4). "N" indicates the number of bounding boxes, and the value
* "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
*@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
* with shape (M, 4). "M" indicates the number of ground truth boxes, and
* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n

*@par Attributes:
*@li trans: An optional bool, true for 'xywh', false for 'xyxy'.
*@li is_cross: An optional bool, control whether the output shape is [M, N] or [1, N]
*@li mode: Computation mode, a character string with the value range of [iou, iof] . \n

*@par Outputs:
* overlap: A 2D Tensor of type float16 or float32 with shape [M, N] or [1, N],
* specifying the IoU or IoF ratio . \n

*@attention Constraints:
* Only computation of float16 data is supported. To avoid overflow, the input
* length and width are scaled by 0.2 internally.
*/
REG_OP(GIoU)
.INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(trans, Bool, false)
.ATTR(is_cross, Bool, true)
.ATTR(mode, String, "iou")
.OP_END_FACTORY_REG(GIoU)

/**
*@brief Performs the backpropagation of ROIAlign for training scenarios . \n *@brief Performs the backpropagation of ROIAlign for training scenarios . \n


*@par Inputs: *@par Inputs:
@@ -453,7 +417,7 @@ REG_OP(PSROIPooling)
*@brief Returns detection result . \n *@brief Returns detection result . \n


*@par Inputs: *@par Inputs:
* Five inputs, including:
* Four inputs, including:
*@li rois: An NCHW tensor of type floa16 or float32, output from operator proposal_d at the preceding layer, used as the input of operator FSRDetectionOutput. *@li rois: An NCHW tensor of type floa16 or float32, output from operator proposal_d at the preceding layer, used as the input of operator FSRDetectionOutput.
*@li bbox_delta: An NCHWC0 tensor of type floa16 or float32, specifying the prediction offset, used to update the coordinates [x1, y1, x2, y2] of each ROI. *@li bbox_delta: An NCHWC0 tensor of type floa16 or float32, specifying the prediction offset, used to update the coordinates [x1, y1, x2, y2] of each ROI.
*@li score: An NCHWC0 tensor of type floa16 or float32, specifying the probability of each class. Class 0 is the background class. *@li score: An NCHWC0 tensor of type floa16 or float32, specifying the probability of each class. Class 0 is the background class.
@@ -495,7 +459,7 @@ REG_OP(FSRDetectionOutput)
*@brief Returns detection result . \n *@brief Returns detection result . \n


*@par Inputs: *@par Inputs:
* Three inputs, including:
* Four inputs, including:
*@li bbox_delta: An ND tensor of type floa16 or float32, specifying the box loc predictions, used as the input of operator SSDDetectionOutput. *@li bbox_delta: An ND tensor of type floa16 or float32, specifying the box loc predictions, used as the input of operator SSDDetectionOutput.
*@li score: An ND tensor of type floa16 or float32, specifying the box confidences data, used as the input of operator SSDDetectionOutput. *@li score: An ND tensor of type floa16 or float32, specifying the box confidences data, used as the input of operator SSDDetectionOutput.
*@li anchors: An ND tensor of type floa16 or float32, output from operator PriorBoxD, used as the input of operator SSDDetectionOutput. *@li anchors: An ND tensor of type floa16 or float32, output from operator PriorBoxD, used as the input of operator SSDDetectionOutput.
@@ -510,6 +474,7 @@ REG_OP(FSRDetectionOutput)
*@li code_type: An optional int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3 *@li code_type: An optional int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3
*@li keep_top_k: An optional int32, specify the topk value after nms. Defaults to -1 *@li keep_top_k: An optional int32, specify the topk value after nms. Defaults to -1
*@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold *@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold
*@li kernel_name: An optional string, specifying the operator name. Defaults to "ssd_detection_output".
*@par Outputs: *@par Outputs:
*@li out_boxnum: A tensor of type int32, specifying the number of output boxes. *@li out_boxnum: A tensor of type int32, specifying the number of output boxes.
*@li y: A tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box. *@li y: A tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box.
@@ -1024,26 +989,26 @@ REG_OP(SPP)
* feature map . \n * feature map . \n


*@attention Constraints: *@attention Constraints:
* For the feature map input:
*@li If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
*@li If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
*@li If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
*@li If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
*@li If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
*@li If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
*@li If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
*@li If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
*@li If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
*@li For the feature map input:
(1) If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
(2) If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
(3) If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
(4) If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
(5) If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
(6) If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
(7) If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
(8) If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
(9) If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
(10) If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
(11) If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
(12) If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
(13) If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
(14) If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
(15) If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
(16) If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
(17) If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
(18) If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
(19) If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
*@par Third-party framework compatibility *@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe. * It is a custom operator. It has no corresponding operator in Caffe.
*/ */
@@ -1257,7 +1222,9 @@ REG_OP(RpnProposalsD)
* @li box_filter: bool, mark of box_filter. Defaults to "true" * @li box_filter: bool, mark of box_filter. Defaults to "true"
* @li core_max_num: int, max number of core. Defaults to "8" * @li core_max_num: int, max number of core. Defaults to "8"
*@par Outputs: *@par Outputs:
*sorted_box: A Tensor. Must be float16. N-D with shape [N, 1].
* @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
* @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
* @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1].
*/ */
REG_OP(RpnProposalPostProcessing) REG_OP(RpnProposalPostProcessing)
.INPUT(sorted_proposal, TensorType({DT_FLOAT16})) .INPUT(sorted_proposal, TensorType({DT_FLOAT16}))
@@ -1415,7 +1382,7 @@ REG_OP(BatchMultiClassNonMaxSuppression)
* @li shape_hw: A 1D Tensor of type int32 . \n * @li shape_hw: A 1D Tensor of type int32 . \n


* @par Attributes: * @par Attributes:
* reversed_box: An optional bool, specifying the last two dims is "4,num" or
* @li reversed_box: An optional bool, specifying the last two dims is "4,num" or
* "num,4", "true" for "4,num", "false" for "num,4". Defaults to "false" . \n * "num,4", "true" for "4,num", "false" for "num,4". Defaults to "false" . \n


* @par Outputs: * @par Outputs:
@@ -1462,9 +1429,9 @@ REG_OP(NormalizeBBox)
* @li anchors: A Tensor. Must be int32. * @li anchors: A Tensor. Must be int32.
* *
*@par Attributes: *@par Attributes:
* @li scales: optional, listfloat.
* @li scales: optional, listfloat, .
* @li decode_clip: optional, float, threahold of decode process. * @li decode_clip: optional, float, threahold of decode process.
* @li reversed_boxes: optional, bool.
* @li reversed_boxes: optional, bool,.
* *
*@par Outputs: *@par Outputs:
* y: A Tensor. Must have the same type as box_predictions. * y: A Tensor. Must have the same type as box_predictions.
@@ -1479,16 +1446,16 @@ REG_OP(DecodeBboxV2)
.OP_END_FACTORY_REG(DecodeBboxV2) .OP_END_FACTORY_REG(DecodeBboxV2)


/** /**
*@brief sort the input tensor and return the value of index.
*@brief Computes sort function.
* *
*@par Inputs: *@par Inputs:
*Inputs include: *Inputs include:
* x: A Tensor. Dtype support: float16, float, int16, int8,
* x: A Tensor. Dtype support: flaot16, flaot, int16, int8,
uint8, int32, int64. uint8, int32, int64.
*
*@par Attributes: *@par Attributes:
* @li axis: An optional attribute indicates the sorting axis.
* @li descending: An optional attribute indicates desending sort or not.
* @li axis: optional, int.
* @li descending: optional,bool.
* *
*@par Outputs: *@par Outputs:
* @li y1: A Tensor. Must have the same type as x. * @li y1: A Tensor. Must have the same type as x.
@@ -1601,18 +1568,16 @@ deciding when to remove boxes based on score . \n
the last dim representing (batch_id,class_id,index_id) . \n the last dim representing (batch_id,class_id,index_id) . \n


*@par Attributes: *@par Attributes:
*@li center_point_box:Integer indicate the format of the box data.
*center_point_box:Integer indicate the format of the box data.
The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2]
where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair
of box corners and the coordinates can be provided as normalized of box corners and the coordinates can be provided as normalized
(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models. (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
1 - the box data is supplied as [x_center, y_center, width, height]. 1 - the box data is supplied as [x_center, y_center, width, height].
Mostly used for Pytorch models. \n Mostly used for Pytorch models. \n
*@li max_boxes_size: An optional attribute integer representing the real maximum
*number of boxes to be selected by non max suppression . \n


*@par Outputs: *@par Outputs:
*selected_indices: A 2-D integer tensor of shape [M] representing the
*@li selected_indices: A 2-D integer tensor of shape [M] representing the
selected indices from the boxes tensor, where M <= max_output_size. \n selected indices from the boxes tensor, where M <= max_output_size. \n


*@attention Constraints: *@attention Constraints:
@@ -1638,7 +1603,7 @@ REG_OP(NonMaxSuppressionV7)
*@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n *@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n


*@par Inputs: *@par Inputs:
* Two inputs, including:
* Three inputs, including:
*@li features: A 5HD Tensor list of type float32 or float16. *@li features: A 5HD Tensor list of type float32 or float16.
*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
* the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1". * the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1".
@@ -1795,7 +1760,7 @@ REG_OP(AnchorResponseFlags)
* "N" indicates the number of ROIs. \n * "N" indicates the number of ROIs. \n


*@par Attributes: *@par Attributes:
*performance_mode: select performance mode, "high_precision" or "high_performance".
*@li performance_mode: select performance mode, "high_precision" or "high_performance".
* select "high_precision" when input type is float32, the output tensor precision * select "high_precision" when input type is float32, the output tensor precision
* will be smaller than 0.0001, select "high_performance" when input type is float32, * will be smaller than 0.0001, select "high_performance" when input type is float32,
* the ops will be best performance, but precision will be only smaller than 0.005. * the ops will be best performance, but precision will be only smaller than 0.005.
@@ -1830,12 +1795,12 @@ REG_OP(YoloBoxesEncode)
*@li num_gts: A Tensor. Support int32. real k. shape (1, ) *@li num_gts: A Tensor. Support int32. real k. shape (1, )


*@par Attributes: *@par Attributes:
*@li pos_iou_thr: float. IOU threshold for positive bboxes.
*@li min_pos_iou: float. minimum iou for a bbox to be considered as a positive bbox
*@li gt_max_assign_all: bool. whether to assign all bboxes with the same highest overlap with some gt to that gt.
*@li output_dim: float. IOU threshold for positive bboxes.
*@li group_size: float. minimum iou for a bbox to be considered as a positive bbox
*@li spatial_scale: bool. whether to assign all bboxes with the same highest overlap with some gt to that gt.


*@par Outputs: *@par Outputs:
* assigned_gt_inds_pos: A Tensor. Support float16/float32. shape (n, ).
*@li assigned_gt_inds_pos: A Tensor. Support float16/float32. shape (n, ).
*/ */
REG_OP(GridAssignPositive) REG_OP(GridAssignPositive)
.INPUT(assigned_gt_inds, TensorType({ DT_FLOAT, DT_FLOAT16 })) .INPUT(assigned_gt_inds, TensorType({ DT_FLOAT, DT_FLOAT16 }))
@@ -1851,40 +1816,6 @@ REG_OP(GridAssignPositive)
.REQUIRED_ATTR(min_pos_iou, Float) .REQUIRED_ATTR(min_pos_iou, Float)
.REQUIRED_ATTR(gt_max_assign_all, Bool) .REQUIRED_ATTR(gt_max_assign_all, Bool)
.OP_END_FACTORY_REG(GridAssignPositive) .OP_END_FACTORY_REG(GridAssignPositive)

/**
*@brief GIoUGrad . \n

*@par Inputs:
*@li dy : data of grad increment, a 1D Tensor of type float16 or float32 with
* shape (N,).
*@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
* shape (4, N). "N" indicates the number of bounding boxes, and the value
* "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
*@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
* with shape (4, M). "M" indicates the number of ground truth boxes, and
* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n

*@par Attributes:
*@li trans: An optional attr, true for 'xywh', false for 'xyxy', only support true now.
*@li is_cross: An optional attr, if false M equals N, only support false now.
*@li mode: An optional attr, a character string with the value range of ['iou', 'iof'],
* only support 'iou' now. \n

*@par Outputs:
*@li dbboxes: A 2D Tensor of type float16 or float32 with shape [4, N].
*@li dgtboxes: A 2D Tensor of type float16 or float32 with shape [4, M].
*/
REG_OP(GIoUGrad)
.INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dbboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dgtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(trans, Bool, false)
.ATTR(is_cross, Bool, true)
.ATTR(mode, String, "iou")
.OP_END_FACTORY_REG(GIoUGrad)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_


+ 121
- 54
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -54,16 +54,15 @@ REG_OP(LogSoftmaxGrad)
*@par Inputs: *@par Inputs:
*Two inputs, including: *Two inputs, including:
* @li features: A Tensor. Must be one of the following types: half, float32, double. * @li features: A Tensor. Must be one of the following types: half, float32, double.
*A "batch_size * num_classes" matrix.
* A "batch_size * num_classes" matrix.
* @li labels: A Tensor. Must be one of the following types: 'int32', 'int64'. * @li labels: A Tensor. Must be one of the following types: 'int32', 'int64'.
*batch_size vector with values in [0, num_classes).
*This is the label for the given minibatch entry. \n
* batch_size vector with values in [0, num_classes).
* This is the label for the given minibatch entry.




*@par Outputs: *@par Outputs:
*@li loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
*@li backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix).
Has the same type as "features" . \n
*loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
*backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the TensorFlow operator SparseSoftmaxCrossEntropyWithLogits. *Compatible with the TensorFlow operator SparseSoftmaxCrossEntropyWithLogits.
@@ -85,8 +84,8 @@ REG_OP(SparseSoftmaxCrossEntropyWithLogits)
* @li labels: A Tensor of the same type as "features". A "batch_size * num_classes" matrix . \n * @li labels: A Tensor of the same type as "features". A "batch_size * num_classes" matrix . \n


*@par Outputs: *@par Outputs:
* @li loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
* @li backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n
*loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
*backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the TensorFlow operator SoftmaxCrossEntropyWithLogits. *Compatible with the TensorFlow operator SoftmaxCrossEntropyWithLogits.
@@ -128,13 +127,12 @@ REG_OP(SoftmaxGrad)
*@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n *@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n


*@par Inputs: *@par Inputs:
* Three inputs, including:
* Two inputs, including:
*@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value. *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value .
*@li dout:A multi-dimensional Tensor of float16 or float32,specifying the gradient transferred from the upper layer. \n
*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n


*@par Outputs: *@par Outputs:
*gradient: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n
*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SigmoidCrossEntropyWithLogitsGrad. * Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SigmoidCrossEntropyWithLogitsGrad.
@@ -150,12 +148,13 @@ REG_OP(SigmoidCrossEntropyWithLogitsGrad)
*@brief Performs the backpropagation of SigmoidCrossEntropyWithLogits for training scenarios . \n *@brief Performs the backpropagation of SigmoidCrossEntropyWithLogits for training scenarios . \n


*@par Inputs: *@par Inputs:
* Two inputs, including:
* Three inputs, including:
*@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value. *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value. \n
*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value.
*@li dout: A multi-dimensional Tensor of float16 or float32, specifying the gradient transferred from the upper layer . \n


*@par Outputs: *@par Outputs:
*loss: Return loss. Has the same dimensions and type as "predict" . \n
*gradient: Return gradient. Has the same dimensions and type as "predict" . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SigmoidCrossEntropyWithLogits. * Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SigmoidCrossEntropyWithLogits.
@@ -573,7 +572,7 @@ REG_OP(LayerNorm)


*@par Inputs: *@par Inputs:
*One input, including: *One input, including:
* x: A Tensor. Must be one of the following types: float16, float32 . \n
* @li x: A Tensor. Must be one of the following types: float16, float32 . \n


*@par Attributes: *@par Attributes:
* @li p: Specify L_p norm, the type is float. * @li p: Specify L_p norm, the type is float.
@@ -582,7 +581,7 @@ REG_OP(LayerNorm)


*@par Outputs: *@par Outputs:
*One outputs, including: *One outputs, including:
* y: shape and dtype of output, should be same shape and type as input.
* @li y: shape and dtype of output, should be same shape and type as input.
*/ */
REG_OP(Renorm) REG_OP(Renorm)
.INPUT(x, TensorType::BasicType()) .INPUT(x, TensorType::BasicType())
@@ -812,7 +811,7 @@ REG_OP(LayerNormBetaGammaBackpropV2)
* shape of "keep_prob" should be (1,) or [1,]. * shape of "keep_prob" should be (1,) or [1,].
* Has the same type as "x" . \n * Has the same type as "x" . \n


*@par Outputs:
*@par Output:
*y: A mutable Tensor. Has the same type as "x". *y: A mutable Tensor. Has the same type as "x".
*/ */
REG_OP(DropOutDoMask) REG_OP(DropOutDoMask)
@@ -840,7 +839,7 @@ REG_OP(DropOutDoMask)
* shape of "keep_prob" should be (1,) or [1,]. * shape of "keep_prob" should be (1,) or [1,].
* Has the same type as "x" . \n * Has the same type as "x" . \n


*@par Outputs:
*@par Output:
*y: A mutable Tensor. Has the same type as "x". *y: A mutable Tensor. Has the same type as "x".
*@par Restrictions: *@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
@@ -1011,7 +1010,7 @@ REG_OP(LRNGrad)
*@li grads: A Tensor. Has the same type as acts. *@li grads: A Tensor. Has the same type as acts.


*@par Attributes: *@par Attributes:
*blank_label: An optional attribute. Defaults to 0.
*@li blank_label: An optional attribute. Defaults to 0.


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with TensorFlow RNNTLoss operator. * Compatible with TensorFlow RNNTLoss operator.
@@ -1199,11 +1198,13 @@ REG_OP(INInferV2D)
* @li epsilon: An attribute of type Float. \n * @li epsilon: An attribute of type Float. \n


* @par Outputs: * @par Outputs:
* Three outputs, including:
*Three outputs, including:
* @li y: A Tensor. Has the same type as "x". \n * @li y: A Tensor. Has the same type as "x". \n
* @li mean: A Tensor. Has the same type as "x". \n * @li mean: A Tensor. Has the same type as "x". \n
* @li variance: A Tensor. Has the same type as "x". \n * @li variance: A Tensor. Has the same type as "x". \n


* @par Third-party framework compatibility
* Can be used by onnx InstanceNormalization
*/ */
REG_OP(InstanceNorm) REG_OP(InstanceNorm)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1217,22 +1218,24 @@ REG_OP(InstanceNorm)
.OP_END_FACTORY_REG(InstanceNorm) .OP_END_FACTORY_REG(InstanceNorm)


/** /**
* @brief InstanceNormGrad operator interface implementation.
*@brief InstanceNormGrad operator interface implementation.


* @par Inputs:
* Five inputs, including:
*@par Inputs:
*Five inputs, including:
* @li dy: A Tensor. Must be one of the following types: float16, float32. * @li dy: A Tensor. Must be one of the following types: float16, float32.
* @li x: A Tensor. Must be one of the following types: float16, float32. * @li x: A Tensor. Must be one of the following types: float16, float32.
* @li variance: A Tensor. Must be one of the following types: float16, float32. * @li variance: A Tensor. Must be one of the following types: float16, float32.
* @li mean: A Tensor. Must be one of the following types: float16, float32. * @li mean: A Tensor. Must be one of the following types: float16, float32.
* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n * @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n


* @par Outputs:
* Three outputs, including:
*@par Outputs:
*Three outputs, including:
* @li pd_x: A Tensor. Must be one of the following types: float16, float32. * @li pd_x: A Tensor. Must be one of the following types: float16, float32.
* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
* @li pd_beta: A Tensor. Must be one of the following types: float16, float32. * @li pd_beta: A Tensor. Must be one of the following types: float16, float32.


*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(InstanceNormGrad) REG_OP(InstanceNormGrad)
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -1246,6 +1249,58 @@ REG_OP(InstanceNormGrad)
.OP_END_FACTORY_REG(InstanceNormGrad) .OP_END_FACTORY_REG(InstanceNormGrad)


/** /**
*@brief InstanceNormXBackprop operator interface implementation.

*@par Inputs:
*Five inputs, including:
* @li dy: A Tensor. Must be one of the following types: float16, float32.
* @li x: A Tensor. Must be one of the following types: float16, float32.
* @li variance: A Tensor. Must be one of the following types: float16, float32.
* @li mean: A Tensor. Must be one of the following types: float16, float32.
* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n

*@par Outputs:
*Two outputs, including:
* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
* @li res_for_gamma: A Tensor. Must be one of the following types: float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(InstanceNormXBackprop)
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(res_for_gamma, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(InstanceNormXBackprop)

/**
*@brief InstanceNormBetaGammaBackprop operator interface implementation.

*@par Inputs:
*Two inputs, including:
* @li dy: A Tensor. Must be one of the following types: float16, float32.
* @li res_for_gamma: A Tensor. Must be one of the following types: float32.\n

*@par Outputs:
*Two outputs, including:
* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(InstanceNormBetaGammaBackprop)
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(res_for_gamma, TensorType({DT_FLOAT}))
.OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(InstanceNormBetaGammaBackprop)

/**
* @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n * @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n


* @par Inputs: * @par Inputs:
@@ -1285,10 +1340,10 @@ REG_OP(KlDivLossGrad)
* @li label: A Tensor. Has the same type as "grads". Required. \n * @li label: A Tensor. Has the same type as "grads". Required. \n


* @par Attributes: * @par Attributes:
* reduction: An optional attribute of type String. Defaults to "mean". \n
* @li reduction: An optional attribute of type String. Defaults to "mean". \n


* @par Outputs: * @par Outputs:
* y: A Tensor. Has the same type as "x". \n
* @li y: A Tensor. Has the same type as "x". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator L1LossGrad. * Compatible with the Pytorch operator L1LossGrad.
@@ -1313,7 +1368,7 @@ REG_OP(L1LossGrad)
* @li reduction: An optional string.Defaults to "mean". \n * @li reduction: An optional string.Defaults to "mean". \n


* @par Outputs: * @par Outputs:
* y: An ND tensor tensor with the same shape and type as "predict". \n
* @li y: An ND tensor tensor with the same shape and type as "predict". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator LpLoss. * Compatible with the Pytorch operator LpLoss.
@@ -1335,10 +1390,10 @@ REG_OP(LpLoss)
* @li dout: An ND tensor of type float16, float32. \n * @li dout: An ND tensor of type float16, float32. \n


* @par Attributes: * @par Attributes:
* reduction: An optional string.Defaults to "mean". \n
* @li reduction: An optional string.Defaults to "mean". \n


* @par Outputs: * @par Outputs:
* y: An ND tensor tensor with the same shape and type as "predict". \n
* @li y: An ND tensor tensor with the same shape and type as "predict". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator MseLossGrad. * Compatible with the Pytorch operator MseLossGrad.
@@ -1359,10 +1414,10 @@ REG_OP(MseLossGrad)
* @li label: An ND Tensor of dtype float16 or float32.\n * @li label: An ND Tensor of dtype float16 or float32.\n
* *
* @par Attributes: * @par Attributes:
* reduction:An optional str from sum, none, mean, Defaults to "mean".\n
* @li reduction:An optional str from sum, none, mean, Defaults to "mean".\n
* *
* @par Outputs: * @par Outputs:
* y: when reduction=sum/mean, y is scale. when reduction=none, y has
* @li y: when reduction=sum/mean, y is scale. when reduction=none, y has
* same type and shape as "predict".\n * same type and shape as "predict".\n
*/ */
REG_OP(MseLoss) REG_OP(MseLoss)
@@ -1390,7 +1445,7 @@ REG_OP(MseLoss)
* Must be one of the following: "none", "mean", "sum". \n * Must be one of the following: "none", "mean", "sum". \n


* @par Outputs: * @par Outputs:
* gradient: A Tensor. Has the same type as "predict". \n
* @li gradient: A Tensor. Has the same type as "predict". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator SmoothL1LossBackward. * Compatible with the Pytorch operator SmoothL1LossBackward.
@@ -1425,7 +1480,7 @@ REG_OP(SmoothL1LossGradV2)
* the output,'sum': the output will be summed. Default: 'mean'. \n * the output,'sum': the output will be summed. Default: 'mean'. \n


* @par Outputs: * @par Outputs:
* loss: Indicates the loss between the predictive value and target value.
* @li loss: Indicates the loss between the predictive value and target value.
* Has the same dimensions as "predict". \n * Has the same dimensions as "predict". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
@@ -1443,12 +1498,12 @@ REG_OP(SmoothL1LossV2)
* @brief Computes Centralization. result = x - mean(x, axes) * @brief Computes Centralization. result = x - mean(x, axes)


* @par Inputs: * @par Inputs:
* x: An ND tensor of type float16, float32.
* @li x: An ND tensor of type float16, float32.
* @par Attributes: * @par Attributes:
* axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType.
* @li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType.
* Must be in the range [-rank(x), rank(x)). * Must be in the range [-rank(x), rank(x)).
* @par Outputs: * @par Outputs:
* y: A Tensor. Has the same type as "x". \n
* @li y: A Tensor. Has the same type as "x". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* custom operator \n * custom operator \n
@@ -1466,7 +1521,7 @@ REG_OP(Centralization)


*@par Inputs: *@par Inputs:
*One inputs, including: *One inputs, including:
* x: A tensor . Must be one of the following types:
* @li x: A tensor . Must be one of the following types:
* float16, float32, int32, uint32, int8, uint8. \n * float16, float32, int32, uint32, int8, uint8. \n


*@par Attributes: *@par Attributes:
@@ -1491,14 +1546,14 @@ REG_OP(Roll)
logistic loss between input_x and input_y (containing 1 or -1). \n logistic loss between input_x and input_y (containing 1 or -1). \n


*@par Inputs: *@par Inputs:
*Tow inputs, including:
*One inputs, including:
* @li input_x: A tensor. Must be one of the following types: * @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n
* @li input_y: A tensor. Must be one of the following types: * @li input_y: A tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n


*@par Attributes: *@par Attributes:
*reduction: An optional string.Defaults to "mean". \n
*@li lambd: An optional string.Defaults to "mean". \n


*@par Outputs: *@par Outputs:
*output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n *output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n
@@ -1525,10 +1580,10 @@ REG_OP(SoftMarginLoss)
* @li pos_weight: An optional ND tensor of type float16, float32. \n * @li pos_weight: An optional ND tensor of type float16, float32. \n


* @par Attributes: * @par Attributes:
* reduction: An optional string.Defaults to "mean". \n
* @li reduction: An optional string.Defaults to "mean". \n


* @par Outputs: * @par Outputs:
* gradient: An ND tensor tensor with the same shape and type as "predict". \n
* @li gradient: An ND tensor tensor with the same shape and type as "predict". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator SigmoidCrossEntropyWithLogitsGrad. * Compatible with the Pytorch operator SigmoidCrossEntropyWithLogitsGrad.
@@ -1548,14 +1603,24 @@ REG_OP(SigmoidCrossEntropyWithLogitsGradV2)


* @par Inputs: * @par Inputs:
* Two inputs, including: * Two inputs, including:
* @li input_x: A tensor. Must be one of the following types: float16, float32.
* @li target: A tensor. Must be one of the following types: float16, float32. \n
* @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n
*
* @par Inputs:
* @li target: A tensor. Must be one of the following types:
* float16, float32. \n


* @par Attributes: * @par Attributes:
* four Attributes, including: * four Attributes, including:
* @li log_input: An optional bool. Defaults to "True"
* @li full: An optional bool. Defaults to "False"
* @li eps: An optional float. Defaults to "1e-8"
* @li log_input: An optional bool. Defaults to "True" \n
*
* @par Attributes:
* @li full: An optional bool. Defaults to "False" \n
*
* @par Attributes:
* @li eps: An optional float. Defaults to "1e-8" \n
*
* @par Attributes:
* @li reduction: An optional string. Defaults to "mean" \n * @li reduction: An optional string. Defaults to "mean" \n


* @par Outputs: * @par Outputs:
@@ -1576,14 +1641,14 @@ REG_OP(PoissonNllLoss)
/** /**
*@brief rnn_gen_mask *@brief rnn_gen_mask
* @par Inputs: * @par Inputs:
* seq_length: A ND Tensor of type int32. Recoed the current length of each batch.\n
* @li seq_length: A ND Tensor of type int32. Recoed the current length of each batch.\n
* *
* @par Attributes: * @par Attributes:
* @li num_step: A required int.\n * @li num_step: A required int.\n
* @li hidden_size: A required int. \n * @li hidden_size: A required int. \n
* *
* *
* @par Ouputs:
* @par Output:
* y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n * y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n
* *
*/ */
@@ -1601,16 +1666,18 @@ REG_OP(RnnGenMask)
* @par Inputs: * @par Inputs:
* Two inputs, including: * Two inputs, including:
* @li x: A tensor. Must be one of the following types: * @li x: A tensor. Must be one of the following types:
* float16, float32.
* float16, float32. \n
*
* @par Inputs:
* @li target: A tensor. Must be the following types: * @li target: A tensor. Must be the following types:
* int32. \n * int32. \n


* @par Attributes: * @par Attributes:
* reduction: An optional string. Defaults to "mean" \n
* @li reduction: An optional string. Defaults to "mean" \n


* @par Outputs: * @par Outputs:
* @li y: A Tensor has same element type as input x. \n
* @li is_target: A Tensor has same element type as input target. \n
* y: A Tensor has same element type as input x. \n
* is_target: A Tensor has same element type as input target. \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator MultiLabelMarginLoss. \n * Compatible with the Pytorch operator MultiLabelMarginLoss. \n


+ 14
- 14
third_party/fwkacllib/inc/ops/nn_ops.h View File

@@ -106,16 +106,16 @@ REG_OP(FusedBatchNormV2)
.OP_END_FACTORY_REG(FusedBatchNormV2) .OP_END_FACTORY_REG(FusedBatchNormV2)


/** /**
* @brief Large amount of data sort.First operator of TopK.
* @brief: Large amount of data sort.First operator of TopK.
* @par Inputs: * @par Inputs:
* two input, including: * two input, including:
* @li input_data: A Tensor. Data to be sorted. Support float16 * @li input_data: A Tensor. Data to be sorted. Support float16
* @li input_index: A Tensor. Range(0, 2048). Datatype and format is same as input_data. * @li input_index: A Tensor. Range(0, 2048). Datatype and format is same as input_data.
* @par Attributes: * @par Attributes:
* k_num: Int.Number to be sorted.
* @li k_num: Int.Number to be sorted.
* @par Outputs: * @par Outputs:
* One output, including:
* output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
* 1 output, including:
* @li output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
*/ */
REG_OP(SegmentSort) REG_OP(SegmentSort)
.INPUT(input_data, TensorType({DT_FLOAT16})) .INPUT(input_data, TensorType({DT_FLOAT16}))
@@ -127,13 +127,13 @@ REG_OP(SegmentSort)
/** /**
* @brief: Large amount of data sort.Second operator of TopK. * @brief: Large amount of data sort.Second operator of TopK.
* @par Inputs: * @par Inputs:
* One input, including:
* input_proposal: A Tensor. Proposal sorted for each channel. Support float16
* two input, including:
* @li input_proposal: A Tensor. Proposal sorted for each channel. Support float16
* @par Attributes: * @par Attributes:
* k_num: Int.Number to be sorted.
* @li k_num: Int.Number to be sorted.
* @par Outputs: * @par Outputs:
* One output, including:
* output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
* 1 output, including:
* @li output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
*/ */
REG_OP(MultiMerge) REG_OP(MultiMerge)
.INPUT(input_proposal, TensorType({DT_FLOAT16})) .INPUT(input_proposal, TensorType({DT_FLOAT16}))
@@ -142,14 +142,14 @@ REG_OP(MultiMerge)
.OP_END_FACTORY_REG(MultiMerge) .OP_END_FACTORY_REG(MultiMerge)


/** /**
* @brief Large amount of data sort.Third operator of TopK.
* @brief: Large amount of data sort.Third operator of TopK.
* @par Inputs: * @par Inputs:
* One input, including:
* input_proposal: A Tensor. Proposal sorted for each channel. Support float16
* two input, including:
* @li input_proposal: A Tensor. Proposal sorted for each channel. Support float16
* @par Attributes: * @par Attributes:
* k_num: Int.Number to be sorted.
* @li k_num: Int.Number to be sorted.
* @par Outputs: * @par Outputs:
* Two output, including:
* 2 output, including:
* @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted. * @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted.
* @li output_index: A Tensor. int32. Data index. * @li output_index: A Tensor. int32. Data index.
*/ */


+ 27
- 70
third_party/fwkacllib/inc/ops/nn_pooling_ops.h View File

@@ -29,7 +29,7 @@ namespace ge {
/** /**
*@brief Performs pooling on the input. *@brief Performs pooling on the input.
*@par Inputs: *@par Inputs:
* x: An NCHW tensor of type float16, float32, int8.
*@li x: An NCHW tensor of type float16, float32, int8.
*@par Attributes: *@par Attributes:
*@li mode: An optional int32, specifying the pooling algorithm, either "0" (max pooling) or "1" (avg pooling). Defaults to "0". *@li mode: An optional int32, specifying the pooling algorithm, either "0" (max pooling) or "1" (avg pooling). Defaults to "0".
*@li global_pooling: An optional bool. Defaults to "false". *@li global_pooling: An optional bool. Defaults to "false".
@@ -50,7 +50,6 @@ namespace ge {
*dilation[2]: An optional int32, specifying the left dilation. Defaults to "1". *dilation[2]: An optional int32, specifying the left dilation. Defaults to "1".
*dilation[3]: An optional int32, specifying the right dilation. Defaults to "1". *dilation[3]: An optional int32, specifying the right dilation. Defaults to "1".
*@li ceil_mode: An optional int32, either "0" (ceil mode) or "1" (floor mode). Defaults to "0". *@li ceil_mode: An optional int32, either "0" (ceil mode) or "1" (floor mode). Defaults to "0".
*@li data_format: An optional string, Specify the data format of the input and output data. With the default format "NCHW".
*@par Outputs: *@par Outputs:
*y: An NCHW tensor of type float16, float32, int32. *y: An NCHW tensor of type float16, float32, int32.
*@attention Constraints: *@attention Constraints:
@@ -205,7 +204,7 @@ REG_OP(AvgPool3D)
*y: The average pooled output tensor . \n *y: The average pooled output tensor . \n


*@attention Constraints: *@attention Constraints:
*"ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
*@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the TensorFlow operator AvgPool3D. * Compatible with the TensorFlow operator AvgPool3D.
@@ -282,10 +281,10 @@ REG_OP(AvgPool3DGrad)
* @li data_format: A string, format of input data . \n * @li data_format: A string, format of input data . \n


* @par Outputs: * @par Outputs:
* output: The average pooled output tensor . \n
* @output: The average pooled output tensor . \n


* @attention Constraints: * @attention Constraints:
* "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the TensorFlow operator AvgPool3DGradD. * Compatible with the TensorFlow operator AvgPool3DGradD.
@@ -432,47 +431,6 @@ REG_OP(MaxPool3D)
.OP_END_FACTORY_REG(MaxPool3D) .OP_END_FACTORY_REG(MaxPool3D)


/** /**
* @brief Performs max pooling3d on both max values and indices.
*
* @par Inputs:
* One input:
* x: An 6D tensor. Supported type: float16. Format as NDC1HWC0.
* @par Attributes:
* @li ksize: A required list of int32 values,
* specifying the size of the window for each dimension of the input tensor.
* No default value.
* @li strides: A required list of int32 values,
* specifying the stride of the sliding window for each dimension of
* the input tensor. No default value.
* @li pads: A required 3*2-dimension-list of int32 values.
* specifying the pad of three dimension of input, implement with 0.
* @li dilation: dilation of kernel. default value is {1,1,1,1,1}.
* @li ceil_mode: default value is false.
* @li data_format: the format of torch input, default value is "NCDHW".
* @li argmax_type: the function of this field is to determine the type of
* output argmax, "bitmask" is the default value, the argmax will return
* a img2col bitmask. "index_int32" and "index_int64" represent the torch
* output indices.
* @par Outputs:
* y: An 6D tensor. the maxpool3d output(max value), format as NDoC1HoWoC0.
* @par Outputs:
* argmax: A 5D uint16 tensor. the indice output.
* format as NC1HWC0, actually it represent N, Do, C1*ksize, Ho*Wo//16, 16.
*/
REG_OP(MaxPool3DWithArgmax)
.INPUT(x, TensorType::RealNumberType())
.OUTPUT(y, TensorType::RealNumberType())
.OUTPUT(argmax, TensorType::IndexNumberType())
.REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(dilation, ListInt, {1, 1, 1, 1, 1})
.ATTR(ceil_mode, Bool, false)
.ATTR(data_format, String, "NCDHW")
.ATTR(argmax_type, String, "bitmask")
.OP_END_FACTORY_REG(MaxPool3DWithArgmax)

/**
*@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n *@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n
* The output is of size H x W, for any input size. * The output is of size H x W, for any input size.


@@ -564,7 +522,8 @@ REG_OP(MaxPool3DGradGrad)
* y: A mutable tensor. Has the same shape and type as "x1" . \n * y: A mutable tensor. Has the same shape and type as "x1" . \n


* @attention Constraints: * @attention Constraints:
* @li ksize is limited by buffer with full tiling.
* @li Computing gradients of global pooling is not supported, which means
* "ksize < x1".
* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] * @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]


* @par Third-party framework compatibility * @par Third-party framework compatibility
@@ -609,7 +568,7 @@ REG_OP(MaxPoolGrad)
* @li Other dimensions of ksize and strides is 1 . \n * @li Other dimensions of ksize and strides is 1 . \n


* @par Outputs: * @par Outputs:
* y: Has the same type and format as input "x1" . \n
* @li y: Has the same type and format as input "x1" . \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* @li Compatible with the TensorFlow operator MaxPoolGradGrad. * @li Compatible with the TensorFlow operator MaxPoolGradGrad.
@@ -629,7 +588,7 @@ REG_OP(MaxPoolGradGrad)
*@brief Performs max_pool_ext2 on the input . \n *@brief Performs max_pool_ext2 on the input . \n


*@par Inputs: *@par Inputs:
* Three inputs:
* Two inputs:
*@li x: An NC1HWC0 Tensor of type float16. *@li x: An NC1HWC0 Tensor of type float16.
*@li strides: A required type of int32 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value. *@li strides: A required type of int32 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
*@li ksize: A required type of int32 values, specifying the size of the window for each dimension of the input tensor. No default value. *@li ksize: A required type of int32 values, specifying the size of the window for each dimension of the input tensor. No default value.
@@ -676,8 +635,7 @@ REG_OP(MaxPoolV2)
*@li strides: A required list of int8, int16, int32, or int64 values, *@li strides: A required list of int8, int16, int32, or int64 values,
* specifying the stride of the sliding window for each dimension of * specifying the stride of the sliding window for each dimension of
* the input tensor. No default value. * the input tensor. No default value.
*@li padding: A required string. No default value .
*@li Targmax:An optional int with default value 7 . \n
*@li padding: A required string. No default value . \n


*@par Outputs: *@par Outputs:
*@li y: A Tensor. Has the same type and format as input "x". *@li y: A Tensor. Has the same type and format as input "x".
@@ -687,7 +645,7 @@ REG_OP(MaxPoolV2)
* ksize[1] * ksize[2] <= 255. * ksize[1] * ksize[2] <= 255.
*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1,
* strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
*@li "padding" is either "SAME" or "VALID" .
*@li "padding" is either "SAME" or "VALID" . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the TensorFlow operator MaxPoolWithArgmax. * Compatible with the TensorFlow operator MaxPoolWithArgmax.
@@ -752,15 +710,14 @@ REG_OP(MaxPoolGradWithArgmax)
*@brief Performs transform mask to argmax . \n *@brief Performs transform mask to argmax . \n


*@par Inputs: *@par Inputs:
* Two inputs:
*@li x: An NC1HWC0 Tensor of type float16.
*@li mask: An NC1HWC0 Tensor of type uint16 . \n
* Two input:
*x: An NC1HWC0 Tensor of type float16.
*mask: An NC1HWC0 Tensor of type uint16 . \n


*@par Attributes: *@par Attributes:
*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value. *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value.
*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value. *@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
*@li padding: A required string. No default value .
*@li originshape:A required list of int8, int16, int32, or int64 values, No default value. \n
*@li padding: A required string. No default value . \n


*@par Outputs: *@par Outputs:
*argmax: An NC1HWC0 Tensor of type int32 . \n *argmax: An NC1HWC0 Tensor of type int32 . \n
@@ -797,7 +754,7 @@ REG_OP(Mask2Argmax)
* @li strides: A required list, specifying the stride of the sliding window. * @li strides: A required list, specifying the stride of the sliding window.
* @li padding: A required string, window sliding mode. Either SAME or VALID. * @li padding: A required string, window sliding mode. Either SAME or VALID.
* @par Outputs: * @par Outputs:
* y:Result tensor. Supported type: float, double, int32,
* @li y:Result tensor. Supported type: float, double, int32,
* uint8, int16, int8, int64, uint16, half, uint32, uint64 * uint8, int16, int8, int64, uint16, half, uint32, uint64


* @attention Constraints: * @attention Constraints:
@@ -810,7 +767,7 @@ REG_OP(Mask2Argmax)
* (shape_max_pool[2] * shape_max_pool[3] + 31) // 16, 16), else failed . \n * (shape_max_pool[2] * shape_max_pool[3] + 31) // 16, 16), else failed . \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the TensorFlow operator MaxPoolGradGradWithArgmax.
* @li Compatible with the TensorFlow operator MaxPoolGradGradWithArgmax.
*/ */
REG_OP(MaxPoolGradGradWithArgmax) REG_OP(MaxPoolGradGradWithArgmax)
.INPUT(x, TensorType::RealNumberType()) .INPUT(x, TensorType::RealNumberType())
@@ -974,11 +931,11 @@ REG_OP(AvgPoolV2GradD)
.OP_END_FACTORY_REG(AvgPoolV2GradD) .OP_END_FACTORY_REG(AvgPoolV2GradD)


/** /**
*@brief upsample the layer, similar to the nearest-neighbor difference scaling algorithm.
*@brief :upsample the layer


*@par Inputs: *@par Inputs:
* one input, including: * one input, including:
* x: A tensor of type float16 or float32.
*@li x: A tensor of type float16 or float32.
*@par Attributes: *@par Attributes:
*@li scale: A optional float32, scale factor of x. Defaults to "1.0". *@li scale: A optional float32, scale factor of x. Defaults to "1.0".
*@li stride_h: An optional int32, broadcast the axis of h. Defaults to "2". *@li stride_h: An optional int32, broadcast the axis of h. Defaults to "2".
@@ -1462,7 +1419,7 @@ REG_OP(MaxPoolV3)
* the floor function will be used. Default False \n * the floor function will be used. Default False \n


* @par Outputs: * @par Outputs:
* out_grad: A mutable tensor. Has the same shape and type as "x1" . \n
* y: A mutable tensor. Has the same shape and type as "x1" . \n


* @attention Constraints: * @attention Constraints:
* @li Computing gradients of global pooling is not supported, which means * @li Computing gradients of global pooling is not supported, which means
@@ -1490,8 +1447,8 @@ REG_OP(MaxPoolV3Grad)
*@brief Performs Dilation2D on the input . \n *@brief Performs Dilation2D on the input . \n


*@par Inputs: *@par Inputs:
*@li x: A tensor of shape is 4d, format is support NHWC.
*@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. \n
*x: A tensor of shape is 4d, format is support NHWC.
*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. \n


*@par Attributes: *@par Attributes:
*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1.
@@ -1523,9 +1480,9 @@ REG_OP(Dilation2D)
*@brief Performs Dilation2DBackpropFilter on the input. \n *@brief Performs Dilation2DBackpropFilter on the input. \n


*@par Inputs: *@par Inputs:
*@li x: A tensor of shape is 4d, format is support NHWC.
*@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
*@li out_backprop: Has the same type and format as input x and the c dimension is same with x. \n
*x: A tensor of shape is 4d, format is support NHWC.
*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n


*@par Attributes *@par Attributes
*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1. *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1.
@@ -1562,9 +1519,9 @@ REG_OP(Dilation2DBackpropFilter)
*@brief Performs Dilation2DBackpropInput on the input. \n *@brief Performs Dilation2DBackpropInput on the input. \n


*@par Inputs: *@par Inputs:
*@li x: A tensor of shape is 4d, format is support NHWC.
*@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
*@li out_backprop: Has the same type and format as input x and the c dimension is same with x. \n
*x: A tensor of shape is 4d, format is support NHWC.
*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n


*@par Attributes *@par Attributes
*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1. *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1.


+ 4
- 7
third_party/fwkacllib/inc/ops/nn_training_ops.h View File

@@ -289,8 +289,7 @@ REG_OP(SparseApplyAdagradV2D)
* Should be from a Variable(). * Should be from a Variable().
*@li lr: A scalar. Has the same type as "var". *@li lr: A scalar. Has the same type as "var".
*@li grad: A tensor for the gradient. Has the same type as "var". *@li grad: A tensor for the gradient. Has the same type as "var".
*@li momentum: Momentum. Must be a scalar.

*
*@par Attributes: *@par Attributes:
*@li use_nesterov: An optional bool. Defaults to "False". *@li use_nesterov: An optional bool. Defaults to "False".
* If "True", the tensor passed to compute grad will be * If "True", the tensor passed to compute grad will be
@@ -702,7 +701,7 @@ REG_OP(ApplyPowerSignD)
/** /**
*@brief Updates "var" as FOBOS algorithm with fixed learning rate. *@brief Updates "var" as FOBOS algorithm with fixed learning rate.
* prox_v = var - alpha * delta * prox_v = var - alpha * delta
* var = sign(prox_v)/(1+alpha * l2) * max{|prox_v|-alpha * l1,0}
* var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
* *
*@attention Constraints: *@attention Constraints:
* the input tensors must have the same shape. * the input tensors must have the same shape.
@@ -2129,12 +2128,10 @@ REG_OP(FusedMulApplyMomentumExtern)
* otherwise the behavior is undefined, but may exhibit less contention. * otherwise the behavior is undefined, but may exhibit less contention.
* *
*@par Outputs: *@par Outputs:
* @li var: A mutable tensor. Has the same type as input "var".
* @li accum: A mutable tensor. Has the same type as input "accum".
* var: A mutable tensor. Has the same type as input "var".
* *
*@attention Constraints: *@attention Constraints:
* @li var: A mutable tensor. Has the same type as input "var".
* @li accum: A mutable tensor. Has the same type as input "accum".
* The input tensors must have the same shape.
* *
*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the TensorFlow operator ResourceApplyKerasMomentum. * Compatible with the TensorFlow operator ResourceApplyKerasMomentum.


+ 41
- 44
third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h View File

@@ -28,8 +28,8 @@ namespace ge {
*@brief Computes the for the gelu of "x" . \n *@brief Computes the for the gelu of "x" . \n


*@par Inputs: *@par Inputs:
*One input, including:
*x: A Tensor. Must be one of the following types: float16, float32
*Two inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as "x". *y: A Tensor. Has the same type as "x".
@@ -66,8 +66,8 @@ REG_OP(GeluGrad)
*@brief Computes the for the fast_gelu of "x" . \n *@brief Computes the for the fast_gelu of "x" . \n


*@par Inputs: *@par Inputs:
*One input, including:
*x: A Tensor. Must be one of the following types: float16, float32
*Two inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as "x". *y: A Tensor. Has the same type as "x".
@@ -83,7 +83,7 @@ REG_OP(FastGelu)
*@brief Computes the gradient for the fast_gelu of "x" . \n *@brief Computes the gradient for the fast_gelu of "x" . \n


*@par Inputs: *@par Inputs:
*Two inputs, including:
*Three inputs, including:
* @li dy: A Tensor. Must be one of the following types: float16, float32 * @li dy: A Tensor. Must be one of the following types: float16, float32
* @li x: A Tensor of the same type as "dy" . \n * @li x: A Tensor of the same type as "dy" . \n


@@ -169,7 +169,7 @@ REG_OP(Relu)
* x: A Tensor of type RealNumberType . \n * x: A Tensor of type RealNumberType . \n


* @par Outputs: * @par Outputs:
* y: A Tensor with the same type as x . \n
* y: A Tensor of type RealNumberType . \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the TensorFlow operator Relu6. * Compatible with the TensorFlow operator Relu6.
@@ -209,12 +209,8 @@ REG_OP(Relu6D)
* backprops = gradients * (features > 0) * (features < 6) . \n * backprops = gradients * (features > 0) * (features < 6) . \n


* @par Inputs: * @par Inputs:
* @li gradients: A Tensor of type RealNumberType. The backpropagated
gradients to the corresponding Relu6 operation.
* @li features: A Tensor with the same type as gradients.he features passed
as input to the corresponding Relu6 operation, or its output;
using either one produces the same result. \n

* @li features: A Tensor of type RealNumberType.
* @li gradients: A Tensor of type RealNumberType . \n


* @par Outputs: * @par Outputs:
* backprops: A Tensor of type RealNumberType . \n * backprops: A Tensor of type RealNumberType . \n
@@ -232,7 +228,7 @@ REG_OP(Relu6Grad)
*Applies the element-wise function: *Applies the element-wise function:
* Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha . * Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha .
*@par Inputs: *@par Inputs:
*Two inputs, including:
*One inputs, including:
* @li grads: A tensor. Must be one of the following types: * @li grads: A tensor. Must be one of the following types:
* float16, float32. * float16, float32.
* @li activations: A tensor. Must be one of the following types: * @li activations: A tensor. Must be one of the following types:
@@ -242,7 +238,7 @@ REG_OP(Relu6Grad)
*y: A Tensor with the same type and shape of grads's. *y: A Tensor with the same type and shape of grads's.
* *
*@par Attributes: *@par Attributes:
*alpha: scalar parameter, default value = 1.0
*@li alpha: scalar parameter, default value = 1.0
*/ */
REG_OP(EluGradV2) REG_OP(EluGradV2)
.INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -543,9 +539,13 @@ REG_OP(Elu)
*x: A float16, float32, for the input data type . \n *x: A float16, float32, for the input data type . \n


*@par Attributes: *@par Attributes:
*@li alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .
*@li alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .
*@li alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n
*alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n

*@par Attributes:
*alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n

*@par Attributes:
*alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n


*@par Outputs: *@par Outputs:
*y: A float16, float32, for the normalized result . \n *y: A float16, float32, for the normalized result . \n
@@ -706,8 +706,8 @@ REG_OP(Mish)
* @li x: A Tensor. Must be one of the following types: float16, float32 * @li x: A Tensor. Must be one of the following types: float16, float32
* @li tanhx: A Tensor. shape, datatype and format is same as x * @li tanhx: A Tensor. shape, datatype and format is same as x
* @par Outputs: * @par Outputs:
* One output, including:
* x_grad: A Tensor. shape, datatype and format is same as x
* 1 output, including:
* @li x_grad: A Tensor. shape, datatype and format is same as x
*/ */


REG_OP(MishGrad) REG_OP(MishGrad)
@@ -721,20 +721,20 @@ REG_OP(MishGrad)
* @brief pytorch hardtanh_backward operator. * @brief pytorch hardtanh_backward operator.
* *
* @par Inputs: * @par Inputs:
* Two inputs, including:
* 2 inputs, including:
* @li result, minimum tensor of the linear region range, * @li result, minimum tensor of the linear region range,
* datatype: float16/float32, format:ND/5HD. * datatype: float16/float32, format:ND/5HD.
* @li grad, maximum tensor of the linear region range, * @li grad, maximum tensor of the linear region range,
* datatype:float16/float32, format:ND/5HD. \n * datatype:float16/float32, format:ND/5HD. \n


* @par Attributes: * @par Attributes:
* Two attributes, including:
* 2 attributes, including:
* @li min_val, minimum value of the linear region range, datatype:float. * @li min_val, minimum value of the linear region range, datatype:float.
* @li max_val, maximum value of the linear region range, datatype:float. \n * @li max_val, maximum value of the linear region range, datatype:float. \n


* @par Outputs: * @par Outputs:
* One output, including:
* y, hardtanh_backward output tensor, datatype and format is same as
* 1 output, including:
* @li y, hardtanh_backward output tensor, datatype and format is same as
* input result. \n * input result. \n


* @attention Constraints: * @attention Constraints:
@@ -756,7 +756,7 @@ REG_OP(HardtanhGrad)


* @par Inputs: * @par Inputs:
* One inputs, including: * One inputs, including:
* x: A mutable Tensor. Must be one of the following types:
* @li x: A mutable Tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n


* @par Attributes: * @par Attributes:
@@ -765,7 +765,7 @@ REG_OP(HardtanhGrad)
* @li threshold: An optional float. Defaults to "20.0" \n * @li threshold: An optional float. Defaults to "20.0" \n


* @par Outputs: * @par Outputs:
* y: A mutable Tensor. Has the same type as "x" \n
* @li y: A mutable Tensor. Has the same type as "x" \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator Softplus. * Compatible with the Pytorch operator Softplus.
@@ -792,7 +792,7 @@ REG_OP(SoftplusV2)
* @li threshold: An optional float. Defaults to "20.0" \n * @li threshold: An optional float. Defaults to "20.0" \n


* @par Outputs: * @par Outputs:
* output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n
* @li output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator SoftplusGrad. * Compatible with the Pytorch operator SoftplusGrad.
@@ -809,16 +809,13 @@ REG_OP(SoftplusV2Grad)
* @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor) * @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor)
* where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise. * where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise.
* *
* @par Inputs:
* @par inputs
* one input including: * one input including:
* x: input A Tensor. Must be one of the following types: float32, float16
* @li x: input A Tensor. Must be one of the following types: float32, float16
* *
* @par Attributes:
* alpha: An optional float. Defaults to 1.0. \n

* @par Outputs:
* @par output
* one output including: * one output including:
* y:A Tensor of the same type as x
* @li y:A Tensor of the same type as x
* *
*/ */
REG_OP(ThresholdedRelu) REG_OP(ThresholdedRelu)
@@ -832,14 +829,14 @@ REG_OP(ThresholdedRelu)


* @par Inputs: * @par Inputs:
* One inputs, including: * One inputs, including:
* input_x: A tensor. Must be one of the following types:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n


* @par Attributes: * @par Attributes:
* lambd: An optional float. Defaults to 0.5. \n
* @li lambd: An optional float. Defaults to 0.5. \n


* @par Outputs: * @par Outputs:
* output_y: A Tensor with the same dtype and shape of input_x's. \n
* y: A Tensor with the same dtype and shape of input_x's. \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator Hardshrink. \n * Compatible with the Pytorch operator Hardshrink. \n
@@ -866,7 +863,7 @@ REG_OP(HardShrink)
*backprops: A Tensor with the same type and shape of features's. \n *backprops: A Tensor with the same type and shape of features's. \n
* *
*@par Attributes: *@par Attributes:
*lambd: An optional float.Defaults to 0.5. \n
*@li lambd: An optional float.Defaults to 0.5. \n
* *
*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the Pytorch operator Hardshrink_backward. \n *Compatible with the Pytorch operator Hardshrink_backward. \n
@@ -883,7 +880,7 @@ REG_OP(HardShrink)


* @par Inputs: * @par Inputs:
* One inputs, including: * One inputs, including:
* input_x: A tensor. Must be one of the following types:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32, int32. \n * float16, float32, int32. \n


* @par Attributes: * @par Attributes:
@@ -908,11 +905,11 @@ REG_OP(HardSigmoid)


* @par Inputs: * @par Inputs:
* One inputs, including: * One inputs, including:
* input_x: A tensor. Must be one of the following types:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n


* @par Attributes: * @par Attributes:
* lambd: An optional float. Defaults to 0.5. \n
* @li lambd: An optional float. Defaults to 0.5. \n


* @par Outputs: * @par Outputs:
* y: A Tensor with the same dtype and shape of input_x's. \n * y: A Tensor with the same dtype and shape of input_x's. \n
@@ -936,7 +933,7 @@ REG_OP(SoftShrink)
* @li input_x: A tensor of the same dtype as "input_grad". \n * @li input_x: A tensor of the same dtype as "input_grad". \n


* @par Attributes: * @par Attributes:
* lambd: An optional float. Defaults to 0.5. \n
* @li lambd: An optional float. Defaults to 0.5. \n


* @par Outputs: * @par Outputs:
* y: A Tensor of the same dtype and shape as "input_graxd". \n * y: A Tensor of the same dtype and shape as "input_graxd". \n
@@ -979,12 +976,12 @@ REG_OP(LogSigmoidGrad)


*@par Inputs: *@par Inputs:
*One inputs, including: *One inputs, including:
* x: A tensor. Must be one of the following types:
* @li x: A tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n


*@par Outputs: *@par Outputs:
*One outputs, including: *One outputs, including:
* y: A tensor with the same type and shape of x's. \n
* @li y: A tensor with the same type and shape of x's. \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the Pytorch operator LogSigmoid. \n *Compatible with the Pytorch operator LogSigmoid. \n
@@ -1006,7 +1003,7 @@ REG_OP(LogSigmoid)


*@par Outputs: *@par Outputs:
*One outputs, including: *One outputs, including:
* y: A tensor with the same type and shape of x's. \n
* @li y: A tensor with the same type and shape of x's. \n


* @par Attributes: * @par Attributes:
* @li alpha: An optional float. Defaults to 0.16666666. \n * @li alpha: An optional float. Defaults to 0.16666666. \n


+ 8
- 14
third_party/fwkacllib/inc/ops/pad_ops.h View File

@@ -33,8 +33,8 @@ namespace ge {


*@li value: A 0D scalar. Specifies the value to fill the returned tensor. *@li value: A 0D scalar. Specifies the value to fill the returned tensor.
* Must be one of the following types: * Must be one of the following types:
* float16, float32, double, int32, uint8, int16, int8, complex64, int64, bool,
* qint8, quint8, qint32, qint16, quint16, uint16, complex128, uint32, uint64, .
* float16, float32, double, int32, uint8, int16, int8, complex64, int64,
* qint8, quint8, qint32, uint16, complex128, uint32, uint64.
* *
*@par Outputs: *@par Outputs:
* y: A tensor. Has the same type as "value". * y: A tensor. Has the same type as "value".
@@ -46,14 +46,8 @@ namespace ge {
*/ */
REG_OP(Fill) REG_OP(Fill)
.INPUT(dims, TensorType::IndexNumberType()) .INPUT(dims, TensorType::IndexNumberType())
.INPUT(value, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16,
DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8,
DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16, DT_UINT16,
DT_COMPLEX128, DT_FLOAT16, DT_UINT32, DT_UINT64}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16,
DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8,
DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16, DT_UINT16,
DT_COMPLEX128, DT_FLOAT16, DT_UINT32, DT_UINT64}))
.INPUT(value, TensorType::BasicType())
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(Fill) .OP_END_FACTORY_REG(Fill)


/** /**
@@ -219,11 +213,11 @@ REG_OP(PadV2)
*@brief Pads a tensor . \n *@brief Pads a tensor . \n


*@par Inputs: *@par Inputs:
*@li x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
*@li constant_values: A Tensor. Must have the same type as input.
*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
*constant_values: A Tensor. Must have the same type as input.


*@par Attributes: *@par Attributes:
*paddings: A required Attribute.
*paddings: An optional "vector<vector<int>>". Defaults to "{}".
* For each dimension D of input, paddings[D, 0] indicates how many * For each dimension D of input, paddings[D, 0] indicates how many
* values to add before the contents of tensor in that dimension, * values to add before the contents of tensor in that dimension,
* and paddings[D, 1] indicates how many values to add after the * and paddings[D, 1] indicates how many values to add after the
@@ -467,7 +461,7 @@ REG_OP(FillV2)
* @li dims: An required listInt to specify the shape that the value to fill. * @li dims: An required listInt to specify the shape that the value to fill.


* @par Outputs: * @par Outputs:
* y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.
* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the ONNX operator ConstantOfShape. * Compatible with the ONNX operator ConstantOfShape.


+ 67
- 68
third_party/fwkacllib/inc/ops/parsing_ops.h View File

@@ -54,26 +54,27 @@ REG_OP(StringToNumber)
/** /**
*@brief Convert serialized tensorflow.TensorProto prototype to Tensor. *@brief Convert serialized tensorflow.TensorProto prototype to Tensor.
*@brief Parse an Example prototype. *@brief Parse an Example prototype.
*@par Inputs:
*@li serialized: A Tensor of type string.
*@li dense_defaults: DYNAMIC INPUT Tensor type as string, float, int64. \n
*@par Input:
*serialized: A Tensor of type string.
*dense_defaults: DYNAMIC INPUT Tensor type as string, float, int64. \n


*@par Attributes: *@par Attributes:
*@li num_sparse: type int num of inputs sparse_indices , sparse_values, sparse_shapes
*@li sparse_keys: ListString
*@li sparse_types: types of sparse_values
*@li dense_keys: ListString
*@li Tdense: output of dense_defaults type
*@li dense_shapes: output of dense_defaults shape \n
*num_sparse: type int num of inputs sparse_indices , sparse_values, sparse_shapes
*out_type: output type
*sparse_keys: ListString
*sparse_types: types of sparse_values
*dense_keys: ListString
*dense_shapes: output of dense_defaults shape
*dense_types: output of dense_defaults type \n


*@par Outputs: *@par Outputs:
*@li sparse_indices: A Tensor of type string.
*@li sparse_values: Has the same type as sparse_types.
*@li sparse_shapes: A Tensor of type int64
*@li dense_values: Has the same type as dense_defaults.
*sparse_indices: A Tensor of type string.
*sparse_values: Has the same type as sparse_types.
*sparse_shapes: A Tensor of type int64
*dense_values: Has the same type as dense_defaults.


*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
**/
REG_OP(ParseSingleExample) REG_OP(ParseSingleExample)
.INPUT(serialized, TensorType({DT_STRING})) .INPUT(serialized, TensorType({DT_STRING}))
.DYNAMIC_INPUT(dense_defaults, TensorType({DT_STRING,DT_FLOAT,DT_INT64})) .DYNAMIC_INPUT(dense_defaults, TensorType({DT_STRING,DT_FLOAT,DT_INT64}))
@@ -91,16 +92,16 @@ REG_OP(ParseSingleExample)


/** /**
*@brief Decodes raw file into tensor . \n *@brief Decodes raw file into tensor . \n
*@par Inputs:
*@par Input:
*bytes: A Tensor of type string. *bytes: A Tensor of type string.


*@par Attributes: *@par Attributes:
*@li little_endian: bool ture
*@li out_type: output type
*little_endian: bool ture
*out_type: output type


*@par Outputs: *@par Outputs:
*Output: A Tensor *Output: A Tensor
*/
**/
REG_OP(DecodeRaw) REG_OP(DecodeRaw)
.INPUT(bytes, TensorType({DT_STRING})) .INPUT(bytes, TensorType({DT_STRING}))
.OUTPUT(output, TensorType({DT_BOOL,DT_FLOAT16,DT_DOUBLE,DT_FLOAT, .OUTPUT(output, TensorType({DT_BOOL,DT_FLOAT16,DT_DOUBLE,DT_FLOAT,
@@ -146,20 +147,18 @@ REG_OP(ParseTensor)


*@par Inputs: *@par Inputs:
*Inputs include: *Inputs include:
*@li records: Each string is a record/row in the csv and all records should have the
*records: Each string is a record/row in the csv and all records should have the
*same format. \n *same format. \n
*@li record_defaults: One tensor per column of the input record, with either a
*record_defaults: One tensor per column of the input record, with either a
*scalar default value for that column or an empty vector if the column is *scalar default value for that column or an empty vector if the column is
*required. \n *required. \n


*@par Attributes: *@par Attributes:
*@li OUT_TYPE: The numeric type to interpret each string in string_tensor as . \n
*@li field_delim: char delimiter to separate fields in a record. \n
*@li use_quote_delim: If false, treats double quotation marks as regular characters
*OUT_TYPE: The numeric type to interpret each string in string_tensor as . \n
*field_delim: char delimiter to separate fields in a record. \n
*use_quote_delim: If false, treats double quotation marks as regular characters
*inside of the string fields (ignoring RFC 4180, Section 2, Bullet 5). \n *inside of the string fields (ignoring RFC 4180, Section 2, Bullet 5). \n
*@li na_value: Additional string to recognize as NA/NaN. \n
*@li select_cols: Optional sorted list of column indices to select. If specified,
only this subset of columns will be parsed and returned.
*na_value: Additional string to recognize as NA/NaN. \n


*@par Outputs: *@par Outputs:
*output: A Tensor. Has the same type as x . \n *output: A Tensor. Has the same type as x . \n
@@ -187,25 +186,25 @@ REG_OP(DecodeCSV)
/** /**
*@brief Convert serialized tensorflow.TensorProto prototype to Tensor. *@brief Convert serialized tensorflow.TensorProto prototype to Tensor.
*@brief Parse an Example prototype. *@brief Parse an Example prototype.
*@par Inputs:
*@li serialized: A Tensor of type string. \n
*@li name:A Tensor of type string. \n
*@li sparse_keys: Dynamic input tensor of string. \n
*@li dense_keys: Dynamic input tensor of string \n
*@li dense_defaults: Dynamic input tensor type as string, float, int64. \n
*@par Input:
*serialized: A Tensor of type string. \n
*name:A Tensor of type string. \n
*sparse_keys: Dynamic input tensor of string. \n
*dense_keys: Dynamic input tensor of string \n
*dense_defaults: Dynamic input tensor type as string, float, int64. \n


*@par Attributes: *@par Attributes:
*@li Nsparse: Number of sparse_keys, sparse_indices and sparse_shapes \n
*@li Ndense: Number of dense_keys \n
*@li sparse_types: types of sparse_values \n
*@li Tdense: Type of dense_defaults dense_defaults and dense_values \n
*@li dense_shapes: output of dense_defaults shape \n
*Nsparse: Number of sparse_keys, sparse_indices and sparse_shapes \n
*Ndense: Number of dense_keys \n
*sparse_types: types of sparse_values \n
*Tdense: Type of dense_defaults dense_defaults and dense_values \n
*dense_shapes: output of dense_defaults shape \n


*@par Outputs: *@par Outputs:
*@li sparse_indices: A Tensor of type string. \n
*@li sparse_values: Has the same type as sparse_types. \n
*@li sparse_shapes: A Tensor of type int64 \n
*@li dense_values: Has the same type as dense_defaults. \n
*sparse_indices: A Tensor of type string. \n
*sparse_values: Has the same type as sparse_types. \n
*sparse_shapes: A Tensor of type int64 \n
*dense_values: Has the same type as dense_defaults. \n
*@par Third-party framework compatibility \n *@par Third-party framework compatibility \n
*@li compatible with tensorflow StringToNumber operator. \n *@li compatible with tensorflow StringToNumber operator. \n
*/ */
@@ -229,37 +228,37 @@ REG_OP(ParseExample)
/** /**
*@brief Transforms a scalar brain.SequenceExample proto (as strings) into typed *@brief Transforms a scalar brain.SequenceExample proto (as strings) into typed
*tensors. *tensors.
*@par Inputs:
*@li serialized: A Tensor of type string. \n
*@li feature_list_dense_missing_assumed_empty:A Tensor of type string. \n
*@li context_sparse_keys: Dynamic input tensor of string. \n
*@li context_dense_keys: Dynamic input tensor of string \n
*@li feature_list_sparse_keys: Dynamic input tensor of string \n
*@li feature_list_dense_keys: Dynamic input tensor of string \n
*@li context_dense_defaults: Dynamic input tensor of string, float, int64 \n
*@li debug_name: A Tensor of type string. \n
*@par Input:
*serialized: A Tensor of type string. \n
*feature_list_dense_missing_assumed_empty:A Tensor of type string. \n
*context_sparse_keys: Dynamic input tensor of string. \n
*context_dense_keys: Dynamic input tensor of string \n
*feature_list_sparse_keys: Dynamic input tensor of string \n
*feature_list_dense_keys: Dynamic input tensor of string \n
*context_dense_defaults: Dynamic input tensor of string, float, int64 \n
*debug_name: A Tensor of type string. \n


*@par Attributes: *@par Attributes:
*@li Ncontext_sparse: Number of context_sparse_keys, context_sparse_indices and context_sparse_shapes \n
*@li Ncontext_dense: Number of context_dense_keys \n
*@li Nfeature_list_sparse: Number of feature_list_sparse_keys \n
*@li Nfeature_list_dense: Number of feature_list_dense_keys \n
*@li context_sparse_types: Types of context_sparse_values \n
*@li Tcontext_dense: Number of dense_keys \n
*@li feature_list_dense_types: Types of feature_list_dense_values \n
*@li context_dense_shapes: Shape of context_dense \n
*@li feature_list_sparse_types: Type of feature_list_sparse_values \n
*@li feature_list_dense_shapes: Shape of feature_list_dense \n
*Ncontext_sparse: Number of context_sparse_keys, context_sparse_indices and context_sparse_shapes \n
*Ncontext_dense: Number of context_dense_keys \n
*Nfeature_list_sparse: Number of feature_list_sparse_keys \n
*Nfeature_list_dense: Number of feature_list_dense_keys \n
*context_sparse_types: Types of context_sparse_values \n
*Tcontext_dense: Number of dense_keys \n
*feature_list_dense_types: Types of feature_list_dense_values \n
*context_dense_shapes: Shape of context_dense \n
*feature_list_sparse_types: Type of feature_list_sparse_values \n
*feature_list_dense_shapes: Shape of feature_list_dense \n


*@par Outputs: *@par Outputs:
*@li context_sparse_indices: Dynamic output tensor of type int64. \n
*@li context_sparse_values: Dynamic output tensor of type string, float, int64. \n
*@li context_sparse_shapes: Dynamic output tensor of type int64 \n
*@li context_dense_values: Dynamic output tensor of type string, float, int64. \n
*@li feature_list_sparse_indices: Dynamic output tensor of type int64. \n
*@li feature_list_sparse_values: Dynamic output tensor of type string, float, int64. \n
*@li feature_list_sparse_shapes: Dynamic output tensor of type int64 \n
*@li feature_list_dense_values: Dynamic output tensor of type string, float, int64. \n
*context_sparse_indices: Dynamic output tensor of type int64. \n
*context_sparse_values: Dynamic output tensor of type string, float, int64. \n
*context_sparse_shapes: Dynamic output tensor of type int64 \n
*context_dense_values: Dynamic output tensor of type string, float, int64. \n
*feature_list_sparse_indices: Dynamic output tensor of type int64. \n
*feature_list_sparse_values: Dynamic output tensor of type string, float, int64. \n
*feature_list_sparse_shapes: Dynamic output tensor of type int64 \n
*feature_list_dense_values: Dynamic output tensor of type string, float, int64. \n
*@par Third-party framework compatibility \n *@par Third-party framework compatibility \n
*@li compatible with tensorflow StringToNumber operator. \n *@li compatible with tensorflow StringToNumber operator. \n
*/ */


+ 4
- 6
third_party/fwkacllib/inc/ops/quantize_ops.h View File

@@ -63,11 +63,10 @@ REG_OP(Dequantize)
/** /**
*@brief Quantizes the input . \n *@brief Quantizes the input . \n
*@par Inputs: *@par Inputs:
*@li x: shape and dtype of input_x. \n
*@li scales: shape and dtype of input_scales. \n
*@li zero_points: shape and dtype of input_zero_points \n
*x: shape and dtype of input_x. \n
*scales: shape and dtype of input_scales. \n
*zero_points: shape and dtype of input_zero_points \n
*@par Attributes: *@par Attributes:
*@li dtype: required, type.
*@li axis: the processed dim. \n *@li axis: the processed dim. \n
*@par Outputs: *@par Outputs:
*y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n *y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n
@@ -92,8 +91,7 @@ REG_OP(Quantize)
*@li offset: A required float16, specifying the offset. *@li offset: A required float16, specifying the offset.
*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False". *@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False".
*@li round_mode: An optional string, specifying the float16 to int8 cast type. *@li round_mode: An optional string, specifying the float16 to int8 cast type.
* The value range is [Round, Floor, Ceil, Truncate]. Defaults to "Round" .
*@li dst_type: A optional int32, specifying the output data type. Defaults to "DT_INT8" . \n
* The value range is [Round, Floor, Ceiling, Truncate]. Defaults to "Round" . \n


*@par Outputs: *@par Outputs:
*y: The quantized output tensor of type int8 and with format NC1HWC0 . \n *y: The quantized output tensor of type int8 and with format NC1HWC0 . \n


+ 2
- 7
third_party/fwkacllib/inc/ops/ragged_array_ops.h View File

@@ -37,18 +37,13 @@ namespace ge {
*deprecated name. *deprecated name.
*@li indices: Indices in the outermost dimension of `params` of the values that should be *@li indices: Indices in the outermost dimension of `params` of the values that should be
*gathered. *gathered.

*@par Attributes:
*@li PARAMS_RAGGED_RANK:The ragged rank of the params_nested_splits.
*@li Tsplits:A type of output_nested_splits.
*@li OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain *@li OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain
*this number of `row_splits` tensors. This value should equal *this number of `row_splits` tensors. This value should equal
*`indices.shape.ndims + params.ragged_rank - 1` . \n *`indices.shape.ndims + params.ragged_rank - 1` . \n


*@par Outputs: *@par Outputs:
*@li output_nested_splits:A Returns The `nested_row_splits` tensors that define the row-partitioning for the
*returned RaggedTensor.The `flat_values` for the returned RaggedTensor .
*@li output_dense_values:The `flat_values` for the returned RaggedTensor. \n
*y:A Returns The `nested_row_splits` tensors that define the row-partitioning for the
*returned RaggedTensor.The `flat_values` for the returned RaggedTensor . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with tensorflow RaggedGather operator. * Compatible with tensorflow RaggedGather operator.


+ 2
- 1
third_party/fwkacllib/inc/ops/ragged_conversion_ops.h View File

@@ -61,6 +61,7 @@ REG_OP(RaggedTensorToSparse)
*@brief Create a dense tensor from a ragged tensor, possibly altering its shape . \n *@brief Create a dense tensor from a ragged tensor, possibly altering its shape . \n


*@par Inputs: *@par Inputs:
*Six inputs, including:
*@li shape:A `Tensor`. Must be one of the following types: `int64`, `int32`. *@li shape:A `Tensor`. Must be one of the following types: `int64`, `int32`.
*@li values:A 1D tensor representing the values of the ragged tensor. *@li values:A 1D tensor representing the values of the ragged tensor.
*@li default_value:A `Tensor`. Must have the same type as `values`. *@li default_value:A `Tensor`. Must have the same type as `values`.
@@ -77,7 +78,7 @@ The types of the row partition tensors. At present, these can be:
is preceeded by "FIRST_DIM_SIZE" . \n is preceeded by "FIRST_DIM_SIZE" . \n


*@par Outputs: *@par Outputs:
*result: A `Tensor`. Has the same type as `values`.
*@li result: A `Tensor`. Has the same type as `values`.
*/ */
REG_OP(RaggedTensorToTensor) REG_OP(RaggedTensorToTensor)
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) .INPUT(shape, TensorType({DT_INT32, DT_INT64}))


+ 1
- 5
third_party/fwkacllib/inc/ops/ragged_math_ops.h View File

@@ -35,11 +35,7 @@ namespace ge {
*@li deltas: The deltas of each range . \n *@li deltas: The deltas of each range . \n


*@par Outputs: *@par Outputs:
*@li rt_dense_values:The `flat_values` for the returned `RaggedTensor`.
*@li rt_nested_splits:The `row_splits` for the returned `RaggedTensor`. \n

*@par Attributes:
*Tsplits:A type of rt_nested_splits.
*y:A Returns The `row_splits` for the returned `RaggedTensor`.The `flat_values` for the returned `RaggedTensor` . \n


*@attention Constraints: *@attention Constraints:
*The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors. *The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors.


+ 21
- 122
third_party/fwkacllib/inc/ops/random_ops.h View File

@@ -148,32 +148,6 @@ REG_OP(RandomGamma)
.OP_END_FACTORY_REG(RandomGamma) .OP_END_FACTORY_REG(RandomGamma)


/** /**
*@brief Returns the random permutation of integers from 0 to n-1. \n

*@par Attributes:
*@li n: An required int.
*@li dtype: An optional str. Defaults to int64 .
*@li layout: An optional int. Defaults to 0 . \n

*@par Outputs:
*out: A required Tensor. Must be one of the following types:
float16, float32, float32, int8, uint8, int16, int32, int64. \n

*@attention Constraints:
*The implementation for Randperm on Ascend uses AICPU, with bad performance.

*@par Third-party framework compatibility
*@li compatible with Pytorch Randperm operator.
*/
REG_OP(Randperm)
.OUTPUT(out, TensorType({DT_INT64, DT_INT32, DT_INT16,
DT_UINT8, DT_INT8, DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.REQUIRED_ATTR(n, Int)
.ATTR(layout, Int, 0)
.ATTR(dtype, Type, DT_INT64)
.OP_END_FACTORY_REG(Randperm)

/**
*@brief Outputs random values from the Poisson distribution(s) described by rate . \n *@brief Outputs random values from the Poisson distribution(s) described by rate . \n


*@par Inputs: *@par Inputs:
@@ -183,12 +157,11 @@ REG_OP(Randperm)


*@par Attributes: *@par Attributes:
*@li dtype: An optional type from: half, float32, float64, int32, int64. Defaults to int64. *@li dtype: An optional type from: half, float32, float64, int32, int64. Defaults to int64.
*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero,
the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
*@li seed: An optional int. Defaults to 0.
*@li seed2: An optional int. Defaults to 0 . \n


*@par Outputs: *@par Outputs:
*y: A Tensor of type dtype float16, float, double, int32, int64. \n
*y: A Tensor of type dtype . \n


*@attention Constraints: *@attention Constraints:
*The implementation for RandomPoisson on Ascend uses AICPU, with bad performance. *The implementation for RandomPoisson on Ascend uses AICPU, with bad performance.
@@ -215,13 +188,11 @@ REG_OP(RandomPoisson)
*x: A Tensor. The tensor to be shuffled . \n *x: A Tensor. The tensor to be shuffled . \n


*@par Attributes: *@par Attributes:
*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero,
the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
*@li seed: An optional int. Defaults to 0.
*@li seed2: An optional int. Defaults to 0 . \n


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as x . A Tensor of type float16, float,
*double, int32, int64, int16, uint16, int8, uint8, int32,int64. \n
*y: A Tensor. Has the same type as x . \n


*@attention Constraints: *@attention Constraints:
*The implementation for RandomShuffle on Ascend uses AICPU, with bad performance. *The implementation for RandomShuffle on Ascend uses AICPU, with bad performance.
@@ -249,12 +220,11 @@ REG_OP(RandomShuffle)


*@par Attributes: *@par Attributes:
*@li dtype: A type from: half, float16, float32, float64. The type of the output. *@li dtype: A type from: half, float16, float32, float64. The type of the output.
*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero,
the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
*@li seed: An optional int. Defaults to 0.
*@li seed2: An optional int. Defaults to 0 . \n


*@par Outputs: *@par Outputs:
*y: A Tensor of type float32, float16, double. \n
*y: A Tensor of type dtype . \n


*@attention Constraints: *@attention Constraints:
*The implementation for RandomStandardNormal on Ascend uses AICPU, with bad performance. *The implementation for RandomStandardNormal on Ascend uses AICPU, with bad performance.
@@ -271,28 +241,6 @@ REG_OP(RandomStandardNormal)
.OP_END_FACTORY_REG(RandomStandardNormal) .OP_END_FACTORY_REG(RandomStandardNormal)


/** /**
*@brief Output random value from separate normal distribution. \n

*@par Inputs:
*Inputs include:
*mean: The mean is a tensor with the mean of each output element’s normal distribution .
*std: The std is a tensor with the standard deviation of each output element’s normal distribution. \n
*@par Outputs:
*y: A Tensor of type dtype . \n

*@attention Constraints:
*The implementation for Normal on Ascend uses AICPU, with bad performance.

*@par Third-party framework compatibility
*@li compatible with Pytorch Normal operator.
*/
REG_OP(Normal)
.INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(std, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(Normal)

/**
*@brief Outputs random integers from a uniform distribution . \n *@brief Outputs random integers from a uniform distribution . \n


*@par Inputs: *@par Inputs:
@@ -302,9 +250,8 @@ REG_OP(Normal)
* @li max: A Tensor. Must have the same type as minval. 0-D . \n * @li max: A Tensor. Must have the same type as minval. 0-D . \n


*@par Attributes: *@par Attributes:
*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero,
the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
*@li seed: An optional int. Defaults to 0.
*@li seed2: An optional int. Defaults to 0 . \n


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as min . \n *y: A Tensor. Has the same type as min . \n
@@ -333,9 +280,8 @@ REG_OP(RandomUniformInt)


*@par Attributes: *@par Attributes:
*@li dtype: A type from: half, float16, float32, float64. The type of the output. *@li dtype: A type from: half, float16, float32, float64. The type of the output.
*@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero,
the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
*@li seed: An optional int. Defaults to 0.
*@li seed2: An optional int. Defaults to 0 . \n


*@par Outputs: *@par Outputs:
*y: A Tensor of type dtype . \n *y: A Tensor of type dtype . \n
@@ -362,14 +308,11 @@ REG_OP(RandomUniform)
*shape: A Tensor. Must be one of the following types: int32, int64 . \n *shape: A Tensor. Must be one of the following types: int32, int64 . \n


*@par Attributes: *@par Attributes:
*@li seed: An optional int. Defaults to 0.If either `seed` or `seed2`
are set to be non-zero, the random number generator is seeded by the given
seed. Otherwise, it is seeded by a random seed.
*@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n
*@li seed: An optional int. Defaults to 0.
*@li seed2: An optional int. Defaults to 0 . \n


*@par Outputs: *@par Outputs:
*y: A Tensor of types: float16, float32, double . A tensor of the specified shape
filled with random truncated normal values. \n
*size: A Tensor of types: float16, float32, double . \n


*@attention Constraints: *@attention Constraints:
*The implementation for TruncatedNormal on Ascend uses AICPU, with bad performance. *The implementation for TruncatedNormal on Ascend uses AICPU, with bad performance.
@@ -562,15 +505,15 @@ REG_OP(RandomChoiceWithMask)


*@par Inputs: *@par Inputs:
*Inputs including: *Inputs including:
* x: A required Tensor. Must be one of the following types:
float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
* @li x: A required Tensor. Must be one of the following types:
float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n


*@par Attributes: *@par Attributes:
* group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n
*@li group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n


*@par Outputs: *@par Outputs:
* y: A required Tensor. Has same type and shape as "x". Must be one of the following types:
float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
*y: A required Tensor. Has same type and shape as "x". Must be one of the following types:
float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n


*@attention Constraints: *@attention Constraints:
*@li "group" must be greater than 0 and must evenly divide the channel dimension size. *@li "group" must be greater than 0 and must evenly divide the channel dimension size.
@@ -641,50 +584,6 @@ REG_OP(DropoutV2)
.OUTPUT(seed, TensorType({ DT_FLOAT })) .OUTPUT(seed, TensorType({ DT_FLOAT }))
.REQUIRED_ATTR(p, Float) .REQUIRED_ATTR(p, Float)
.OP_END_FACTORY_REG(DropoutV2) .OP_END_FACTORY_REG(DropoutV2)

/**
* @brief The Bernoulli distribution with probability . \n

* @par Inputs:
* @li x: A ND Tensor. Must be one of the following data types:
int8, uint8, int16, int32, int64, bool, float32, float64 .
* @li p: A ND Tensor. The probability of an element to be zeroed.
Must be one of the following data types: float32, float64. \n

* @par Attributes:
* seed: An Integer, the seed of the random generator. Default value -1
to use current timestamp, otherwise it should be a positive integer.

* @par Outputs:
* y: A tensor with the same shape and type as "x".
*/

REG_OP(Bernoulli)
.INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
.INPUT(p, TensorType({ DT_FLOAT, DT_DOUBLE }))
.OUTPUT(y, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
.ATTR(seed, Int, -1)
.OP_END_FACTORY_REG(Bernoulli)

/**
* @brief: Fill the input tensor with values drawn from the uniform distribution U(from, to). \n
* @par Inputs:
* x: A Tensor. Must be one of the following types: float16, float, double. \n

* @par Attributes:
* @li from: The lower bound of the uniform. Defaults: 0.0
* @li to: The upper bound of the uniform. Defaults: 1.0 \n

* @par Outputs:
* y: A Tensor has the same type as x. \n
*/
REG_OP(Uniform)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.ATTR(from, Float, 0.0)
.ATTR(to, Float, 1.0)
.OP_END_FACTORY_REG(Uniform)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_

+ 17
- 90
third_party/fwkacllib/inc/ops/reduce_ops.h View File

@@ -576,7 +576,7 @@ REG_OP(ReduceAll)
*@li axis: A mutable Tensor. The dimensions to reduce . \n *@li axis: A mutable Tensor. The dimensions to reduce . \n


*@par Attributes: *@par Attributes:
*keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n
*@li keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type and format as input "x" . \n *y: A Tensor. Has the same type and format as input "x" . \n
@@ -967,9 +967,9 @@ REG_OP(EuclideanNormD)
Defaults to "0.00001" . \n Defaults to "0.00001" . \n


*@par Outputs: *@par Outputs:
*@li y: A Tensor of type float16 or float32 for the normalized "x".
*@li batch_mean: A Tensor of type float32 for the result mean.
*@li batch_ variance: A Tensor of type float32 for the result variance . \n
*y: A Tensor of type float16 or float32 for the normalized "x".
*batch_mean: A Tensor of type float32 for the result mean.
*batch_ variance: A Tensor of type float32 for the result variance . \n


*@attention Constraints: *@attention Constraints:
*For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction. *For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction.
@@ -987,7 +987,7 @@ REG_OP(INInferV2)
.OP_END_FACTORY_REG(INInferV2) .OP_END_FACTORY_REG(INInferV2)


/** /**
*@brief Performs reduce instance normalization. \n
*@brief Performs reduced instance normalization . \n


*@par Inputs: *@par Inputs:
*x: A Tensor of type float16 or float32. \n *x: A Tensor of type float16 or float32. \n
@@ -1008,31 +1008,32 @@ REG_OP(INTrainingReduceV2)




/** /**
*@brief Performs update instance normalization. \n
*@brief Performs update instance normalization . \n


*@par Inputs: *@par Inputs:
* Seven inputs, including:
* Seven inputs, including: (NC1HWC0supported)
*@li x: A Tensor of type float16 or float32. *@li x: A Tensor of type float16 or float32.
*@li sum: A Tensor of type float32 for the output of operator INTrainingReduceV2. *@li sum: A Tensor of type float32 for the output of operator INTrainingReduceV2.
*@li square_sum: A Tensor of type float32 for the output of operator INTrainingReduceV2. *@li square_sum: A Tensor of type float32 for the output of operator INTrainingReduceV2.
*@li gamma: A Tensor of type float32, for the scaling gamma. *@li gamma: A Tensor of type float32, for the scaling gamma.
*@li beta: A Tensor of type float32, for the scaling beta. *@li beta: A Tensor of type float32, for the scaling beta.
*@li mean: A Tensor of type float32, for the updated mean. *@li mean: A Tensor of type float32, for the updated mean.
*@li variance: A Tensor of type float32, for the updated variance. \n
*@li variance: A Tensor of type float32, for the updated variance . \n


*@par Attributes: *@par Attributes:
*@li momentum: A required float32, specifying the momentum to update mean and var. *@li momentum: A required float32, specifying the momentum to update mean and var.
*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero. \n
*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n


*@par Outputs: *@par Outputs:
* Three outputs * Three outputs
*@li y: A Tensor of type float16 or float32, for normalized "x". *@li y: A Tensor of type float16 or float32, for normalized "x".
*@li batch_mean: A Tensor of type float32, for the updated mean. *@li batch_mean: A Tensor of type float32, for the updated mean.
*@li batch_variance: A Tensor of type float32, for the updated variance. \n
*@li batch_variance: A Tensor of type float32, for the updated variance . \n


*@attention Constraints: *@attention Constraints:
* This operator is a InstanceNorm fusion operator for updating the moving averages for training.
*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training.
* This operator is used in conjunction with INTrainingReduceV2. * This operator is used in conjunction with INTrainingReduceV2.
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
*/ */
REG_OP(INTrainingUpdateV2) REG_OP(INTrainingUpdateV2)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -1051,80 +1052,6 @@ REG_OP(INTrainingUpdateV2)




/** /**
*@brief Performs the backpropagation of InstanceNorm. \n

*@par Inputs:
* Seven inputs, including:
*@li dy: A Tensor of type float16 or float32.
*@li x: A Tensor of type float16 or float32.
*@li variance: A Tensor of type float32, for the variance of "x".
*@li mean: A Tensor of type float32, for the mean of "x".
*@li res_gamma: A Tensor of type float32.
*@li res_beta: A Tensor of type float32.
*@li gamma: A Tensor of type float32. \n

*@par Outputs:
*pd_x: A Tensor of type float16 or float32, for the offset of "x". \n

*@attention Constraints:
* The preceding layer of this operator must be INTrainingUpdateGrad. \n
*/
REG_OP(INTrainingReduceGrad)
.INPUT(dy, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(variance, TensorType({DT_FLOAT}))
.INPUT(mean, TensorType({DT_FLOAT}))
.INPUT(res_gamma, TensorType({DT_FLOAT}))
.INPUT(res_beta, TensorType({DT_FLOAT}))
.INPUT(gamma, TensorType({DT_FLOAT}))
.OUTPUT(pd_x, TensorType({DT_FLOAT16,DT_FLOAT}))
.OP_END_FACTORY_REG(INTrainingReduceGrad)

/**
*@brief Performs the backpropagation of InstanceNorm. \n

*@par Inputs:
* Four inputs, including:
*@li dy: A Tensor of type float16 or float32, for the gradient.
*@li x: A Tensor of type float16 or float32.
*@li variance: A Tensor of type float32, for the variance of "x".
*@li mean: A Tensor of type float32, for the mean of "x". \n

*@par Outputs:
*@li res_gamma: A Tensor of type float32.
*@li res_beta: A Tensor of type float32. \n

*/
REG_OP(INTrainingUpdateGrad)
.INPUT(dy, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(variance, TensorType({DT_FLOAT}))
.INPUT(mean, TensorType({DT_FLOAT}))
.OUTPUT(res_gamma, TensorType({DT_FLOAT}))
.OUTPUT(res_beta, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(INTrainingUpdateGrad)

/**
*@brief Performs the backpropagation of InstanceNorm. \n

*@par Inputs:
* Two inputs, including:
*@li res_gamma: A Tensor of type float32.
*@li res_beta: A Tensor of type float32. \n

*@par Outputs:
*@li pd_gamma: A Tensor of type float32.
*@li pd_beta: A Tensor of type float32. \n

*/
REG_OP(INTrainingUpdateGradGammaBeta)
.INPUT(res_gamma, TensorType({DT_FLOAT}))
.INPUT(res_beta, TensorType({DT_FLOAT}))
.OUTPUT(pd_gamma, TensorType({DT_FLOAT}))
.OUTPUT(pd_beta, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(INTrainingUpdateGradGammaBeta)

/**
*@brief Performs reduced group normalization . \n *@brief Performs reduced group normalization . \n


*@par Inputs: *@par Inputs:
@@ -1136,7 +1063,7 @@ REG_OP(INTrainingUpdateGradGammaBeta)




*@par Attributes: *@par Attributes:
*num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n
*@li num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n


*@attention Constraints: *@attention Constraints:
* This operator is a GroupNorm fusion operator for updating the moving averages for training. * This operator is a GroupNorm fusion operator for updating the moving averages for training.
@@ -1154,7 +1081,7 @@ REG_OP(GNTrainingReduce)
*@brief Performs update group normalization . \n *@brief Performs update group normalization . \n


*@par Inputs: *@par Inputs:
* Seven inputs, including: (NCHW NHWC supported)
* Eight inputs, including: (NCHW NHWC supported)
*@li x: A Tensor of type float16 or float32. *@li x: A Tensor of type float16 or float32.
*@li sum: A 5D Tensor of type float32, *@li sum: A 5D Tensor of type float32,
shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC
@@ -1218,8 +1145,8 @@ include:
*@li keep_dims:A bool, An optional bool. Defaults to False. If True, retain reduced dimensions with length 1.. *@li keep_dims:A bool, An optional bool. Defaults to False. If True, retain reduced dimensions with length 1..
*@li separator:string. *@li separator:string.


*@par Outputs:
*output:A Tensor of type string.
*@par output:
*@li output::A Tensor of type string..
*/ */
REG_OP(ReduceJoin) REG_OP(ReduceJoin)
.INPUT(input, TensorType({DT_STRING})) .INPUT(input, TensorType({DT_STRING}))
@@ -1233,7 +1160,7 @@ REG_OP(ReduceJoin)
* @brief Calculates the standard deviation and average value of Tensors. * @brief Calculates the standard deviation and average value of Tensors.


* @par Inputs: * @par Inputs:
* x: A Tensor. Must be one of the following types:
* @li x: A Tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n


* @par Attributes: * @par Attributes:


+ 13
- 15
third_party/fwkacllib/inc/ops/resource_variable_ops.h View File

@@ -33,12 +33,10 @@ namespace ge {
*y:A Tensor of type resource. \n *y:A Tensor of type resource. \n


*@par Attributes: *@par Attributes:
* @li container: optional, string. the container this
variable is placed in.
* @li shared_name: optional, string.the name by which
this variable is referred to.
* @li dtype: required, type. the output of type.
* @li shape: optional, ListInt. the output of shape. \n
* @li container: optional, string.
* @li shared_name: optional, string.
* @li dtype: required, type.
* @li shape: optional, ListInt. \n


*@see VarHandleOp. *@see VarHandleOp.
*/ */
@@ -55,11 +53,11 @@ REG_OP(VarHandleOp)
*@brief Assigns a new value to a variable. \n *@brief Assigns a new value to a variable. \n


*@par Inputs: *@par Inputs:
*@li resource:Handle to the resource in which to store the variable.
*@li value:The value to set the new tensor to use. \n
*resource:Handle to the resource in which to store the variable.
*value:The value to set the new tensor to use. \n


*@par Attributes: *@par Attributes:
* dtype: required, type. \n
* @li dtype: required, type. \n


*@see AssignVariableOp. *@see AssignVariableOp.
*/ */
@@ -75,11 +73,11 @@ REG_OP(AssignVariableOp)
*@brief Adds a value to the current value of a variable. \n *@brief Adds a value to the current value of a variable. \n


*@par Inputs: *@par Inputs:
*@li resource:Handle to the resource in which to store the variable.
*@li value:The value by which the variable will be incremented. \n
*resource:Handle to the resource in which to store the variable.
*value:The value by which the variable will be incremented. \n


*@par Attributes: *@par Attributes:
* dtype: required, type. \n
* @li dtype: required, type. \n


*@see AssignAddVariableOp. *@see AssignAddVariableOp.
*/ */
@@ -95,11 +93,11 @@ REG_OP(AssignAddVariableOp)
*@brief Subtracts a value to the current value of a variable. \n *@brief Subtracts a value to the current value of a variable. \n


*@par Inputs: *@par Inputs:
*@li resource:Handle to the resource in which to store the variable.
*@li value:The value by which the variable will be incremented. \n
*resource:Handle to the resource in which to store the variable.
*value:The value by which the variable will be incremented. \n


*@par Attributes: *@par Attributes:
* dtype: required, type. \n
* @li dtype: required, type. \n


*@see AssignSubVariableOp. *@see AssignSubVariableOp.
*/ */


+ 10
- 8
third_party/fwkacllib/inc/ops/rnn.h View File

@@ -127,7 +127,9 @@ REG_OP(DynamicLSTM)
*@li cell_clip:An float identifying the cell clip in the op. Default to -1. *@li cell_clip:An float identifying the cell clip in the op. Default to -1.
*@li num_proj:An integer identifying the num projection in the op. Default to 0. *@li num_proj:An integer identifying the num projection in the op. Default to 0.
*@li time_major:An bool identifying the time major in the op. Default to false. *@li time_major:An bool identifying the time major in the op. Default to false.
*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
*@li forget_bias:An float identifying the forget bias in the op. Default to 0. *@li forget_bias:An float identifying the forget bias in the op. Default to 0.
*@li is_training:An bool identifying is training in the op. Default to true.


*@par Outputs: *@par Outputs:
*eight outputs: \n *eight outputs: \n
@@ -489,6 +491,7 @@ REG_OP(DynamicLSTMV2)
*ten inputs: \n *ten inputs: \n
*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
@@ -501,11 +504,10 @@ REG_OP(DynamicLSTMV2)




*@par Outputs: *@par Outputs:
*four outputs: \n
*eight outputs: \n
*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dgate:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
*/ */
REG_OP(LSTMInputGrad) REG_OP(LSTMInputGrad)
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -569,13 +571,13 @@ REG_OP(DynamicLSTMGradCell)
.INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(t_state, TensorType({DT_INT32, DT_INT32}))
.INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(t_state, TensorType({DT_INT32, DT_INT32}))
.OUTPUT(dgate, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dgate, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dct_1, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dct_1, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(forget_bias, Float, 1.0)
.ATTR(activation, String, "tanh")
.ATTR(direction, String, "UNIDIRECTIONAL")
.ATTR(forget_bias, Float, 1)
.ATTR(activation, String, "")
.ATTR(direction, String, "Forward")
.ATTR(gate_order, String, "ijfo") .ATTR(gate_order, String, "ijfo")
.OP_END_FACTORY_REG(DynamicLSTMGradCell) .OP_END_FACTORY_REG(DynamicLSTMGradCell)


@@ -1068,7 +1070,7 @@ REG_OP(GRUV2HiddenGradCell)
* If "False", "grad_weight" will not be scale by word_frequency. \n * If "False", "grad_weight" will not be scale by word_frequency. \n


* @par Outputs: * @par Outputs:
* y: A mutable output Tensor of new word grad has the same type as "grads". \n
* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator EmbeddingDenseGrad. * Compatible with the Pytorch operator EmbeddingDenseGrad.
@@ -1220,7 +1222,7 @@ REG_OP(CommonGRU)
* is equivalent to the size of indices. This matches the CSR format.. \n * is equivalent to the size of indices. This matches the CSR format.. \n


* @par Outputs: * @par Outputs:
* y: A mutable output Tensor of new word grad has the same type as "grads". \n
* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator EmbeddingBag. * Compatible with the Pytorch operator EmbeddingBag.


+ 3
- 3
third_party/fwkacllib/inc/ops/rpn_ops.h View File

@@ -28,12 +28,12 @@ namespace ge {
* iou_threshold with higher scoring box according to their * iou_threshold with higher scoring box according to their
* intersection-over-union (IoU) . \n * intersection-over-union (IoU) . \n


* @par Inputs:
* box_scores: 2-D tensor with shape of [N, 8], including proposal boxes and
*@par Input:
* @li box_scores: 2-D tensor with shape of [N, 8], including proposal boxes and
* corresponding confidence scores . \n * corresponding confidence scores . \n


* @par Attributes: * @par Attributes:
* iou_threshold: An optional float. The threshold for deciding whether boxes
* @li iou_threshold: An optional float. The threshold for deciding whether boxes
* overlap too much with respect to IOU . \n * overlap too much with respect to IOU . \n


* @par Outputs: * @par Outputs:


+ 4
- 10
third_party/fwkacllib/inc/ops/sdca_ops.h View File

@@ -45,13 +45,7 @@ namespace ge {
*corresponding weights in sparse_weights. This field maybe omitted for the dense approach.It's a dynamic input. *corresponding weights in sparse_weights. This field maybe omitted for the dense approach.It's a dynamic input.
*@li sparse_weights: a list of vectors where each value is the weight associated with a sparse feature group. *@li sparse_weights: a list of vectors where each value is the weight associated with a sparse feature group.
*@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group.It's a dynamic input. *@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group.It's a dynamic input.
*@li example_state_data: a list of vectors containing the example state data. \n

*@par Attributes:
*@li adaptive: the type is bool default false.
*@li num_sparse_features:The num of sparse.
*@li num_sparse_features_with_values: The num of sparse_feature_values
*@li num_dense_features:The num of dense.
*@li example_state_data: a list of vectors containing the example state data.
*@li loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, squared and hinge losses. *@li loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, squared and hinge losses.
*@li l1: Symmetric l1 regularization strength. *@li l1: Symmetric l1 regularization strength.
*@li l2: Symmetric l2 regularization strength. *@li l2: Symmetric l2 regularization strength.
@@ -59,10 +53,10 @@ namespace ge {
*@li num_inner_iterations: Number of iterations per mini-batch . \n *@li num_inner_iterations: Number of iterations per mini-batch . \n


*@par Outputs: *@par Outputs:
*@li out_example_state_data: A Returns a list of vectors containing the updated example state
*y: A Returns a list of vectors containing the updated example state
*data.a list of vectors where each value is the delta *data.a list of vectors where each value is the delta
*@li out_delta_sparse_weights:weights associated with a sparse feature group.a list of vectors where the values are the delta
*@li out_delta_dense_weights:weights associated with a dense feature group . \n
*weights associated with a sparse feature group.a list of vectors where the values are the delta
*weights associated with a dense feature group . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with tensorflow SdcaOptimizerV2 operator. * Compatible with tensorflow SdcaOptimizerV2 operator.


+ 53
- 110
third_party/fwkacllib/inc/ops/selection_ops.h View File

@@ -258,7 +258,7 @@ REG_OP(GatherV2D)


REG_OP(GatherElements) REG_OP(GatherElements)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
.INPUT(index, TensorType({DT_INT32, DT_INT64}))
.INPUT(index, TensorType({DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
.ATTR(dim, Int, 0) .ATTR(dim, Int, 0)
.OP_END_FACTORY_REG(GatherElements) .OP_END_FACTORY_REG(GatherElements)
@@ -508,7 +508,7 @@ REG_OP(UnsortedSegmentSum)


*@par Inputs: *@par Inputs:
*One inputs, including: *One inputs, including:
* assist: A tensor. Must be one of the following types:
* @li assist: A tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n


* @par Attributes: * @par Attributes:
@@ -970,11 +970,10 @@ REG_OP(TopKV2)
* for matrices) . \n * for matrices) . \n


* @par Attributes: * @par Attributes:
* @li sorted: Defaults to true.
* @li sorted: An optional bool. Defaults to true.
* If true, the resulting "k" elements will be sorted by the values in descending * If true, the resulting "k" elements will be sorted by the values in descending
* order. * order.
* @li largest:If true the resulting `k` elements will be sorted by the values in descending order.
* @li dim:0-D. Number of top elements to look for along the last dimension (along each row for matrices). \n
* @li T: Indicator of indices type . \n


* @par Outputs: * @par Outputs:
* @li values: A Tensor, specifying the sorted data. Has the same type as * @li values: A Tensor, specifying the sorted data. Has the same type as
@@ -983,7 +982,7 @@ REG_OP(TopKV2)


* @see TopK() * @see TopK()
* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the TensorFlow operator TopKV2.
* @li Compatible with the TensorFlow operator TopKV2.
*/ */
REG_OP(TopK) REG_OP(TopK)
.INPUT(x, TensorType::RealNumberType()) .INPUT(x, TensorType::RealNumberType())
@@ -1086,6 +1085,7 @@ REG_OP(InTopKD)
* @brief Says whether the targets are in the top "k" predictions . \n * @brief Says whether the targets are in the top "k" predictions . \n


* @par Inputs: * @par Inputs:
* Two inputs, including:
* @li x1: A 2D Tensor of type float32. A "batch_size * classes" tensor. * @li x1: A 2D Tensor of type float32. A "batch_size * classes" tensor.
* @li x2: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids. * @li x2: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids.
* @li k: A 1D Tensor of the same type as "x2". * @li k: A 1D Tensor of the same type as "x2".
@@ -1618,12 +1618,12 @@ REG_OP(UnsortedSegmentMinD)
* y: A Tensor of type RealNumberType . \n * y: A Tensor of type RealNumberType . \n


* @attention Constraints: * @attention Constraints:
* segment_ids must be non-negative tensor.
* @li segment_ids must be non-negative tensor.


* @see UnsortedSegmentSum(), UnsortedSegmentProd(), * @see UnsortedSegmentSum(), UnsortedSegmentProd(),


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the TensorFlow operator UnsortedSegmentMax.
* @li Compatible with the TensorFlow operator UnsortedSegmentMax.
*/ */
REG_OP(UnsortedSegmentMax) REG_OP(UnsortedSegmentMax)
.INPUT(x, TensorType::RealNumberType()) .INPUT(x, TensorType::RealNumberType())
@@ -1875,15 +1875,15 @@ REG_OP(Crop)


*@par Inputs: *@par Inputs:
*One inputs, including: *One inputs, including:
* x: A tensor . Must be one of the following types:
* @li x: A tensor . Must be one of the following types:
* float16, float32, int32, uint32, int8, uint8. \n * float16, float32, int32, uint32, int8, uint8. \n


*@par Attributes: *@par Attributes:
* axis: Axis along which to cummin. \n
* @li axis: Axis along which to cummin. \n


*@par Outputs: *@par Outputs:
* @li y: A Tensor with the same type and shape of x's.
* @li indices: A Tensor with the int32 type and the same shape of x's. \n
* y: A Tensor with the same type and shape of x's. \n
* indices: A Tensor with the int32 type and the same shape of x's. \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the Pytorch operator Cummin. \n *Compatible with the Pytorch operator Cummin. \n
@@ -1968,14 +1968,17 @@ REG_OP(WriteSelect)
.OP_END_FACTORY_REG(WriteSelect) .OP_END_FACTORY_REG(WriteSelect)


/** /**
*@brief Read data by stride.
*@brief Read data by stride . \n


*@par Inputs: *@par Inputs:
*x: A Tensor. Must be one of the following types: float16, int8. \n
*One input:
*x: A Tensor. Must be one of the following types: float16, int8 . \n


*@par Attributes: *@par Attributes:
*@li axis: A required int32, specifying the index of axis to read by stride. \n
*@li stride: A required int32, specifying the value of reading stride. \n
*@li axis: A required int32, specifying the index of axis to read by stride . \n

*@par Attributes:
*@li stride: A required int32, specifying the value of reading stride . \n


*@par Outputs: *@par Outputs:
*y: A Tensor of the same type as "x". *y: A Tensor of the same type as "x".
@@ -1988,14 +1991,16 @@ REG_OP(StridedRead)
.OP_END_FACTORY_REG(StridedRead) .OP_END_FACTORY_REG(StridedRead)


/** /**
*@brief Write data by stride.
*@brief: Write data by stride . \n


*@par Inputs: *@par Inputs:
*x: A Tensor. Must be one of the following types: float16, int8. \n
*x: A Tensor. Must be one of the following types: float16, int8 . \n

*@par Attributes:
*@li axis: A required int32, specifying the index of axis to write by stride . \n


*@par Attributes: *@par Attributes:
*@li axis: A required int32, specifying the index of axis to write by stride. \n
*@li stride: A required int32, specifying the value of writing stride. \n
*@li stride: A required int32, specifying the value of writing stride . \n


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as "x". *y: A Tensor. Has the same type as "x".
@@ -2071,10 +2076,10 @@ REG_OP(CumulativeLogsumexpD)
* @li updates: A Tensor of the same type as "var". \n * @li updates: A Tensor of the same type as "var". \n


* @par Attributes: * @par Attributes:
* axis: An required int to specify the axis to perform indices add. \n
* @li axis: An required int to specify the axis to perform indices add. \n


* @par Outputs: * @par Outputs:
* var: A Tensor. Same as input "var".
* @li var: A Tensor. Same as input "var".


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator index_add_. * Compatible with the Pytorch operator index_add_.
@@ -2099,7 +2104,7 @@ REG_OP(InplaceIndexAdd)
* @li value: A Tensor of dtype float16 or float32 or int64 or int32 or int8. * @li value: A Tensor of dtype float16 or float32 or int64 or int32 or int8.


* @par Outputs: * @par Outputs:
* y: A tensor. Must be one of the following dtypes:
* @li y: A tensor. Must be one of the following dtypes:
* float16, float32, int64, int32, int8. * float16, float32, int64, int32, int8.
*/ */
REG_OP(MaskedFill) REG_OP(MaskedFill)
@@ -2118,7 +2123,7 @@ REG_OP(MaskedFill)
* @li mask: A Tensor of dtype is bool. \n * @li mask: A Tensor of dtype is bool. \n


* @par Outputs: * @par Outputs:
* y: A tensor with the same type as x. \n
* @li y: A tensor with the same type as x. \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Numpy operator select. * Compatible with the Numpy operator select.
@@ -2129,50 +2134,13 @@ REG_OP(MaskedSelectV2)
.INPUT(mask, TensorType({DT_BOOL})) .INPUT(mask, TensorType({DT_BOOL}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(MaskedSelectV2) .OP_END_FACTORY_REG(MaskedSelectV2)
/**
* @brief Choose the value of X with value according to mask.

* @par Inputs:
* two inputs, including:
* @li x: A Tensor of dtype is float16 or float32 or float64 or int64 or int32 or int16 or int8 or uint8.
* @li mask: A Tensor of dtype is bool. \n

* @par Outputs:
* @li y: A tensor with the same type as x. \n

*/
REG_OP(MaskedSelect)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
.INPUT(mask, TensorType({DT_BOOL}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
.OP_END_FACTORY_REG(MaskedSelect)

/**
* @brief update the value of X with value according to mask.

* @par Inputs:
* three inputs, including:
* @li x: A Tensor of dtype is float16 or float32 or float64 or int64 or int32 or int16 or int8 or uint8.
* @li mask: A Tensor of dtype is bool.
* @li updates: A tensor with the same type as x. \n

* @par Outputs:
* @li y: A tensor with the same type as x. \n
*/
REG_OP(MaskedScatter)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
.INPUT(mask, TensorType({DT_BOOL}))
.INPUT(updates, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
.OP_END_FACTORY_REG(MaskedScatter)


/** /**
* @brief Slice a tensor at its last dim, e.x. a[..., begin:end:stride]. \n * @brief Slice a tensor at its last dim, e.x. a[..., begin:end:stride]. \n


* @par Inputs: * @par Inputs:
* One inputs, including: * One inputs, including:
* x: A Tensor. Must be one of the following types: float16, float32, int16, int32.
* @li x: A Tensor. Must be one of the following types: float16, float32, int16, int32.


* @par Attributes: * @par Attributes:
* @li start: An attribute of type Int, start index of last dim. \n * @li start: An attribute of type Int, start index of last dim. \n
@@ -2180,7 +2148,7 @@ REG_OP(MaskedScatter)
* @li stride: An attribute of type Int, stride of slice. \n * @li stride: An attribute of type Int, stride of slice. \n


* @par Outputs: * @par Outputs:
* y: A Tensor. Has the same type as "x". \n
* @li y: A Tensor. Has the same type as "x". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* No compatibility * No compatibility
@@ -2194,36 +2162,39 @@ REG_OP(SliceLastDim)
.OP_END_FACTORY_REG(SliceLastDim) .OP_END_FACTORY_REG(SliceLastDim)


/** /**
* @brief Extracts a strided slice of a tensor. Roughly speaking, this op
* extracts a slice of size (end-begin)/stride from the given input tensor.
* Starting at the location specified by begin the slice continues by
* @brief Extracts a strided slice of a tensor. Roughly speaking, this op \n
* extracts a slice of size (end-begin)/stride from the given input tensor. \n
* Starting at the location specified by begin the slice continues by \n
* adding stride to the index until all dimensions are not less than end. \n * adding stride to the index until all dimensions are not less than end. \n
* *
* @par Inputs: * @par Inputs:
* Five inputs, including:
* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
* complex128, float16, uint32, uint64, complex64, complex128.
* Four inputs, including:
* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n
* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n
* complex128, float16, uint32, uint64, complex64, complex128. \n
* @li begin: A Tensor of type int32 or int64, for the index of the first value to select. * @li begin: A Tensor of type int32 or int64, for the index of the first value to select.
*
* @li end: A Tensor of type int32 or int64, for the index of the last value to select. * @li end: A Tensor of type int32 or int64, for the index of the last value to select.
*
* @li axes: A Tensor of type int32 or int64, indicate axis to be select. * @li axes: A Tensor of type int32 or int64, indicate axis to be select.
* @li strides: A Tensor of type int32 or int64, for the increment. \n
*
* @li strides: A Tensor of type int32 or int64, for the increment.
* *
* @par Attributes: * @par Attributes:
* @li begin_mask: A Tensor of type int32.
* A bitmask where a bit "i" being "1" means to ignore the begin
* @li begin_mask: A Tensor of type int32. \n
* A bitmask where a bit "i" being "1" means to ignore the begin \n
* value and instead use the largest interval possible. * value and instead use the largest interval possible.
* @li end_mask: A Tensor of type int32.
* @li end_mask: A Tensor of type int32. \n
* Analogous to "begin_mask". * Analogous to "begin_mask".
* @li ellipsis_mask: A Tensor of type int32.
* A bitmask where bit "i" being "1" means the "i"th position
* @li ellipsis_mask: A Tensor of type int32. \n
* A bitmask where bit "i" being "1" means the "i"th position \n
* is actually an ellipsis. * is actually an ellipsis.
* @li new_axis_mask: A Tensor of type int32.
* A bitmask where bit "i" being "1" means the "i"th
* @li new_axis_mask: A Tensor of type int32. \n
* A bitmask where bit "i" being "1" means the "i"th \n
* specification creates a new shape 1 dimension. * specification creates a new shape 1 dimension.
* @li shrink_axis_mask: A Tensor of type int32.
* A bitmask where bit "i" implies that the "i"th
* specification should shrink the dimensionality. \n
* @li shrink_axis_mask: A Tensor of type int32. \n
* A bitmask where bit "i" implies that the "i"th \n
* specification should shrink the dimensionality.
* *
* @par Outputs: * @par Outputs:
* y: A Tensor. Has the same type as "x". * y: A Tensor. Has the same type as "x".
@@ -2260,7 +2231,7 @@ REG_OP(StridedSliceV2)
* float16, float32, int32. \n * float16, float32, int32. \n


* @par Attributes: * @par Attributes:
* dim: A required int. Used to select the dimension of this tensor. \n
* @li dim: A required int. Used to select the dimension of this tensor. \n


*@par Outputs: *@par Outputs:
*y: A Tensor with the same type and shape of input_x's. \n *y: A Tensor with the same type and shape of input_x's. \n
@@ -2336,34 +2307,6 @@ REG_OP(MaskedFillRange)
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32}))
.REQUIRED_ATTR(axis, Int) .REQUIRED_ATTR(axis, Int)
.OP_END_FACTORY_REG(MaskedFillRange) .OP_END_FACTORY_REG(MaskedFillRange)

/**
* @brief After a set of sorted data and a new set of data are re-sorted, get the first k data. \n
*
* @par Inputs:
* Six inputs, including:
* @li topk_pq_distance: A sorted Tensor, Will be updated after calculation. Must be one of the following types: float32, float16.
* @li topk_pq_index: A Tensor of type int32, index corresponding to topk_pq_distance.
* @li topk_pq_ivf: A Tensor of type int32 , the bucket number corresponding to topk_pq_distance.
* @li pq_distance: A Tensor of type float32 or float16, the new data set will be reordered with topk_pq_distance and updated to topk_pq_distance.
* @li pq_index: A Tensor of type int32, index corresponding to pq_distance.
* @li pq_ivf: A scalar of type int32 , the bucket number corresponding to pq_distance. \n
*
* @par Attributes:
* @li order: A string, indicates the sorting method of topk_pq_distance. \n
*
* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(InplaceTopKDistance)
.INPUT(topk_pq_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(topk_pq_index, TensorType({DT_INT32}))
.INPUT(topk_pq_ivf, TensorType({DT_INT32}))
.INPUT(pq_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(pq_index, TensorType({DT_INT32}))
.INPUT(pq_ivf, TensorType({DT_INT32}))
.ATTR(order, String, "asc")
.OP_END_FACTORY_REG(InplaceTopKDistance)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_

+ 23
- 20
third_party/fwkacllib/inc/ops/sparse_ops.h View File

@@ -281,9 +281,9 @@ REG_OP(SparseSliceGrad)
* @li size: A 1D Tensor of type int64. The size of the slice . \n * @li size: A 1D Tensor of type int64. The size of the slice . \n


*@par Outputs: *@par Outputs:
*@li y_indices: A Tensor of type int64.
*@li y_values: A Tensor. Has the same type as "values".
*@li y_shape: A Tensor of type int64 . \n
*y_indices: A Tensor of type int64.
*y_values: A Tensor. Has the same type as "values".
*y_values: A Tensor of type int64 . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the TensorFlow operator SparseSlice. * Compatible with the TensorFlow operator SparseSlice.
@@ -313,8 +313,8 @@ REG_OP(SparseSlice)
* @li sum_indices: A 2D Tensor of type int64. The indices of the sum SparseTensor, with size [nnz(sum), ndims] . \n * @li sum_indices: A 2D Tensor of type int64. The indices of the sum SparseTensor, with size [nnz(sum), ndims] . \n


*@par Outputs: *@par Outputs:
*@li x1_val_grad: A Tensor. Has the same type as "backprop_val_grad".
*@li x2_val_grad: A Tensor. Has the same type as "backprop_val_grad" . \n
*x1_val_grad: A Tensor. Has the same type as "backprop_val_grad".
*x2_val_grad: A Tensor. Has the same type as "backprop_val_grad" . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the TensorFlow operator SparseAddGrad. * Compatible with the TensorFlow operator SparseAddGrad.
@@ -363,7 +363,7 @@ REG_OP(SparseFillEmptyRowsGrad)


*@par Inputs: *@par Inputs:
* @li x1_indices: A 2D Tensor of type int32 or int64. * @li x1_indices: A 2D Tensor of type int32 or int64.
*The indices of the matrix "SparseTensor", with size [nnz, 2].
* @li The indices of the matrix "SparseTensor", with size [nnz, 2].
* @li x1_values: A 1D Tensor. The values of the SparseTensor, with size [nnz]. * @li x1_values: A 1D Tensor. The values of the SparseTensor, with size [nnz].
* @li x1_shape: A 1D Tensor of type int64. The shape of the SparseTensor, with size [2]. * @li x1_shape: A 1D Tensor of type int64. The shape of the SparseTensor, with size [2].
* @li x2: A dense matrix Tensor of the same type as "x1_values". 2D . \n * @li x2: A dense matrix Tensor of the same type as "x1_values". 2D . \n
@@ -373,9 +373,9 @@ REG_OP(SparseFillEmptyRowsGrad)


*@par Attributes: *@par Attributes:
*@li adjoint_a: An optional bool. Defaults to "False".Use the adjoint of A in the matrix multiply. *@li adjoint_a: An optional bool. Defaults to "False".Use the adjoint of A in the matrix multiply.
*If A is complex, this is transpose(conj(A)). Otherwise it is transpose(A).
*@li If A is complex, this is transpose(conj(A)). Otherwise it is transpose(A).
*@li adjoint_b: An optional bool. Defaults to "False".Use the adjoint of B in the matrix multiply. *@li adjoint_b: An optional bool. Defaults to "False".Use the adjoint of B in the matrix multiply.
*If B is complex, this is transpose(conj(B)). Otherwise it is transpose(B) . \n
*@li If B is complex, this is transpose(conj(B)). Otherwise it is transpose(B) . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the TensorFlow operator SparseTensorDenseMatMul. * Compatible with the TensorFlow operator SparseTensorDenseMatMul.
@@ -400,13 +400,9 @@ REG_OP(SparseTensorDenseMatMul)
* @li indices: A 0D, 1D, or 2D Tensor of type int32 or int64. * @li indices: A 0D, 1D, or 2D Tensor of type int32 or int64.
* @li output_shape: A 1D Tensor of the same type as "sparse_indices". The shape of the dense output tensor. * @li output_shape: A 1D Tensor of the same type as "sparse_indices". The shape of the dense output tensor.
* @li values: A 1D Tensor. Values corresponding to each row of "sparse_indices", * @li values: A 1D Tensor. Values corresponding to each row of "sparse_indices",
or a scalar value to be used for all sparse indices.
* @li or a scalar value to be used for all sparse indices.
* @li default_value: A Tensor of the same type as "sparse_values" . \n * @li default_value: A Tensor of the same type as "sparse_values" . \n


*@par Attributes:
*validate_indices: If true, indices are checked to make sure they are sorted in
lexicographic order and that there are no repeats. \n

*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as "values" . \n *y: A Tensor. Has the same type as "values" . \n


@@ -431,6 +427,7 @@ REG_OP(SparseToDense)
*Concatenation is with respect to the dense versions of these sparse tensors . \n *Concatenation is with respect to the dense versions of these sparse tensors . \n


*@par Inputs: *@par Inputs:
*3 or 5 inputs,contains:
* @li indices:A list of at least 2 `Tensor` objects with type `int64`.2-D. * @li indices:A list of at least 2 `Tensor` objects with type `int64`.2-D.
*Indices of each input `SparseTensor`.It's a dynamic input. *Indices of each input `SparseTensor`.It's a dynamic input.
* @li values:A list with the same length as `indices` of `Tensor` objects with the same type. * @li values:A list with the same length as `indices` of `Tensor` objects with the same type.
@@ -703,6 +700,7 @@ REG_OP(SparseReduceMaxSparse)
*@brief Computes the sum of elements across dimensions of a SparseTensor . \n *@brief Computes the sum of elements across dimensions of a SparseTensor . \n


*@par Inputs: *@par Inputs:
*4 or 5 inputs, including:
* @li x_indices: A 2D Tensor of type int64. * @li x_indices: A 2D Tensor of type int64.
*"N x R" matrix with the indices of non-empty values in a *"N x R" matrix with the indices of non-empty values in a
*SparseTensor, possibly not in canonical ordering. *SparseTensor, possibly not in canonical ordering.
@@ -713,11 +711,13 @@ REG_OP(SparseReduceMaxSparse)
*A length-"K" vector containing the reduction axes . \n *A length-"K" vector containing the reduction axes . \n


*@par Attributes: *@par Attributes:
*keep_dims: An optional bool. Defaults to "False".
* keep_dims: An optional bool. Defaults to "False".
*If true, retains reduced dimensions with length 1 . \n *If true, retains reduced dimensions with length 1 . \n


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as "x_values". \n
* @li y_indices: A Tensor of type int64.
* @li y_values: A Tensor. Has the same type as "input_values".
* @li y_shape: A Tensor of type int64 . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the TensorFlow operator SparseReduceSum. * Compatible with the TensorFlow operator SparseReduceSum.
@@ -818,6 +818,7 @@ REG_OP(SparseSplit)
*@brief Generates sparse cross from a list of sparse and dense tensors . \n *@brief Generates sparse cross from a list of sparse and dense tensors . \n


*@par Inputs: *@par Inputs:
*8 or 10 inputs, including:
* @li indices: A list of 2D Tensor objects of type int64. * @li indices: A list of 2D Tensor objects of type int64.
* Indices of each input SparseTensor.It's a dynamic input. * Indices of each input SparseTensor.It's a dynamic input.
* @li values: A list of 1D Tensor objects of type int64 or string. * @li values: A list of 1D Tensor objects of type int64 or string.
@@ -898,8 +899,9 @@ REG_OP(AddManySparseToTensorsMap)
*@brief Reads SparseTensors from a "SparseTensorsMap" and concatenate them . \n *@brief Reads SparseTensors from a "SparseTensorsMap" and concatenate them . \n


*@par Inputs: *@par Inputs:
*2 or 4 inputs, including:
* handles: A 1D Tensor of type int64. * handles: A 1D Tensor of type int64.
*The "N" serialized SparseTensor objects . \n
* The "N" serialized SparseTensor objects . \n


*@par Attributes: *@par Attributes:
* @li dtype: A tf.DType. The "dtype" of the SparseTensor objects stored in the "SparseTensorsMap". * @li dtype: A tf.DType. The "dtype" of the SparseTensor objects stored in the "SparseTensorsMap".
@@ -909,9 +911,9 @@ REG_OP(AddManySparseToTensorsMap)
*The shared name for the "SparseTensorsMap" read by this op . \n *The shared name for the "SparseTensorsMap" read by this op . \n


*@par Outputs: *@par Outputs:
* @li indices: A Tensor of type int64.2-D. The `indices` of the minibatch `SparseTensor`.
* @li values: A Tensor of type "dtype". 1-D. The `values` of the minibatch `SparseTensor`.
* @li shape: A Tensor of type int64 . 1-D. The `shape` of the minibatch `SparseTensor`. \n
* @li indices: A Tensor of type int64.
* @li values: A Tensor of type "dtype".
* @li shape: A Tensor of type int64 . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the TensorFlow operator TakeManySparseFromTensorsMap. * Compatible with the TensorFlow operator TakeManySparseFromTensorsMap.
@@ -987,7 +989,8 @@ REG_OP(SerializeManySparse)
*@brief Deserializes SparseTensor objects . \n *@brief Deserializes SparseTensor objects . \n


*@par Inputs: *@par Inputs:
*serialized_sparse: A Tensor. The serialized SparseTensor objects.
*Two inputs, including:
* serialized_sparse: A Tensor. The serialized SparseTensor objects.
*The last dimension must have 3 columns . \n *The last dimension must have 3 columns . \n


*@par Attributes: *@par Attributes:


+ 10
- 10
third_party/fwkacllib/inc/ops/spectral_ops.h View File

@@ -31,10 +31,10 @@ namespace ge {
inner-most dimension of `x`. \n inner-most dimension of `x`. \n


*@par Inputs: *@par Inputs:
*x: A Tensor. Must be the following types: complex64, complex128. \n
*@li x: A Tensor. Must be the following types: complex64, complex128. \n


*@par Outputs: *@par Outputs:
*y: A complex tensor of the same rank as `x`. \n
*@li y: A complex tensor of the same rank as `x`. \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with TensorFlow IFFT operator. * Compatible with TensorFlow IFFT operator.
@@ -52,7 +52,7 @@ REG_OP(IFFT)
*@li fft_length: An int32 tensor of shape [1]. The FFT length . \n *@li fft_length: An int32 tensor of shape [1]. The FFT length . \n


*@par Outputs: *@par Outputs:
*y: A complex64 tensor of the same rank as `input`. The inner-most
*@li y: A complex64 tensor of the same rank as `input`. The inner-most
dimension of `input` is replaced with the `fft_length / 2 + 1` unique dimension of `input` is replaced with the `fft_length / 2 + 1` unique
frequency components of its 1D Fourier transform . \n frequency components of its 1D Fourier transform . \n


@@ -73,7 +73,7 @@ REG_OP(RFFT)
*@li fft_length: An int32 tensor of shape [1]. The FFT length. \n *@li fft_length: An int32 tensor of shape [1]. The FFT length. \n


*@par Outputs: *@par Outputs:
* y: A float32 tensor of the same rank as `input`. The inner-most
*@li y: A float32 tensor of the same rank as `input`. The inner-most
dimension of `input` is replaced with the `fft_length` samples of its inverse dimension of `input` is replaced with the `fft_length` samples of its inverse
1D Fourier transform. \n 1D Fourier transform. \n


@@ -91,10 +91,10 @@ REG_OP(IRFFT)
*@brief 2D fast Fourier transform. \n *@brief 2D fast Fourier transform. \n


*@par Inputs: *@par Inputs:
*x: A complex64 tensor.
*@li x: A complex64 tensor.


*@par Outputs: *@par Outputs:
*y: A complex64 tensor of the same shape as `input`. The inner-most 2
*@li y: A complex64 tensor of the same shape as `input`. The inner-most 2
dimensions of `input` are replaced with their 2D Fourier transform. \n dimensions of `input` are replaced with their 2D Fourier transform. \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
@@ -110,10 +110,10 @@ REG_OP(FFT2D)
innermost dimension of the input. \n innermost dimension of the input. \n


*@par Inputs: *@par Inputs:
*x: A Tensor. Must be the following types: complex64, complex128. \n
*@li x: A Tensor. Must be the following types: complex64, complex128. \n


*@par Outputs: *@par Outputs:
*y: A complex tensor with the same shape as input. The innermost dimension
*@li y: A complex tensor with the same shape as input. The innermost dimension
of the input is replaced by its 1-dimensional Fourier transform. \n of the input is replaced by its 1-dimensional Fourier transform. \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
@@ -129,10 +129,10 @@ REG_OP(FFT)
innermost dimension of the input. \n innermost dimension of the input. \n


*@par Inputs: *@par Inputs:
*x: A Tensor. Must be the following types: complex64, complex128. \n
*@li x: A Tensor. Must be the following types: complex64, complex128. \n


*@par Outputs: *@par Outputs:
*y: A complex tensor with the same shape as input. The innermost dimension
*@li y: A complex tensor with the same shape as input. The innermost dimension
of the input is replaced by its inverse two-dimensional Fourier transform. \n of the input is replaced by its inverse two-dimensional Fourier transform. \n


*@par Third-party framework compatibility *@par Third-party framework compatibility


+ 6
- 3
third_party/fwkacllib/inc/ops/split_combination_ops.h View File

@@ -161,11 +161,14 @@ REG_OP(SplitVD)
/** /**
*@brief Concatenates a list of N tensors along the first dimension. *@brief Concatenates a list of N tensors along the first dimension.
*@par Inputs: *@par Inputs:
* One input, including:
* values: A list of Tensors. Must be one of the following types: int8, int16, int32,
* Two inputs, including:
* @li values: A list of Tensors. Must be one of the following types: int8, int16, int32,
* int64, uint8, uint16, uint32, uint64, float16, float32. * int64, uint8, uint16, uint32, uint64, float16, float32.
* Tensors to be concatenated. All must have size 1 in the first dimension and same shape. * Tensors to be concatenated. All must have size 1 in the first dimension and same shape.
* It's a dynamic input. \n
* It's a dynamic input.
* @li shape: A Tensor of the same type as "x".
* The final shape of the result. Should be equal to the shapes of any input
* but with the number of input values in the first dimension . \n


*@par Attributes: *@par Attributes:
* @li shape: A required list of ints. * @li shape: A required list of ints.


+ 1
- 1
third_party/fwkacllib/inc/ops/state_ops.h View File

@@ -104,7 +104,7 @@ REG_OP(DestroyTemporaryVariable)
*@brief Checks whether a tensor has been initialized. Outputs boolean scalar indicating whether the tensor has been initialized . \n *@brief Checks whether a tensor has been initialized. Outputs boolean scalar indicating whether the tensor has been initialized . \n


*@par Inputs: *@par Inputs:
*x: A Tensor of type float16, float32, double, bool, int8, uint8, uint16, int16, int32, uint32, uint64, int64.
*x: A tensor . \n


*@par Outputs: *@par Outputs:
*y: A tensor, indicating whether "x" has been initialized . \n *y: A tensor, indicating whether "x" has been initialized . \n


+ 13
- 18
third_party/fwkacllib/inc/ops/stateful_random_ops.h View File

@@ -32,10 +32,7 @@ namespace ge {
*@par Inputs: *@par Inputs:
*This op may use some OS-provided source of non-determinism (e.g. an RNG), *This op may use some OS-provided source of non-determinism (e.g. an RNG),
*so each execution will give different results. Inputs included: *so each execution will give different results. Inputs included:
*shape: The shape of the output tensor . \n

*@par Attributes:
*dtype: required, type. \n
*@li shape: The shape of the output tensor . \n


*@par Outputs: *@par Outputs:
*y:A Returns Non-deterministic integer values with specified shape . \n *y:A Returns Non-deterministic integer values with specified shape . \n
@@ -57,10 +54,13 @@ REG_OP(NonDeterministicInts)
*counter is an unspecified implementation detail . \n *counter is an unspecified implementation detail . \n


*@par Inputs: *@par Inputs:
*@li x: The handle of the resource variable that stores the state of the RNG.
*@li resource: The handle of the resource variable that stores the state of the RNG.
*@li algorithm: The RNG algorithm. *@li algorithm: The RNG algorithm.
*@li delta: The amount of advancement . \n *@li delta: The amount of advancement . \n


*@par Outputs:
*y:A Returns the created operation . \n

*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with tensorflow RngSkip operator. * Compatible with tensorflow RngSkip operator.
*/ */
@@ -81,16 +81,11 @@ power of two. The bias is small for values of `maxval - minval` significantly
smaller than the range of the output (either `2^32` or `2^64`) . \n smaller than the range of the output (either `2^32` or `2^64`) . \n


*@par Inputs: *@par Inputs:
*@li x: The handle of the resource variable that stores the state of the RNG.
*@li resource: The handle of the resource variable that stores the state of the RNG.
*@li algorithm: The RNG algorithm. *@li algorithm: The RNG algorithm.
*@li shape: The shape of the output tensor. *@li shape: The shape of the output tensor.
*@li counts: A 0/1-D Tensor or Python value. The counts of the binomial
distribution. Must be broadcastable with the leftmost dimension defined by `shape`.
*@li probs: A 0/1-D Tensor or Python value. The probability of success for the
binomial distribution. Must be broadcastable with the leftmost dimension defined by `shape`.\n

*@par Attributes:
*dtype: required, type. \n
*@li minval: Minimum value (inclusive, scalar).
*@li maxval: Maximum value (exclusive, scalar) . \n


*@par Outputs: *@par Outputs:
*y:A Returns Random values with specified shape . \n *y:A Returns Random values with specified shape . \n
@@ -114,7 +109,7 @@ REG_OP(StatefulRandomBinomial)
*The generated values will have mean 0 and standard deviation 1 . \n *The generated values will have mean 0 and standard deviation 1 . \n


*@par Inputs: *@par Inputs:
*@li x: The handle of the resource variable that stores the state of the RNG.
*@li resource: The handle of the resource variable that stores the state of the RNG.
*@li algorithm: The RNG algorithm. *@li algorithm: The RNG algorithm.
*@li shape: The shape of the output tensor . \n *@li shape: The shape of the output tensor . \n


@@ -139,7 +134,7 @@ REG_OP(StatefulStandardNormalV2)
*deviations from the mean are dropped and re-picked . \n *deviations from the mean are dropped and re-picked . \n


*@par Inputs: *@par Inputs:
*@li x: The handle of the resource variable that stores the state of the RNG.
*@li resource: The handle of the resource variable that stores the state of the RNG.
*@li algorithm: The RNG algorithm. *@li algorithm: The RNG algorithm.
*@li shape: The shape of the output tensor . \n *@li shape: The shape of the output tensor . \n


@@ -163,7 +158,7 @@ The generated values follow a uniform distribution in the range `[0, 1)`. The
lower bound 0 is included in the range, while the upper bound 1 is excluded. lower bound 0 is included in the range, while the upper bound 1 is excluded.


*@par Inputs: *@par Inputs:
*@li x: The handle of the resource variable that stores the state of the RNG.
*@li resource: The handle of the resource variable that stores the state of the RNG.
*@li algorithm: The RNG algorithm. *@li algorithm: The RNG algorithm.
*@li shape: The shape of the output tensor . \n *@li shape: The shape of the output tensor . \n


@@ -186,7 +181,7 @@ REG_OP(StatefulUniform)
The generated values are uniform integers covering the whole range of `dtype` . \n The generated values are uniform integers covering the whole range of `dtype` . \n


*@par Inputs: *@par Inputs:
*@li x: The handle of the resource variable that stores the state of the RNG.
*@li resource: The handle of the resource variable that stores the state of the RNG.
*@li algorithm: The RNG algorithm. *@li algorithm: The RNG algorithm.
*@li shape: The shape of the output tensor . \n *@li shape: The shape of the output tensor . \n


@@ -214,7 +209,7 @@ power of two. The bias is small for values of `maxval - minval` significantly
smaller than the range of the output (either `2^32` or `2^64`) . \n smaller than the range of the output (either `2^32` or `2^64`) . \n


*@par Inputs: *@par Inputs:
*@li x: The handle of the resource variable that stores the state of the RNG.
*@li resource: The handle of the resource variable that stores the state of the RNG.
*@li algorithm: The RNG algorithm. *@li algorithm: The RNG algorithm.
*@li shape: The shape of the output tensor. *@li shape: The shape of the output tensor.
*@li minval: Minimum value (inclusive, scalar). *@li minval: Minimum value (inclusive, scalar).


+ 25
- 25
third_party/fwkacllib/inc/ops/string_ops.h View File

@@ -295,7 +295,7 @@ REG_OP(StringSplit)


*@par Inputs: *@par Inputs:
include: include:
*input:A Tensor of type string. The text to be processed. \n
*@li input:A Tensor of type string. The text to be processed. \n


*@par Attributes: *@par Attributes:
*@li pattern:A string. The regular expression to match the input. *@li pattern:A string. The regular expression to match the input.
@@ -303,8 +303,8 @@ include:
*@li replace_global:An optional bool. Defaults to True. If True, the replacement is global, *@li replace_global:An optional bool. Defaults to True. If True, the replacement is global,
otherwise the replacement is done only on the first match. otherwise the replacement is done only on the first match.


*@par Outputs:
*output::A Tensor of type string.
*@par output:
*@li output::A Tensor of type string.
*/ */
REG_OP(StaticRegexReplace) REG_OP(StaticRegexReplace)
.INPUT(input, TensorType({DT_STRING})) .INPUT(input, TensorType({DT_STRING}))
@@ -322,13 +322,13 @@ REG_OP(StaticRegexReplace)


*@par Inputs: *@par Inputs:
include: include:
*input:A Tensor of type string. The text to be processed. \n
*@li input:A Tensor of type string. The text to be processed. \n


*@par Attributes: *@par Attributes:
*pattern:A string. The regular expression to match the input.
*@li pattern:A string. The regular expression to match the input.


*@par Outputs:
*output::A bool tensor with the same shape as `input`.
*@par output:
*@li output::A bool tensor with the same shape as `input`.
*/ */
REG_OP(StaticRegexFullMatch) REG_OP(StaticRegexFullMatch)
.INPUT(input, TensorType({DT_STRING})) .INPUT(input, TensorType({DT_STRING}))
@@ -347,10 +347,10 @@ include:
*@li num_segments:A Tensor. Must be one of the following types: int32, int64. A scalar. *@li num_segments:A Tensor. Must be one of the following types: int32, int64. A scalar.


*@par Attributes: *@par Attributes:
*separator:An optional string. Defaults to "". The separator to use when joining.
*@li separator:An optional string. Defaults to "". The separator to use when joining.


*@par Outputs:
*output::A Tensor of type string..
*@par output:
*@li output::A Tensor of type string..
*/ */
REG_OP(UnsortedSegmentJoin) REG_OP(UnsortedSegmentJoin)
.INPUT(input, TensorType({DT_STRING})) .INPUT(input, TensorType({DT_STRING}))
@@ -366,13 +366,13 @@ REG_OP(UnsortedSegmentJoin)


*@par Inputs: *@par Inputs:
include: include:
*input:A Tensor of type string. The text to be processed.
*@li input:A Tensor of type string. The text to be processed.


*@par Attributes: *@par Attributes:
*encoding:An optional string. Defaults to "".
*@li encoding:An optional string. Defaults to "".


*@par Outputs:
*output::A Tensor of type string..
*@par output:
*@li output::A Tensor of type string..
*/ */
REG_OP(StringLower) REG_OP(StringLower)
.INPUT(input, TensorType({DT_STRING})) .INPUT(input, TensorType({DT_STRING}))
@@ -386,13 +386,13 @@ REG_OP(StringLower)


*@par Inputs: *@par Inputs:
include: include:
*input:A Tensor of type string. The text to be processed.
*@li input:A Tensor of type string. The text to be processed.


*@par Attributes: *@par Attributes:
*encoding:An optional string. Defaults to "".
*@li encoding:An optional string. Defaults to "".


*@par Outputs:
*output::A Tensor of type string..
*@par output:
*@li output::A Tensor of type string..
*/ */
REG_OP(StringUpper) REG_OP(StringUpper)
.INPUT(input, TensorType({DT_STRING})) .INPUT(input, TensorType({DT_STRING}))
@@ -901,10 +901,10 @@ REG_OP(DecodeBase64)
*@brief StringNormalization performs string operations for basic cleaning . \n *@brief StringNormalization performs string operations for basic cleaning . \n


*@par Inputs: *@par Inputs:
*input: only accepts [C] or [1, C] UTF-8 strings tensor . \n
*@li input: only accepts [C] or [1, C] UTF-8 strings tensor . \n


*@par Outputs: *@par Outputs:
*output: UTF-8 strings tensor after cleaning . \n
*@li output: UTF-8 strings tensor after cleaning . \n


*@par Attributes: *@par Attributes:
*@li stopwords : list of strings (default is empty). *@li stopwords : list of strings (default is empty).
@@ -919,13 +919,13 @@ case-sensitive. Default is false.
*string enum that cases output to be lowercased/uppercases/unchanged. Valid *string enum that cases output to be lowercased/uppercases/unchanged. Valid
values are "LOWER", "UPPER", "NONE". Default is "NONE". values are "LOWER", "UPPER", "NONE". Default is "NONE".


*@li locale : string (default is "C").
*@li local : string (default is "en_US").
*Environment dependent string that denotes the locale according to which output *Environment dependent string that denotes the locale according to which output
strings needs to be upper/lowercased.Default C or platform specific equivalent
as decided by the implementation. \n
strings needs to be upper/lowercased.Default en_US or platform specific equivalent
as decided by the implementation . \n


*@attention Constraints: *@attention Constraints:
*input can be either a 1-D or 2-D tensor, the shape of 2-D tensor must be [1, C].
*@li input can be either a 1-D or 2-D tensor, the shape of 2-D tensor must be [1, C].
*/ */
REG_OP(StringNormalizer) REG_OP(StringNormalizer)
.INPUT(input, TensorType({DT_STRING})) .INPUT(input, TensorType({DT_STRING}))
@@ -933,7 +933,7 @@ REG_OP(StringNormalizer)
.ATTR(stopwords, ListString, {}) .ATTR(stopwords, ListString, {})
.ATTR(is_case_sensitive, Bool, false) .ATTR(is_case_sensitive, Bool, false)
.ATTR(case_change_action, String, "NONE") .ATTR(case_change_action, String, "NONE")
.ATTR(locale, String, "C")
.ATTR(local, String, "en_US")
.OP_END_FACTORY_REG(StringNormalizer) .OP_END_FACTORY_REG(StringNormalizer)
} // namespace ge } // namespace ge




+ 8
- 9
third_party/fwkacllib/inc/ops/transformation_ops.h View File

@@ -29,15 +29,15 @@ namespace ge {


*@par Inputs: *@par Inputs:
*The input handle must have the resource type. Inputs include: *The input handle must have the resource type. Inputs include:
*x:A list of Tensor objects. One or more tensors from which
*@li x:A list of Tensor objects. One or more tensors from which
the enqueued tensors should be taken . \n the enqueued tensors should be taken . \n


*@par Outputs: *@par Outputs:
*y:A list of Tensor objects. One or more tensors from which
*@li y:A list of Tensor objects. One or more tensors from which
the enqueued tensors should be taken . \n the enqueued tensors should be taken . \n


*@par Attributes: *@par Attributes:
*type: An optional ge::DataType. It refers to the target data type of outputs . \n
*@li type: An optional ge::DataType. It refers to the target data type of outputs . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with tensorflow QueueIsClosed operator. *Compatible with tensorflow QueueIsClosed operator.
@@ -723,12 +723,11 @@ REG_OP(CompressFcOp)
*@brief Performs Col2im for each batch entry. \n *@brief Performs Col2im for each batch entry. \n


*@par Inputs: *@par Inputs:
*@li x: The Col Tensor. 4-D, shape: `(n, c, kernel_h*kernel_w, ho*wo)`.
where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1.
*@li output_size: The img shape Tensor. 1-D, shape:`(2)`, value: (output_h, output_w). \n
*@li input_x: The Col Tensor. 5-D, shape: `(n, c1, kernel_h*kernel_w, ho*wo, c0)`.
where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1 \n


*@par Outputs: *@par Outputs:
*y: The img Tensor. 4-D, shape: `(n, c, output_h, output_w)`. \n
*@li output_y: The img Tensor. 5-D, shape: `(n, c1, output_h, output_w, c0)`. \n


*@par Attributes: *@par Attributes:
*@li kernel_shape: ListInt, value: `(kernel_h, kernel_w)`, the shape of kernel in convolution. *@li kernel_shape: ListInt, value: `(kernel_h, kernel_w)`, the shape of kernel in convolution.
@@ -910,7 +909,7 @@ output shape would be [max(ngram_indexes) + 1]. If input shape is [N, C], this o
*@li either pool_strings or pool_int64s attributes must be present but not both. *@li either pool_strings or pool_int64s attributes must be present but not both.
*/ */


REG_OP(TfIdfVectorizer)
REG_OP(TfidVectorizer)
.INPUT(input, TensorType({DT_INT32, DT_INT64, DT_STRING})) .INPUT(input, TensorType({DT_INT32, DT_INT64, DT_STRING}))
.OUTPUT(output, TensorType({DT_FLOAT})) .OUTPUT(output, TensorType({DT_FLOAT}))
.REQUIRED_ATTR(max_gram_length, Int) .REQUIRED_ATTR(max_gram_length, Int)
@@ -922,7 +921,7 @@ REG_OP(TfIdfVectorizer)
.ATTR(pool_int64s, ListInt, {}) .ATTR(pool_int64s, ListInt, {})
.ATTR(pool_strings, ListString, {}) .ATTR(pool_strings, ListString, {})
.ATTR(weights, ListFloat, {}) .ATTR(weights, ListFloat, {})
.OP_END_FACTORY_REG(TfIdfVectorizer)
.OP_END_FACTORY_REG(TfidVectorizer)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_

+ 2
- 2
third_party/fwkacllib/inc/runtime/base.h View File

@@ -20,7 +20,7 @@
#include <stdint.h> #include <stdint.h>
#include "toolchain/prof_callback.h" #include "toolchain/prof_callback.h"


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
extern "C" { extern "C" {
#endif #endif


@@ -357,7 +357,7 @@ RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_
*/ */
RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId); RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId);


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
} }
#endif #endif




+ 7
- 8
third_party/fwkacllib/inc/runtime/config.h View File

@@ -19,7 +19,7 @@


#include "base.h" #include "base.h"


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
extern "C" { extern "C" {
#endif #endif


@@ -43,7 +43,6 @@ typedef enum tagRtChipType {
CHIP_LHISI, CHIP_LHISI,
CHIP_DC, CHIP_DC,
CHIP_CLOUD_V2, CHIP_CLOUD_V2,
CHIP_NO_DEVICE,
CHIP_END, CHIP_END,
} rtChipType_t; } rtChipType_t;


@@ -54,11 +53,11 @@ typedef enum tagRtAicpuScheType {
} rtAicpuScheType; } rtAicpuScheType;


typedef enum tagRtDeviceCapabilityType { typedef enum tagRtDeviceCapabilityType {
RT_SCHEDULE_SOFTWARE = 0, // Software Schedule
RT_SCHEDULE_SOFTWARE_OPT,
RT_SCHEDULE_HARDWARE, // HWTS Schedule
RT_AICPU_BLOCKING_OP_NOT_SUPPORT,
RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation
RT_SCHEDULE_SOFTWARE = 0, // SoftWare Schedule
RT_SCHEDULE_SOFTWARE_OPT,
RT_SCHEDULE_HARDWARE, // HWTS Schedule
RT_AICPU_BLOCKING_OP_NOT_SUPPORT,
RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation
} rtDeviceCapabilityType; } rtDeviceCapabilityType;


typedef enum tagRtVersion { typedef enum tagRtVersion {
@@ -236,7 +235,7 @@ RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout);
*/ */
RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout); RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout);


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
} }
#endif #endif




+ 2
- 2
third_party/fwkacllib/inc/runtime/context.h View File

@@ -19,7 +19,7 @@


#include "base.h" #include "base.h"


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
extern "C" { extern "C" {
#endif #endif


@@ -157,7 +157,7 @@ RTS_API rtError_t rtGetGroupCount(uint32_t *count);
*/ */
RTS_API rtError_t rtSetCtxINFMode(bool mode); RTS_API rtError_t rtSetCtxINFMode(bool mode);


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
} }
#endif #endif




+ 11
- 11
third_party/fwkacllib/inc/runtime/dev.h View File

@@ -19,7 +19,7 @@


#include "base.h" #include "base.h"


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
extern "C" { extern "C" {
#endif #endif


@@ -80,15 +80,15 @@ typedef enum tagMemoryInfo {
} rtMemoryInfo_t; } rtMemoryInfo_t;


typedef enum tagRtDeviceModuleType { typedef enum tagRtDeviceModuleType {
RT_MODULE_TYPE_SYSTEM = 0, /**< system info*/
RT_MODULE_TYPE_AICPU, /** < aicpu info*/
RT_MODULE_TYPE_CCPU, /**< ccpu_info*/
RT_MODULE_TYPE_DCPU, /**< dcpu info*/
RT_MODULE_TYPE_AICORE, /**< AI CORE info*/
RT_MODULE_TYPE_TSCPU, /**< tscpu info*/
RT_MODULE_TYPE_PCIE, /**< PCIE info*/
RT_MODULE_TYPE_VECTOR_CORE, /**< VECTOR CORE info*/
} rtDeviceModuleType_t;
RT_MODULE_TYPE_SYSTEM = 0,
RT_MODULE_TYPE_AICPU,
RT_MODULE_TYPE_CCPU,
RT_MODULE_TYPE_DCPU,
RT_MODULE_TYPE_AICORE,
RT_MODULE_TYPE_TSCPU,
RT_MODULE_TYPE_PCIE,
RT_MODULE_TYPE_VECTOR_CORE
} tagRtDeviceModuleType_t;


/** /**
* @ingroup dvrt_dev * @ingroup dvrt_dev
@@ -380,7 +380,7 @@ RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device);
*/ */
RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device); RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device);


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
} }
#endif #endif




+ 2
- 2
third_party/fwkacllib/inc/runtime/dvfsprofile.h View File

@@ -19,7 +19,7 @@


#include "base.h" #include "base.h"


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
extern "C" { extern "C" {
#endif #endif


@@ -56,7 +56,7 @@ RTS_API rtError_t rtUnsetDvfsProfile();
*/ */
RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode); RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode);


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
} }
#endif #endif




+ 12
- 2
third_party/fwkacllib/inc/runtime/event.h View File

@@ -19,7 +19,7 @@


#include "base.h" #include "base.h"


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
extern "C" { extern "C" {
#endif #endif


@@ -41,6 +41,16 @@ typedef enum rtEventWaitStatus {
#define RT_EVENT_DDSYNC 0x04U #define RT_EVENT_DDSYNC 0x04U
#define RT_EVENT_TIME_LINE 0x08U #define RT_EVENT_TIME_LINE 0x08U


#define RT_EVENT_DDSYNC_NS 0x01U
#define RT_EVENT_STREAM_MARK 0x02U
#define RT_EVENT_DDSYNC 0x04U
#define RT_EVENT_TIME_LINE 0x08U

#define RT_EVENT_DDSYNC_NS 0x01U
#define RT_EVENT_STREAM_MARK 0x02U
#define RT_EVENT_DDSYNC 0x04U
#define RT_EVENT_TIME_LINE 0x08U

/** /**
* @ingroup dvrt_event * @ingroup dvrt_event
* @brief create event instance * @brief create event instance
@@ -272,7 +282,7 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs
*/ */
RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num); RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num);


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
} }
#endif #endif




+ 2
- 2
third_party/fwkacllib/inc/runtime/kernel.h View File

@@ -20,7 +20,7 @@
#include "base.h" #include "base.h"
#include "stream.h" #include "stream.h"


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
extern "C" { extern "C" {
#endif #endif


@@ -647,7 +647,7 @@ RTS_API rtError_t rtStartMDCProfiler(void **addr, uint32_t length);
*/ */
RTS_API rtError_t rtStopMDCProfiler(void *addr); RTS_API rtError_t rtStopMDCProfiler(void *addr);


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
} }
#endif #endif




+ 2
- 2
third_party/fwkacllib/inc/runtime/mem.h View File

@@ -24,7 +24,7 @@
#include "config.h" #include "config.h"
#include "stream.h" #include "stream.h"


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
extern "C" { extern "C" {
#endif #endif


@@ -547,7 +547,7 @@ RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num);
*/ */
RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream); RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream);


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
} }
#endif #endif




+ 37
- 36
third_party/fwkacllib/inc/runtime/rt_ffts.h View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
* Copyright (c) Huawei Technologies Co. , Ltd. 2021. All rights reserved.
* Description: ffts interface * Description: ffts interface
*/ */


@@ -8,7 +8,7 @@


#include "base.h" #include "base.h"


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
extern "C" { extern "C" {
#endif #endif


@@ -19,8 +19,8 @@ extern "C" {
#define RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN 32U #define RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN 32U


typedef enum tagFftsType { typedef enum tagFftsType {
RT_FFTS_TYPE_AUTO_THREAD = 2, // ffts auto thread mode, same as ffts define
RT_FFTS_TYPE_MANUAL_THREAD = 3, // ffts manual thread mode, same as ffts define
RT_FFTS_TYPE_AUTO_THREAD = 2, // ffts auto thread mode, same as ffts define
RT_FFTS_TYPE_MANUAL_THREAD = 3, // ffts manual thread mode, same as ffts define
} rtFftsType_t; } rtFftsType_t;


typedef enum tagFftsSubTaskType { typedef enum tagFftsSubTaskType {
@@ -37,7 +37,7 @@ typedef enum tagFftsSubTaskType {
} rtFftsSubTaskType_t; } rtFftsSubTaskType_t;


typedef struct tagManualThreadDmuInfo { typedef struct tagManualThreadDmuInfo {
uint64_t dataAddr; // device mem
uint64_t dataAddr; // device mem
uint16_t numOuter; uint16_t numOuter;
uint16_t numInner; uint16_t numInner;
uint32_t strideOuter; uint32_t strideOuter;
@@ -50,43 +50,44 @@ typedef struct tagManualThreadDependency {
} rtManualThreadDependency_t; } rtManualThreadDependency_t;


typedef struct tagManualThreadAicAivInfo { typedef struct tagManualThreadAicAivInfo {
uint64_t taskParamAddr; // device mem
uint64_t taskParamAddr; // device mem
uint16_t taskParamOffset; uint16_t taskParamOffset;
// when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16 // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16
// when satMode=0 and FP16 computation with none INF inputs overflows/underflows,
// results will be saturated to +/-MAX of FP16
// when satMode=0 and FP16 computation with none INF inputs overflows/underflows
// results will be saturated to +/- MAX of FP16
uint8_t satMode; uint8_t satMode;
uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved
uint8_t iCachePrefetchCnt; // units is 2K
uint8_t prefetchEnableBitmap; // 8 bit bitmap 1 0 1 0
uint8_t prefetchOnceBitmap; // 8 bit bitmap 1 0 1 0
uint16_t prefetchOnceDmuNum; // prefetch_once_dmu_descriptor_index in ffts
// num: thread0_prefetch_dmu_descriptor_index – prefetch_once_dmu_descriptor_index
uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; // max valid is threadDim
uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode, 3: reserved
uint8_t iCachePrefetchCnt; // units is 2K
uint8_t prefetchEnableBitmap; // 8 bit bitmap 1 0 1 0
uint8_t prefetchOnceBitmap; // 8 bit bitmap 1 0 1 0
uint16_t prefetchOnceDmuNum; // prefetch_once_dmu_descriptor_index in ffts
// num: thread0_prefetch_dmu_descriptor_index - prefetch_once_dmu_descriptor_index
uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; // max valid is threadDim
uint16_t threadBlkDim[RT_FFTS_MAX_MANUAL_THREAD_NUM]; uint16_t threadBlkDim[RT_FFTS_MAX_MANUAL_THREAD_NUM];
const char *threadTaskFuncStub[RT_FFTS_MAX_MANUAL_THREAD_NUM]; const char *threadTaskFuncStub[RT_FFTS_MAX_MANUAL_THREAD_NUM];


rtManualThreadDmuInfo_t *prefetchList; // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim-1]
rtManualThreadDmuInfo_t *prefetchList; // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim - 1]
rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK];
} rtManualThreadAicAivInfo_t; } rtManualThreadAicAivInfo_t;


typedef struct tagAutoThreadPrefetch { typedef struct tagAutoThreadPrefetch {
uint64_t dataAddr; // device mem
uint64_t dataAddr; // device mem
uint32_t dataAddrOffset; uint32_t dataAddrOffset;
uint32_t nonTailDataLen; uint32_t nonTailDataLen;
uint32_t tailDataLen; uint32_t tailDataLen;
} rtAutoThreadPrefetch_t; } rtAutoThreadPrefetch_t;


typedef struct tagAutoThreadAicAivInfo { typedef struct tagAutoThreadAicAivInfo {
uint64_t taskParamAddr; // device mem
uint64_t taskParamAddr; // device mem
uint16_t taskParamOffset; uint16_t taskParamOffset;
// when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16 // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16
// when satMode=0 and FP16 computation with none INF inputs overflows/underflows, results will be saturated to +/-MAX of FP16
// when satMode=0 and FP16 computation with none INF inputs overflows/underflows
// results will be saturated to +/- MAX of FP16
uint8_t satMode; uint8_t satMode;
uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved
uint8_t iCachePrefetchCnt; // units is 2K
uint8_t prefetchEnableBitmap; // 8 bit bitmap
uint8_t prefetchOnceBitmap; // 8 bit bitmap
uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode, 3: reserved
uint8_t iCachePrefetchCnt; // units is 2K
uint8_t prefetchEnableBitmap; // 8 bit bitmap
uint8_t prefetchOnceBitmap; // 8 bit bitmap


uint16_t tailBlkDim; uint16_t tailBlkDim;
uint16_t nonTailBlkDim; uint16_t nonTailBlkDim;
@@ -94,13 +95,13 @@ typedef struct tagAutoThreadAicAivInfo {
const char *nonTailTaskFuncStub; const char *nonTailTaskFuncStub;
const char *tailTaskFuncStub; const char *tailTaskFuncStub;


// for prefetch, valid num is prefetchEnableBitmap bit count.
// if prefetchEnableBitmap='00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid
// for prefetch, valid num is prefetchEnableBitmap bit count
// if prefetchEnableBitmap = '00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid
rtAutoThreadPrefetch_t srcPrefetch[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; rtAutoThreadPrefetch_t srcPrefetch[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK];
} rtAutoThreadAicAivInfo_t; } rtAutoThreadAicAivInfo_t;


typedef struct tagAutoThreadCacheInfo { typedef struct tagAutoThreadCacheInfo {
uint64_t dataAddr; // device mem
uint64_t dataAddr; // device mem
uint32_t dataAddrOffset; uint32_t dataAddrOffset;
uint32_t nonTailDataLen; uint32_t nonTailDataLen;
uint32_t tailDataLen; uint32_t tailDataLen;
@@ -108,7 +109,7 @@ typedef struct tagAutoThreadCacheInfo {
} rtAutoThreadCacheInfo_t; } rtAutoThreadCacheInfo_t;


typedef struct tagManualThreadCacheInfo { typedef struct tagManualThreadCacheInfo {
rtManualThreadDmuInfo_t *dmuList; // 0-64k
rtManualThreadDmuInfo_t *dmuList; // 0-64k
uint16_t dmuNum; uint16_t dmuNum;
uint16_t sliceDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; uint16_t sliceDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM];
uint16_t ticketCacheRefCntTbl[RT_FFTS_MAX_MANUAL_THREAD_NUM]; uint16_t ticketCacheRefCntTbl[RT_FFTS_MAX_MANUAL_THREAD_NUM];
@@ -151,11 +152,11 @@ typedef struct tagFftsSubTaskInfo {
} rtFftsSubTaskInfo_t; } rtFftsSubTaskInfo_t;


typedef struct tagFftsDescInfo { typedef struct tagFftsDescInfo {
uint8_t tm; // thread subtask kickstart mode, 0:order, 1:disorder
uint8_t di; // discard invalidate
uint8_t dw; // discard write back
uint8_t df; // discard flush
uint8_t dataSplitUnit; // split source or ticket cache by 2^dataSplitUnit MB
uint8_t tm; // thread subtask kickstart mode, 0:order, 1:disorder
uint8_t di; // discard invalidate
uint8_t dw; // discard write back
uint8_t df; // discard flush
uint8_t dataSplitUnit; // split source or ticket cache by 2~dataSplitUnit MB
uint8_t prefetchOstNum; uint8_t prefetchOstNum;
uint8_t cacheMaintainOstNum; uint8_t cacheMaintainOstNum;
uint8_t aicPrefetchUpper; uint8_t aicPrefetchUpper;
@@ -165,20 +166,20 @@ typedef struct tagFftsDescInfo {
} rtFftsDescInfo_t; } rtFftsDescInfo_t;


typedef struct tagFftsTaskInfo { typedef struct tagFftsTaskInfo {
rtFftsType_t fftsType;
rtFftsType_t fftsType;
uint16_t subTaskNum; uint16_t subTaskNum;
uint16_t tickCacheNum; uint16_t tickCacheNum;
rtFftsDescInfo_t fftsDesc; rtFftsDescInfo_t fftsDesc;
// sub task desc, real num is subTaskNum // sub task desc, real num is subTaskNum
rtFftsSubTaskInfo_t subTask[RT_FFTS_MAX_SUB_TASK_NUM]; rtFftsSubTaskInfo_t subTask[RT_FFTS_MAX_SUB_TASK_NUM];


// ticket cache, real number is tickCacheNum.
// ticket cache, real number is ticketCacheNum
rtTicketCache_t ticketCache[RT_FFTS_MAX_TICKET_CACHE_NUM]; rtTicketCache_t ticketCache[RT_FFTS_MAX_TICKET_CACHE_NUM];
} rtFftsTaskInfo_t; } rtFftsTaskInfo_t;


RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream); RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream);


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
} }
#endif #endif
#endif // __CCE_RUNTIME_FFTS_H
#endif //__CCE_RUNTIME_FFTS_H

+ 2
- 2
third_party/fwkacllib/inc/runtime/rt_model.h View File

@@ -19,7 +19,7 @@


#include "base.h" #include "base.h"


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
extern "C" { extern "C" {
#endif #endif


@@ -490,7 +490,7 @@ RTS_API rtError_t rtDebugRegister(rtModel_t model, uint32_t flag, const void *ad
*/ */
RTS_API rtError_t rtDebugUnRegister(rtModel_t model); RTS_API rtError_t rtDebugUnRegister(rtModel_t model);


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
} }
#endif #endif




+ 3
- 5
third_party/fwkacllib/inc/runtime/rt_stars.h View File

@@ -8,7 +8,7 @@


#include "base.h" #include "base.h"


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
extern "C" { extern "C" {
#endif #endif


@@ -23,7 +23,6 @@ extern "C" {
*/ */
RTS_API rtError_t rtStarsTaskLaunch(const void *taskSqe, uint32_t sqeLen, rtStream_t stream); RTS_API rtError_t rtStarsTaskLaunch(const void *taskSqe, uint32_t sqeLen, rtStream_t stream);



/** /**
* @ingroup rt_stars * @ingroup rt_stars
* @brief create cdq instance. * @brief create cdq instance.
@@ -77,11 +76,10 @@ RTS_API rtError_t rtCdqEnQueue(const char *queName, uint32_t cdqeIndex, void *da
* @param [in] stream launch task on the stream * @param [in] stream launch task on the stream
* @return RT_ERROR_NONE for ok, others failed * @return RT_ERROR_NONE for ok, others failed
*/ */
RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, const void *ptrAddr,
RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, const void *prtAddr,
rtStream_t stream); rtStream_t stream);


#if defined(__cplusplus)

#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
} }
#endif #endif
#endif // __CCE_RUNTIME_STARS_H #endif // __CCE_RUNTIME_STARS_H

+ 2
- 2
third_party/fwkacllib/inc/runtime/stream.h View File

@@ -20,7 +20,7 @@
#include "base.h" #include "base.h"
#include "event.h" #include "event.h"


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
extern "C" { extern "C" {
#endif #endif


@@ -211,7 +211,7 @@ RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, con
*/ */
RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream); RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream);


#if defined(__cplusplus)
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
} }
#endif #endif




+ 0
- 32
third_party/fwkacllib/inc/toolchain/prof_acl_api.h View File

@@ -84,7 +84,6 @@
#endif #endif


#include <cstdint> #include <cstdint>
#include <stddef.h>


namespace Msprofiler { namespace Msprofiler {
namespace Api { namespace Api {
@@ -106,37 +105,6 @@ extern "C" {


MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);


typedef uint32_t Status;
typedef struct aclprofSubscribeConfig aclprofSubscribeConfig1;
///
/// @ingroup AscendCL
/// @brief subscribe profiling data of graph
/// @param [in] graphId: the graph id subscribed
/// @param [in] profSubscribeConfig: pointer to config of model subscribe
/// @return Status result of function
///
Status aclgrphProfGraphSubscribe(const uint32_t graphId,
const aclprofSubscribeConfig1 *profSubscribeConfig);

///
/// @ingroup AscendCL
/// @brief unsubscribe profiling data of graph
/// @param [in] graphId: the graph id subscribed
/// @return Status result of function
///
Status aclgrphProfGraphUnSubscribe(const uint32_t graphId);

/**
* @ingroup AscendCL
* @brief get graph id from subscription data
*
* @param opInfo [IN] pointer to subscription data
* @param opInfoLen [IN] memory size of subscription data
*
* @retval graph id of subscription data
* @retval 0 for failed
*/
size_t aclprofGetGraphId(const void *opInfo, size_t opInfoLen, uint32_t index);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


+ 0
- 12
third_party/fwkacllib/inc/toolchain/prof_callback.h View File

@@ -55,17 +55,6 @@ struct ReporterData {
}; };


/** /**
* @name HashData
* @brief struct of data to hash
*/
struct HashData {
int deviceId; // the index of device
size_t dataLen; // the length of data
unsigned char *data; // the data content
uint64_t hashId; // the id of hashed data
};

/**
* @name MsprofReporterModuleId * @name MsprofReporterModuleId
* @brief module id of data to report * @brief module id of data to report
*/ */
@@ -86,7 +75,6 @@ enum MsprofReporterCallbackType {
MSPROF_REPORTER_INIT, // init reporter MSPROF_REPORTER_INIT, // init reporter
MSPROF_REPORTER_UNINIT, // uninit reporter MSPROF_REPORTER_UNINIT, // uninit reporter
MSPROF_REPORTER_DATA_MAX_LEN, // data max length for calling report callback MSPROF_REPORTER_DATA_MAX_LEN, // data max length for calling report callback
MSPROF_REPORTER_HASH // hash data to id
}; };


/** /**


Loading…
Cancel
Save