From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmqtags/v1.2.0
@@ -174,6 +174,12 @@ typedef enum { | |||
ACL_ERROR = 3, | |||
} aclLogLevel; | |||
typedef enum { | |||
ACL_MEMTYPE_DEVICE = 0, | |||
ACL_MEMTYPE_HOST = 1, | |||
} aclMemType; | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Converts data of type aclFloat16 to data of type float | |||
@@ -596,6 +602,18 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBu | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Set tensor memory type specified by the tensor description | |||
* | |||
* @param desc [OUT] pointer to the instance of aclTensorDesc | |||
* @param memType [IN] ACL_MEMTYPE_DEVICE means device, ACL_MEMTYPE_HOST means host | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclSetTensorPlaceMent(aclTensorDesc *desc, aclMemType memType); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief an interface for users to output APP logs | |||
* | |||
* @param logLevel [IN] the level of current log | |||
@@ -1203,6 +1203,18 @@ ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handl | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr, | |||
const void *attrValue, size_t valueSize); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get real tensor name from modelDesc | |||
* | |||
* @param modelDesc [IN] pointer to modelDesc | |||
* @param name [IN] tensor name | |||
* | |||
* @retval the pointer of real tensor name | |||
* @retval Failure return NULL | |||
*/ | |||
ACL_FUNC_VISIBILITY const char *aclmdlGetTensorRealName(const aclmdlDesc *modelDesc, const char *name); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
@@ -25,6 +25,8 @@ | |||
extern "C" { | |||
#endif | |||
#define ACL_EVENT_TIME_LINE 0x00000008u | |||
typedef enum aclrtRunMode { | |||
ACL_DEVICE, | |||
ACL_HOST, | |||
@@ -427,6 +429,18 @@ ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief create event instance with flag | |||
* | |||
* @param event [OUT] created event | |||
* @param flag [IN] event flag | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtCreateEventWithFlag(aclrtEvent *event, uint32_t flag); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief destroy event instance | |||
* | |||
* @par Function | |||
@@ -27,7 +27,7 @@ | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif // __cplusplus | |||
#endif // __cplusplus | |||
/** | |||
* @brief Initialize HCCL. | |||
@@ -66,15 +66,14 @@ extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *root | |||
* @param sendBuf A pointer identifying the input data address of the operator. | |||
* @param recvBuf A pointer identifying the output data address of the operator. | |||
* @param count An integer(u64) identifying the number of the output data. | |||
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, | |||
* float32. | |||
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32. | |||
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, | |||
HcclComm comm, aclrtStream stream); | |||
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, | |||
HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||
/** | |||
* @brief Broadcast operator. | |||
@@ -85,10 +84,10 @@ extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, Hc | |||
* @param root An integer(u32) identifying the the root rank in the operator. | |||
* @param comm A pointer identifying the communication resource based on | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||
aclrtStream stream); | |||
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||
aclrtStream stream); | |||
/** | |||
* @brief ReduceScatter operator. | |||
@@ -100,10 +99,10 @@ extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType | |||
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||
HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||
HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||
/** | |||
* @brief AllGather operator. | |||
@@ -114,10 +113,10 @@ extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvC | |||
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, | |||
aclrtStream stream); | |||
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, | |||
HcclComm comm, aclrtStream stream); | |||
/** | |||
* @brief Destroy HCCL comm | |||
@@ -130,5 +129,5 @@ extern HcclResult HcclCommDestroy(HcclComm comm); | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
#endif // HCCL_H_ | |||
#endif // __cplusplus | |||
#endif // HCCL_H_ |
@@ -16,10 +16,10 @@ | |||
/** | |||
* @file hccl_types.h | |||
* @brief HCCL data type definition | |||
* | |||
* @brief HCCL data type definition | |||
* | |||
*/ | |||
#ifndef HCCL_TYPES_H_ | |||
#define HCCL_TYPES_H_ | |||
@@ -27,33 +27,33 @@ | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif // __cplusplus | |||
#endif // __cplusplus | |||
/** | |||
* @brief HCCL functions return value definition | |||
*/ | |||
typedef enum { | |||
HCCL_SUCCESS = 0, /**< success */ | |||
HCCL_E_PARA = 1, /**< parameter error */ | |||
HCCL_E_PTR = 2, /**< empty pointer */ | |||
HCCL_E_MEMORY = 3, /**< memory error */ | |||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||
HCCL_E_RESERVED /**< reserved */ | |||
HCCL_SUCCESS = 0, /**< success */ | |||
HCCL_E_PARA = 1, /**< parameter error */ | |||
HCCL_E_PTR = 2, /**< empty pointer */ | |||
HCCL_E_MEMORY = 3, /**< memory error */ | |||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||
HCCL_E_RESERVED /**< reserved */ | |||
} HcclResult; | |||
/** | |||
@@ -65,37 +65,37 @@ typedef void *HcclComm; | |||
* @brief HCCL Reduction opperation | |||
*/ | |||
typedef enum { | |||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||
HCCL_REDUCE_MAX = 2, /**< max */ | |||
HCCL_REDUCE_MIN = 3, /**< min */ | |||
HCCL_REDUCE_RESERVED /**< reserved */ | |||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||
HCCL_REDUCE_MAX = 2, /**< max */ | |||
HCCL_REDUCE_MIN = 3, /**< min */ | |||
HCCL_REDUCE_RESERVED /**< reserved */ | |||
} HcclReduceOp; | |||
/** | |||
* @brief HCCL data type | |||
*/ | |||
typedef enum { | |||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||
} HcclDataType; | |||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||
/** | |||
* @brief HCCL root info | |||
*/ | |||
typedef struct HcclRootInfoDef { | |||
char internal[HCCL_ROOT_INFO_BYTES]; | |||
char internal[HCCL_ROOT_INFO_BYTES]; | |||
} HcclRootInfo; | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
#endif // HCCL_TYPES_H_ | |||
#endif // __cplusplus | |||
#endif // HCCL_TYPES_H_ |
@@ -23,80 +23,80 @@ | |||
extern "C" { | |||
#endif | |||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ |
@@ -3781,6 +3781,32 @@ REG_OP(ArgMaxGradD) | |||
.REQUIRED_ATTR(dimension, Int) | |||
.OP_END_FACTORY_REG(ArgMaxGradD) | |||
/** | |||
*@brief Returns cosine similarity between x1 and x2,computed along dim. \n | |||
*@par Inputs: | |||
*Two inputs, including: | |||
* @li input_x1: A tensor. Must be the following types: | |||
* float32. \n | |||
*@par Inputs: | |||
*@li input_x2: A tensor. Must of the following types: | |||
* float32. \n | |||
*@par Outputs: | |||
*@li output_y: A Tensor with the same type of input_x's. \n | |||
*@par Third-party framework compatibility | |||
*Compatible with the Pytorch operator CosineSimilarity. \n | |||
*/ | |||
REG_OP(CosineSimilarity) | |||
.INPUT(input_x1, TensorType({DT_FLOAT})) /* "First operand." */ | |||
.INPUT(input_x2, TensorType({DT_FLOAT})) /* "Second operand." */ | |||
.OUTPUT(output_y, TensorType({DT_FLOAT})) /* "Result, has same element type as two inputs" */ | |||
.ATTR(dim, Int, 1) | |||
.ATTR(eps, Float, 1e-8) | |||
.OP_END_FACTORY_REG(CosineSimilarity) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ |
@@ -952,6 +952,36 @@ REG_OP(Angle) | |||
.ATTR(Tout, Type, DT_FLOAT) | |||
.OP_END_FACTORY_REG(Angle) | |||
/** | |||
*@brief Computes the gradient of SoftMarginLossGrad. \n | |||
*@par Inputs: | |||
*Three inputs, including: | |||
* @li predict: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
* @li label: A tensor with same shape of predict. Must be one of the following types: | |||
* float16, float32. \n | |||
* @li dout: A tensor with same shpae of predcit. Must be one of the following types: | |||
* float16, float32. \n | |||
*@par Attributes: | |||
* @li reduction: Specifies the reduction to apply to the output: | |||
* 'none' | 'mean' | 'sum'. Default: 'mean'. \n | |||
*@par Outputs: | |||
* gradient: A Tensor with the same type of predict. \n | |||
*@par Third-party framework compatibility | |||
*Compatible with the Pytorch operator SoftMarginLoss Backward. \n | |||
*/ | |||
REG_OP(SoftMarginLossGrad) | |||
.INPUT(predict, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.INPUT(label, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.INPUT(dout, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.OUTPUT(gradient, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.ATTR(reduction, String, "mean") | |||
.OP_END_FACTORY_REG(SoftMarginLossGrad) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ |
@@ -1206,6 +1206,34 @@ REG_OP(Centralization) | |||
.OP_END_FACTORY_REG(Centralization) | |||
/** | |||
*@brief Calculate the loss. Creates a criterion that optimizes a two-class classification | |||
logistic loss between input_x and input_y (containing 1 or -1). \n | |||
*@par Inputs: | |||
*One inputs, including: | |||
* @li input_x: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
* @li input_y: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
*@par Attributes: | |||
*@li lambd: An optional string.Defaults to "mean". \n | |||
*@par Outputs: | |||
*output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n | |||
* while reduction == "sum" or "mean", A Tensor with the same type of input_x , shape of which is (1,) | |||
*@par Third-party framework compatibility | |||
*Compatible with the Pytorch operator SoftMarginLoss. \n | |||
*/ | |||
REG_OP(SoftMarginLoss) | |||
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.ATTR(reduction, String, "mean") | |||
.OUTPUT(output_z, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OP_END_FACTORY_REG(SoftMarginLoss) | |||
/** | |||
* @brief Computes gradients of sigmoid_cross_entropy_with_logits_v2. | |||
* @par Inputs: | |||
@@ -793,6 +793,34 @@ REG_OP(HardShrink) | |||
.OP_END_FACTORY_REG(HardShrink) | |||
/** | |||
*@brief Calculate the hard shrink grad function. \n | |||
* | |||
* Computes the gradient for the HardShrink: if x > lambda or x < -lambda, x,otherwise 0 | |||
* | |||
*@par Inputs: | |||
*Two inputs, including: | |||
* @li gradients: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
* @li features: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
* | |||
*@par Outputs: | |||
*backprops: A Tensor with the same type and shape of features's. \n | |||
* | |||
*@par Attributes: | |||
*@li lambda: An optional float.Defaults to 0.5. \n | |||
* | |||
*@par Third-party framework compatibility | |||
*Compatible with the Pytorch operator Hardshrink_backward. \n | |||
*/ | |||
REG_OP(HardShrinkGrad) | |||
.INPUT(gradients, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(lambda, Float, 0.5) | |||
.OP_END_FACTORY_REG(HardShrinkGrad) | |||
/** | |||
* @brief Calculate the hard sigmoid function. \n | |||
* @par Inputs: | |||
@@ -884,6 +912,36 @@ REG_OP(LogSigmoid) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */ | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) /* "output:y" */ | |||
.OP_END_FACTORY_REG(LogSigmoid) | |||
/** | |||
*@brief Calculate the backward outputs of the function "hard_sigmoid" \n | |||
*@par Inputs: | |||
*One inputs, including: | |||
* @li grads: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
* @li input_x: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
*@par Outputs: | |||
*One outputs, including: | |||
* @li y: A tensor with the same type and shape of x's. \n | |||
* @par Attributes: | |||
* @li alpha: An optional float. Defaults to 0.16666666. \n | |||
* @li beta: An optional float. Defaults to 0.5. \n | |||
*@par Third-party framework compatibility | |||
*Compatible with the Pytorch operator LogSigmoidGrad. \n | |||
*/ | |||
REG_OP(HardSigmoidGrad) | |||
.INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.ATTR(alpha, Float, 0.16666666) | |||
.ATTR(beta, Float, 0.5) | |||
.OP_END_FACTORY_REG(HardSigmoidGrad) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ |
@@ -737,14 +737,51 @@ where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//st | |||
* Compatible with Pytorch col2im/im2col_backward operator. | |||
*/ | |||
REG_OP(Col2im) | |||
.INPUT(x, TensorType({DT_FLOAT})) | |||
.INPUT(output_size, TensorType({DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT})) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(output_size, TensorType({DT_INT32, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.REQUIRED_ATTR(kernel_size, ListInt) | |||
.REQUIRED_ATTR(dilation, ListInt) | |||
.REQUIRED_ATTR(padding, ListInt) | |||
.REQUIRED_ATTR(stride, ListInt) | |||
.OP_END_FACTORY_REG(Col2im) | |||
/** | |||
*@brief Generates a 2D or 3D flow field (sampling grid), given a batch of affine | |||
matrices theta. \n | |||
*@par Inputs: | |||
*Input theta must be float16 or float, output_size must be int32 type.Inputs | |||
include: | |||
*@li theta: input batch of affine matrices with shape (N,2,3) for 2D or (N,3,4) | |||
for 3D | |||
*@li output_size: the target output image size. (N×C×H×W for 2D or N×C×D×H×W for | |||
3D) Example: torch.Size((32, 3, 24, 24)) . \n | |||
*@par Attributes: | |||
*align_corners: if True, consider -1 and 1 to refer to the centers of the corner | |||
pixels rather than the image corners.Refer to grid_sample() for a more complete | |||
description. A grid generated by affine_grid() should be passed to grid_sample() | |||
with the same setting for this option. Default: False \n | |||
*@par Outputs: | |||
*@li y: A 2-D integer tensor of shape [M] representing the | |||
selected indices from the boxes tensor, where M <= max_output_size. \n | |||
*@attention Constraints: | |||
*Input theta must be float16 or float, output_size must be int32 type . \n | |||
*@par Third-party framework compatibility | |||
*Compatible with Pytorch affine_grid operator. | |||
*/ | |||
REG_OP(AffineGrid) | |||
.INPUT(theta, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(output_size, TensorType({DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(align_corners, Bool, false) | |||
.OP_END_FACTORY_REG(AffineGrid) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_ |
@@ -30,6 +30,11 @@ extern "C" { | |||
#define RT_EVENT_DEFAULT (0x00) | |||
#define RT_EVENT_WITH_FLAG (0x01) | |||
#define RT_EVENT_DDSYNC_NS 0x01U | |||
#define RT_EVENT_STREAM_MARK 0x02U | |||
#define RT_EVENT_DDSYNC 0x04U | |||
#define RT_EVENT_TIME_LINE 0x08U | |||
/** | |||
* @ingroup dvrt_event | |||
* @brief create event instance | |||
@@ -141,7 +141,7 @@ enum { | |||
IDEDD, /**< IDE daemon device */ | |||
IDEDH, /**< IDE daemon host */ | |||
HCCL, /**< HCCL */ | |||
FMK, /**< Framework */ | |||
FMK, /**< Adapter */ | |||
HIAIENGINE, /**< Matrix */ | |||
DVPP, /**< DVPP */ | |||
RUNTIME, /**< Runtime */ | |||
@@ -162,11 +162,11 @@ enum { | |||
MDCDEFAULT, /**< MDC undefine */ | |||
MDCSC, /**< MDC spatial cognition */ | |||
MDCPNC, | |||
MLL, | |||
MLL, /**< abandon */ | |||
DEVMM, /**< Dlog memory managent */ | |||
KERNEL, /**< Kernel */ | |||
LIBMEDIA, /**< Libmedia */ | |||
CCECPU, /**< ai cpu */ | |||
CCECPU, /**< aicpu shedule */ | |||
ASCENDDK, /**< AscendDK */ | |||
ROS, /**< ROS */ | |||
HCCP, | |||
@@ -179,7 +179,7 @@ enum { | |||
TSDUMP, /**< TSDUMP module */ | |||
AICPU, /**< AICPU module */ | |||
LP, /**< LP module */ | |||
TDT, | |||
TDT, /**< tsdaemon or aicpu shedule */ | |||
FE, | |||
MD, | |||
MB, | |||