@@ -174,6 +174,12 @@ typedef enum { | |||||
ACL_ERROR = 3, | ACL_ERROR = 3, | ||||
} aclLogLevel; | } aclLogLevel; | ||||
typedef enum { | |||||
ACL_MEMTYPE_DEVICE = 0, | |||||
ACL_MEMTYPE_HOST = 1, | |||||
} aclMemType; | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
* @brief Converts data of type aclFloat16 to data of type float | * @brief Converts data of type aclFloat16 to data of type float | ||||
@@ -596,6 +602,18 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBu | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
* @brief Set tensor memory type specified by the tensor description | |||||
* | |||||
* @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
* @param memType [IN] ACL_MEMTYPE_DEVICE means device, ACL_MEMTYPE_HOST means host | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclSetTensorPlaceMent(aclTensorDesc *desc, aclMemType memType); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief an interface for users to output APP logs | * @brief an interface for users to output APP logs | ||||
* | * | ||||
* @param logLevel [IN] the level of current log | * @param logLevel [IN] the level of current log | ||||
@@ -1203,6 +1203,18 @@ ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handl | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr, | ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr, | ||||
const void *attrValue, size_t valueSize); | const void *attrValue, size_t valueSize); | ||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get real tensor name from modelDesc | |||||
* | |||||
* @param modelDesc [IN] pointer to modelDesc | |||||
* @param name [IN] tensor name | |||||
* | |||||
* @retval the pointer of real tensor name | |||||
* @retval Failure return NULL | |||||
*/ | |||||
ACL_FUNC_VISIBILITY const char *aclmdlGetTensorRealName(const aclmdlDesc *modelDesc, const char *name); | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif | #endif | ||||
@@ -25,6 +25,8 @@ | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
#define ACL_EVENT_TIME_LINE 0x00000008u | |||||
typedef enum aclrtRunMode { | typedef enum aclrtRunMode { | ||||
ACL_DEVICE, | ACL_DEVICE, | ||||
ACL_HOST, | ACL_HOST, | ||||
@@ -427,6 +429,18 @@ ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
* @brief create event instance with flag | |||||
* | |||||
* @param event [OUT] created event | |||||
* @param flag [IN] event flag | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtCreateEventWithFlag(aclrtEvent *event, uint32_t flag); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief destroy event instance | * @brief destroy event instance | ||||
* | * | ||||
* @par Function | * @par Function | ||||
@@ -27,7 +27,7 @@ | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
extern "C" { | extern "C" { | ||||
#endif // __cplusplus | |||||
#endif // __cplusplus | |||||
/** | /** | ||||
* @brief Initialize HCCL. | * @brief Initialize HCCL. | ||||
@@ -66,15 +66,14 @@ extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *root | |||||
* @param sendBuf A pointer identifying the input data address of the operator. | * @param sendBuf A pointer identifying the input data address of the operator. | ||||
* @param recvBuf A pointer identifying the output data address of the operator. | * @param recvBuf A pointer identifying the output data address of the operator. | ||||
* @param count An integer(u64) identifying the number of the output data. | * @param count An integer(u64) identifying the number of the output data. | ||||
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, | |||||
* float32. | |||||
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32. | |||||
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | ||||
* @param comm A pointer identifying the communication resource based on. | * @param comm A pointer identifying the communication resource based on. | ||||
* @param stream A pointer identifying the stream information. | * @param stream A pointer identifying the stream information. | ||||
* @return HcclResult | |||||
* @return HcclResult | |||||
*/ | */ | ||||
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, | |||||
HcclComm comm, aclrtStream stream); | |||||
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, | |||||
HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||||
/** | /** | ||||
* @brief Broadcast operator. | * @brief Broadcast operator. | ||||
@@ -85,10 +84,10 @@ extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, Hc | |||||
* @param root An integer(u32) identifying the the root rank in the operator. | * @param root An integer(u32) identifying the the root rank in the operator. | ||||
* @param comm A pointer identifying the communication resource based on | * @param comm A pointer identifying the communication resource based on | ||||
* @param stream A pointer identifying the stream information. | * @param stream A pointer identifying the stream information. | ||||
* @return HcclResult | |||||
* @return HcclResult | |||||
*/ | */ | ||||
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||||
aclrtStream stream); | |||||
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||||
aclrtStream stream); | |||||
/** | /** | ||||
* @brief ReduceScatter operator. | * @brief ReduceScatter operator. | ||||
@@ -100,10 +99,10 @@ extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType | |||||
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | ||||
* @param comm A pointer identifying the communication resource based on. | * @param comm A pointer identifying the communication resource based on. | ||||
* @param stream A pointer identifying the stream information. | * @param stream A pointer identifying the stream information. | ||||
* @return HcclResult | |||||
* @return HcclResult | |||||
*/ | */ | ||||
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||||
HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||||
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||||
HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||||
/** | /** | ||||
* @brief AllGather operator. | * @brief AllGather operator. | ||||
@@ -114,10 +113,10 @@ extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvC | |||||
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | ||||
* @param comm A pointer identifying the communication resource based on. | * @param comm A pointer identifying the communication resource based on. | ||||
* @param stream A pointer identifying the stream information. | * @param stream A pointer identifying the stream information. | ||||
* @return HcclResult | |||||
* @return HcclResult | |||||
*/ | */ | ||||
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, | |||||
aclrtStream stream); | |||||
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, | |||||
HcclComm comm, aclrtStream stream); | |||||
/** | /** | ||||
* @brief Destroy HCCL comm | * @brief Destroy HCCL comm | ||||
@@ -130,5 +129,5 @@ extern HcclResult HcclCommDestroy(HcclComm comm); | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif // __cplusplus | |||||
#endif // HCCL_H_ | |||||
#endif // __cplusplus | |||||
#endif // HCCL_H_ |
@@ -16,10 +16,10 @@ | |||||
/** | /** | ||||
* @file hccl_types.h | * @file hccl_types.h | ||||
* @brief HCCL data type definition | |||||
* | |||||
* @brief HCCL data type definition | |||||
* | |||||
*/ | */ | ||||
#ifndef HCCL_TYPES_H_ | #ifndef HCCL_TYPES_H_ | ||||
#define HCCL_TYPES_H_ | #define HCCL_TYPES_H_ | ||||
@@ -27,33 +27,33 @@ | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
extern "C" { | extern "C" { | ||||
#endif // __cplusplus | |||||
#endif // __cplusplus | |||||
/** | /** | ||||
* @brief HCCL functions return value definition | * @brief HCCL functions return value definition | ||||
*/ | */ | ||||
typedef enum { | typedef enum { | ||||
HCCL_SUCCESS = 0, /**< success */ | |||||
HCCL_E_PARA = 1, /**< parameter error */ | |||||
HCCL_E_PTR = 2, /**< empty pointer */ | |||||
HCCL_E_MEMORY = 3, /**< memory error */ | |||||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||||
HCCL_E_RESERVED /**< reserved */ | |||||
HCCL_SUCCESS = 0, /**< success */ | |||||
HCCL_E_PARA = 1, /**< parameter error */ | |||||
HCCL_E_PTR = 2, /**< empty pointer */ | |||||
HCCL_E_MEMORY = 3, /**< memory error */ | |||||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||||
HCCL_E_RESERVED /**< reserved */ | |||||
} HcclResult; | } HcclResult; | ||||
/** | /** | ||||
@@ -65,37 +65,37 @@ typedef void *HcclComm; | |||||
* @brief HCCL Reduction opperation | * @brief HCCL Reduction opperation | ||||
*/ | */ | ||||
typedef enum { | typedef enum { | ||||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||||
HCCL_REDUCE_MAX = 2, /**< max */ | |||||
HCCL_REDUCE_MIN = 3, /**< min */ | |||||
HCCL_REDUCE_RESERVED /**< reserved */ | |||||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||||
HCCL_REDUCE_MAX = 2, /**< max */ | |||||
HCCL_REDUCE_MIN = 3, /**< min */ | |||||
HCCL_REDUCE_RESERVED /**< reserved */ | |||||
} HcclReduceOp; | } HcclReduceOp; | ||||
/** | /** | ||||
* @brief HCCL data type | * @brief HCCL data type | ||||
*/ | */ | ||||
typedef enum { | typedef enum { | ||||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||||
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||||
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||||
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||||
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||||
} HcclDataType; | } HcclDataType; | ||||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||||
/** | /** | ||||
* @brief HCCL root info | * @brief HCCL root info | ||||
*/ | */ | ||||
typedef struct HcclRootInfoDef { | typedef struct HcclRootInfoDef { | ||||
char internal[HCCL_ROOT_INFO_BYTES]; | |||||
char internal[HCCL_ROOT_INFO_BYTES]; | |||||
} HcclRootInfo; | } HcclRootInfo; | ||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif // __cplusplus | |||||
#endif // HCCL_TYPES_H_ | |||||
#endif // __cplusplus | |||||
#endif // HCCL_TYPES_H_ |
@@ -23,80 +23,80 @@ | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif | #endif | ||||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ |
@@ -3781,6 +3781,32 @@ REG_OP(ArgMaxGradD) | |||||
.REQUIRED_ATTR(dimension, Int) | .REQUIRED_ATTR(dimension, Int) | ||||
.OP_END_FACTORY_REG(ArgMaxGradD) | .OP_END_FACTORY_REG(ArgMaxGradD) | ||||
/** | |||||
*@brief Returns cosine similarity between x1 and x2,computed along dim. \n | |||||
*@par Inputs: | |||||
*Two inputs, including: | |||||
* @li input_x1: A tensor. Must be the following types: | |||||
* float32. \n | |||||
*@par Inputs: | |||||
*@li input_x2: A tensor. Must of the following types: | |||||
* float32. \n | |||||
*@par Outputs: | |||||
*@li output_y: A Tensor with the same type of input_x's. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator CosineSimilarity. \n | |||||
*/ | |||||
REG_OP(CosineSimilarity) | |||||
.INPUT(input_x1, TensorType({DT_FLOAT})) /* "First operand." */ | |||||
.INPUT(input_x2, TensorType({DT_FLOAT})) /* "Second operand." */ | |||||
.OUTPUT(output_y, TensorType({DT_FLOAT})) /* "Result, has same element type as two inputs" */ | |||||
.ATTR(dim, Int, 1) | |||||
.ATTR(eps, Float, 1e-8) | |||||
.OP_END_FACTORY_REG(CosineSimilarity) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ |
@@ -952,6 +952,36 @@ REG_OP(Angle) | |||||
.ATTR(Tout, Type, DT_FLOAT) | .ATTR(Tout, Type, DT_FLOAT) | ||||
.OP_END_FACTORY_REG(Angle) | .OP_END_FACTORY_REG(Angle) | ||||
/** | |||||
*@brief Computes the gradient of SoftMarginLossGrad. \n | |||||
*@par Inputs: | |||||
*Three inputs, including: | |||||
* @li predict: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li label: A tensor with same shape of predict. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li dout: A tensor with same shpae of predcit. Must be one of the following types: | |||||
* float16, float32. \n | |||||
*@par Attributes: | |||||
* @li reduction: Specifies the reduction to apply to the output: | |||||
* 'none' | 'mean' | 'sum'. Default: 'mean'. \n | |||||
*@par Outputs: | |||||
* gradient: A Tensor with the same type of predict. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator SoftMarginLoss Backward. \n | |||||
*/ | |||||
REG_OP(SoftMarginLossGrad) | |||||
.INPUT(predict, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(label, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(dout, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(gradient, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.ATTR(reduction, String, "mean") | |||||
.OP_END_FACTORY_REG(SoftMarginLossGrad) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ |
@@ -1206,6 +1206,34 @@ REG_OP(Centralization) | |||||
.OP_END_FACTORY_REG(Centralization) | .OP_END_FACTORY_REG(Centralization) | ||||
/** | /** | ||||
*@brief Calculate the loss. Creates a criterion that optimizes a two-class classification | |||||
logistic loss between input_x and input_y (containing 1 or -1). \n | |||||
*@par Inputs: | |||||
*One inputs, including: | |||||
* @li input_x: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li input_y: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
*@par Attributes: | |||||
*@li lambd: An optional string.Defaults to "mean". \n | |||||
*@par Outputs: | |||||
*output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n | |||||
* while reduction == "sum" or "mean", A Tensor with the same type of input_x , shape of which is (1,) | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator SoftMarginLoss. \n | |||||
*/ | |||||
REG_OP(SoftMarginLoss) | |||||
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.ATTR(reduction, String, "mean") | |||||
.OUTPUT(output_z, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OP_END_FACTORY_REG(SoftMarginLoss) | |||||
/** | |||||
* @brief Computes gradients of sigmoid_cross_entropy_with_logits_v2. | * @brief Computes gradients of sigmoid_cross_entropy_with_logits_v2. | ||||
* @par Inputs: | * @par Inputs: | ||||
@@ -793,6 +793,34 @@ REG_OP(HardShrink) | |||||
.OP_END_FACTORY_REG(HardShrink) | .OP_END_FACTORY_REG(HardShrink) | ||||
/** | /** | ||||
*@brief Calculate the hard shrink grad function. \n | |||||
* | |||||
* Computes the gradient for the HardShrink: if x > lambda or x < -lambda, x,otherwise 0 | |||||
* | |||||
*@par Inputs: | |||||
*Two inputs, including: | |||||
* @li gradients: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li features: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* | |||||
*@par Outputs: | |||||
*backprops: A Tensor with the same type and shape of features's. \n | |||||
* | |||||
*@par Attributes: | |||||
*@li lambda: An optional float.Defaults to 0.5. \n | |||||
* | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator Hardshrink_backward. \n | |||||
*/ | |||||
REG_OP(HardShrinkGrad) | |||||
.INPUT(gradients, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(lambda, Float, 0.5) | |||||
.OP_END_FACTORY_REG(HardShrinkGrad) | |||||
/** | |||||
* @brief Calculate the hard sigmoid function. \n | * @brief Calculate the hard sigmoid function. \n | ||||
* @par Inputs: | * @par Inputs: | ||||
@@ -884,6 +912,36 @@ REG_OP(LogSigmoid) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */ | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */ | ||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) /* "output:y" */ | .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) /* "output:y" */ | ||||
.OP_END_FACTORY_REG(LogSigmoid) | .OP_END_FACTORY_REG(LogSigmoid) | ||||
/** | |||||
*@brief Calculate the backward outputs of the function "hard_sigmoid" \n | |||||
*@par Inputs: | |||||
*One inputs, including: | |||||
* @li grads: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li input_x: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
*@par Outputs: | |||||
*One outputs, including: | |||||
* @li y: A tensor with the same type and shape of x's. \n | |||||
* @par Attributes: | |||||
* @li alpha: An optional float. Defaults to 0.16666666. \n | |||||
* @li beta: An optional float. Defaults to 0.5. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator LogSigmoidGrad. \n | |||||
*/ | |||||
REG_OP(HardSigmoidGrad) | |||||
.INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.ATTR(alpha, Float, 0.16666666) | |||||
.ATTR(beta, Float, 0.5) | |||||
.OP_END_FACTORY_REG(HardSigmoidGrad) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ |
@@ -737,14 +737,51 @@ where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//st | |||||
* Compatible with Pytorch col2im/im2col_backward operator. | * Compatible with Pytorch col2im/im2col_backward operator. | ||||
*/ | */ | ||||
REG_OP(Col2im) | REG_OP(Col2im) | ||||
.INPUT(x, TensorType({DT_FLOAT})) | |||||
.INPUT(output_size, TensorType({DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT})) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(output_size, TensorType({DT_INT32, DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.REQUIRED_ATTR(kernel_size, ListInt) | .REQUIRED_ATTR(kernel_size, ListInt) | ||||
.REQUIRED_ATTR(dilation, ListInt) | .REQUIRED_ATTR(dilation, ListInt) | ||||
.REQUIRED_ATTR(padding, ListInt) | .REQUIRED_ATTR(padding, ListInt) | ||||
.REQUIRED_ATTR(stride, ListInt) | .REQUIRED_ATTR(stride, ListInt) | ||||
.OP_END_FACTORY_REG(Col2im) | .OP_END_FACTORY_REG(Col2im) | ||||
/** | |||||
*@brief Generates a 2D or 3D flow field (sampling grid), given a batch of affine | |||||
matrices theta. \n | |||||
*@par Inputs: | |||||
*Input theta must be float16 or float, output_size must be int32 type.Inputs | |||||
include: | |||||
*@li theta: input batch of affine matrices with shape (N,2,3) for 2D or (N,3,4) | |||||
for 3D | |||||
*@li output_size: the target output image size. (N×C×H×W for 2D or N×C×D×H×W for | |||||
3D) Example: torch.Size((32, 3, 24, 24)) . \n | |||||
*@par Attributes: | |||||
*align_corners: if True, consider -1 and 1 to refer to the centers of the corner | |||||
pixels rather than the image corners.Refer to grid_sample() for a more complete | |||||
description. A grid generated by affine_grid() should be passed to grid_sample() | |||||
with the same setting for this option. Default: False \n | |||||
*@par Outputs: | |||||
*@li y: A 2-D integer tensor of shape [M] representing the | |||||
selected indices from the boxes tensor, where M <= max_output_size. \n | |||||
*@attention Constraints: | |||||
*Input theta must be float16 or float, output_size must be int32 type . \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with Pytorch affine_grid operator. | |||||
*/ | |||||
REG_OP(AffineGrid) | |||||
.INPUT(theta, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(output_size, TensorType({DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(align_corners, Bool, false) | |||||
.OP_END_FACTORY_REG(AffineGrid) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_ |
@@ -30,6 +30,11 @@ extern "C" { | |||||
#define RT_EVENT_DEFAULT (0x00) | #define RT_EVENT_DEFAULT (0x00) | ||||
#define RT_EVENT_WITH_FLAG (0x01) | #define RT_EVENT_WITH_FLAG (0x01) | ||||
#define RT_EVENT_DDSYNC_NS 0x01U | |||||
#define RT_EVENT_STREAM_MARK 0x02U | |||||
#define RT_EVENT_DDSYNC 0x04U | |||||
#define RT_EVENT_TIME_LINE 0x08U | |||||
/** | /** | ||||
* @ingroup dvrt_event | * @ingroup dvrt_event | ||||
* @brief create event instance | * @brief create event instance | ||||
@@ -141,7 +141,7 @@ enum { | |||||
IDEDD, /**< IDE daemon device */ | IDEDD, /**< IDE daemon device */ | ||||
IDEDH, /**< IDE daemon host */ | IDEDH, /**< IDE daemon host */ | ||||
HCCL, /**< HCCL */ | HCCL, /**< HCCL */ | ||||
FMK, /**< Framework */ | |||||
FMK, /**< Adapter */ | |||||
HIAIENGINE, /**< Matrix */ | HIAIENGINE, /**< Matrix */ | ||||
DVPP, /**< DVPP */ | DVPP, /**< DVPP */ | ||||
RUNTIME, /**< Runtime */ | RUNTIME, /**< Runtime */ | ||||
@@ -162,11 +162,11 @@ enum { | |||||
MDCDEFAULT, /**< MDC undefine */ | MDCDEFAULT, /**< MDC undefine */ | ||||
MDCSC, /**< MDC spatial cognition */ | MDCSC, /**< MDC spatial cognition */ | ||||
MDCPNC, | MDCPNC, | ||||
MLL, | |||||
MLL, /**< abandon */ | |||||
DEVMM, /**< Dlog memory managent */ | DEVMM, /**< Dlog memory managent */ | ||||
KERNEL, /**< Kernel */ | KERNEL, /**< Kernel */ | ||||
LIBMEDIA, /**< Libmedia */ | LIBMEDIA, /**< Libmedia */ | ||||
CCECPU, /**< ai cpu */ | |||||
CCECPU, /**< aicpu shedule */ | |||||
ASCENDDK, /**< AscendDK */ | ASCENDDK, /**< AscendDK */ | ||||
ROS, /**< ROS */ | ROS, /**< ROS */ | ||||
HCCP, | HCCP, | ||||
@@ -179,7 +179,7 @@ enum { | |||||
TSDUMP, /**< TSDUMP module */ | TSDUMP, /**< TSDUMP module */ | ||||
AICPU, /**< AICPU module */ | AICPU, /**< AICPU module */ | ||||
LP, /**< LP module */ | LP, /**< LP module */ | ||||
TDT, | |||||
TDT, /**< tsdaemon or aicpu shedule */ | |||||
FE, | FE, | ||||
MD, | MD, | ||||
MB, | MB, | ||||