@@ -64,49 +64,54 @@ static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is emp | |||
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | |||
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | |||
static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow | |||
static const int32_t ACL_ERROR_RT_OVER_FLOW = 207017; // common over flow | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode | |||
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die | |||
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id | |||
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode | |||
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die | |||
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id | |||
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042; // aic trap read overflow | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||
@@ -71,13 +71,15 @@ const char_t *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExec | |||
const char_t *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; | |||
const char_t *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr"; | |||
const char_t *const OPTION_EXEC_GRAPH_EXEC_TIMEOUT = "ge.exec.graphExecTimeout"; | |||
const char_t *const OPTION_EXEC_MODEL_EXEC_TIMEOUT = "ge.exec.modelExecTimeout"; | |||
// Option key: memory init | |||
const char_t *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; | |||
const char_t *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize"; | |||
const char_t *const OPTION_EXEC_REUSE_ZERO_COPY_MEMORY = "ge.exec.reuseZeroCopyMemory"; | |||
const char_t *const OPTION_EXEC_LOGICAL_DEVICE_CLUSTER_DEPLOY_MODE = "ge.exec.logicalDeviceClusterDeployMode"; | |||
const char_t *const OPTION_EXEC_LOGICAL_DEVICE_ID = "ge.exec.logicalDeviceId"; | |||
namespace configure_option { | |||
const char_t *const STREAM_NUM = "ge.streamNum"; | |||
const char_t *const HEAD_STREAM = "ge.headStream"; | |||
@@ -107,6 +109,7 @@ const char_t *const OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode"; | |||
const char_t *const HCOM_PARALLEL = "ge.hcomParallel"; | |||
const char_t *const AUTO_TUNE_MODE = "ge.autoTuneMode"; | |||
const char_t *const SOC_VERSION = "ge.socVersion"; | |||
const char_t *const VIRTUAL_TYPE = "ge.virtual_type"; | |||
const char_t *const CORE_TYPE = "ge.engineType"; | |||
const char_t *const AICORE_NUM = "ge.aicoreNum"; | |||
const char_t *const L1_FUSION = "ge.l1Fusion"; | |||
@@ -254,9 +257,16 @@ const std::string AUTO_TUNE_MODE = "ge.autoTuneMode"; | |||
// Configure soc version , example: "Ascend310" | |||
const std::string SOC_VERSION = "ge.socVersion"; | |||
// configure whether to enable virtualization, | |||
// its value should be "0" or "1", default value is "0" | |||
const std::string VIRTUAL_TYPE = "ge.virtual_type"; | |||
// Configure core type "VectorEngine", default value is "AIcoreEngine" | |||
const std::string CORE_TYPE = "ge.engineType"; | |||
// Configure graph exclude one or more engines | |||
const std::string EXCLUDE_ENGINES = "ge.exec.exclude_engines"; | |||
// Configure AICORE NUM | |||
const std::string AICORE_NUM = "ge.aicoreNum"; | |||
@@ -420,6 +430,7 @@ static const char_t *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_ | |||
static const char_t *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); | |||
static const char_t *const CORE_TYPE = ge::CORE_TYPE.c_str(); | |||
static const char_t *const SOC_VERSION = ge::SOC_VERSION.c_str(); | |||
static const char_t *const VIRTUAL_TYPE = ge::VIRTUAL_TYPE.c_str(); | |||
static const char_t *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; | |||
static const char_t *const AICORE_NUM = ge::AICORE_NUM.c_str(); | |||
static const char_t *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); | |||
@@ -487,6 +498,7 @@ const std::set<std::string> ir_parser_suppported_options = { | |||
// for interface: aclgrphBuildInitialize | |||
const std::set<std::string> global_options = {CORE_TYPE, | |||
SOC_VERSION, | |||
VIRTUAL_TYPE, | |||
BUFFER_OPTIMIZE, | |||
ENABLE_COMPRESS_WEIGHT, | |||
COMPRESS_WEIGHT_CONF, | |||
@@ -64,49 +64,54 @@ static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is emp | |||
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | |||
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | |||
static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow | |||
static const int32_t ACL_ERROR_RT_OVER_FLOW = 207017; // common over flow | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode | |||
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die | |||
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id | |||
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode | |||
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die | |||
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id | |||
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042; // aic trap read overflow | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||
@@ -17,6 +17,7 @@ | |||
#ifndef INC_FRAMEWORK_COMMON_DEBUG_GE_LOG_H_ | |||
#define INC_FRAMEWORK_COMMON_DEBUG_GE_LOG_H_ | |||
#include <cinttypes> | |||
#include <cstdint> | |||
#include "framework/common/ge_inner_error_codes.h" | |||
@@ -56,63 +57,64 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) { | |||
return (enable == 1); | |||
} | |||
#define GELOGE(ERROR_CODE, fmt, ...) \ | |||
do { \ | |||
dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE), \ | |||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||
##__VA_ARGS__); \ | |||
#define GELOGE(ERROR_CODE, fmt, ...) \ | |||
do { \ | |||
dlog_error(GE_MODULE_NAME, "%" PRIu64 " %s: ErrorNo: %" PRIuLEAST8 "(%s) %s" fmt, GeLog::GetTid(), \ | |||
&__FUNCTION__[0], (ERROR_CODE), ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), \ | |||
ErrorManager::GetInstance().GetLogHeader().c_str(), ##__VA_ARGS__); \ | |||
} while (false) | |||
#define GELOGW(fmt, ...) \ | |||
do { \ | |||
if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) { \ | |||
dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||
} \ | |||
#define GELOGW(fmt, ...) \ | |||
do { \ | |||
if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) { \ | |||
dlog_warn(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||
} \ | |||
} while (false) | |||
#define GELOGI(fmt, ...) \ | |||
do { \ | |||
if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) { \ | |||
dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||
} \ | |||
#define GELOGI(fmt, ...) \ | |||
do { \ | |||
if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) { \ | |||
dlog_info(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||
} \ | |||
} while (false) | |||
#define GELOGD(fmt, ...) \ | |||
do { \ | |||
if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) { \ | |||
dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||
} \ | |||
#define GELOGD(fmt, ...) \ | |||
do { \ | |||
if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) { \ | |||
dlog_debug(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||
} \ | |||
} while (false) | |||
#define GEEVENT(fmt, ...) \ | |||
do { \ | |||
dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||
#define GEEVENT(fmt, ...) \ | |||
do { \ | |||
dlog_event(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||
} while (false) | |||
#define GELOGT(VALUE, fmt, ...) \ | |||
do { \ | |||
TraceStatus stat = (VALUE); \ | |||
const char_t *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ | |||
const int32_t idx = static_cast<int32_t>(stat); \ | |||
char_t *k = const_cast<char_t *>("status"); \ | |||
char_t *v = const_cast<char_t *>(TraceStatStr[idx]); \ | |||
KeyValue kv = {k, v}; \ | |||
DlogWithKV(GE_MODULE_NAME, DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||
#define GELOGT(VALUE, fmt, ...) \ | |||
do { \ | |||
TraceStatus stat = (VALUE); \ | |||
const char_t *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ | |||
const int32_t idx = static_cast<int32_t>(stat); \ | |||
char_t *k = const_cast<char_t *>("status"); \ | |||
char_t *v = const_cast<char_t *>(TraceStatStr[idx]); \ | |||
KeyValue kv = {k, v}; \ | |||
DlogWithKV(GE_MODULE_NAME, DLOG_TRACE, &kv, 1, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], \ | |||
##__VA_ARGS__); \ | |||
} while (false) | |||
#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ | |||
do { \ | |||
dlog_error((MOD_NAME), "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE), \ | |||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||
##__VA_ARGS__); \ | |||
#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ | |||
do { \ | |||
dlog_error((MOD_NAME), "%" PRIu64 " %s: ErrorNo: %" PRIuLEAST8 "(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], \ | |||
(ERROR_CODE), ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), \ | |||
ErrorManager::GetInstance().GetLogHeader().c_str(), ##__VA_ARGS__); \ | |||
} while (false) | |||
// print memory when it is greater than 1KB. | |||
#define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \ | |||
do { \ | |||
if (static_cast<size_t>(SIZE) > 1024UL) { \ | |||
GELOGI("MallocMemory, func=%s, size=%zu, purpose=%s", (#FUNC), static_cast<size_t>(SIZE), (PURPOSE)); \ | |||
} \ | |||
#define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \ | |||
do { \ | |||
if (static_cast<size_t>(SIZE) > 1024UL) { \ | |||
GELOGI("MallocMemory, func=%s, size=%" PRIu64 ", purpose=%s", (#FUNC), static_cast<size_t>(SIZE), (PURPOSE)); \ | |||
} \ | |||
} while (false) | |||
#ifdef __cplusplus | |||
} | |||
@@ -180,13 +180,13 @@ | |||
#define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \ | |||
{ \ | |||
GELOGE((_status), "[Check][InnerData]%s", (errormsg)); \ | |||
REPORT_INNER_ERROR("E19999", "%s", (errormsg)); \ | |||
REPORT_INNER_ERROR("E10052", "%s", (errormsg)); \ | |||
} | |||
#define GE_WARNINGLOG_AND_ERRORMSG(errormsg) \ | |||
{ \ | |||
GELOGW("%s", (errormsg)); \ | |||
ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {(errormsg)}); \ | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10052", {"reason"}, {(errormsg)}); \ | |||
} | |||
#define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg) \ | |||
@@ -194,7 +194,7 @@ | |||
const bool b = (expr); \ | |||
if (!b) { \ | |||
GELOGE((_status), "%s", (errormsg)); \ | |||
ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {(errormsg)}); \ | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10052", {"reason"}, {(errormsg)}); \ | |||
return (_status); \ | |||
} \ | |||
} while (false) | |||
@@ -31,8 +31,8 @@ class GE_FUNC_VISIBILITY OpTypeContainer { | |||
} | |||
~OpTypeContainer() = default; | |||
void Register(const std::string &op_type) { | |||
static_cast<void>(op_type_list_.insert(op_type)); | |||
bool Register(const std::string &op_type) { | |||
return op_type_list_.insert(op_type).second; | |||
} | |||
bool IsExisting(const std::string &op_type) { | |||
@@ -52,7 +52,7 @@ class GE_FUNC_VISIBILITY OpTypeContainer { | |||
#define REGISTER_OPTYPE_DEFINE(var_name, str_name) \ | |||
const char_t *var_name = str_name; \ | |||
const bool g_##var_name##_reg = (static_cast<void>(OpTypeContainer::Instance()->Register(str_name)), true); | |||
const bool g_##var_name##_reg = OpTypeContainer::Instance()->Register(str_name); | |||
#define IS_OPTYPE_EXISTING(str_name) (ge::OpTypeContainer::Instance()->IsExisting(str_name)) | |||
#endif // INC_FRAMEWORK_COMMON_OP_TYPES_H_ |
@@ -162,6 +162,7 @@ REGISTER_OPTYPE_DECLARE(_IF, "_If"); | |||
REGISTER_OPTYPE_DECLARE(STATELESSIF, "StatelessIf"); | |||
REGISTER_OPTYPE_DECLARE(IF, "If"); | |||
REGISTER_OPTYPE_DECLARE(CASE, "Case"); | |||
REGISTER_OPTYPE_DECLARE(STATELESSCASE, "StatelessCase"); | |||
REGISTER_OPTYPE_DECLARE(_WHILE, "_While"); | |||
REGISTER_OPTYPE_DECLARE(WHILE, "While"); | |||
REGISTER_OPTYPE_DECLARE(STATELESSWHILE, "StatelessWhile"); | |||
@@ -626,8 +627,8 @@ struct ModelFileHeader { | |||
uint32_t version = MODEL_VERSION; // version 1.0 | |||
uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0U}; // signature | |||
uint32_t length = 0U; // Ciphertext length. In the non-encryption model, the length is the plaintext length. | |||
uint8_t is_encrypt = | |||
static_cast<uint8_t>(ModelEncryptType::UNENCRYPTED); // whether encrypted 0:not encrypt, 1:encrypt | |||
// whether encrypted 0:not encrypt, 1:encrypt | |||
uint8_t is_encrypt = static_cast<uint8_t>(ModelEncryptType::UNENCRYPTED); | |||
uint8_t is_checksum = static_cast<uint8_t>(ModelCheckType::CHECK); // whether to check the checksum | |||
uint8_t modeltype = 0U; // 0:IR model 1:standard model 2: OM Tiny model | |||
uint8_t genmode = 0U; // 0:offline generate 1:online generate | |||
@@ -31,6 +31,7 @@ enum class PriorityEnum { | |||
COST_1 = 1, | |||
COST_2 = 2, | |||
COST_3 = 3, | |||
COST_4 = 4, | |||
COST_9 = 9, | |||
COST_10 = 10, | |||
}; | |||
@@ -178,7 +178,7 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
Status GetAippType(const uint32_t model_id, const uint32_t index, InputAippType &type, size_t &aipp_index); | |||
Status CommandHandle(const Command &command); | |||
Status CommandHandle(const Command &command) const; | |||
Status SetDump(const DumpConfig &dump_config); | |||
@@ -133,6 +133,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *_IF; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSIF; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *IF; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CASE; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSCASE; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *_WHILE; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *WHILE; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSWHILE; | |||
@@ -1 +1 @@ | |||
Subproject commit 002617852e22767bd864db3c01595630e23f5496 | |||
Subproject commit 58412ad7aed08cd1c01cc070d80706e4253c9075 |
@@ -18,31 +18,38 @@ | |||
#define AICPU_OP_TYPE_LIST_H_ | |||
extern "C" { | |||
enum OpKernelType { TF_KERNEL, CPU_KERNEL }; | |||
enum OpKernelType { | |||
TF_KERNEL, | |||
CPU_KERNEL | |||
}; | |||
enum ReturnCode { OP_TYPE_NOT_SUPPORT, FORMAT_NOT_SUPPORT, DTYPE_NOT_SUPPORT }; | |||
enum ReturnCode { | |||
OP_TYPE_NOT_SUPPORT, | |||
FORMAT_NOT_SUPPORT, | |||
DTYPE_NOT_SUPPORT | |||
}; | |||
#pragma pack(push, 1) | |||
// One byte alignment | |||
struct SysOpInfo { | |||
uint64_t opLen; | |||
uint64_t opType; | |||
OpKernelType kernelsType; | |||
uint64_t opLen; | |||
uint64_t opType; | |||
OpKernelType kernelsType; | |||
}; | |||
struct SysOpCheckInfo { | |||
uint64_t opListNum; | |||
uint64_t offSetLen; | |||
uint64_t sysOpInfoList; | |||
uint64_t opParamInfoList; | |||
uint64_t opListNum; | |||
uint64_t offSetLen; | |||
uint64_t sysOpInfoList; | |||
uint64_t opParamInfoList; | |||
}; | |||
struct SysOpCheckResp { | |||
uint64_t opListNum; | |||
bool isWithoutJson; | |||
uint64_t returnCodeList; | |||
uint64_t sysOpInfoList; | |||
uint64_t opParamInfoList; | |||
uint64_t opListNum; | |||
bool isWithoutJson; | |||
uint64_t returnCodeList; | |||
uint64_t sysOpInfoList; | |||
uint64_t opParamInfoList; | |||
}; | |||
#pragma pack(pop) | |||
} | |||
@@ -24,81 +24,83 @@ namespace aicpu { | |||
using char_t = char; | |||
#pragma pack(push, 1) | |||
struct AicpuParamHead { | |||
uint32_t length; // Total length: include cunstom message | |||
uint32_t ioAddrNum; // Input and output address number | |||
uint32_t extInfoLength; // extInfo struct Length | |||
uint64_t extInfoAddr; // extInfo address | |||
struct AicpuParamHead | |||
{ | |||
uint32_t length; // Total length: include cunstom message | |||
uint32_t ioAddrNum; // Input and output address number | |||
uint32_t extInfoLength; // extInfo struct Length | |||
uint64_t extInfoAddr; // extInfo address | |||
}; | |||
enum class AicpuConfigMsgType { | |||
AICPU_CONFIG_MSG_TYPE_BUF_FREE = 0, /* free buf */ | |||
AICPU_CONFIG_MSG_TYPE_BUF_RESET = 1, /* reset buf */ | |||
AICPU_CONFIG_MSG_TYPE_BUF_SET_ADDR = 2, /* set buf addr to aicpu */ | |||
AICPU_CONFIG_MSG_TYPE_BUF_FREE = 0, /* free buf */ | |||
AICPU_CONFIG_MSG_TYPE_BUF_RESET = 1, /* reset buf */ | |||
AICPU_CONFIG_MSG_TYPE_BUF_SET_ADDR = 2, /* set buf addr to aicpu */ | |||
}; | |||
enum class AicpuErrMsgType { | |||
ERR_MSG_TYPE_NULL = 0, | |||
ERR_MSG_TYPE_AICORE = 1, | |||
ERR_MSG_TYPE_AICPU = 2, | |||
ERR_MSG_TYPE_NULL = 0, | |||
ERR_MSG_TYPE_AICORE = 1, | |||
ERR_MSG_TYPE_AICPU = 2, | |||
}; | |||
enum class AicpuExtInfoMsgType { | |||
EXT_MODEL_ID_MSG_TYPE = 0, | |||
EXT_MODEL_ID_MSG_TYPE = 0, | |||
}; | |||
struct AicpuConfigMsg { | |||
uint8_t msgType; | |||
uint8_t reserved1; | |||
uint16_t bufLen; | |||
uint32_t offset; | |||
uint64_t bufAddr; | |||
uint32_t tsId; | |||
uint32_t reserved2; | |||
uint8_t msgType; | |||
uint8_t reserved1; | |||
uint16_t bufLen; | |||
uint32_t offset; | |||
uint64_t bufAddr; | |||
uint32_t tsId; | |||
uint32_t reserved2; | |||
}; | |||
struct AicpuModelIdInfo { | |||
uint32_t modelId; | |||
uint32_t extendModelId; | |||
uint32_t extendInfo[13]; | |||
uint32_t modelId; | |||
uint32_t extendModelId; | |||
uint32_t extendInfo[13]; | |||
}; | |||
// 64 bytes | |||
struct AicpuExtendInfo { | |||
uint8_t msgType; | |||
uint8_t version; | |||
uint8_t reserved[2]; | |||
union { | |||
AicpuModelIdInfo modelIdMap; | |||
}; | |||
uint8_t msgType; | |||
uint8_t version; | |||
uint8_t reserved[2]; | |||
union { | |||
AicpuModelIdInfo modelIdMap; | |||
}; | |||
}; | |||
struct AicoreErrMsgInfo { | |||
uint8_t errType; | |||
uint8_t version; | |||
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */ | |||
uint32_t errorCode; | |||
uint32_t modelId; | |||
uint32_t taskId; | |||
uint32_t streamId; | |||
uint64_t transactionId; | |||
uint8_t reserved2[228]; /* the total byte is 256, reserved2 len = 256 - other lens */ | |||
uint8_t errType; | |||
uint8_t version; | |||
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */ | |||
uint32_t errorCode; | |||
uint32_t modelId; | |||
uint32_t taskId; | |||
uint32_t streamId; | |||
uint64_t transactionId; | |||
uint8_t reserved2[228]; /* the total byte is 256, reserved2 len = 256 - other lens */ | |||
}; | |||
struct AicpuErrMsgInfo { | |||
uint8_t errType; | |||
uint8_t version; | |||
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */ | |||
uint32_t errorCode; | |||
uint32_t modelId; | |||
uint32_t streamId; | |||
uint64_t transactionId; | |||
char_t opName[64]; /* op name str */ | |||
char_t errDesc[128]; /* err msg desc info */ | |||
uint8_t reserved2[40]; /* the total byte is 256, reserved2 len = 256 - other lens */ | |||
uint8_t errType; | |||
uint8_t version; | |||
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */ | |||
uint32_t errorCode; | |||
uint32_t modelId; | |||
uint32_t streamId; | |||
uint64_t transactionId; | |||
char_t opName[64]; /* op name str */ | |||
char_t errDesc[128]; /* err msg desc info */ | |||
uint8_t reserved2[40]; /* the total byte is 256, reserved2 len = 256 - other lens */ | |||
}; | |||
#pragma pack(pop) | |||
} // namespace aicpu | |||
#endif // AICPU_TASK_STRUCT_H | |||
@@ -1,13 +1,13 @@ | |||
/** | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||
* | |||
* This program is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
* | |||
* Description:interface. | |||
* Create: 2021-12-21 | |||
*/ | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||
* | |||
* This program is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
* | |||
* Description:interface. | |||
* Create: 2021-12-21 | |||
*/ | |||
#ifndef AICPU_TYPE_DEF_H | |||
#define AICPU_TYPE_DEF_H | |||
@@ -25,24 +25,28 @@ typedef float float32_t; | |||
typedef double float64_t; | |||
#endif | |||
inline uint64_t PtrToValue(const void *ptr) { | |||
return static_cast<const uint64_t>(reinterpret_cast<const uintptr_t>(ptr)); | |||
inline uint64_t PtrToValue(const void *ptr) | |||
{ | |||
return static_cast<const uint64_t>(reinterpret_cast<const uintptr_t>(ptr)); | |||
} | |||
inline void *ValueToPtr(const uint64_t value) { | |||
return reinterpret_cast<void *>(static_cast<const uintptr_t>(value)); | |||
inline void *ValueToPtr(const uint64_t value) | |||
{ | |||
return reinterpret_cast<void *>(static_cast<const uintptr_t>(value)); | |||
} | |||
template <typename TI, typename TO> | |||
inline TO *PtrToPtr(TI *ptr) { | |||
return reinterpret_cast<TO *>(ptr); | |||
template<typename TI, typename TO> | |||
inline TO *PtrToPtr(TI *ptr) | |||
{ | |||
return reinterpret_cast<TO *>(ptr); | |||
} | |||
template <typename T> | |||
inline T *PtrAdd(T *const ptr, const size_t maxIdx, const size_t idx) { | |||
if ((ptr != nullptr) && (idx < maxIdx)) { | |||
return reinterpret_cast<T *>(ptr + idx); | |||
} | |||
return nullptr; | |||
template<typename T> | |||
inline T *PtrAdd(T * const ptr, const size_t maxIdx, const size_t idx) | |||
{ | |||
if ((ptr != nullptr) && (idx < maxIdx)) { | |||
return reinterpret_cast<T *>(ptr + idx); | |||
} | |||
return nullptr; | |||
} | |||
#endif // AICPU_TYPE_DEF_H |
@@ -2507,6 +2507,31 @@ REG_OP(GetNextFromQueue) | |||
.OP_END_FACTORY_REG(GetNextFromQueue) | |||
/** | |||
*@brief Get the batch of data in data processing . \n | |||
*@par Attributes: | |||
*@li output_types: A nested structure of DType objects corresponding to each | |||
component of an element of this dataset. | |||
*@li output_shapes: A nested structure of TensorShape objects corresponding | |||
to each component of an element of this dataset. | |||
*@li channel_name: A string. Default "" . \n | |||
*@par Outputs: | |||
*y:A nested structure of Tensor objects . \n | |||
*@par Third-party framework compatibility | |||
*Compatible with tensorflow GetNext operator | |||
*/ | |||
REG_OP(PeekData) | |||
.DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, | |||
DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL})) | |||
.ATTR(output_types, ListType, {}) | |||
.ATTR(output_shapes, ListListInt, {}) | |||
.ATTR(channel_name, String, "") | |||
.OP_END_FACTORY_REG(PeekData) | |||
/** | |||
* @brief OptionalGetValue | |||
* @par Inputs: | |||
* optional: A tensor of type variant | |||
@@ -42,8 +42,8 @@ namespace ge { | |||
*Compatible with the TensorFlow operator AddN. | |||
*/ | |||
REG_OP(AddN) | |||
.DYNAMIC_INPUT(x, TensorType::NumberType()) | |||
.OUTPUT(y, TensorType::NumberType()) | |||
.DYNAMIC_INPUT(x, TensorType({NumberType(), DT_VARIANT})) | |||
.OUTPUT(y, TensorType({NumberType(), DT_VARIANT})) | |||
.REQUIRED_ATTR(N, Int) | |||
.OP_END_FACTORY_REG(AddN) | |||
@@ -349,6 +349,19 @@ REG_OP(StatefulPartitionedCall) | |||
.ATTR(executor_type, String, "") | |||
.OP_END_FACTORY_REG(StatefulPartitionedCall) | |||
/** | |||
* @par Inputs: | |||
* @li input: The input tensors \n | |||
* | |||
* @par Outputs: | |||
* @li output: The output tensors. \n | |||
*/ | |||
REG_OP(ToBool) | |||
.INPUT(input, TensorType({DT_INT64, DT_INT32, DT_INT16, DT_INT8, \ | |||
DT_UINT8, DT_FLOAT, DT_DOUBLE, DT_STRING, DT_BOOL})) | |||
.OUTPUT(output, DT_BOOL) | |||
.OP_END_FACTORY_REG(ToBool) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_ |
@@ -1931,9 +1931,6 @@ REG_OP(DenseImageWarpGrad) | |||
*@par Third-party framework compatibility | |||
*Compatible with pytorch GridSampler2D operator. | |||
*@par Restrictions: | |||
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(GridSampler2D) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
@@ -1966,9 +1963,6 @@ REG_OP(GridSampler2D) | |||
*@par Third-party framework compatibility | |||
*Compatible with pytorch GridSampler2DGrad operator. | |||
*@par Restrictions: | |||
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(GridSampler2DGrad) | |||
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
@@ -2063,9 +2057,6 @@ REG_OP(IMGWarpOffsets) | |||
*@par Third-party framework compatibility | |||
*Compatible with pytorch GridSampler3D operator. | |||
*@par Restrictions: | |||
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(GridSampler3D) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
@@ -2096,9 +2087,6 @@ REG_OP(GridSampler3D) | |||
*@par Third-party framework compatibility | |||
*Compatible with pytorch GridSampler3DGrad operator. | |||
*@par Restrictions: | |||
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(GridSampler3DGrad) | |||
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
@@ -491,6 +491,40 @@ REG_OP(TridiagonalSolve) | |||
.ATTR(partial_pivoting, Bool, true) | |||
.OP_END_FACTORY_REG(TridiagonalSolve) | |||
/** | |||
* @brief Solution of banded triangular matrix . \n | |||
* @par Inputs: | |||
* The input bands has to be symmetric and positive definite. | |||
* @li bands:A Tensor. Must be one of the following types: double, float32, | |||
float16,complex64, complex128. Shape is [... K,M], K corresponds to the | |||
number of bands (actually stored diagonals), and M is the data of the | |||
diagonals. | |||
@li rhs:shape is [...M] or [...M, N]. Has the same type as bands \n | |||
* @par Outputs: | |||
* @li output:A Tensor. Has the same type as bands . \n | |||
* @par Attributes: | |||
* @li lower:An optional bool. Defaults to True.True: indicates the lower | |||
triangular matrix. False: indicates the upper triangular matrix. | |||
* @li adjoint:An optional bool. Defaults to False.Boolean indicating whether to | |||
solve with matrix or its (block-wise) adjoint. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with tensorflow BandedTriangularSolve operator. | |||
*/ | |||
REG_OP(BandedTriangularSolve) | |||
.INPUT(bands, TensorType({DT_FLOAT, DT_DOUBLE, \ | |||
DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128})) | |||
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, \ | |||
DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128})) | |||
.OUTPUT(output,TensorType({DT_FLOAT, DT_DOUBLE, \ | |||
DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128})) | |||
.ATTR(lower, Bool, true) | |||
.ATTR(adjoint, Bool, false) | |||
.OP_END_FACTORY_REG(BandedTriangularSolve) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_ |
@@ -303,6 +303,21 @@ REG_OP(MutableHashTable) | |||
.REQUIRED_ATTR(key_dtype, Type) | |||
.REQUIRED_ATTR(value_dtype, Type) | |||
.OP_END_FACTORY_REG(MutableHashTable) | |||
/** | |||
* @brief Remove keys in the given table . \n | |||
* @par Inputs: | |||
* @li table_handle: A Tensor of type resource. Handle to the table. \n | |||
* @li keys: A Tensor. Any shape. Keys to remove. \n | |||
* @par Third-party framework compatibility. | |||
* Compatible with tensorflow LookupTableInsert operator. | |||
*/ | |||
REG_OP(LookupTableRemove) | |||
.INPUT(table_handle, TensorType({DT_RESOURCE})) | |||
.INPUT(keys,TensorType({RealNumberType, DT_BOOL, DT_STRING})) | |||
.OP_END_FACTORY_REG(LookupTableRemove) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_ |
@@ -377,7 +377,7 @@ to each component of an element of this dataset. | |||
REG_OP(GetNext) | |||
.DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, | |||
DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL})) | |||
DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL})) | |||
.ATTR(output_types, ListType, {}) | |||
.ATTR(output_shapes, ListListInt, {}) | |||
.ATTR(output_num, Int, 1) | |||
@@ -1156,6 +1156,185 @@ REG_OP(CdistGrad) | |||
.ATTR(p, Float, 2.0) | |||
.OP_END_FACTORY_REG(CdistGrad) | |||
/** | |||
* @brief Computes the RaggedBincount. \n | |||
* @par Inputs: | |||
* Four inputs, including: | |||
* @li splits: A tensor with shpae: BxPXM. Must be one of the following types: | |||
* int64. | |||
* @li values: A tensor with shpae: BxPXM. Must be one of the following types: | |||
* float16, float32. | |||
* @li size: A tensor with shpae: BxRxM. Must be one of the following types: | |||
* int32, int64. | |||
* @li weights: A tensor with shpae: BxRxM. | |||
* Must be one of the following types: int32, int64, float, double. \n | |||
* @par Attributes: | |||
* @li binary_output: An optional bool \n | |||
* @par Outputs: | |||
* output: Must be one of the following types: int32, int64, float, double. \n | |||
*/ | |||
REG_OP(RaggedBincount) | |||
.INPUT(splits, TensorType({DT_INT64})) | |||
.INPUT(values, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(size, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(weights, TensorType({DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(output, TensorType({DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE})) | |||
.ATTR(binary_output, Bool, false) | |||
.OP_END_FACTORY_REG(RaggedBincount) | |||
/** | |||
* @brief Count the number of occurrences of each value in the input dense integer array, | |||
* and output it according to the sparse matrix. \n | |||
* @par Inputs: | |||
* @li values: A 1D or 2D tensor of type int32 or int64. | |||
* @li weights: A tensor of type int32 or int64 or float or double. \n | |||
* @par Attributes: | |||
* @li minlength: An optional int >=-1. Defaults to -1. | |||
* @li maxlength: An optional int >=-1. Defaults to -1. | |||
* @li binary_output: A required bool. \n | |||
* @par Outputs: | |||
* output_indices: A tensor of type int64. | |||
* output_values: A tensor of the same type as "weights". | |||
* output_dense_shape: A tensor of type int64. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator DenseCountSparseOutput. \n | |||
*/ | |||
REG_OP(DenseCountSparseOutput) | |||
.INPUT(values, TensorType({DT_INT32,DT_INT64})) | |||
.INPUT(weights, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE})) | |||
.OUTPUT(output_indices, TensorType({DT_INT64})) | |||
.OUTPUT(output_values, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE})) | |||
.OUTPUT(output_dense_shape, TensorType({DT_INT64})) | |||
.ATTR(minlength, Int, -1) | |||
.ATTR(maxlength, Int, -1) | |||
.REQUIRED_ATTR(binary_output, Bool) | |||
.OP_END_FACTORY_REG(DenseCountSparseOutput) | |||
/** | |||
* @brief Count the number of occurrences of each value in the input ragged integer array, | |||
* and output it according to the sparse matrix. \n | |||
* @par Inputs: | |||
* @li splits: A 1D tensor of type int64. | |||
* @li values: A 1D or 2D tensor of type int32 or int64. | |||
* @li weights: A tensor of type int32 or int64 or float or double. \n | |||
* @par Attributes: | |||
* @li minlength: An optional int >=-1. Defaults to -1. | |||
* @li maxlength: An optional int >=-1. Defaults to -1. | |||
* @li binary_output: A required bool. \n | |||
* @par Outputs: | |||
* output_indices: A tensor of type int64. | |||
* output_values: A tensor of the same type as "weights". | |||
* output_dense_shape: A tensor of type int64. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator RaggedCountSparseOutput. \n | |||
*/ | |||
REG_OP(RaggedCountSparseOutput) | |||
.INPUT(splits, TensorType({DT_INT64})) | |||
.INPUT(values, TensorType({DT_INT32,DT_INT64})) | |||
.INPUT(weights, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE})) | |||
.OUTPUT(output_indices, TensorType({DT_INT64})) | |||
.OUTPUT(output_values, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE})) | |||
.OUTPUT(output_dense_shape, TensorType({DT_INT64})) | |||
.ATTR(minlength, Int, -1) | |||
.ATTR(maxlength, Int, -1) | |||
.REQUIRED_ATTR(binary_output, Bool) | |||
.OP_END_FACTORY_REG(RaggedCountSparseOutput) | |||
/** | |||
* @brief SignBitsUnpack. | |||
* @par Inputs: | |||
* one input, including: | |||
* @li x: A 1D Tensor of uint8. | |||
* @par Attributes: | |||
* @li size: dim of out put tensor, defaults to 1. | |||
* @li dtype: dtype of out put tensor: DT_FLOAT(0) or DT_FLOAT16(1). | |||
* @par Outputs: | |||
* @li y: A 2D Tensor of type float32 (float16) with shape (size, (x.shape * 8) / size), | |||
*/ | |||
REG_OP(SignBitsUnpack) | |||
.INPUT(x, TensorType({DT_UINT8})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.REQUIRED_ATTR(size, Int) | |||
.REQUIRED_ATTR(dtype, Type) | |||
.OP_END_FACTORY_REG(SignBitsUnpack) | |||
/** | |||
* @brief Function scaled masked softmax . \n | |||
* @par Inputs: | |||
* Two inputs, including: | |||
* @li x: A mutable Tensor. The type support float16/float32. | |||
* @li mask: An optional Tensor. Must meet all of the following rules: | |||
* shape of mask should be broadcastable with x. | |||
* dtype of mask should be bool. | |||
* mask is binary | |||
* @par Attributes: | |||
* scale: A attribute used to scale tensor. The type is float. The dimension softmax would be performed on. Defaults | |||
* to "1.0" . \n | |||
* fixed_triu_mask: A flag used to enable or disable a fixed upper triangle mask. The type is bool. Defaults | |||
* to "false" . \n | |||
* @par Outputs: | |||
* y: A mutable Tensor. Has the same type as "x". \n | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(ScaledMaskedSoftmax) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(mask, TensorType({DT_BOOL, DT_UINT1})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16})) | |||
.ATTR(scale, Float, 1.0) | |||
.ATTR(fixed_triu_mask, Bool, false) | |||
.OP_END_FACTORY_REG(ScaledMaskedSoftmax) | |||
/** | |||
* @brief Function scaled masked softmax grad . \n | |||
* @par Inputs: | |||
* Three inputs, including: | |||
* @li y_grad: A mutable Tensor. The type support float16/float32. | |||
* @li y: A mutable Tensor. The type support float16/float32. | |||
* @li mask: An optional Tensor. Must meet all of the following rules: | |||
* shape of mask should be broadcastable with x. | |||
* dtype of mask should be bool. | |||
* mask is binary | |||
* @par Attributes: | |||
* scale: A attribute used to scale tensor. The type is float. The dimension softmax would be performed on. Defaults | |||
* to "1.0" . \n | |||
* fixed_triu_mask: A flag used to enable or disable a fixed upper triangle mask. The type is bool. Defaults | |||
* to "false" . \n | |||
* @par Outputs: | |||
* x_grad: A mutable Tensor. Has the same type as "x". \n | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(ScaledMaskedSoftmaxGrad) | |||
.INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(mask, TensorType({DT_BOOL, DT_UINT1})) | |||
.OUTPUT(x_grad, TensorType({DT_FLOAT16})) | |||
.ATTR(scale, Float, 1.0) | |||
.ATTR(fixed_triu_mask, Bool, false) | |||
.OP_END_FACTORY_REG(ScaledMaskedSoftmaxGrad) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ |
@@ -1595,6 +1595,50 @@ REG_OP(Pinverse) | |||
.ATTR(rcond, Float, 1e-15) | |||
.OP_END_FACTORY_REG(Pinverse) | |||
/** | |||
* @brief From the input tensor and updates tensor, select the maximum value according to indices to output. \n | |||
* @par Inputs: | |||
* Three inputs, including: | |||
* @li input: Must be one of the following types: | |||
* float16, float32, double, int32, uint8, int16, int8, complex64, int64, | |||
* qint8, quint8, qint32, uint16, complex128, uint32, uint64. | |||
* @li indices: Must be one of the following types: | |||
* int32, int64. | |||
* @li updates: Must have the same type as input. \n | |||
* @par Outputs: | |||
* output: A Tensor with the same type as input. \n | |||
*/ | |||
REG_OP(TensorScatterMax) | |||
.INPUT(input, TensorType::BasicType()) | |||
.INPUT(indices, TensorType::IndexNumberType()) | |||
.INPUT(updates, TensorType::BasicType()) | |||
.OUTPUT(output, TensorType::BasicType()) | |||
.OP_END_FACTORY_REG(TensorScatterMax) | |||
/** | |||
* @brief From the input tensor and updates tensor, select the minimum value according to indices to output. \n | |||
* @par Inputs: | |||
* Three inputs, including: | |||
* @li input: Must be one of the following types: | |||
* float16, float32, double, int32, uint8, int16, int8, complex64, int64, | |||
* qint8, quint8, qint32, uint16, complex128, uint32, uint64. | |||
* @li indices: Must be one of the following types: | |||
* int32, int64. | |||
* @li updates: Must have the same type as input. \n | |||
* @par Outputs: | |||
* output: A Tensor with the same type as input. \n | |||
*/ | |||
REG_OP(TensorScatterMin) | |||
.INPUT(input, TensorType::BasicType()) | |||
.INPUT(indices, TensorType::IndexNumberType()) | |||
.INPUT(updates, TensorType::BasicType()) | |||
.OUTPUT(output, TensorType::BasicType()) | |||
.OP_END_FACTORY_REG(TensorScatterMin) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ |
@@ -886,6 +886,7 @@ REG_OP(Conv2D) | |||
* to the input image for int8 type. Ensure that the output is within the | |||
* effective range. Defaults to 0. | |||
*@li data_format: Reserved. | |||
* @li alg: compress algorithm, default weight_unzip. | |||
* | |||
*@par Outputs: | |||
* y: A 4D Tensor of output feature map. Has the same type as "x". With the | |||
@@ -909,6 +910,7 @@ REG_OP(Conv2DCompress) | |||
.ATTR(groups, Int, 1) | |||
.ATTR(data_format, String, "NHWC") | |||
.ATTR(offset_x, Int, 0) | |||
.ATTR(alg, String, "weight_unzip") | |||
.OP_END_FACTORY_REG(Conv2DCompress) | |||
/** | |||
@@ -1688,5 +1690,24 @@ REG_OP(FixPipe) | |||
.ATTR(eltwise_mode, String, "") | |||
.OP_END_FACTORY_REG(FixPipe) | |||
/** | |||
* @brief Solves a batch of isotonic regression problems. \n | |||
* @par Inputs: | |||
* @li input: A Tensor. \n | |||
* @par Attributes: | |||
* @li output_dtype: The data type of output. \n | |||
* @par Outputs: | |||
* @li output: A Tensor. A Tensor of type float16, float32, double. | |||
* @li segments: A Tensor. A Tensor of type int32 \n | |||
*/ | |||
REG_OP(IsotonicRegression) | |||
.INPUT(input, TensorType::RealNumberType()) | |||
.OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(segments, TensorType({DT_INT32})) | |||
.ATTR(output_dtype, Type, DT_FLOAT) | |||
.OP_END_FACTORY_REG(IsotonicRegression) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ |
@@ -1550,7 +1550,8 @@ REG_OP(DecodeWheelsTarget) | |||
*@li max_size_per_class: A required attribute of type int, specifying the nms output num per class. | |||
*@li max_total_size: A required attribute of type int, specifying the the nms output num per batch. | |||
*@li change_coordinate_frame: A optional attribute of type bool, whether to normalize coordinates after clipping. | |||
*@li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false" . \n | |||
*@li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false". | |||
*@li image_size: A optional attribute of type ListInt, the size of the image. \n | |||
*@par Outputs: | |||
*@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4), | |||
@@ -1580,6 +1581,7 @@ REG_OP(BatchMultiClassNonMaxSuppression) | |||
.REQUIRED_ATTR(max_total_size, Int) | |||
.ATTR(change_coordinate_frame, Bool, false) | |||
.ATTR(transpose_box, Bool, false) | |||
.ATTR(image_size, ListInt, {}) | |||
.OP_END_FACTORY_REG(BatchMultiClassNonMaxSuppression) | |||
/** | |||
@@ -2316,6 +2318,40 @@ REG_OP(CIoU) | |||
.ATTR(mode, String, "iou") | |||
.ATTR(atan_sub_flag, Bool, false) | |||
.OP_END_FACTORY_REG(CIoU) | |||
/** | |||
* @brief First calculate the minimum closure area of the two boxes, IoU, | |||
* The DIoU is obtained by combining the center distance and IoU. \n | |||
* @par Inputs: | |||
* Two inputs, including: | |||
* @li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with | |||
* shape (4, N). "N" indicates the number of bounding boxes, and the value | |||
* "4" refers to [x1, y1, x2, y2] or [x, y, w, h]. | |||
* @li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32 | |||
* with shape (4, M). "M" indicates the number of ground truth boxes, and | |||
* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n | |||
* @par Attributes: | |||
* @li trans: An optional bool, true for 'xywh', false for 'xyxy'. | |||
* @li is_cross: An optional bool, control whether the output shape is [N, M] or [1, N]. | |||
* @li mode: An optional string, computation mode, a character string with the value range of [iou, iof]. \n | |||
* @par Outputs: | |||
* overlap: A 2D Tensor of type float16 or float32 with shape [N, M] or [1, N], | |||
* specifying the IoU or IoF ratio . \n | |||
* @attention Constraints: | |||
* "is_cross" only support false. | |||
*/ | |||
REG_OP(DIoU) | |||
.INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(trans, Bool, false) | |||
.ATTR(is_cross, Bool, true) | |||
.ATTR(mode, String, "iou") | |||
.OP_END_FACTORY_REG(DIoU) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ | |||
@@ -426,7 +426,10 @@ REG_OP(ConfusionSoftmaxGrad) | |||
*@li keepdims: A bool Scalar. If true, retains reduced dimensions with length 1 . \n | |||
*@par Outputs: | |||
*y: A Tensor dtype of float16, float32. | |||
*y: A Tensor dtype of float16, float32. \n | |||
*@attention Constraints: | |||
*THIS OPERATOR IS DEPRECATED. It will be removed in a future version. | |||
*/ | |||
REG_OP(SoftmaxGradExt) | |||
.INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
@@ -1026,74 +1029,48 @@ REG_OP(RNNTLoss) | |||
.OP_END_FACTORY_REG(RNNTLoss) | |||
/** | |||
*@brief Performs group normalization . \n | |||
* @brief Performs group normalization . \n | |||
*@par Inputs: | |||
* Five inputs, including: (NHWC, NCHW supported) | |||
*@li x: A 4D Tensor of type float16 or float32, with format NHWC or | |||
NCHW for 4D. | |||
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format | |||
NHWC or NCHW. Specifies the scaling factor. | |||
*@li offset: A Tensor of type float32. Must be 1D if input "x" is with | |||
format NHWC or NCHW. Specifies the offset. | |||
*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format | |||
NHWC or NCHW. Reserved. Mu | |||
st be "None" if the operation is used for training. | |||
*@li variance: A Tensor of type float32. Must be 1D if input "x" is with | |||
format NHWC or NCHW. Specifies the variance used for inference. Reserved . \n | |||
* @par Inputs: | |||
* Three inputs | |||
* @li x: A ND Tensor of type float16 or float32, with format NCHW for 4D. | |||
* @li gamma: A Tensor of type float16 or float32. Must be 1D. Specifies the scaling factor. | |||
* @li beta: A Tensor of type float16 or float32. Must be 1D. Specifies the offset. \n | |||
*@par Attributes: | |||
*@li epsilon: An optional float32, specifying the small value added to | |||
* @par Attributes: | |||
* @li num_groups: An required int32, specifying the number of group. | |||
* @li eps: An optional float32, specifying the small value added to | |||
variance to avoid dividing by zero. Defaults to "0.0001". | |||
*@li data_format: An optional string, specifying the format of "x". | |||
* @li data_format: An optional string, specifying the format of "x". | |||
Defaults to "NHWC". | |||
*@li is_training: An optional bool, specifying if the operation is used for | |||
* @li is_training: An optional bool, specifying if the operation is used for | |||
training or inference. Defaults to "True" . \n | |||
*@par Outputs: | |||
* Five outputs, including: (NHWC, NCHW supported) | |||
*@li y: A 4D Tensor of type float16 or float32 for the normalized "x", | |||
with format NHWC or NCHW for 4D. | |||
*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with | |||
format NHWC or NCHW. Specifies the mean of "x". | |||
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is | |||
with format NHWC or NCHW. Specifies the variance of "x". | |||
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if | |||
input "x" is with format NHWC or NCHW. Specifies the mean o | |||
f "x" for gradient computation. Pass "None" to skip this output. | |||
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if | |||
input "x" is with format NHWC or NCHW. Specifies the varian | |||
ce of "x" for gradient computation. Pass "None" to skip this output . \n | |||
* @par Outputs: | |||
* Three outputs | |||
* @li y: A ND Tensor of type float16 or float32 for the normalized "x", | |||
with format NCHW for 4D. | |||
* @li mean: A Tensor of type float16 or float32. Must be 1D. Specifies the mean of "x". | |||
* @li variance: A Tensor of type float16 or float32. Must be 1D. Specifies the variance of "x". \n | |||
*@attention Constraints: | |||
*@li If the operation is used for inference and outputs "reserve_space_1" | |||
and "reserve_space_2" are available, then "reserve_space_1" has the same | |||
value as "mean" and "reserve_spa | |||
ce_2" has the same value as "variance". | |||
*@li For Ascend 310, the result accuracy fails due to the square root | |||
instruction . \n | |||
* @attention Constraints: | |||
* @li For Ascend 310, only support NCHW which can be trans to 5HD. \n | |||
*@par Third-party framework compatibility | |||
*@li Compatible with the PyTorch operator GroupNorm. | |||
* @par Third-party framework compatibility | |||
* @li Compatible with the PyTorch operator GroupNorm. | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(GroupNorm) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(scale, TensorType({DT_FLOAT,})) | |||
.INPUT(offset, TensorType({DT_FLOAT,})) | |||
.OPTIONAL_INPUT(mean, TensorType({DT_FLOAT})) | |||
.OPTIONAL_INPUT(variance, TensorType({DT_FLOAT})) | |||
.INPUT(gamma, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(beta, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(batch_mean, TensorType({DT_FLOAT})) | |||
.OUTPUT(batch_variance, TensorType({DT_FLOAT})) | |||
.OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) | |||
.OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) | |||
.ATTR(epsilon, Float, 0.0001) | |||
.OUTPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(variance, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.REQUIRED_ATTR(num_groups, Int) | |||
.ATTR(data_format, String, "NHWC") | |||
.ATTR(eps, Float, 0.0001) | |||
.ATTR(is_training, Bool, true) | |||
.ATTR(num_groups, Int, 2) | |||
.OP_END_FACTORY_REG(GroupNorm) | |||
/** | |||
@@ -307,7 +307,7 @@ REG_OP(Relu6D) | |||
* @par Inputs: | |||
* @li gradients: A Tensor of type RealNumberType. The backpropagated | |||
gradients to the corresponding Relu6 operation. | |||
gradients to the corresponding Relu6 operation. | |||
* @li features: A Tensor with the same type as gradients.he features passed | |||
as input to the corresponding Relu6 operation, or its output; | |||
using either one produces the same result. \n | |||
@@ -325,22 +325,22 @@ REG_OP(Relu6Grad) | |||
.OUTPUT(backprops, TensorType::RealNumberType()) | |||
.OP_END_FACTORY_REG(Relu6Grad) | |||
/** | |||
*@brief Calculate the elu_grad_v2 function. | |||
*@brief Calculate the elu_grad_v2 function. | |||
*Applies the element-wise function: | |||
* Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha . | |||
*@par Inputs: | |||
*Two inputs, including: | |||
* @li grads: A tensor. Must be one of the following types: | |||
* float16, float32. | |||
* float16, float32. | |||
* @li activations: A tensor. Must be one of the following types: | |||
* float16, float32. | |||
* float16, float32. | |||
* | |||
*@par Outputs: | |||
*y: A Tensor with the same type and shape of grads's. | |||
* | |||
* | |||
*@par Attributes: | |||
*alpha: scalar parameter, default value = 1.0 | |||
*/ | |||
*/ | |||
REG_OP(EluGradV2) | |||
.INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
@@ -972,18 +972,18 @@ REG_OP(SoftplusV2Grad) | |||
/** | |||
* @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor) | |||
* where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise. | |||
* | |||
* | |||
* @par Inputs: | |||
* one input including: | |||
* x: input A Tensor. Must be one of the following types: float32, float16 | |||
* | |||
* | |||
* @par Attributes: | |||
* alpha: An optional float. Defaults to 1.0. \n | |||
* @par Outputs: | |||
* one output including: | |||
* y:A Tensor of the same type as x | |||
* | |||
* | |||
*/ | |||
REG_OP(ThresholdedRelu) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
@@ -1059,7 +1059,7 @@ REG_OP(HardShrink) | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Hardsigmoid. \n | |||
*/ | |||
*/ | |||
REG_OP(HardSigmoid) | |||
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
@@ -1219,13 +1219,13 @@ REG_OP(Shrink) | |||
* Three inputs, including: | |||
* @li x: A Tensor. | |||
* Must be one of the following types on Ascend310: float16, int8, int32, uint8. | |||
* Must be one of the following types on Ascend710 or Ascend910: float16, float32, int8, int32, uint8. \n | |||
* Must be one of the following types on Ascend310P or Ascend910: float16, float32, int8, int32, uint8. \n | |||
* @li threshold: A Tensor which should have the shape (1,), the value to threshold at. | |||
* Must be one of the following types on Ascend310: float16, int8, int32, uint8. | |||
* Must be one of the following types on Ascend710 or Ascend910: float16, float32, int8, int32, uint8. \n | |||
* Must be one of the following types on Ascend310P or Ascend910: float16, float32, int8, int32, uint8. \n | |||
* @li value: A Tensor which should have the shape (1,), the value to replace with. default value is 0. | |||
* Must be one of the following types on Ascend310: float16, int8, int32, uint8. | |||
* Must be one of the following types on Ascend710 or Ascend910: float16, float32, int8, int32, uint8. \n | |||
* Must be one of the following types on Ascend310P or Ascend910: float16, float32, int8, int32, uint8. \n | |||
* @par Outputs: | |||
* y: A Tensor which has the same shape and type as the input x. \n | |||
@@ -61,16 +61,16 @@ REG_OP(Dequantize) | |||
.OP_END_FACTORY_REG(Dequantize) | |||
/** | |||
*@brief Quantizes the input . \n | |||
*@par Inputs: | |||
*@li x: shape and dtype of input_x. \n | |||
*@li scales: shape and dtype of input_scales. \n | |||
*@li zero_points: shape and dtype of input_zero_points \n | |||
*@par Attributes: | |||
*@li dtype: required, type. | |||
*@li axis: the processed dim. \n | |||
*@par Outputs: | |||
*y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n | |||
* @brief Quantizes the input . \n | |||
* @par Inputs: | |||
* @li x: shape and dtype of input_x. \n | |||
* @li scales: shape and dtype of input_scales. \n | |||
* @li zero_points: shape and dtype of input_zero_points \n | |||
* @par Attributes: | |||
* @li dtype: required, type. | |||
* @li axis: the processed dim. \n | |||
* @par Outputs: | |||
* y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n | |||
*/ | |||
REG_OP(Quantize) | |||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
@@ -82,23 +82,31 @@ REG_OP(Quantize) | |||
.OP_END_FACTORY_REG(Quantize) | |||
/** | |||
*@brief Quantizes the input . \n | |||
* @brief Quantizes the input . \n | |||
*@par Inputs: | |||
*x: An tensor of type float16 or float32, specifying the input . \n | |||
* @par Inputs: | |||
* x: An tensor of type float16 or float32, specifying the input . \n | |||
*@par Attributes: | |||
*@li scale: A required float32, specifying the scaling ratio. | |||
*@li offset: A required float16, specifying the offset. | |||
*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False". | |||
*@li round_mode: An optional string, specifying the float16 to int8 cast type. | |||
* @par Attributes: | |||
* @li scale: A required float32, specifying the scaling ratio. | |||
* @li offset: A required float16, specifying the offset. | |||
* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". | |||
* Defaults to "False". | |||
* @li round_mode: An optional string, specifying the float16 to int8 cast type. | |||
* The value range is [Round, Floor, Ceil, Truncate]. Defaults to "Round" . | |||
*@li dst_type: A optional int32, specifying the output data type. Defaults to "DT_INT8" . \n | |||
* @li dst_type: A optional int32, specifying the output data type. Defaults to "DT_INT8" . \n | |||
* @par Outputs: | |||
* y: The quantized output tensor of type int8 or int4. \n | |||
*@par Outputs: | |||
*y: The quantized output tensor of type int8 or int4. \n | |||
* @attention Constraints: | |||
* round_mode value range is [Round, Floor, Ceil, Truncate]. | |||
* @li Round: round to nearest, tie to even(c language rint). | |||
* @li Floor: round to minus infinity(c language floor). | |||
* @li Ceil: round to positive infinity(c language ceil). | |||
* @li Truncate: round to zero(c language trunc). \n | |||
*@par Third-party framework compatibility | |||
* @par Third-party framework compatibility | |||
* It is a custom operator. It has no corresponding operator in Caffe. | |||
*/ | |||
REG_OP(AscendQuant) | |||
@@ -112,21 +120,22 @@ REG_OP(AscendQuant) | |||
.OP_END_FACTORY_REG(AscendQuant) | |||
/** | |||
*@brief Dequantizes the input . \n | |||
* @brief Dequantizes the input . \n | |||
*@par Inputs: | |||
*@par Inputs: | |||
* @li x: An tensor of type int32, specifying the input. | |||
* @li deq_scale: An tensor of type uint64, specifying the scaling ratio . \n | |||
*@par Attributes: | |||
* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False". | |||
* @par Attributes: | |||
* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". | |||
* Defaults to "False". | |||
* @li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False". | |||
* @li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n | |||
*@par Outputs: | |||
*y: The dequantized output tensor of type float16 or float32. \n | |||
* @par Outputs: | |||
* y: The dequantized output tensor of type float16 or float32. \n | |||
*@par Third-party framework compatibility | |||
* @par Third-party framework compatibility | |||
* It is a custom operator. It has no corresponding operator in Caffe. | |||
*/ | |||
REG_OP(AscendDequant) | |||
@@ -139,21 +148,22 @@ REG_OP(AscendDequant) | |||
.OP_END_FACTORY_REG(AscendDequant) | |||
/** | |||
*@brief Anti quantizes the input . \n | |||
* @brief Anti quantizes the input . \n | |||
*@par Inputs: | |||
*x: An tensor of type int8, specifying the input . \n | |||
* @par Inputs: | |||
* x: An tensor of type int8, specifying the input . \n | |||
*@par Attributes: | |||
*@li scale: A required float32 scale. | |||
*@li offset: A required float32 offset. | |||
*@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT". | |||
*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False" . \n | |||
* @par Attributes: | |||
* @li scale: A required float32 scale. | |||
* @li offset: A required float32 offset. | |||
* @li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT". | |||
* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". | |||
* Defaults to "False" . \n | |||
*@par Outputs: | |||
*y: The dequantized output tensor of type float16 or float32. \n | |||
* @par Outputs: | |||
* y: The dequantized output tensor of type float16 or float32. \n | |||
*@par Third-party framework compatibility | |||
* @par Third-party framework compatibility | |||
* It is a custom operator. It has no corresponding operator in Caffe. | |||
*/ | |||
REG_OP(AscendAntiQuant) | |||
@@ -166,20 +176,20 @@ REG_OP(AscendAntiQuant) | |||
.OP_END_FACTORY_REG(AscendAntiQuant) | |||
/** | |||
*@brief Dequantizes the input of int16 . \n | |||
* @brief Dequantizes the input of int16 . \n | |||
*@par Inputs: | |||
*@li x0: An tensor of type int32, specifying the input. | |||
*@li deq_scale: An tensor of type uint64, specifying the scaling ratio. | |||
*@li x1: An tensor of type int16, specifying the input . \n | |||
* @par Inputs: | |||
* @li x0: An tensor of type int32, specifying the input. | |||
* @li deq_scale: An tensor of type uint64, specifying the scaling ratio. | |||
* @li x1: An tensor of type int16, specifying the input . \n | |||
*@par Attributes: | |||
*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n | |||
* @par Attributes: | |||
* relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n | |||
*@par Outputs: | |||
*y: The dequantized output tensor of type int16. \n | |||
* @par Outputs: | |||
* y: The dequantized output tensor of type int16. \n | |||
*@par Third-party framework compatibility | |||
* @par Third-party framework compatibility | |||
* It is a custom operator. It has no corresponding operator in Caffe. | |||
*/ | |||
REG_OP(AscendDequantS16) | |||
@@ -191,19 +201,19 @@ REG_OP(AscendDequantS16) | |||
.OP_END_FACTORY_REG(AscendDequantS16) | |||
/** | |||
*@brief Requantizes the input . \n | |||
* @brief Requantizes the input . \n | |||
*@par Inputs: | |||
*@li x: An tensor of type int32, specifying the input. | |||
*@li req_scale: An tensor of type uint64, specifying the scaling ratio . \n | |||
* @par Inputs: | |||
* @li x: An tensor of type int32, specifying the input. | |||
* @li req_scale: An tensor of type uint64, specifying the scaling ratio . \n | |||
*@par Attributes: | |||
*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n | |||
* @par Attributes: | |||
* relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n | |||
*@par Outputs: | |||
*y: The dequantized output tensor of type int8. \n | |||
* @par Outputs: | |||
* y: The dequantized output tensor of type int8. \n | |||
*@par Third-party framework compatibility | |||
* @par Third-party framework compatibility | |||
* It is a custom operator. It has no corresponding operator in Caffe. | |||
*/ | |||
REG_OP(AscendRequant) | |||
@@ -214,22 +224,23 @@ REG_OP(AscendRequant) | |||
.OP_END_FACTORY_REG(AscendRequant) | |||
/** | |||
*@brief Requantizes the input of int16 . \n | |||
* @brief Requantizes the input of int16 . \n | |||
*@par Inputs: | |||
*@li x0: An tensor of type int16, specifying the input. | |||
*@li req_scale: An tensor of type uint64, specifying the scaling ratio. | |||
*@li x1: An tensor of type int16 . \n | |||
* @par Inputs: | |||
* @li x0: An tensor of type int16, specifying the input. | |||
* @li req_scale: An tensor of type uint64, specifying the scaling ratio. | |||
* @li x1: An tensor of type int16 . \n | |||
*@par Attributes: | |||
*@li dual_output: A optional bool, specifying whether to perform dual ouput, either "True" or "False". Defaults to "False". | |||
*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n | |||
* @par Attributes: | |||
* @li dual_output: A optional bool, specifying whether to perform dual ouput, either "True" or "False". | |||
* Defaults to "False". | |||
* @li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n | |||
*@par Outputs: | |||
*@li y0: The dequantized output tensor of type int8. | |||
*@li y1: The dequantized output tensor of type int16. \n | |||
* @par Outputs: | |||
* @li y0: The dequantized output tensor of type int8. | |||
* @li y1: The dequantized output tensor of type int16. \n | |||
*@par Third-party framework compatibility | |||
* @par Third-party framework compatibility | |||
* It is a custom operator. It has no corresponding operator in Caffe. | |||
*/ | |||
REG_OP(AscendRequantS16) | |||
@@ -79,6 +79,452 @@ REG_OP(StatelessRandomUniformInt) | |||
.OUTPUT(y, TensorType({DT_INT32, DT_INT64})) | |||
.OP_END_FACTORY_REG(StatelessRandomUniformInt) | |||
} // namespace ge | |||
/** | |||
* @brief Outputs random values from a normal distribution. \n | |||
* @par Inputs: | |||
* Inputs include: | |||
* @li shape: A Tensor. Must be one of the following types: int32, int64. | |||
The shape of the output tensor. Batches are indexed by the 0th dimension. | |||
* @li seed: 2 seeds (shape [2]). | |||
* @li means: A Tensor. Must be one of the following types: half, bfloat16, float32, float64. | |||
* @li stdevs: A Tensor. Must have the same type as means. | |||
* @li min: A Tensor. Must have the same type as means. The minimum cutoff. May be -infinity. | |||
* @li max: A Tensor. Must have the same type as means. \n | |||
* @par Outputs: | |||
* y: A Tensor. Has the same type as means. \n | |||
* @attention Constraints: | |||
* The implementation for StatelessParameterizedTruncatedNormal on Ascend uses AICPU, with bad performance. \n | |||
* @par Third-party framework compatibility | |||
* @li compatible with tensorflow StatelessParameterizedTruncatedNormal operator. | |||
*/ | |||
REG_OP(StatelessParameterizedTruncatedNormal) | |||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(seed, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(means, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(stdevs, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(min, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(max, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.OP_END_FACTORY_REG(StatelessParameterizedTruncatedNormal) | |||
/** | |||
* @brief Generate a single randomly distorted bounding box for an image . \n | |||
* @par Inputs: | |||
* Input images must be a 4-D tensor. Inputs include: | |||
* @li image_size: 1-D, containing [height, width, channels]. | |||
* @li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding | |||
boxes associated with the image. | |||
* @li min_object_covered: The cropped area of the image must contain at least | |||
this fraction of any bounding box supplied. The value of this parameter should | |||
be non-negative. In the case of 0, the cropped area does not need to overlap | |||
any of the bounding boxes supplied . | |||
* @li seed: A shape [2] Tensor, the seed to the random number generator. \n | |||
* @par Attributes: | |||
* @li aspect_ratio_range: The cropped area of the image must have an aspect | |||
ratio = width / height within this range. | |||
* @li area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The | |||
cropped area of the image must contain a fraction of the supplied image | |||
within this range. | |||
* @li max_attempts: Number of attempts at generating a cropped region of the | |||
image of the specified constraints. After max_attempts failures, return the | |||
entire image. | |||
* @li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes | |||
supplied. If true, assume an implicit bounding box covering the whole input. | |||
If false, raise an error . \n | |||
* @par Outputs: | |||
* @li begin: 1-D, containing [offset_height, offset_width, 0]. | |||
* @li size: 1-D, containing [target_height, target_width, -1]. | |||
* @li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n | |||
* @attention Constraints: | |||
* Input images can be of different types but output images are always float . \n | |||
* @par Third-party framework compatibility | |||
* Compatible with tensorflow StatelessSampleDistortedBoundingBox operator. | |||
*/ | |||
REG_OP(StatelessSampleDistortedBoundingBox) | |||
.INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ | |||
DT_INT32, DT_INT64 })) | |||
.INPUT(bounding_boxes, TensorType({ DT_FLOAT })) | |||
.INPUT(min_object_covered, TensorType({ DT_FLOAT })) | |||
.INPUT(seed, TensorType({ DT_INT32, DT_INT64 })) | |||
.OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ | |||
DT_INT32, DT_INT64 })) | |||
.OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ | |||
DT_INT32, DT_INT64 })) | |||
.OUTPUT(bboxes, TensorType({ DT_FLOAT })) | |||
.ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f }) | |||
.ATTR(area_range, ListFloat, { 0.05f, 1.0f }) | |||
.ATTR(max_attempts, Int, 100) | |||
.ATTR(use_image_if_no_bounding_boxes, Bool, false) | |||
.OP_END_FACTORY_REG(StatelessSampleDistortedBoundingBox) | |||
/** | |||
* @brief Outputs random values from a truncated normal distribution. \n | |||
* @par Inputs: | |||
* Inputs include: | |||
* @li shape: A Tensor. Must be one of the following types: int32, int64. \n | |||
* @li key: Key of RNG algorithm. Shape[1]. \n | |||
* @li counter: Counter of RNG algorithm. Shape[2] for philox, shape[1] for threefry. \n | |||
* @li alg: RNG algorithm. 1:philox 2:threefry. \n | |||
* @par Attributes: | |||
* @li dtype: dtype: A optional attr, specifying the output data type. Defaults to "DT_FLOAT". \n | |||
* @par Outputs: | |||
* y: A Tensor of types: float16, float32, double. A tensor of the specified shape | |||
filled with random truncated normal values. \n | |||
* @attention Constraints: | |||
* The implementation for StatelessTruncatedNormalV2 on Ascend uses AICPU, with bad performance. | |||
* @par Third-party framework compatibility | |||
* @li compatible with tensorflow StatelessTruncatedNormalV2 operator. | |||
*/ | |||
REG_OP(StatelessTruncatedNormalV2) | |||
.INPUT(shape, TensorType({ DT_INT32, DT_INT64 })) | |||
.INPUT(key, TensorType({ DT_UINT64 })) | |||
.INPUT(counter, TensorType({ DT_UINT64 })) | |||
.INPUT(alg, TensorType({ DT_INT32 })) | |||
.OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) | |||
.ATTR(dtype, Type, DT_FLOAT) | |||
.OP_END_FACTORY_REG(StatelessTruncatedNormalV2) | |||
/** | |||
* @brief Outputs deterministic pseudorandom random numbers from a gamma distribution. \n | |||
* @par Inputs: | |||
* @li shape: The shape of the output tensor. | |||
* @li seed: 2 seeds (shape [2]). | |||
* @li alpha: The concentration of the gamma distribution. Shape must match the rightmost dimensions of shape. \n | |||
* @par Outputs: | |||
* y: A Tensor. Has the same type as alpha. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with TensorFlow StatelessRandomGammaV2 operator. | |||
*/ | |||
REG_OP(StatelessRandomGammaV2) | |||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(seed, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE})) | |||
.OP_END_FACTORY_REG(StatelessRandomGammaV2) | |||
/** | |||
* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n | |||
* @par Inputs: | |||
* @li shape: The shape of the output tensor. | |||
* @li seed: 2 seeds (shape [2]). \n | |||
* @par Attributes: | |||
* dtype:Output data type . \n | |||
* @par Outputs: | |||
* y: Returns Random values with specified shape . \n | |||
* @par Third-party framework compatibility | |||
* Compatible with TensorFlow StatelessRandomUniformFullInt operator. | |||
*/ | |||
REG_OP(StatelessRandomUniformFullInt) | |||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(seed, TensorType({DT_INT32, DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) | |||
.ATTR(dtype, Type, DT_INT32) | |||
.OP_END_FACTORY_REG(StatelessRandomUniformFullInt) | |||
/** | |||
* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n | |||
* @par Inputs: | |||
* @li shape: The shape of the output tensor. | |||
* @li key: Key for the counter-based RNG algorithm. | |||
* @li counter: Initial counter for the counter-based RNG algorithm. | |||
* @li alg: 0-D. The RNG algorithm. \n | |||
* @par Attributes: | |||
* dtype:Output data type . \n | |||
* @par Outputs: | |||
* y: Returns Random values with specified shape . \n | |||
* @par Third-party framework compatibility | |||
* Compatible with TensorFlow StatelessRandomUniformFullIntV2 operator. | |||
*/ | |||
REG_OP(StatelessRandomUniformFullIntV2) | |||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(key, TensorType({DT_UINT64})) | |||
.INPUT(counter, TensorType({DT_UINT64})) | |||
.INPUT(alg, TensorType({DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) | |||
.ATTR(dtype, Type, DT_INT32) | |||
.OP_END_FACTORY_REG(StatelessRandomUniformFullIntV2) | |||
/** | |||
* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n | |||
* @par Inputs: | |||
* @li shape: The shape of the output tensor. | |||
* @li key: Key for the counter-based RNG algorithm. | |||
* @li counter: Initial counter for the counter-based RNG algorithm. | |||
* @li alg: 0-D. The RNG algorithm. | |||
* @li minval: Minimum value (inclusive, scalar). | |||
* @li maxval: Maximum value (exclusive, scalar) . \n | |||
* @par Outputs: | |||
* y: Returns Random values with specified shape . \n | |||
* @par Third-party framework compatibility | |||
* Compatible with TensorFlow StatelessRandomUniformIntV2 operator. | |||
*/ | |||
REG_OP(StatelessRandomUniformIntV2) | |||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(key, TensorType({DT_UINT64})) | |||
.INPUT(counter, TensorType({DT_UINT64})) | |||
.INPUT(alg, TensorType({DT_INT32})) | |||
.INPUT(minval, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) | |||
.INPUT(maxval, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) | |||
.OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) | |||
.OP_END_FACTORY_REG(StatelessRandomUniformIntV2) | |||
/** | |||
* @brief Outputs deterministic pseudorandom random integers from a binomial distribution. \n | |||
* @par Inputs: | |||
* @li shape: The shape of the output tensor. | |||
* @li seed: 2 seeds (shape [2]). | |||
* @li counts: The counts of the binomial distribution. Must be broadcastable with probs, | |||
* and broadcastable with the rightmost dimensions of shape. | |||
* @li probs: The probability of success for the binomial distribution. | |||
* Must be broadcastable with counts and broadcastable with the rightmost dimensions of shape. \n | |||
* @par Attributes: | |||
* @li dtype: A optional int32, specifying the output data type. Defaults to "DT_INT32". \n | |||
* @par Outputs: | |||
* @li y: Returns Random values with specified shape. \n | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_ | |||
* @par Third-party framework compatibility | |||
* Compatible with TensorFlow StatelessRandomBinomial operator. | |||
*/ | |||
REG_OP(StatelessRandomBinomial) | |||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(seed, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(counts, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64})) | |||
.INPUT(probs, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.ATTR(dtype, Type, DT_INT32) | |||
.OP_END_FACTORY_REG(StatelessRandomBinomial) | |||
/** | |||
* @brief Outputs deterministic pseudorandom random integers from a poisson distribution . \n | |||
* @par Inputs: | |||
* @li shape: The shape of the output tensor. | |||
* @li seed: 2 seeds (shape [2]). | |||
* @li lam: mean value value of poisson distribution . \n | |||
* @par Attributes: | |||
* dtype:Output data type . \n | |||
* @par Outputs: | |||
* y: Returns Random values with specified shape . \n | |||
* @par Third-party framework compatibility | |||
* Compatible with TensorFlow StatelessRandomUniformInt operator. | |||
*/ | |||
REG_OP(StatelessRandomPoisson) | |||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(seed, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(lam, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT32, DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT32, DT_INT64})) | |||
.REQUIRED_ATTR(dtype, Type) | |||
.OP_END_FACTORY_REG(StatelessRandomPoisson) | |||
/** | |||
* @brief Get the counter of the RNG algorithm. \n | |||
* @par Outputs: | |||
* @li alg: The RNG algorithm. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with TensorFlow StatelessRandomGetAlg operator. | |||
*/ | |||
REG_OP(StatelessRandomGetAlg) | |||
.OUTPUT(alg, TensorType({DT_INT32})) | |||
.OP_END_FACTORY_REG(StatelessRandomGetAlg) | |||
/** | |||
* @brief This op picks the best counter-based RNG algorithm based on device, and | |||
* scrambles a shape-[2] seed into a key and a counter, both needed by the | |||
* counter-based algorithm. \n | |||
* @par Inputs: | |||
* @li seed: 2 seeds (shape [2]). \n | |||
* @par Outputs: | |||
* @li key: Key for the counter-based RNG algorithm. | |||
* @li counter: Initial counter for the counter-based RNG algorithm. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with TensorFlow StatelessRandomGetKeyCounter operator. | |||
*/ | |||
REG_OP(StatelessRandomGetKeyCounter) | |||
.INPUT(seed, TensorType({DT_INT32, DT_INT64})) | |||
.OUTPUT(key, TensorType({DT_UINT64})) | |||
.OUTPUT(counter, TensorType({DT_UINT64})) | |||
.OP_END_FACTORY_REG(StatelessRandomGetKeyCounter) | |||
/** | |||
* @brief This op picks the best counter-based RNG algorithm based on device, and | |||
* scrambles a shape-[2] seed into a key and a counter, both needed by the | |||
* counter-based algorithm. \n | |||
* @par Inputs: | |||
* @li seed: 2 seeds (shape [2]). \n | |||
* @par Outputs: | |||
* @li key: Key for the counter-based RNG algorithm. | |||
* @li counter: Initial counter for the counter-based RNG algorithm. | |||
* @li alg: The RNG algorithm. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with TensorFlow StatelessRandomGetKeyCounterAlg operator. | |||
*/ | |||
REG_OP(StatelessRandomGetKeyCounterAlg) | |||
.INPUT(seed, TensorType({DT_INT32, DT_INT64})) | |||
.OUTPUT(key, TensorType({DT_UINT64})) | |||
.OUTPUT(counter, TensorType({DT_UINT64})) | |||
.OUTPUT(alg, TensorType({DT_INT32})) | |||
.OP_END_FACTORY_REG(StatelessRandomGetKeyCounterAlg) | |||
/** | |||
* @brief Outputs deterministic pseudorandom values from a normal distribution. \n | |||
* @par Inputs: | |||
* @li shape: The shape of the output tensor. | |||
* @li key: Key for the counter-based RNG algorithm. | |||
* @li counter: Initial counter for the counter-based RNG algorithm. | |||
* @li alg: The RNG algorithm. \n | |||
* @par Attributes: | |||
* @li dtype: Output data type . \n | |||
* @par Outputs: | |||
* @li y: Returns Random values with specified shape . \n | |||
* @par Third-party framework compatibility | |||
* Compatible with TensorFlow StatelessRandomNormalV2 operator. | |||
*/ | |||
REG_OP(StatelessRandomNormalV2) | |||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(key, TensorType({DT_UINT64})) | |||
.INPUT(counter, TensorType({DT_UINT64})) | |||
.INPUT(alg, TensorType({DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.ATTR(dtype, Type, DT_FLOAT) | |||
.OP_END_FACTORY_REG(StatelessRandomNormalV2) | |||
/** | |||
* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n | |||
* @par Inputs: | |||
* @li shape: The shape of the output tensor. | |||
* @li key: Key for the counter-based RNG algorithm. | |||
* @li counter: Initial counter for the counter-based RNG algorithm. | |||
* @li alg: 0-D. The RNG algorithm. \n | |||
* @par Attributes: | |||
* dtype:Output data type . \n | |||
* @par Outputs: | |||
* y: Returns Random values with specified shape . \n | |||
* @par Third-party framework compatibility | |||
* Compatible with TensorFlow StatelessRandomUniformV2 operator. | |||
*/ | |||
REG_OP(StatelessRandomUniformV2) | |||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(key, TensorType({DT_UINT64})) | |||
.INPUT(counter, TensorType({DT_UINT64})) | |||
.INPUT(alg, TensorType({DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE})) | |||
.ATTR(dtype, Type, DT_FLOAT) | |||
.OP_END_FACTORY_REG(StatelessRandomUniformV2) | |||
/** | |||
* @brief Create a random number seed generator . \n | |||
* @par Inputs: | |||
* include: | |||
* @li seed:1-D Tensor,the seed to generate random. | |||
* Must be one of the types:int32 or int64. | |||
* @li seed2:1-D Tensor,the seed to generate random. | |||
* Must be one of the types:int32 or int64. | |||
* @li reshuffle:1-D Tensor.Seed selection, True:random seed, False:fixed seed. | |||
* Must be one of the types:bool. \n | |||
* @par Outputs: | |||
* handle:Handle to the random number generator. | |||
* deleter:Handle to the remover. | |||
* Used when deleting the random number seed generator \n | |||
* @see AnonymousSeedGenerator() | |||
* @par Third-party framework compatibility | |||
* compatible with AnonymousSeedGenerator op of tensorflow | |||
*/ | |||
REG_OP(AnonymousSeedGenerator) | |||
.INPUT(seed, TensorType({DT_INT32,DT_INT64})) | |||
.INPUT(seed2, TensorType({DT_INT32,DT_INT64})) | |||
.INPUT(reshuffle, TensorType({DT_BOOL})) | |||
.OUTPUT(handle, TensorType({DT_RESOURSE})) | |||
.OUTPUT(deleter, TensorType({DT_VARIANT})) | |||
.OP_END_FACTORY_REG(AnonymousSeedGenerator) | |||
/** | |||
* @brief DeleteSeedGenerator . \n | |||
* @par Inputs: | |||
* @li handle: A Tensor of type resource. | |||
* @li deleter: A Tensor of type variant. | |||
* @par Third-party framework compatibility | |||
* Compatible with TensorFlow DeleteSeedGenerator operator. | |||
*/ | |||
REG_OP(DeleteSeedGenerator) | |||
.INPUT(handle, TensorType({DT_RESOURCE})) | |||
.INPUT(deleter, TensorType({DT_VARIANT})) | |||
.OP_END_FACTORY_REG(DeleteSeedGenerator) | |||
/** | |||
* @brief Create a placeholder handle to rewrite and pass | |||
* to use during the graph compilation phase. \n | |||
* @par Outputs: | |||
* handle:Output random number . \n | |||
*/ | |||
REG_OP(DummySeedGenerator) | |||
.OUTPUT(handle, TensorType({ DT_RESOURCE })) | |||
.OP_END_FACTORY_REG(DummySeedGenerator) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_ |
@@ -60,7 +60,10 @@ REG_OP(Bitcast) | |||
*x: A Tensor. Must be 4D Tensor of type float16, float32, int32, uint16, with format HWCN . \n | |||
*@par Outputs: | |||
*y: A 6D Tensor. Has the same type as "x", with format C1HWNCoC0. | |||
*y: A 6D Tensor. Has the same type as "x", with format C1HWNCoC0. \n | |||
*@attention Constraints: | |||
*THIS OPERATOR IS DEPRECATED. It will be removed in a future version. | |||
*/ | |||
REG_OP(DepthwiseWeight4DTo6D) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | |||
@@ -77,7 +80,10 @@ REG_OP(DepthwiseWeight4DTo6D) | |||
*channel_size: An optional int, specifying the channel size of 4D Tensor with format HWCN . \n | |||
*@par Outputs: | |||
*y: A 4D Tensor. Has the same type as "x", with format HWCN. | |||
*y: A 4D Tensor. Has the same type as "x", with format HWCN. \n | |||
*@attention Constraints: | |||
*THIS OPERATOR IS DEPRECATED. It will be removed in a future version. | |||
*/ | |||
REG_OP(DepthwiseWeight6DTo4D) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | |||
@@ -92,6 +92,12 @@ typedef enum tagRtLimitType { | |||
RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms | |||
} rtLimitType_t; | |||
typedef enum tagRtFloatOverflowMode { | |||
RT_OVERFLOW_MODE_SATURATION = 0, | |||
RT_OVERFLOW_MODE_INFNAN, | |||
RT_OVERFLOW_MODE_UNDEF, | |||
} rtFloatOverflowMode_t; | |||
typedef struct rtExceptionInfo { | |||
uint32_t taskid; | |||
uint32_t streamid; | |||
@@ -145,12 +145,6 @@ typedef enum tagRTTaskTimeoutType { | |||
RT_TIMEOUT_TYPE_OP_EXECUTE, | |||
} rtTaskTimeoutType_t; | |||
typedef enum tagRtFloatOverflowMode { | |||
RT_OVERFLOW_MODE_SATURATION = 0, | |||
RT_OVERFLOW_MODE_INFNAN, | |||
RT_OVERFLOW_MODE_UNDEF, | |||
} rtFloatOverflowMode_t; | |||
/** | |||
* @ingroup | |||
* @brief get AI core count | |||
@@ -433,6 +433,32 @@ RTS_API rtError_t rtGetDevMsg(rtGetDevMsgType_t getMsgType, rtGetMsgCallback cal | |||
* @return RT_MEMORY_TS, RT_MEMORY_HBM, RT_MEMORY_TS | RT_MEMORY_POLICY_HUGE_PAGE_ONLY | |||
*/ | |||
RTS_API uint32_t rtGetTsMemType(rtMemRequestFeature_t featureType, uint32_t memSize); | |||
/** | |||
* @ingroup | |||
* @brief set saturation mode for current device. | |||
* @param [in] saturation mode. | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtSetDeviceSatMode(rtFloatOverflowMode_t floatOverflowMode); | |||
/** | |||
* @ingroup | |||
* @brief get saturation mode for current device. | |||
* @param [out] saturation mode. | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtGetDeviceSatMode(rtFloatOverflowMode_t *floatOverflowMode); | |||
/** | |||
* @ingroup | |||
* @brief get saturation mode for target stream. | |||
* @param [in] target stm | |||
* @param [out] saturation mode. | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtGetDeviceSatModeForStream(rtStream_t stm, rtFloatOverflowMode_t *floatOverflowMode); | |||
#if defined(__cplusplus) | |||
} | |||
#endif | |||
@@ -229,6 +229,8 @@ typedef void (*rtCallback_t)(void *fnData); | |||
#define RT_KERNEL_DUMPFLAG (0x02U) | |||
#define RT_FUSION_KERNEL_DUMPFLAG (0x04U) | |||
#define RT_KERNEL_CUSTOM_AICPU (0x08U) | |||
#define RT_KERNEL_FFTSPLUS_DYNAMIC_SHAPE_DUMPFLAG (0x10U) | |||
#define RT_KERNEL_FFTSPLUS_STATIC_SHAPE_DUMPFLAG (0x20U) | |||
// STARS topic scheduler sqe : topic_type | |||
#define RT_KERNEL_DEVICE_FIRST (0x10U) | |||
@@ -391,6 +391,18 @@ RTS_API rtError_t rtMemcpyHostTask(void * const dst, const uint64_t destMax, con | |||
RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind, | |||
rtStream_t stm); | |||
typedef struct rtMemcpyAddrInfo { | |||
uint32_t resv0; | |||
uint32_t resv1; | |||
uint32_t resv2; | |||
uint32_t len; | |||
uint64_t src; | |||
uint64_t dst; | |||
} rtMemcpyAddrInfo; | |||
RTS_API rtError_t rtMemcpyAsyncPtr(void *memcpyAddrInfo, uint64_t destMax, uint64_t count, | |||
rtMemcpyKind_t kind, rtStream_t stream); | |||
/** | |||
* @ingroup dvrt_mem | |||
* @brief asynchronized reduce memcpy | |||
@@ -409,6 +421,23 @@ RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, ui | |||
/** | |||
* @ingroup dvrt_mem | |||
* @brief asynchronized reduce memcpy | |||
* @param [in] dst destination address pointer | |||
* @param [in] Max length of destination address memory | |||
* @param [in] src source address pointer | |||
* @param [in] count the number of byte to copy | |||
* @param [in] kind memcpy type | |||
* @param [in] type data type | |||
* @param [in] stm asynchronized task stream | |||
* @param [in] overflowAddr addr of overflow flag | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtReduceAsyncV2(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind, | |||
rtDataType_t type, rtStream_t stm, void *overflowAddr); | |||
/** | |||
* @ingroup dvrt_mem | |||
* @brief synchronized memcpy2D | |||
* @param [in] dst destination address pointer | |||
* @param [in] dstPitch pitch of destination memory | |||
@@ -17,10 +17,19 @@ extern "C" { | |||
#pragma pack(push) | |||
#pragma pack (1) | |||
typedef struct tagFftsPlusDumpInfo { | |||
const void *loadDumpInfo; | |||
const void *unloadDumpInfo; | |||
uint32_t loadDumpInfolen; | |||
uint32_t unloadDumpInfolen; | |||
} rtFftsPlusDumpInfo_t; | |||
typedef struct tagFftsPlusTaskInfo { | |||
const rtFftsPlusSqe_t *fftsPlusSqe; | |||
const void *descBuf; // include total context | |||
size_t descBufLen; // the length of descBuf | |||
rtFftsPlusDumpInfo_t fftsPlusDumpInfo; // used only in the dynamic shape | |||
} rtFftsPlusTaskInfo_t; | |||
#pragma pack(pop) | |||
@@ -110,7 +110,8 @@ typedef struct tagFftsPlusAicAivCtx { | |||
// 0-3 bytes | |||
uint16_t contextType; | |||
uint8_t successorNum; | |||
uint8_t resv : 7; | |||
uint8_t resv : 6; | |||
uint8_t dumpSwitch : 1; | |||
uint8_t aten : 1; | |||
// 4-7 | |||
uint8_t prefetchConfig; | |||
@@ -172,7 +173,8 @@ typedef struct tagFftsPlusMixAicAivCtx { | |||
// 0-3 bytes | |||
uint16_t contextType; | |||
uint8_t successorNum; | |||
uint8_t reserved1 : 7; | |||
uint8_t reserved1 : 6; | |||
uint8_t dumpSwitch : 1; | |||
uint8_t aten : 1; | |||
// 4-7 | |||
uint8_t prefetchConfig; | |||
@@ -236,7 +238,8 @@ typedef struct tagFftsPlusSdmaCtx { | |||
// 0-3 bytes | |||
uint16_t contextType; | |||
uint8_t successorNum; | |||
uint8_t res1 : 7; | |||
uint8_t res1 : 6; | |||
uint8_t dumpSwitch : 1; | |||
uint8_t aten : 1; | |||
// 4-7 | |||
uint8_t res2; | |||
@@ -376,7 +379,8 @@ typedef struct tagFftsPlusAiCpuCtx { | |||
// 0-3 bytes | |||
uint16_t contextType; | |||
uint8_t successorNum; | |||
uint8_t res1 : 7; | |||
uint8_t res1 : 6; | |||
uint8_t dumpSwitch : 1; | |||
uint8_t aten : 1; | |||
// 4-7 | |||
uint8_t res2; | |||
@@ -70,10 +70,27 @@ typedef enum tagFftsPlusType { | |||
RT_FFTS_PLUS_TYPE = 4, // FFTS+ mode | |||
} rtFftsPlusType_t; | |||
typedef struct tagStarsFftsPlusHeader { | |||
uint8_t type : 6; | |||
uint8_t l1Lock : 1; | |||
uint8_t l1Unlock : 1; | |||
uint8_t ie : 2; | |||
uint8_t preP : 2; | |||
uint8_t postP : 2; | |||
uint8_t wrCqe : 1; | |||
/* tell mcu if this subgraph is overflow-enabled and mcu will send this flag to aicpu when aicpu ctx is excuted */ | |||
uint8_t overflowEn : 1; | |||
uint16_t blockDim; | |||
uint16_t rtStreamId; | |||
uint16_t taskId; | |||
} rtStarsFftsPlusHeader_t; | |||
// ffts+ sqe | |||
typedef struct tagFftsPlusSqe { | |||
// 0-7 bytes | |||
rtStarsSqeHeader_t sqeHeader; | |||
rtStarsSqeHeader_t sqeHeader; // use rtStarsFftsPlusHeader_t instead | |||
// 8-11 bytes | |||
uint16_t fftsType : 3; | |||
uint16_t reserved1 : 9; | |||
@@ -27,6 +27,7 @@ extern "C" { | |||
#define RT_STREAM_HEAD (0x20U) | |||
#define RT_STREAM_PRIMARY_DEFAULT (0x40U) | |||
#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80U) | |||
#define RT_STREAM_OVERFLOW (0x100U) | |||
/** | |||
* @ingroup stream_type | |||
@@ -212,6 +213,26 @@ RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stm, uint32_t flag, const | |||
*/ | |||
RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stm); | |||
/* | |||
* @ingroup dvrt_stream | |||
* @brief enable or disable stream overflow | |||
* @param [in] stm: stream handle | |||
* @param [in] flag: 0:disable others:enable | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtSetStreamOverflowSwitch(rtStream_t stm, uint32_t flags); | |||
/* | |||
* @ingroup dvrt_stream | |||
* @brief get whether overflow of the stream is enable or disable | |||
* @param [in] stm: stream handle | |||
* @param [out] flag: 0:disable others:enable | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtGetStreamOverflowSwitch(rtStream_t stm, uint32_t *flags); | |||
#if defined(__cplusplus) | |||
} | |||
#endif | |||
@@ -41,6 +41,10 @@ struct InitFlowGwInfo { | |||
char_t rsv[128]; | |||
}; | |||
typedef enum { | |||
TSD_CAPABILITY_PIDQOS = 0, | |||
TSD_CAPABILITY_BUT | |||
} TsdCapabilityType; | |||
/** | |||
* @ingroup Open | |||
* @brief Used for the Framework process to communicate with the TSDDaemon process, | |||
@@ -198,6 +202,36 @@ TDT_LIB_EXPORT uint32_t TsdSetMsprofReporterCallback(const MsprofReporterCallbac | |||
* @retval OtherValues Failure | |||
*/ | |||
TDT_LIB_EXPORT uint32_t TsdSetAttr(const char * const attrKey, const char * const attrValue); | |||
/** | |||
* @ingroup TsdCapabilityGet | |||
* @brief use tsd to get some capability | |||
* | |||
* @par type | |||
* capability type | |||
* | |||
* @par ptr | |||
* the result | |||
* @retval TDT_OK Success | |||
* @retval OtherValues Failure | |||
*/ | |||
TDT_LIB_EXPORT uint32_t TsdCapabilityGet(const uint32_t logicDeviceId, const int32_t type, const uint64_t ptr); | |||
/** | |||
* @ingroup GetHdcConctStatus | |||
* @brief used to get hdc connection status | |||
* | |||
* @par logicDeviceId | |||
* logic device id | |||
* | |||
* @par hdcSessStat | |||
* hdc session status, DRV_ERROR_SOCKET_CONNECT or DRV_ERROR_SOCKET_CLOSE | |||
* @retval TDT_OK Success | |||
* @retval OtherValues Failure | |||
*/ | |||
TDT_LIB_EXPORT uint32_t GetHdcConctStatus(const uint32_t logicDeviceId, int32_t *hdcSessStat); | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
@@ -19,11 +19,11 @@ | |||
#include "common/type_def.h" | |||
namespace tsd { | |||
#ifdef __cplusplus | |||
using TSD_StatusT = uint32_t; | |||
using TSD_StatusT = uint32_t; | |||
#else | |||
typedef uint32_t TSD_StatusT; | |||
typedef uint32_t TSD_StatusT; | |||
#endif | |||
// success code | |||
constexpr TSD_StatusT TSD_OK = 0U; | |||
} // namespace tsd | |||
// success code | |||
constexpr TSD_StatusT TSD_OK = 0U; | |||
} | |||
#endif // INC_TDT_STATUS_H |