@@ -64,49 +64,54 @@ static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is emp | |||||
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | ||||
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | ||||
static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow | static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow | ||||
static const int32_t ACL_ERROR_RT_OVER_FLOW = 207017; // common over flow | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||||
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode | |||||
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die | |||||
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id | |||||
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||||
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode | |||||
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die | |||||
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id | |||||
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042; // aic trap read overflow | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | ||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | ||||
@@ -71,13 +71,15 @@ const char_t *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExec | |||||
const char_t *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; | const char_t *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; | ||||
const char_t *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr"; | const char_t *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr"; | ||||
const char_t *const OPTION_EXEC_GRAPH_EXEC_TIMEOUT = "ge.exec.graphExecTimeout"; | const char_t *const OPTION_EXEC_GRAPH_EXEC_TIMEOUT = "ge.exec.graphExecTimeout"; | ||||
const char_t *const OPTION_EXEC_MODEL_EXEC_TIMEOUT = "ge.exec.modelExecTimeout"; | |||||
// Option key: memory init | // Option key: memory init | ||||
const char_t *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; | const char_t *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; | ||||
const char_t *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize"; | const char_t *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize"; | ||||
const char_t *const OPTION_EXEC_REUSE_ZERO_COPY_MEMORY = "ge.exec.reuseZeroCopyMemory"; | const char_t *const OPTION_EXEC_REUSE_ZERO_COPY_MEMORY = "ge.exec.reuseZeroCopyMemory"; | ||||
const char_t *const OPTION_EXEC_LOGICAL_DEVICE_CLUSTER_DEPLOY_MODE = "ge.exec.logicalDeviceClusterDeployMode"; | |||||
const char_t *const OPTION_EXEC_LOGICAL_DEVICE_ID = "ge.exec.logicalDeviceId"; | |||||
namespace configure_option { | namespace configure_option { | ||||
const char_t *const STREAM_NUM = "ge.streamNum"; | const char_t *const STREAM_NUM = "ge.streamNum"; | ||||
const char_t *const HEAD_STREAM = "ge.headStream"; | const char_t *const HEAD_STREAM = "ge.headStream"; | ||||
@@ -107,6 +109,7 @@ const char_t *const OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode"; | |||||
const char_t *const HCOM_PARALLEL = "ge.hcomParallel"; | const char_t *const HCOM_PARALLEL = "ge.hcomParallel"; | ||||
const char_t *const AUTO_TUNE_MODE = "ge.autoTuneMode"; | const char_t *const AUTO_TUNE_MODE = "ge.autoTuneMode"; | ||||
const char_t *const SOC_VERSION = "ge.socVersion"; | const char_t *const SOC_VERSION = "ge.socVersion"; | ||||
const char_t *const VIRTUAL_TYPE = "ge.virtual_type"; | |||||
const char_t *const CORE_TYPE = "ge.engineType"; | const char_t *const CORE_TYPE = "ge.engineType"; | ||||
const char_t *const AICORE_NUM = "ge.aicoreNum"; | const char_t *const AICORE_NUM = "ge.aicoreNum"; | ||||
const char_t *const L1_FUSION = "ge.l1Fusion"; | const char_t *const L1_FUSION = "ge.l1Fusion"; | ||||
@@ -254,9 +257,16 @@ const std::string AUTO_TUNE_MODE = "ge.autoTuneMode"; | |||||
// Configure soc version , example: "Ascend310" | // Configure soc version , example: "Ascend310" | ||||
const std::string SOC_VERSION = "ge.socVersion"; | const std::string SOC_VERSION = "ge.socVersion"; | ||||
// configure whether to enable virtualization, | |||||
// its value should be "0" or "1", default value is "0" | |||||
const std::string VIRTUAL_TYPE = "ge.virtual_type"; | |||||
// Configure core type "VectorEngine", default value is "AIcoreEngine" | // Configure core type "VectorEngine", default value is "AIcoreEngine" | ||||
const std::string CORE_TYPE = "ge.engineType"; | const std::string CORE_TYPE = "ge.engineType"; | ||||
// Configure graph exclude one or more engines | |||||
const std::string EXCLUDE_ENGINES = "ge.exec.exclude_engines"; | |||||
// Configure AICORE NUM | // Configure AICORE NUM | ||||
const std::string AICORE_NUM = "ge.aicoreNum"; | const std::string AICORE_NUM = "ge.aicoreNum"; | ||||
@@ -420,6 +430,7 @@ static const char_t *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_ | |||||
static const char_t *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); | static const char_t *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); | ||||
static const char_t *const CORE_TYPE = ge::CORE_TYPE.c_str(); | static const char_t *const CORE_TYPE = ge::CORE_TYPE.c_str(); | ||||
static const char_t *const SOC_VERSION = ge::SOC_VERSION.c_str(); | static const char_t *const SOC_VERSION = ge::SOC_VERSION.c_str(); | ||||
static const char_t *const VIRTUAL_TYPE = ge::VIRTUAL_TYPE.c_str(); | |||||
static const char_t *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; | static const char_t *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; | ||||
static const char_t *const AICORE_NUM = ge::AICORE_NUM.c_str(); | static const char_t *const AICORE_NUM = ge::AICORE_NUM.c_str(); | ||||
static const char_t *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); | static const char_t *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); | ||||
@@ -487,6 +498,7 @@ const std::set<std::string> ir_parser_suppported_options = { | |||||
// for interface: aclgrphBuildInitialize | // for interface: aclgrphBuildInitialize | ||||
const std::set<std::string> global_options = {CORE_TYPE, | const std::set<std::string> global_options = {CORE_TYPE, | ||||
SOC_VERSION, | SOC_VERSION, | ||||
VIRTUAL_TYPE, | |||||
BUFFER_OPTIMIZE, | BUFFER_OPTIMIZE, | ||||
ENABLE_COMPRESS_WEIGHT, | ENABLE_COMPRESS_WEIGHT, | ||||
COMPRESS_WEIGHT_CONF, | COMPRESS_WEIGHT_CONF, | ||||
@@ -64,49 +64,54 @@ static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is emp | |||||
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | ||||
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | ||||
static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow | static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow | ||||
static const int32_t ACL_ERROR_RT_OVER_FLOW = 207017; // common over flow | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||||
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode | |||||
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die | |||||
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id | |||||
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||||
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode | |||||
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die | |||||
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id | |||||
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042; // aic trap read overflow | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | ||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | ||||
@@ -17,6 +17,7 @@ | |||||
#ifndef INC_FRAMEWORK_COMMON_DEBUG_GE_LOG_H_ | #ifndef INC_FRAMEWORK_COMMON_DEBUG_GE_LOG_H_ | ||||
#define INC_FRAMEWORK_COMMON_DEBUG_GE_LOG_H_ | #define INC_FRAMEWORK_COMMON_DEBUG_GE_LOG_H_ | ||||
#include <cinttypes> | |||||
#include <cstdint> | #include <cstdint> | ||||
#include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
@@ -56,63 +57,64 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) { | |||||
return (enable == 1); | return (enable == 1); | ||||
} | } | ||||
#define GELOGE(ERROR_CODE, fmt, ...) \ | |||||
do { \ | |||||
dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE), \ | |||||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||||
##__VA_ARGS__); \ | |||||
#define GELOGE(ERROR_CODE, fmt, ...) \ | |||||
do { \ | |||||
dlog_error(GE_MODULE_NAME, "%" PRIu64 " %s: ErrorNo: %" PRIuLEAST8 "(%s) %s" fmt, GeLog::GetTid(), \ | |||||
&__FUNCTION__[0], (ERROR_CODE), ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), \ | |||||
ErrorManager::GetInstance().GetLogHeader().c_str(), ##__VA_ARGS__); \ | |||||
} while (false) | } while (false) | ||||
#define GELOGW(fmt, ...) \ | |||||
do { \ | |||||
if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) { \ | |||||
dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||||
} \ | |||||
#define GELOGW(fmt, ...) \ | |||||
do { \ | |||||
if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) { \ | |||||
dlog_warn(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||||
} \ | |||||
} while (false) | } while (false) | ||||
#define GELOGI(fmt, ...) \ | |||||
do { \ | |||||
if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) { \ | |||||
dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||||
} \ | |||||
#define GELOGI(fmt, ...) \ | |||||
do { \ | |||||
if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) { \ | |||||
dlog_info(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||||
} \ | |||||
} while (false) | } while (false) | ||||
#define GELOGD(fmt, ...) \ | |||||
do { \ | |||||
if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) { \ | |||||
dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||||
} \ | |||||
#define GELOGD(fmt, ...) \ | |||||
do { \ | |||||
if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) { \ | |||||
dlog_debug(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||||
} \ | |||||
} while (false) | } while (false) | ||||
#define GEEVENT(fmt, ...) \ | |||||
do { \ | |||||
dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||||
#define GEEVENT(fmt, ...) \ | |||||
do { \ | |||||
dlog_event(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||||
} while (false) | } while (false) | ||||
#define GELOGT(VALUE, fmt, ...) \ | |||||
do { \ | |||||
TraceStatus stat = (VALUE); \ | |||||
const char_t *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ | |||||
const int32_t idx = static_cast<int32_t>(stat); \ | |||||
char_t *k = const_cast<char_t *>("status"); \ | |||||
char_t *v = const_cast<char_t *>(TraceStatStr[idx]); \ | |||||
KeyValue kv = {k, v}; \ | |||||
DlogWithKV(GE_MODULE_NAME, DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||||
#define GELOGT(VALUE, fmt, ...) \ | |||||
do { \ | |||||
TraceStatus stat = (VALUE); \ | |||||
const char_t *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ | |||||
const int32_t idx = static_cast<int32_t>(stat); \ | |||||
char_t *k = const_cast<char_t *>("status"); \ | |||||
char_t *v = const_cast<char_t *>(TraceStatStr[idx]); \ | |||||
KeyValue kv = {k, v}; \ | |||||
DlogWithKV(GE_MODULE_NAME, DLOG_TRACE, &kv, 1, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], \ | |||||
##__VA_ARGS__); \ | |||||
} while (false) | } while (false) | ||||
#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ | |||||
do { \ | |||||
dlog_error((MOD_NAME), "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE), \ | |||||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||||
##__VA_ARGS__); \ | |||||
#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ | |||||
do { \ | |||||
dlog_error((MOD_NAME), "%" PRIu64 " %s: ErrorNo: %" PRIuLEAST8 "(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], \ | |||||
(ERROR_CODE), ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), \ | |||||
ErrorManager::GetInstance().GetLogHeader().c_str(), ##__VA_ARGS__); \ | |||||
} while (false) | } while (false) | ||||
// print memory when it is greater than 1KB. | // print memory when it is greater than 1KB. | ||||
#define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \ | |||||
do { \ | |||||
if (static_cast<size_t>(SIZE) > 1024UL) { \ | |||||
GELOGI("MallocMemory, func=%s, size=%zu, purpose=%s", (#FUNC), static_cast<size_t>(SIZE), (PURPOSE)); \ | |||||
} \ | |||||
#define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \ | |||||
do { \ | |||||
if (static_cast<size_t>(SIZE) > 1024UL) { \ | |||||
GELOGI("MallocMemory, func=%s, size=%" PRIu64 ", purpose=%s", (#FUNC), static_cast<size_t>(SIZE), (PURPOSE)); \ | |||||
} \ | |||||
} while (false) | } while (false) | ||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
@@ -180,13 +180,13 @@ | |||||
#define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \ | #define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \ | ||||
{ \ | { \ | ||||
GELOGE((_status), "[Check][InnerData]%s", (errormsg)); \ | GELOGE((_status), "[Check][InnerData]%s", (errormsg)); \ | ||||
REPORT_INNER_ERROR("E19999", "%s", (errormsg)); \ | |||||
REPORT_INNER_ERROR("E10052", "%s", (errormsg)); \ | |||||
} | } | ||||
#define GE_WARNINGLOG_AND_ERRORMSG(errormsg) \ | #define GE_WARNINGLOG_AND_ERRORMSG(errormsg) \ | ||||
{ \ | { \ | ||||
GELOGW("%s", (errormsg)); \ | GELOGW("%s", (errormsg)); \ | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {(errormsg)}); \ | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10052", {"reason"}, {(errormsg)}); \ | |||||
} | } | ||||
#define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg) \ | #define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg) \ | ||||
@@ -194,7 +194,7 @@ | |||||
const bool b = (expr); \ | const bool b = (expr); \ | ||||
if (!b) { \ | if (!b) { \ | ||||
GELOGE((_status), "%s", (errormsg)); \ | GELOGE((_status), "%s", (errormsg)); \ | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {(errormsg)}); \ | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10052", {"reason"}, {(errormsg)}); \ | |||||
return (_status); \ | return (_status); \ | ||||
} \ | } \ | ||||
} while (false) | } while (false) | ||||
@@ -31,8 +31,8 @@ class GE_FUNC_VISIBILITY OpTypeContainer { | |||||
} | } | ||||
~OpTypeContainer() = default; | ~OpTypeContainer() = default; | ||||
void Register(const std::string &op_type) { | |||||
static_cast<void>(op_type_list_.insert(op_type)); | |||||
bool Register(const std::string &op_type) { | |||||
return op_type_list_.insert(op_type).second; | |||||
} | } | ||||
bool IsExisting(const std::string &op_type) { | bool IsExisting(const std::string &op_type) { | ||||
@@ -52,7 +52,7 @@ class GE_FUNC_VISIBILITY OpTypeContainer { | |||||
#define REGISTER_OPTYPE_DEFINE(var_name, str_name) \ | #define REGISTER_OPTYPE_DEFINE(var_name, str_name) \ | ||||
const char_t *var_name = str_name; \ | const char_t *var_name = str_name; \ | ||||
const bool g_##var_name##_reg = (static_cast<void>(OpTypeContainer::Instance()->Register(str_name)), true); | |||||
const bool g_##var_name##_reg = OpTypeContainer::Instance()->Register(str_name); | |||||
#define IS_OPTYPE_EXISTING(str_name) (ge::OpTypeContainer::Instance()->IsExisting(str_name)) | #define IS_OPTYPE_EXISTING(str_name) (ge::OpTypeContainer::Instance()->IsExisting(str_name)) | ||||
#endif // INC_FRAMEWORK_COMMON_OP_TYPES_H_ | #endif // INC_FRAMEWORK_COMMON_OP_TYPES_H_ |
@@ -162,6 +162,7 @@ REGISTER_OPTYPE_DECLARE(_IF, "_If"); | |||||
REGISTER_OPTYPE_DECLARE(STATELESSIF, "StatelessIf"); | REGISTER_OPTYPE_DECLARE(STATELESSIF, "StatelessIf"); | ||||
REGISTER_OPTYPE_DECLARE(IF, "If"); | REGISTER_OPTYPE_DECLARE(IF, "If"); | ||||
REGISTER_OPTYPE_DECLARE(CASE, "Case"); | REGISTER_OPTYPE_DECLARE(CASE, "Case"); | ||||
REGISTER_OPTYPE_DECLARE(STATELESSCASE, "StatelessCase"); | |||||
REGISTER_OPTYPE_DECLARE(_WHILE, "_While"); | REGISTER_OPTYPE_DECLARE(_WHILE, "_While"); | ||||
REGISTER_OPTYPE_DECLARE(WHILE, "While"); | REGISTER_OPTYPE_DECLARE(WHILE, "While"); | ||||
REGISTER_OPTYPE_DECLARE(STATELESSWHILE, "StatelessWhile"); | REGISTER_OPTYPE_DECLARE(STATELESSWHILE, "StatelessWhile"); | ||||
@@ -626,8 +627,8 @@ struct ModelFileHeader { | |||||
uint32_t version = MODEL_VERSION; // version 1.0 | uint32_t version = MODEL_VERSION; // version 1.0 | ||||
uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0U}; // signature | uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0U}; // signature | ||||
uint32_t length = 0U; // Ciphertext length. In the non-encryption model, the length is the plaintext length. | uint32_t length = 0U; // Ciphertext length. In the non-encryption model, the length is the plaintext length. | ||||
uint8_t is_encrypt = | |||||
static_cast<uint8_t>(ModelEncryptType::UNENCRYPTED); // whether encrypted 0:not encrypt, 1:encrypt | |||||
// whether encrypted 0:not encrypt, 1:encrypt | |||||
uint8_t is_encrypt = static_cast<uint8_t>(ModelEncryptType::UNENCRYPTED); | |||||
uint8_t is_checksum = static_cast<uint8_t>(ModelCheckType::CHECK); // whether to check the checksum | uint8_t is_checksum = static_cast<uint8_t>(ModelCheckType::CHECK); // whether to check the checksum | ||||
uint8_t modeltype = 0U; // 0:IR model 1:standard model 2: OM Tiny model | uint8_t modeltype = 0U; // 0:IR model 1:standard model 2: OM Tiny model | ||||
uint8_t genmode = 0U; // 0:offline generate 1:online generate | uint8_t genmode = 0U; // 0:offline generate 1:online generate | ||||
@@ -31,6 +31,7 @@ enum class PriorityEnum { | |||||
COST_1 = 1, | COST_1 = 1, | ||||
COST_2 = 2, | COST_2 = 2, | ||||
COST_3 = 3, | COST_3 = 3, | ||||
COST_4 = 4, | |||||
COST_9 = 9, | COST_9 = 9, | ||||
COST_10 = 10, | COST_10 = 10, | ||||
}; | }; | ||||
@@ -178,7 +178,7 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||||
Status GetAippType(const uint32_t model_id, const uint32_t index, InputAippType &type, size_t &aipp_index); | Status GetAippType(const uint32_t model_id, const uint32_t index, InputAippType &type, size_t &aipp_index); | ||||
Status CommandHandle(const Command &command); | |||||
Status CommandHandle(const Command &command) const; | |||||
Status SetDump(const DumpConfig &dump_config); | Status SetDump(const DumpConfig &dump_config); | ||||
@@ -133,6 +133,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *_IF; | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSIF; | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSIF; | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *IF; | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *IF; | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CASE; | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CASE; | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSCASE; | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *_WHILE; | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *_WHILE; | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *WHILE; | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *WHILE; | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSWHILE; | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSWHILE; | ||||
@@ -1 +1 @@ | |||||
Subproject commit 002617852e22767bd864db3c01595630e23f5496 | |||||
Subproject commit 58412ad7aed08cd1c01cc070d80706e4253c9075 |
@@ -18,31 +18,38 @@ | |||||
#define AICPU_OP_TYPE_LIST_H_ | #define AICPU_OP_TYPE_LIST_H_ | ||||
extern "C" { | extern "C" { | ||||
enum OpKernelType { TF_KERNEL, CPU_KERNEL }; | |||||
enum OpKernelType { | |||||
TF_KERNEL, | |||||
CPU_KERNEL | |||||
}; | |||||
enum ReturnCode { OP_TYPE_NOT_SUPPORT, FORMAT_NOT_SUPPORT, DTYPE_NOT_SUPPORT }; | |||||
enum ReturnCode { | |||||
OP_TYPE_NOT_SUPPORT, | |||||
FORMAT_NOT_SUPPORT, | |||||
DTYPE_NOT_SUPPORT | |||||
}; | |||||
#pragma pack(push, 1) | #pragma pack(push, 1) | ||||
// One byte alignment | // One byte alignment | ||||
struct SysOpInfo { | struct SysOpInfo { | ||||
uint64_t opLen; | |||||
uint64_t opType; | |||||
OpKernelType kernelsType; | |||||
uint64_t opLen; | |||||
uint64_t opType; | |||||
OpKernelType kernelsType; | |||||
}; | }; | ||||
struct SysOpCheckInfo { | struct SysOpCheckInfo { | ||||
uint64_t opListNum; | |||||
uint64_t offSetLen; | |||||
uint64_t sysOpInfoList; | |||||
uint64_t opParamInfoList; | |||||
uint64_t opListNum; | |||||
uint64_t offSetLen; | |||||
uint64_t sysOpInfoList; | |||||
uint64_t opParamInfoList; | |||||
}; | }; | ||||
struct SysOpCheckResp { | struct SysOpCheckResp { | ||||
uint64_t opListNum; | |||||
bool isWithoutJson; | |||||
uint64_t returnCodeList; | |||||
uint64_t sysOpInfoList; | |||||
uint64_t opParamInfoList; | |||||
uint64_t opListNum; | |||||
bool isWithoutJson; | |||||
uint64_t returnCodeList; | |||||
uint64_t sysOpInfoList; | |||||
uint64_t opParamInfoList; | |||||
}; | }; | ||||
#pragma pack(pop) | #pragma pack(pop) | ||||
} | } | ||||
@@ -24,81 +24,83 @@ namespace aicpu { | |||||
using char_t = char; | using char_t = char; | ||||
#pragma pack(push, 1) | #pragma pack(push, 1) | ||||
struct AicpuParamHead { | |||||
uint32_t length; // Total length: include cunstom message | |||||
uint32_t ioAddrNum; // Input and output address number | |||||
uint32_t extInfoLength; // extInfo struct Length | |||||
uint64_t extInfoAddr; // extInfo address | |||||
struct AicpuParamHead | |||||
{ | |||||
uint32_t length; // Total length: include cunstom message | |||||
uint32_t ioAddrNum; // Input and output address number | |||||
uint32_t extInfoLength; // extInfo struct Length | |||||
uint64_t extInfoAddr; // extInfo address | |||||
}; | }; | ||||
enum class AicpuConfigMsgType { | enum class AicpuConfigMsgType { | ||||
AICPU_CONFIG_MSG_TYPE_BUF_FREE = 0, /* free buf */ | |||||
AICPU_CONFIG_MSG_TYPE_BUF_RESET = 1, /* reset buf */ | |||||
AICPU_CONFIG_MSG_TYPE_BUF_SET_ADDR = 2, /* set buf addr to aicpu */ | |||||
AICPU_CONFIG_MSG_TYPE_BUF_FREE = 0, /* free buf */ | |||||
AICPU_CONFIG_MSG_TYPE_BUF_RESET = 1, /* reset buf */ | |||||
AICPU_CONFIG_MSG_TYPE_BUF_SET_ADDR = 2, /* set buf addr to aicpu */ | |||||
}; | }; | ||||
enum class AicpuErrMsgType { | enum class AicpuErrMsgType { | ||||
ERR_MSG_TYPE_NULL = 0, | |||||
ERR_MSG_TYPE_AICORE = 1, | |||||
ERR_MSG_TYPE_AICPU = 2, | |||||
ERR_MSG_TYPE_NULL = 0, | |||||
ERR_MSG_TYPE_AICORE = 1, | |||||
ERR_MSG_TYPE_AICPU = 2, | |||||
}; | }; | ||||
enum class AicpuExtInfoMsgType { | enum class AicpuExtInfoMsgType { | ||||
EXT_MODEL_ID_MSG_TYPE = 0, | |||||
EXT_MODEL_ID_MSG_TYPE = 0, | |||||
}; | }; | ||||
struct AicpuConfigMsg { | struct AicpuConfigMsg { | ||||
uint8_t msgType; | |||||
uint8_t reserved1; | |||||
uint16_t bufLen; | |||||
uint32_t offset; | |||||
uint64_t bufAddr; | |||||
uint32_t tsId; | |||||
uint32_t reserved2; | |||||
uint8_t msgType; | |||||
uint8_t reserved1; | |||||
uint16_t bufLen; | |||||
uint32_t offset; | |||||
uint64_t bufAddr; | |||||
uint32_t tsId; | |||||
uint32_t reserved2; | |||||
}; | }; | ||||
struct AicpuModelIdInfo { | struct AicpuModelIdInfo { | ||||
uint32_t modelId; | |||||
uint32_t extendModelId; | |||||
uint32_t extendInfo[13]; | |||||
uint32_t modelId; | |||||
uint32_t extendModelId; | |||||
uint32_t extendInfo[13]; | |||||
}; | }; | ||||
// 64 bytes | // 64 bytes | ||||
struct AicpuExtendInfo { | struct AicpuExtendInfo { | ||||
uint8_t msgType; | |||||
uint8_t version; | |||||
uint8_t reserved[2]; | |||||
union { | |||||
AicpuModelIdInfo modelIdMap; | |||||
}; | |||||
uint8_t msgType; | |||||
uint8_t version; | |||||
uint8_t reserved[2]; | |||||
union { | |||||
AicpuModelIdInfo modelIdMap; | |||||
}; | |||||
}; | }; | ||||
struct AicoreErrMsgInfo { | struct AicoreErrMsgInfo { | ||||
uint8_t errType; | |||||
uint8_t version; | |||||
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */ | |||||
uint32_t errorCode; | |||||
uint32_t modelId; | |||||
uint32_t taskId; | |||||
uint32_t streamId; | |||||
uint64_t transactionId; | |||||
uint8_t reserved2[228]; /* the total byte is 256, reserved2 len = 256 - other lens */ | |||||
uint8_t errType; | |||||
uint8_t version; | |||||
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */ | |||||
uint32_t errorCode; | |||||
uint32_t modelId; | |||||
uint32_t taskId; | |||||
uint32_t streamId; | |||||
uint64_t transactionId; | |||||
uint8_t reserved2[228]; /* the total byte is 256, reserved2 len = 256 - other lens */ | |||||
}; | }; | ||||
struct AicpuErrMsgInfo { | struct AicpuErrMsgInfo { | ||||
uint8_t errType; | |||||
uint8_t version; | |||||
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */ | |||||
uint32_t errorCode; | |||||
uint32_t modelId; | |||||
uint32_t streamId; | |||||
uint64_t transactionId; | |||||
char_t opName[64]; /* op name str */ | |||||
char_t errDesc[128]; /* err msg desc info */ | |||||
uint8_t reserved2[40]; /* the total byte is 256, reserved2 len = 256 - other lens */ | |||||
uint8_t errType; | |||||
uint8_t version; | |||||
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */ | |||||
uint32_t errorCode; | |||||
uint32_t modelId; | |||||
uint32_t streamId; | |||||
uint64_t transactionId; | |||||
char_t opName[64]; /* op name str */ | |||||
char_t errDesc[128]; /* err msg desc info */ | |||||
uint8_t reserved2[40]; /* the total byte is 256, reserved2 len = 256 - other lens */ | |||||
}; | }; | ||||
#pragma pack(pop) | #pragma pack(pop) | ||||
} // namespace aicpu | } // namespace aicpu | ||||
#endif // AICPU_TASK_STRUCT_H | #endif // AICPU_TASK_STRUCT_H | ||||
@@ -1,13 +1,13 @@ | |||||
/** | /** | ||||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||||
* | |||||
* This program is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
* | |||||
* Description:interface. | |||||
* Create: 2021-12-21 | |||||
*/ | |||||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||||
* | |||||
* This program is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
* | |||||
* Description:interface. | |||||
* Create: 2021-12-21 | |||||
*/ | |||||
#ifndef AICPU_TYPE_DEF_H | #ifndef AICPU_TYPE_DEF_H | ||||
#define AICPU_TYPE_DEF_H | #define AICPU_TYPE_DEF_H | ||||
@@ -25,24 +25,28 @@ typedef float float32_t; | |||||
typedef double float64_t; | typedef double float64_t; | ||||
#endif | #endif | ||||
inline uint64_t PtrToValue(const void *ptr) { | |||||
return static_cast<const uint64_t>(reinterpret_cast<const uintptr_t>(ptr)); | |||||
inline uint64_t PtrToValue(const void *ptr) | |||||
{ | |||||
return static_cast<const uint64_t>(reinterpret_cast<const uintptr_t>(ptr)); | |||||
} | } | ||||
inline void *ValueToPtr(const uint64_t value) { | |||||
return reinterpret_cast<void *>(static_cast<const uintptr_t>(value)); | |||||
inline void *ValueToPtr(const uint64_t value) | |||||
{ | |||||
return reinterpret_cast<void *>(static_cast<const uintptr_t>(value)); | |||||
} | } | ||||
template <typename TI, typename TO> | |||||
inline TO *PtrToPtr(TI *ptr) { | |||||
return reinterpret_cast<TO *>(ptr); | |||||
template<typename TI, typename TO> | |||||
inline TO *PtrToPtr(TI *ptr) | |||||
{ | |||||
return reinterpret_cast<TO *>(ptr); | |||||
} | } | ||||
template <typename T> | |||||
inline T *PtrAdd(T *const ptr, const size_t maxIdx, const size_t idx) { | |||||
if ((ptr != nullptr) && (idx < maxIdx)) { | |||||
return reinterpret_cast<T *>(ptr + idx); | |||||
} | |||||
return nullptr; | |||||
template<typename T> | |||||
inline T *PtrAdd(T * const ptr, const size_t maxIdx, const size_t idx) | |||||
{ | |||||
if ((ptr != nullptr) && (idx < maxIdx)) { | |||||
return reinterpret_cast<T *>(ptr + idx); | |||||
} | |||||
return nullptr; | |||||
} | } | ||||
#endif // AICPU_TYPE_DEF_H | #endif // AICPU_TYPE_DEF_H |
@@ -2507,6 +2507,31 @@ REG_OP(GetNextFromQueue) | |||||
.OP_END_FACTORY_REG(GetNextFromQueue) | .OP_END_FACTORY_REG(GetNextFromQueue) | ||||
/** | /** | ||||
*@brief Get the batch of data in data processing . \n | |||||
*@par Attributes: | |||||
*@li output_types: A nested structure of DType objects corresponding to each | |||||
component of an element of this dataset. | |||||
*@li output_shapes: A nested structure of TensorShape objects corresponding | |||||
to each component of an element of this dataset. | |||||
*@li channel_name: A string. Default "" . \n | |||||
*@par Outputs: | |||||
*y:A nested structure of Tensor objects . \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with tensorflow GetNext operator | |||||
*/ | |||||
REG_OP(PeekData) | |||||
.DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, | |||||
DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL})) | |||||
.ATTR(output_types, ListType, {}) | |||||
.ATTR(output_shapes, ListListInt, {}) | |||||
.ATTR(channel_name, String, "") | |||||
.OP_END_FACTORY_REG(PeekData) | |||||
/** | |||||
* @brief OptionalGetValue | * @brief OptionalGetValue | ||||
* @par Inputs: | * @par Inputs: | ||||
* optional: A tensor of type variant | * optional: A tensor of type variant | ||||
@@ -42,8 +42,8 @@ namespace ge { | |||||
*Compatible with the TensorFlow operator AddN. | *Compatible with the TensorFlow operator AddN. | ||||
*/ | */ | ||||
REG_OP(AddN) | REG_OP(AddN) | ||||
.DYNAMIC_INPUT(x, TensorType::NumberType()) | |||||
.OUTPUT(y, TensorType::NumberType()) | |||||
.DYNAMIC_INPUT(x, TensorType({NumberType(), DT_VARIANT})) | |||||
.OUTPUT(y, TensorType({NumberType(), DT_VARIANT})) | |||||
.REQUIRED_ATTR(N, Int) | .REQUIRED_ATTR(N, Int) | ||||
.OP_END_FACTORY_REG(AddN) | .OP_END_FACTORY_REG(AddN) | ||||
@@ -349,6 +349,19 @@ REG_OP(StatefulPartitionedCall) | |||||
.ATTR(executor_type, String, "") | .ATTR(executor_type, String, "") | ||||
.OP_END_FACTORY_REG(StatefulPartitionedCall) | .OP_END_FACTORY_REG(StatefulPartitionedCall) | ||||
/** | |||||
* @par Inputs: | |||||
* @li input: The input tensors \n | |||||
* | |||||
* @par Outputs: | |||||
* @li output: The output tensors. \n | |||||
*/ | |||||
REG_OP(ToBool) | |||||
.INPUT(input, TensorType({DT_INT64, DT_INT32, DT_INT16, DT_INT8, \ | |||||
DT_UINT8, DT_FLOAT, DT_DOUBLE, DT_STRING, DT_BOOL})) | |||||
.OUTPUT(output, DT_BOOL) | |||||
.OP_END_FACTORY_REG(ToBool) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_ |
@@ -1931,9 +1931,6 @@ REG_OP(DenseImageWarpGrad) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with pytorch GridSampler2D operator. | *Compatible with pytorch GridSampler2D operator. | ||||
*@par Restrictions: | |||||
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(GridSampler2D) | REG_OP(GridSampler2D) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | ||||
@@ -1966,9 +1963,6 @@ REG_OP(GridSampler2D) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with pytorch GridSampler2DGrad operator. | *Compatible with pytorch GridSampler2DGrad operator. | ||||
*@par Restrictions: | |||||
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(GridSampler2DGrad) | REG_OP(GridSampler2DGrad) | ||||
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | ||||
@@ -2063,9 +2057,6 @@ REG_OP(IMGWarpOffsets) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with pytorch GridSampler3D operator. | *Compatible with pytorch GridSampler3D operator. | ||||
*@par Restrictions: | |||||
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(GridSampler3D) | REG_OP(GridSampler3D) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | ||||
@@ -2096,9 +2087,6 @@ REG_OP(GridSampler3D) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with pytorch GridSampler3DGrad operator. | *Compatible with pytorch GridSampler3DGrad operator. | ||||
*@par Restrictions: | |||||
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(GridSampler3DGrad) | REG_OP(GridSampler3DGrad) | ||||
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | ||||
@@ -491,6 +491,40 @@ REG_OP(TridiagonalSolve) | |||||
.ATTR(partial_pivoting, Bool, true) | .ATTR(partial_pivoting, Bool, true) | ||||
.OP_END_FACTORY_REG(TridiagonalSolve) | .OP_END_FACTORY_REG(TridiagonalSolve) | ||||
/** | |||||
* @brief Solution of banded triangular matrix . \n | |||||
* @par Inputs: | |||||
* The input bands has to be symmetric and positive definite. | |||||
* @li bands:A Tensor. Must be one of the following types: double, float32, | |||||
float16,complex64, complex128. Shape is [... K,M], K corresponds to the | |||||
number of bands (actually stored diagonals), and M is the data of the | |||||
diagonals. | |||||
@li rhs:shape is [...M] or [...M, N]. Has the same type as bands \n | |||||
* @par Outputs: | |||||
* @li output:A Tensor. Has the same type as bands . \n | |||||
* @par Attributes: | |||||
* @li lower:An optional bool. Defaults to True.True: indicates the lower | |||||
triangular matrix. False: indicates the upper triangular matrix. | |||||
* @li adjoint:An optional bool. Defaults to False.Boolean indicating whether to | |||||
solve with matrix or its (block-wise) adjoint. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with tensorflow BandedTriangularSolve operator. | |||||
*/ | |||||
REG_OP(BandedTriangularSolve) | |||||
.INPUT(bands, TensorType({DT_FLOAT, DT_DOUBLE, \ | |||||
DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, \ | |||||
DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(output,TensorType({DT_FLOAT, DT_DOUBLE, \ | |||||
DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.ATTR(lower, Bool, true) | |||||
.ATTR(adjoint, Bool, false) | |||||
.OP_END_FACTORY_REG(BandedTriangularSolve) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_ |
@@ -303,6 +303,21 @@ REG_OP(MutableHashTable) | |||||
.REQUIRED_ATTR(key_dtype, Type) | .REQUIRED_ATTR(key_dtype, Type) | ||||
.REQUIRED_ATTR(value_dtype, Type) | .REQUIRED_ATTR(value_dtype, Type) | ||||
.OP_END_FACTORY_REG(MutableHashTable) | .OP_END_FACTORY_REG(MutableHashTable) | ||||
/** | |||||
* @brief Remove keys in the given table . \n | |||||
* @par Inputs: | |||||
* @li table_handle: A Tensor of type resource. Handle to the table. \n | |||||
* @li keys: A Tensor. Any shape. Keys to remove. \n | |||||
* @par Third-party framework compatibility. | |||||
* Compatible with tensorflow LookupTableInsert operator. | |||||
*/ | |||||
REG_OP(LookupTableRemove) | |||||
.INPUT(table_handle, TensorType({DT_RESOURCE})) | |||||
.INPUT(keys,TensorType({RealNumberType, DT_BOOL, DT_STRING})) | |||||
.OP_END_FACTORY_REG(LookupTableRemove) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_ |
@@ -377,7 +377,7 @@ to each component of an element of this dataset. | |||||
REG_OP(GetNext) | REG_OP(GetNext) | ||||
.DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, | .DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, | ||||
DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL})) | |||||
DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL})) | |||||
.ATTR(output_types, ListType, {}) | .ATTR(output_types, ListType, {}) | ||||
.ATTR(output_shapes, ListListInt, {}) | .ATTR(output_shapes, ListListInt, {}) | ||||
.ATTR(output_num, Int, 1) | .ATTR(output_num, Int, 1) | ||||
@@ -1156,6 +1156,185 @@ REG_OP(CdistGrad) | |||||
.ATTR(p, Float, 2.0) | .ATTR(p, Float, 2.0) | ||||
.OP_END_FACTORY_REG(CdistGrad) | .OP_END_FACTORY_REG(CdistGrad) | ||||
/** | |||||
* @brief Computes the RaggedBincount. \n | |||||
* @par Inputs: | |||||
* Four inputs, including: | |||||
* @li splits: A tensor with shpae: BxPXM. Must be one of the following types: | |||||
* int64. | |||||
* @li values: A tensor with shpae: BxPXM. Must be one of the following types: | |||||
* float16, float32. | |||||
* @li size: A tensor with shpae: BxRxM. Must be one of the following types: | |||||
* int32, int64. | |||||
* @li weights: A tensor with shpae: BxRxM. | |||||
* Must be one of the following types: int32, int64, float, double. \n | |||||
* @par Attributes: | |||||
* @li binary_output: An optional bool \n | |||||
* @par Outputs: | |||||
* output: Must be one of the following types: int32, int64, float, double. \n | |||||
*/ | |||||
REG_OP(RaggedBincount) | |||||
.INPUT(splits, TensorType({DT_INT64})) | |||||
.INPUT(values, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(size, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(weights, TensorType({DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(output, TensorType({DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE})) | |||||
.ATTR(binary_output, Bool, false) | |||||
.OP_END_FACTORY_REG(RaggedBincount) | |||||
/** | |||||
* @brief Count the number of occurrences of each value in the input dense integer array, | |||||
* and output it according to the sparse matrix. \n | |||||
* @par Inputs: | |||||
* @li values: A 1D or 2D tensor of type int32 or int64. | |||||
* @li weights: A tensor of type int32 or int64 or float or double. \n | |||||
* @par Attributes: | |||||
* @li minlength: An optional int >=-1. Defaults to -1. | |||||
* @li maxlength: An optional int >=-1. Defaults to -1. | |||||
* @li binary_output: A required bool. \n | |||||
* @par Outputs: | |||||
* output_indices: A tensor of type int64. | |||||
* output_values: A tensor of the same type as "weights". | |||||
* output_dense_shape: A tensor of type int64. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator DenseCountSparseOutput. \n | |||||
*/ | |||||
REG_OP(DenseCountSparseOutput) | |||||
.INPUT(values, TensorType({DT_INT32,DT_INT64})) | |||||
.INPUT(weights, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE})) | |||||
.OUTPUT(output_indices, TensorType({DT_INT64})) | |||||
.OUTPUT(output_values, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE})) | |||||
.OUTPUT(output_dense_shape, TensorType({DT_INT64})) | |||||
.ATTR(minlength, Int, -1) | |||||
.ATTR(maxlength, Int, -1) | |||||
.REQUIRED_ATTR(binary_output, Bool) | |||||
.OP_END_FACTORY_REG(DenseCountSparseOutput) | |||||
/** | |||||
* @brief Count the number of occurrences of each value in the input ragged integer array, | |||||
* and output it according to the sparse matrix. \n | |||||
* @par Inputs: | |||||
* @li splits: A 1D tensor of type int64. | |||||
* @li values: A 1D or 2D tensor of type int32 or int64. | |||||
* @li weights: A tensor of type int32 or int64 or float or double. \n | |||||
* @par Attributes: | |||||
* @li minlength: An optional int >=-1. Defaults to -1. | |||||
* @li maxlength: An optional int >=-1. Defaults to -1. | |||||
* @li binary_output: A required bool. \n | |||||
* @par Outputs: | |||||
* output_indices: A tensor of type int64. | |||||
* output_values: A tensor of the same type as "weights". | |||||
* output_dense_shape: A tensor of type int64. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator RaggedCountSparseOutput. \n | |||||
*/ | |||||
REG_OP(RaggedCountSparseOutput) | |||||
.INPUT(splits, TensorType({DT_INT64})) | |||||
.INPUT(values, TensorType({DT_INT32,DT_INT64})) | |||||
.INPUT(weights, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE})) | |||||
.OUTPUT(output_indices, TensorType({DT_INT64})) | |||||
.OUTPUT(output_values, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE})) | |||||
.OUTPUT(output_dense_shape, TensorType({DT_INT64})) | |||||
.ATTR(minlength, Int, -1) | |||||
.ATTR(maxlength, Int, -1) | |||||
.REQUIRED_ATTR(binary_output, Bool) | |||||
.OP_END_FACTORY_REG(RaggedCountSparseOutput) | |||||
/** | |||||
* @brief SignBitsUnpack. | |||||
* @par Inputs: | |||||
* one input, including: | |||||
* @li x: A 1D Tensor of uint8. | |||||
* @par Attributes: | |||||
* @li size: dim of out put tensor, defaults to 1. | |||||
* @li dtype: dtype of out put tensor: DT_FLOAT(0) or DT_FLOAT16(1). | |||||
* @par Outputs: | |||||
* @li y: A 2D Tensor of type float32 (float16) with shape (size, (x.shape * 8) / size), | |||||
*/ | |||||
REG_OP(SignBitsUnpack) | |||||
.INPUT(x, TensorType({DT_UINT8})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.REQUIRED_ATTR(size, Int) | |||||
.REQUIRED_ATTR(dtype, Type) | |||||
.OP_END_FACTORY_REG(SignBitsUnpack) | |||||
/** | |||||
* @brief Function scaled masked softmax . \n | |||||
* @par Inputs: | |||||
* Two inputs, including: | |||||
* @li x: A mutable Tensor. The type support float16/float32. | |||||
* @li mask: An optional Tensor. Must meet all of the following rules: | |||||
* shape of mask should be broadcastable with x. | |||||
* dtype of mask should be bool. | |||||
* mask is binary | |||||
* @par Attributes: | |||||
* scale: A attribute used to scale tensor. The type is float. The dimension softmax would be performed on. Defaults | |||||
* to "1.0" . \n | |||||
* fixed_triu_mask: A flag used to enable or disable a fixed upper triangle mask. The type is bool. Defaults | |||||
* to "false" . \n | |||||
* @par Outputs: | |||||
* y: A mutable Tensor. Has the same type as "x". \n | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(ScaledMaskedSoftmax) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(mask, TensorType({DT_BOOL, DT_UINT1})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16})) | |||||
.ATTR(scale, Float, 1.0) | |||||
.ATTR(fixed_triu_mask, Bool, false) | |||||
.OP_END_FACTORY_REG(ScaledMaskedSoftmax) | |||||
/** | |||||
* @brief Function scaled masked softmax grad . \n | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li y_grad: A mutable Tensor. The type support float16/float32. | |||||
* @li y: A mutable Tensor. The type support float16/float32. | |||||
* @li mask: An optional Tensor. Must meet all of the following rules: | |||||
* shape of mask should be broadcastable with x. | |||||
* dtype of mask should be bool. | |||||
* mask is binary | |||||
* @par Attributes: | |||||
* scale: A attribute used to scale tensor. The type is float. The dimension softmax would be performed on. Defaults | |||||
* to "1.0" . \n | |||||
* fixed_triu_mask: A flag used to enable or disable a fixed upper triangle mask. The type is bool. Defaults | |||||
* to "false" . \n | |||||
* @par Outputs: | |||||
* x_grad: A mutable Tensor. Has the same type as "x". \n | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(ScaledMaskedSoftmaxGrad) | |||||
.INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(mask, TensorType({DT_BOOL, DT_UINT1})) | |||||
.OUTPUT(x_grad, TensorType({DT_FLOAT16})) | |||||
.ATTR(scale, Float, 1.0) | |||||
.ATTR(fixed_triu_mask, Bool, false) | |||||
.OP_END_FACTORY_REG(ScaledMaskedSoftmaxGrad) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ |
@@ -1595,6 +1595,50 @@ REG_OP(Pinverse) | |||||
.ATTR(rcond, Float, 1e-15) | .ATTR(rcond, Float, 1e-15) | ||||
.OP_END_FACTORY_REG(Pinverse) | .OP_END_FACTORY_REG(Pinverse) | ||||
/** | |||||
* @brief From the input tensor and updates tensor, select the maximum value according to indices to output. \n | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li input: Must be one of the following types: | |||||
* float16, float32, double, int32, uint8, int16, int8, complex64, int64, | |||||
* qint8, quint8, qint32, uint16, complex128, uint32, uint64. | |||||
* @li indices: Must be one of the following types: | |||||
* int32, int64. | |||||
* @li updates: Must have the same type as input. \n | |||||
* @par Outputs: | |||||
* output: A Tensor with the same type as input. \n | |||||
*/ | |||||
REG_OP(TensorScatterMax) | |||||
.INPUT(input, TensorType::BasicType()) | |||||
.INPUT(indices, TensorType::IndexNumberType()) | |||||
.INPUT(updates, TensorType::BasicType()) | |||||
.OUTPUT(output, TensorType::BasicType()) | |||||
.OP_END_FACTORY_REG(TensorScatterMax) | |||||
/** | |||||
* @brief From the input tensor and updates tensor, select the minimum value according to indices to output. \n | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li input: Must be one of the following types: | |||||
* float16, float32, double, int32, uint8, int16, int8, complex64, int64, | |||||
* qint8, quint8, qint32, uint16, complex128, uint32, uint64. | |||||
* @li indices: Must be one of the following types: | |||||
* int32, int64. | |||||
* @li updates: Must have the same type as input. \n | |||||
* @par Outputs: | |||||
* output: A Tensor with the same type as input. \n | |||||
*/ | |||||
REG_OP(TensorScatterMin) | |||||
.INPUT(input, TensorType::BasicType()) | |||||
.INPUT(indices, TensorType::IndexNumberType()) | |||||
.INPUT(updates, TensorType::BasicType()) | |||||
.OUTPUT(output, TensorType::BasicType()) | |||||
.OP_END_FACTORY_REG(TensorScatterMin) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ |
@@ -886,6 +886,7 @@ REG_OP(Conv2D) | |||||
* to the input image for int8 type. Ensure that the output is within the | * to the input image for int8 type. Ensure that the output is within the | ||||
* effective range. Defaults to 0. | * effective range. Defaults to 0. | ||||
*@li data_format: Reserved. | *@li data_format: Reserved. | ||||
* @li alg: compress algorithm, default weight_unzip. | |||||
* | * | ||||
*@par Outputs: | *@par Outputs: | ||||
* y: A 4D Tensor of output feature map. Has the same type as "x". With the | * y: A 4D Tensor of output feature map. Has the same type as "x". With the | ||||
@@ -909,6 +910,7 @@ REG_OP(Conv2DCompress) | |||||
.ATTR(groups, Int, 1) | .ATTR(groups, Int, 1) | ||||
.ATTR(data_format, String, "NHWC") | .ATTR(data_format, String, "NHWC") | ||||
.ATTR(offset_x, Int, 0) | .ATTR(offset_x, Int, 0) | ||||
.ATTR(alg, String, "weight_unzip") | |||||
.OP_END_FACTORY_REG(Conv2DCompress) | .OP_END_FACTORY_REG(Conv2DCompress) | ||||
/** | /** | ||||
@@ -1688,5 +1690,24 @@ REG_OP(FixPipe) | |||||
.ATTR(eltwise_mode, String, "") | .ATTR(eltwise_mode, String, "") | ||||
.OP_END_FACTORY_REG(FixPipe) | .OP_END_FACTORY_REG(FixPipe) | ||||
/** | |||||
* @brief Solves a batch of isotonic regression problems. \n | |||||
* @par Inputs: | |||||
* @li input: A Tensor. \n | |||||
* @par Attributes: | |||||
* @li output_dtype: The data type of output. \n | |||||
* @par Outputs: | |||||
* @li output: A Tensor. A Tensor of type float16, float32, double. | |||||
* @li segments: A Tensor. A Tensor of type int32 \n | |||||
*/ | |||||
REG_OP(IsotonicRegression) | |||||
.INPUT(input, TensorType::RealNumberType()) | |||||
.OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(segments, TensorType({DT_INT32})) | |||||
.ATTR(output_dtype, Type, DT_FLOAT) | |||||
.OP_END_FACTORY_REG(IsotonicRegression) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ |
@@ -1550,7 +1550,8 @@ REG_OP(DecodeWheelsTarget) | |||||
*@li max_size_per_class: A required attribute of type int, specifying the nms output num per class. | *@li max_size_per_class: A required attribute of type int, specifying the nms output num per class. | ||||
*@li max_total_size: A required attribute of type int, specifying the the nms output num per batch. | *@li max_total_size: A required attribute of type int, specifying the the nms output num per batch. | ||||
*@li change_coordinate_frame: A optional attribute of type bool, whether to normalize coordinates after clipping. | *@li change_coordinate_frame: A optional attribute of type bool, whether to normalize coordinates after clipping. | ||||
*@li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false" . \n | |||||
*@li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false". | |||||
*@li image_size: A optional attribute of type ListInt, the size of the image. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4), | *@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4), | ||||
@@ -1580,6 +1581,7 @@ REG_OP(BatchMultiClassNonMaxSuppression) | |||||
.REQUIRED_ATTR(max_total_size, Int) | .REQUIRED_ATTR(max_total_size, Int) | ||||
.ATTR(change_coordinate_frame, Bool, false) | .ATTR(change_coordinate_frame, Bool, false) | ||||
.ATTR(transpose_box, Bool, false) | .ATTR(transpose_box, Bool, false) | ||||
.ATTR(image_size, ListInt, {}) | |||||
.OP_END_FACTORY_REG(BatchMultiClassNonMaxSuppression) | .OP_END_FACTORY_REG(BatchMultiClassNonMaxSuppression) | ||||
/** | /** | ||||
@@ -2316,6 +2318,40 @@ REG_OP(CIoU) | |||||
.ATTR(mode, String, "iou") | .ATTR(mode, String, "iou") | ||||
.ATTR(atan_sub_flag, Bool, false) | .ATTR(atan_sub_flag, Bool, false) | ||||
.OP_END_FACTORY_REG(CIoU) | .OP_END_FACTORY_REG(CIoU) | ||||
/** | |||||
* @brief First calculate the minimum closure area of the two boxes, IoU, | |||||
* The DIoU is obtained by combining the center distance and IoU. \n | |||||
* @par Inputs: | |||||
* Two inputs, including: | |||||
* @li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with | |||||
* shape (4, N). "N" indicates the number of bounding boxes, and the value | |||||
* "4" refers to [x1, y1, x2, y2] or [x, y, w, h]. | |||||
* @li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32 | |||||
* with shape (4, M). "M" indicates the number of ground truth boxes, and | |||||
* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n | |||||
* @par Attributes: | |||||
* @li trans: An optional bool, true for 'xywh', false for 'xyxy'. | |||||
* @li is_cross: An optional bool, control whether the output shape is [N, M] or [1, N]. | |||||
* @li mode: An optional string, computation mode, a character string with the value range of [iou, iof]. \n | |||||
* @par Outputs: | |||||
* overlap: A 2D Tensor of type float16 or float32 with shape [N, M] or [1, N], | |||||
* specifying the IoU or IoF ratio . \n | |||||
* @attention Constraints: | |||||
* "is_cross" only support false. | |||||
*/ | |||||
REG_OP(DIoU) | |||||
.INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(trans, Bool, false) | |||||
.ATTR(is_cross, Bool, true) | |||||
.ATTR(mode, String, "iou") | |||||
.OP_END_FACTORY_REG(DIoU) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ | ||||
@@ -426,7 +426,10 @@ REG_OP(ConfusionSoftmaxGrad) | |||||
*@li keepdims: A bool Scalar. If true, retains reduced dimensions with length 1 . \n | *@li keepdims: A bool Scalar. If true, retains reduced dimensions with length 1 . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor dtype of float16, float32. | |||||
*y: A Tensor dtype of float16, float32. \n | |||||
*@attention Constraints: | |||||
*THIS OPERATOR IS DEPRECATED. It will be removed in a future version. | |||||
*/ | */ | ||||
REG_OP(SoftmaxGradExt) | REG_OP(SoftmaxGradExt) | ||||
.INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
@@ -1026,74 +1029,48 @@ REG_OP(RNNTLoss) | |||||
.OP_END_FACTORY_REG(RNNTLoss) | .OP_END_FACTORY_REG(RNNTLoss) | ||||
/** | /** | ||||
*@brief Performs group normalization . \n | |||||
* @brief Performs group normalization . \n | |||||
*@par Inputs: | |||||
* Five inputs, including: (NHWC, NCHW supported) | |||||
*@li x: A 4D Tensor of type float16 or float32, with format NHWC or | |||||
NCHW for 4D. | |||||
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format | |||||
NHWC or NCHW. Specifies the scaling factor. | |||||
*@li offset: A Tensor of type float32. Must be 1D if input "x" is with | |||||
format NHWC or NCHW. Specifies the offset. | |||||
*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format | |||||
NHWC or NCHW. Reserved. Mu | |||||
st be "None" if the operation is used for training. | |||||
*@li variance: A Tensor of type float32. Must be 1D if input "x" is with | |||||
format NHWC or NCHW. Specifies the variance used for inference. Reserved . \n | |||||
* @par Inputs: | |||||
* Three inputs | |||||
* @li x: A ND Tensor of type float16 or float32, with format NCHW for 4D. | |||||
* @li gamma: A Tensor of type float16 or float32. Must be 1D. Specifies the scaling factor. | |||||
* @li beta: A Tensor of type float16 or float32. Must be 1D. Specifies the offset. \n | |||||
*@par Attributes: | |||||
*@li epsilon: An optional float32, specifying the small value added to | |||||
* @par Attributes: | |||||
* @li num_groups: An required int32, specifying the number of group. | |||||
* @li eps: An optional float32, specifying the small value added to | |||||
variance to avoid dividing by zero. Defaults to "0.0001". | variance to avoid dividing by zero. Defaults to "0.0001". | ||||
*@li data_format: An optional string, specifying the format of "x". | |||||
* @li data_format: An optional string, specifying the format of "x". | |||||
Defaults to "NHWC". | Defaults to "NHWC". | ||||
*@li is_training: An optional bool, specifying if the operation is used for | |||||
* @li is_training: An optional bool, specifying if the operation is used for | |||||
training or inference. Defaults to "True" . \n | training or inference. Defaults to "True" . \n | ||||
*@par Outputs: | |||||
* Five outputs, including: (NHWC, NCHW supported) | |||||
*@li y: A 4D Tensor of type float16 or float32 for the normalized "x", | |||||
with format NHWC or NCHW for 4D. | |||||
*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with | |||||
format NHWC or NCHW. Specifies the mean of "x". | |||||
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is | |||||
with format NHWC or NCHW. Specifies the variance of "x". | |||||
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if | |||||
input "x" is with format NHWC or NCHW. Specifies the mean o | |||||
f "x" for gradient computation. Pass "None" to skip this output. | |||||
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if | |||||
input "x" is with format NHWC or NCHW. Specifies the varian | |||||
ce of "x" for gradient computation. Pass "None" to skip this output . \n | |||||
* @par Outputs: | |||||
* Three outputs | |||||
* @li y: A ND Tensor of type float16 or float32 for the normalized "x", | |||||
with format NCHW for 4D. | |||||
* @li mean: A Tensor of type float16 or float32. Must be 1D. Specifies the mean of "x". | |||||
* @li variance: A Tensor of type float16 or float32. Must be 1D. Specifies the variance of "x". \n | |||||
*@attention Constraints: | |||||
*@li If the operation is used for inference and outputs "reserve_space_1" | |||||
and "reserve_space_2" are available, then "reserve_space_1" has the same | |||||
value as "mean" and "reserve_spa | |||||
ce_2" has the same value as "variance". | |||||
*@li For Ascend 310, the result accuracy fails due to the square root | |||||
instruction . \n | |||||
* @attention Constraints: | |||||
* @li For Ascend 310, only support NCHW which can be trans to 5HD. \n | |||||
*@par Third-party framework compatibility | |||||
*@li Compatible with the PyTorch operator GroupNorm. | |||||
* @par Third-party framework compatibility | |||||
* @li Compatible with the PyTorch operator GroupNorm. | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(GroupNorm) | REG_OP(GroupNorm) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.INPUT(scale, TensorType({DT_FLOAT,})) | |||||
.INPUT(offset, TensorType({DT_FLOAT,})) | |||||
.OPTIONAL_INPUT(mean, TensorType({DT_FLOAT})) | |||||
.OPTIONAL_INPUT(variance, TensorType({DT_FLOAT})) | |||||
.INPUT(gamma, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(beta, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OUTPUT(batch_mean, TensorType({DT_FLOAT})) | |||||
.OUTPUT(batch_variance, TensorType({DT_FLOAT})) | |||||
.OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) | |||||
.OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) | |||||
.ATTR(epsilon, Float, 0.0001) | |||||
.OUTPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(variance, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(num_groups, Int) | |||||
.ATTR(data_format, String, "NHWC") | .ATTR(data_format, String, "NHWC") | ||||
.ATTR(eps, Float, 0.0001) | |||||
.ATTR(is_training, Bool, true) | .ATTR(is_training, Bool, true) | ||||
.ATTR(num_groups, Int, 2) | |||||
.OP_END_FACTORY_REG(GroupNorm) | .OP_END_FACTORY_REG(GroupNorm) | ||||
/** | /** | ||||
@@ -307,7 +307,7 @@ REG_OP(Relu6D) | |||||
* @par Inputs: | * @par Inputs: | ||||
* @li gradients: A Tensor of type RealNumberType. The backpropagated | * @li gradients: A Tensor of type RealNumberType. The backpropagated | ||||
gradients to the corresponding Relu6 operation. | |||||
gradients to the corresponding Relu6 operation. | |||||
* @li features: A Tensor with the same type as gradients.he features passed | * @li features: A Tensor with the same type as gradients.he features passed | ||||
as input to the corresponding Relu6 operation, or its output; | as input to the corresponding Relu6 operation, or its output; | ||||
using either one produces the same result. \n | using either one produces the same result. \n | ||||
@@ -325,22 +325,22 @@ REG_OP(Relu6Grad) | |||||
.OUTPUT(backprops, TensorType::RealNumberType()) | .OUTPUT(backprops, TensorType::RealNumberType()) | ||||
.OP_END_FACTORY_REG(Relu6Grad) | .OP_END_FACTORY_REG(Relu6Grad) | ||||
/** | /** | ||||
*@brief Calculate the elu_grad_v2 function. | |||||
*@brief Calculate the elu_grad_v2 function. | |||||
*Applies the element-wise function: | *Applies the element-wise function: | ||||
* Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha . | * Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha . | ||||
*@par Inputs: | *@par Inputs: | ||||
*Two inputs, including: | *Two inputs, including: | ||||
* @li grads: A tensor. Must be one of the following types: | * @li grads: A tensor. Must be one of the following types: | ||||
* float16, float32. | |||||
* float16, float32. | |||||
* @li activations: A tensor. Must be one of the following types: | * @li activations: A tensor. Must be one of the following types: | ||||
* float16, float32. | |||||
* float16, float32. | |||||
* | * | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor with the same type and shape of grads's. | *y: A Tensor with the same type and shape of grads's. | ||||
* | |||||
* | |||||
*@par Attributes: | *@par Attributes: | ||||
*alpha: scalar parameter, default value = 1.0 | *alpha: scalar parameter, default value = 1.0 | ||||
*/ | |||||
*/ | |||||
REG_OP(EluGradV2) | REG_OP(EluGradV2) | ||||
.INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) | .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) | ||||
.INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16})) | .INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16})) | ||||
@@ -972,18 +972,18 @@ REG_OP(SoftplusV2Grad) | |||||
/** | /** | ||||
* @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor) | * @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor) | ||||
* where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise. | * where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise. | ||||
* | |||||
* | |||||
* @par Inputs: | * @par Inputs: | ||||
* one input including: | * one input including: | ||||
* x: input A Tensor. Must be one of the following types: float32, float16 | * x: input A Tensor. Must be one of the following types: float32, float16 | ||||
* | |||||
* | |||||
* @par Attributes: | * @par Attributes: | ||||
* alpha: An optional float. Defaults to 1.0. \n | * alpha: An optional float. Defaults to 1.0. \n | ||||
* @par Outputs: | * @par Outputs: | ||||
* one output including: | * one output including: | ||||
* y:A Tensor of the same type as x | * y:A Tensor of the same type as x | ||||
* | |||||
* | |||||
*/ | */ | ||||
REG_OP(ThresholdedRelu) | REG_OP(ThresholdedRelu) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
@@ -1059,7 +1059,7 @@ REG_OP(HardShrink) | |||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
* Compatible with the Pytorch operator Hardsigmoid. \n | * Compatible with the Pytorch operator Hardsigmoid. \n | ||||
*/ | |||||
*/ | |||||
REG_OP(HardSigmoid) | REG_OP(HardSigmoid) | ||||
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | ||||
.OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16})) | .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16})) | ||||
@@ -1219,13 +1219,13 @@ REG_OP(Shrink) | |||||
* Three inputs, including: | * Three inputs, including: | ||||
* @li x: A Tensor. | * @li x: A Tensor. | ||||
* Must be one of the following types on Ascend310: float16, int8, int32, uint8. | * Must be one of the following types on Ascend310: float16, int8, int32, uint8. | ||||
* Must be one of the following types on Ascend710 or Ascend910: float16, float32, int8, int32, uint8. \n | |||||
* Must be one of the following types on Ascend310P or Ascend910: float16, float32, int8, int32, uint8. \n | |||||
* @li threshold: A Tensor which should have the shape (1,), the value to threshold at. | * @li threshold: A Tensor which should have the shape (1,), the value to threshold at. | ||||
* Must be one of the following types on Ascend310: float16, int8, int32, uint8. | * Must be one of the following types on Ascend310: float16, int8, int32, uint8. | ||||
* Must be one of the following types on Ascend710 or Ascend910: float16, float32, int8, int32, uint8. \n | |||||
* Must be one of the following types on Ascend310P or Ascend910: float16, float32, int8, int32, uint8. \n | |||||
* @li value: A Tensor which should have the shape (1,), the value to replace with. default value is 0. | * @li value: A Tensor which should have the shape (1,), the value to replace with. default value is 0. | ||||
* Must be one of the following types on Ascend310: float16, int8, int32, uint8. | * Must be one of the following types on Ascend310: float16, int8, int32, uint8. | ||||
* Must be one of the following types on Ascend710 or Ascend910: float16, float32, int8, int32, uint8. \n | |||||
* Must be one of the following types on Ascend310P or Ascend910: float16, float32, int8, int32, uint8. \n | |||||
* @par Outputs: | * @par Outputs: | ||||
* y: A Tensor which has the same shape and type as the input x. \n | * y: A Tensor which has the same shape and type as the input x. \n | ||||
@@ -61,16 +61,16 @@ REG_OP(Dequantize) | |||||
.OP_END_FACTORY_REG(Dequantize) | .OP_END_FACTORY_REG(Dequantize) | ||||
/** | /** | ||||
*@brief Quantizes the input . \n | |||||
*@par Inputs: | |||||
*@li x: shape and dtype of input_x. \n | |||||
*@li scales: shape and dtype of input_scales. \n | |||||
*@li zero_points: shape and dtype of input_zero_points \n | |||||
*@par Attributes: | |||||
*@li dtype: required, type. | |||||
*@li axis: the processed dim. \n | |||||
*@par Outputs: | |||||
*y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n | |||||
* @brief Quantizes the input . \n | |||||
* @par Inputs: | |||||
* @li x: shape and dtype of input_x. \n | |||||
* @li scales: shape and dtype of input_scales. \n | |||||
* @li zero_points: shape and dtype of input_zero_points \n | |||||
* @par Attributes: | |||||
* @li dtype: required, type. | |||||
* @li axis: the processed dim. \n | |||||
* @par Outputs: | |||||
* y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n | |||||
*/ | */ | ||||
REG_OP(Quantize) | REG_OP(Quantize) | ||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
@@ -82,23 +82,31 @@ REG_OP(Quantize) | |||||
.OP_END_FACTORY_REG(Quantize) | .OP_END_FACTORY_REG(Quantize) | ||||
/** | /** | ||||
*@brief Quantizes the input . \n | |||||
* @brief Quantizes the input . \n | |||||
*@par Inputs: | |||||
*x: An tensor of type float16 or float32, specifying the input . \n | |||||
* @par Inputs: | |||||
* x: An tensor of type float16 or float32, specifying the input . \n | |||||
*@par Attributes: | |||||
*@li scale: A required float32, specifying the scaling ratio. | |||||
*@li offset: A required float16, specifying the offset. | |||||
*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False". | |||||
*@li round_mode: An optional string, specifying the float16 to int8 cast type. | |||||
* @par Attributes: | |||||
* @li scale: A required float32, specifying the scaling ratio. | |||||
* @li offset: A required float16, specifying the offset. | |||||
* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". | |||||
* Defaults to "False". | |||||
* @li round_mode: An optional string, specifying the float16 to int8 cast type. | |||||
* The value range is [Round, Floor, Ceil, Truncate]. Defaults to "Round" . | * The value range is [Round, Floor, Ceil, Truncate]. Defaults to "Round" . | ||||
*@li dst_type: A optional int32, specifying the output data type. Defaults to "DT_INT8" . \n | |||||
* @li dst_type: A optional int32, specifying the output data type. Defaults to "DT_INT8" . \n | |||||
* @par Outputs: | |||||
* y: The quantized output tensor of type int8 or int4. \n | |||||
*@par Outputs: | |||||
*y: The quantized output tensor of type int8 or int4. \n | |||||
* @attention Constraints: | |||||
* round_mode value range is [Round, Floor, Ceil, Truncate]. | |||||
* @li Round: round to nearest, tie to even(c language rint). | |||||
* @li Floor: round to minus infinity(c language floor). | |||||
* @li Ceil: round to positive infinity(c language ceil). | |||||
* @li Truncate: round to zero(c language trunc). \n | |||||
*@par Third-party framework compatibility | |||||
* @par Third-party framework compatibility | |||||
* It is a custom operator. It has no corresponding operator in Caffe. | * It is a custom operator. It has no corresponding operator in Caffe. | ||||
*/ | */ | ||||
REG_OP(AscendQuant) | REG_OP(AscendQuant) | ||||
@@ -112,21 +120,22 @@ REG_OP(AscendQuant) | |||||
.OP_END_FACTORY_REG(AscendQuant) | .OP_END_FACTORY_REG(AscendQuant) | ||||
/** | /** | ||||
*@brief Dequantizes the input . \n | |||||
* @brief Dequantizes the input . \n | |||||
*@par Inputs: | |||||
*@par Inputs: | |||||
* @li x: An tensor of type int32, specifying the input. | * @li x: An tensor of type int32, specifying the input. | ||||
* @li deq_scale: An tensor of type uint64, specifying the scaling ratio . \n | * @li deq_scale: An tensor of type uint64, specifying the scaling ratio . \n | ||||
*@par Attributes: | |||||
* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False". | |||||
* @par Attributes: | |||||
* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". | |||||
* Defaults to "False". | |||||
* @li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False". | * @li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False". | ||||
* @li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n | * @li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n | ||||
*@par Outputs: | |||||
*y: The dequantized output tensor of type float16 or float32. \n | |||||
* @par Outputs: | |||||
* y: The dequantized output tensor of type float16 or float32. \n | |||||
*@par Third-party framework compatibility | |||||
* @par Third-party framework compatibility | |||||
* It is a custom operator. It has no corresponding operator in Caffe. | * It is a custom operator. It has no corresponding operator in Caffe. | ||||
*/ | */ | ||||
REG_OP(AscendDequant) | REG_OP(AscendDequant) | ||||
@@ -139,21 +148,22 @@ REG_OP(AscendDequant) | |||||
.OP_END_FACTORY_REG(AscendDequant) | .OP_END_FACTORY_REG(AscendDequant) | ||||
/** | /** | ||||
*@brief Anti quantizes the input . \n | |||||
* @brief Anti quantizes the input . \n | |||||
*@par Inputs: | |||||
*x: An tensor of type int8, specifying the input . \n | |||||
* @par Inputs: | |||||
* x: An tensor of type int8, specifying the input . \n | |||||
*@par Attributes: | |||||
*@li scale: A required float32 scale. | |||||
*@li offset: A required float32 offset. | |||||
*@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT". | |||||
*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False" . \n | |||||
* @par Attributes: | |||||
* @li scale: A required float32 scale. | |||||
* @li offset: A required float32 offset. | |||||
* @li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT". | |||||
* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". | |||||
* Defaults to "False" . \n | |||||
*@par Outputs: | |||||
*y: The dequantized output tensor of type float16 or float32. \n | |||||
* @par Outputs: | |||||
* y: The dequantized output tensor of type float16 or float32. \n | |||||
*@par Third-party framework compatibility | |||||
* @par Third-party framework compatibility | |||||
* It is a custom operator. It has no corresponding operator in Caffe. | * It is a custom operator. It has no corresponding operator in Caffe. | ||||
*/ | */ | ||||
REG_OP(AscendAntiQuant) | REG_OP(AscendAntiQuant) | ||||
@@ -166,20 +176,20 @@ REG_OP(AscendAntiQuant) | |||||
.OP_END_FACTORY_REG(AscendAntiQuant) | .OP_END_FACTORY_REG(AscendAntiQuant) | ||||
/** | /** | ||||
*@brief Dequantizes the input of int16 . \n | |||||
* @brief Dequantizes the input of int16 . \n | |||||
*@par Inputs: | |||||
*@li x0: An tensor of type int32, specifying the input. | |||||
*@li deq_scale: An tensor of type uint64, specifying the scaling ratio. | |||||
*@li x1: An tensor of type int16, specifying the input . \n | |||||
* @par Inputs: | |||||
* @li x0: An tensor of type int32, specifying the input. | |||||
* @li deq_scale: An tensor of type uint64, specifying the scaling ratio. | |||||
* @li x1: An tensor of type int16, specifying the input . \n | |||||
*@par Attributes: | |||||
*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n | |||||
* @par Attributes: | |||||
* relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n | |||||
*@par Outputs: | |||||
*y: The dequantized output tensor of type int16. \n | |||||
* @par Outputs: | |||||
* y: The dequantized output tensor of type int16. \n | |||||
*@par Third-party framework compatibility | |||||
* @par Third-party framework compatibility | |||||
* It is a custom operator. It has no corresponding operator in Caffe. | * It is a custom operator. It has no corresponding operator in Caffe. | ||||
*/ | */ | ||||
REG_OP(AscendDequantS16) | REG_OP(AscendDequantS16) | ||||
@@ -191,19 +201,19 @@ REG_OP(AscendDequantS16) | |||||
.OP_END_FACTORY_REG(AscendDequantS16) | .OP_END_FACTORY_REG(AscendDequantS16) | ||||
/** | /** | ||||
*@brief Requantizes the input . \n | |||||
* @brief Requantizes the input . \n | |||||
*@par Inputs: | |||||
*@li x: An tensor of type int32, specifying the input. | |||||
*@li req_scale: An tensor of type uint64, specifying the scaling ratio . \n | |||||
* @par Inputs: | |||||
* @li x: An tensor of type int32, specifying the input. | |||||
* @li req_scale: An tensor of type uint64, specifying the scaling ratio . \n | |||||
*@par Attributes: | |||||
*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n | |||||
* @par Attributes: | |||||
* relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n | |||||
*@par Outputs: | |||||
*y: The dequantized output tensor of type int8. \n | |||||
* @par Outputs: | |||||
* y: The dequantized output tensor of type int8. \n | |||||
*@par Third-party framework compatibility | |||||
* @par Third-party framework compatibility | |||||
* It is a custom operator. It has no corresponding operator in Caffe. | * It is a custom operator. It has no corresponding operator in Caffe. | ||||
*/ | */ | ||||
REG_OP(AscendRequant) | REG_OP(AscendRequant) | ||||
@@ -214,22 +224,23 @@ REG_OP(AscendRequant) | |||||
.OP_END_FACTORY_REG(AscendRequant) | .OP_END_FACTORY_REG(AscendRequant) | ||||
/** | /** | ||||
*@brief Requantizes the input of int16 . \n | |||||
* @brief Requantizes the input of int16 . \n | |||||
*@par Inputs: | |||||
*@li x0: An tensor of type int16, specifying the input. | |||||
*@li req_scale: An tensor of type uint64, specifying the scaling ratio. | |||||
*@li x1: An tensor of type int16 . \n | |||||
* @par Inputs: | |||||
* @li x0: An tensor of type int16, specifying the input. | |||||
* @li req_scale: An tensor of type uint64, specifying the scaling ratio. | |||||
* @li x1: An tensor of type int16 . \n | |||||
*@par Attributes: | |||||
*@li dual_output: A optional bool, specifying whether to perform dual ouput, either "True" or "False". Defaults to "False". | |||||
*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n | |||||
* @par Attributes: | |||||
* @li dual_output: A optional bool, specifying whether to perform dual ouput, either "True" or "False". | |||||
* Defaults to "False". | |||||
* @li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n | |||||
*@par Outputs: | |||||
*@li y0: The dequantized output tensor of type int8. | |||||
*@li y1: The dequantized output tensor of type int16. \n | |||||
* @par Outputs: | |||||
* @li y0: The dequantized output tensor of type int8. | |||||
* @li y1: The dequantized output tensor of type int16. \n | |||||
*@par Third-party framework compatibility | |||||
* @par Third-party framework compatibility | |||||
* It is a custom operator. It has no corresponding operator in Caffe. | * It is a custom operator. It has no corresponding operator in Caffe. | ||||
*/ | */ | ||||
REG_OP(AscendRequantS16) | REG_OP(AscendRequantS16) | ||||
@@ -79,6 +79,452 @@ REG_OP(StatelessRandomUniformInt) | |||||
.OUTPUT(y, TensorType({DT_INT32, DT_INT64})) | .OUTPUT(y, TensorType({DT_INT32, DT_INT64})) | ||||
.OP_END_FACTORY_REG(StatelessRandomUniformInt) | .OP_END_FACTORY_REG(StatelessRandomUniformInt) | ||||
} // namespace ge | |||||
/** | |||||
* @brief Outputs random values from a normal distribution. \n | |||||
* @par Inputs: | |||||
* Inputs include: | |||||
* @li shape: A Tensor. Must be one of the following types: int32, int64. | |||||
The shape of the output tensor. Batches are indexed by the 0th dimension. | |||||
* @li seed: 2 seeds (shape [2]). | |||||
* @li means: A Tensor. Must be one of the following types: half, bfloat16, float32, float64. | |||||
* @li stdevs: A Tensor. Must have the same type as means. | |||||
* @li min: A Tensor. Must have the same type as means. The minimum cutoff. May be -infinity. | |||||
* @li max: A Tensor. Must have the same type as means. \n | |||||
* @par Outputs: | |||||
* y: A Tensor. Has the same type as means. \n | |||||
* @attention Constraints: | |||||
* The implementation for StatelessParameterizedTruncatedNormal on Ascend uses AICPU, with bad performance. \n | |||||
* @par Third-party framework compatibility | |||||
* @li compatible with tensorflow StatelessParameterizedTruncatedNormal operator. | |||||
*/ | |||||
REG_OP(StatelessParameterizedTruncatedNormal) | |||||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(seed, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(means, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(stdevs, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(min, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(max, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OP_END_FACTORY_REG(StatelessParameterizedTruncatedNormal) | |||||
/** | |||||
* @brief Generate a single randomly distorted bounding box for an image . \n | |||||
* @par Inputs: | |||||
* Input images must be a 4-D tensor. Inputs include: | |||||
* @li image_size: 1-D, containing [height, width, channels]. | |||||
* @li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding | |||||
boxes associated with the image. | |||||
* @li min_object_covered: The cropped area of the image must contain at least | |||||
this fraction of any bounding box supplied. The value of this parameter should | |||||
be non-negative. In the case of 0, the cropped area does not need to overlap | |||||
any of the bounding boxes supplied . | |||||
* @li seed: A shape [2] Tensor, the seed to the random number generator. \n | |||||
* @par Attributes: | |||||
* @li aspect_ratio_range: The cropped area of the image must have an aspect | |||||
ratio = width / height within this range. | |||||
* @li area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The | |||||
cropped area of the image must contain a fraction of the supplied image | |||||
within this range. | |||||
* @li max_attempts: Number of attempts at generating a cropped region of the | |||||
image of the specified constraints. After max_attempts failures, return the | |||||
entire image. | |||||
* @li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes | |||||
supplied. If true, assume an implicit bounding box covering the whole input. | |||||
If false, raise an error . \n | |||||
* @par Outputs: | |||||
* @li begin: 1-D, containing [offset_height, offset_width, 0]. | |||||
* @li size: 1-D, containing [target_height, target_width, -1]. | |||||
* @li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n | |||||
* @attention Constraints: | |||||
* Input images can be of different types but output images are always float . \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with tensorflow StatelessSampleDistortedBoundingBox operator. | |||||
*/ | |||||
REG_OP(StatelessSampleDistortedBoundingBox) | |||||
.INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ | |||||
DT_INT32, DT_INT64 })) | |||||
.INPUT(bounding_boxes, TensorType({ DT_FLOAT })) | |||||
.INPUT(min_object_covered, TensorType({ DT_FLOAT })) | |||||
.INPUT(seed, TensorType({ DT_INT32, DT_INT64 })) | |||||
.OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ | |||||
DT_INT32, DT_INT64 })) | |||||
.OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ | |||||
DT_INT32, DT_INT64 })) | |||||
.OUTPUT(bboxes, TensorType({ DT_FLOAT })) | |||||
.ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f }) | |||||
.ATTR(area_range, ListFloat, { 0.05f, 1.0f }) | |||||
.ATTR(max_attempts, Int, 100) | |||||
.ATTR(use_image_if_no_bounding_boxes, Bool, false) | |||||
.OP_END_FACTORY_REG(StatelessSampleDistortedBoundingBox) | |||||
/** | |||||
* @brief Outputs random values from a truncated normal distribution. \n | |||||
* @par Inputs: | |||||
* Inputs include: | |||||
* @li shape: A Tensor. Must be one of the following types: int32, int64. \n | |||||
* @li key: Key of RNG algorithm. Shape[1]. \n | |||||
* @li counter: Counter of RNG algorithm. Shape[2] for philox, shape[1] for threefry. \n | |||||
* @li alg: RNG algorithm. 1:philox 2:threefry. \n | |||||
* @par Attributes: | |||||
* @li dtype: dtype: A optional attr, specifying the output data type. Defaults to "DT_FLOAT". \n | |||||
* @par Outputs: | |||||
* y: A Tensor of types: float16, float32, double. A tensor of the specified shape | |||||
filled with random truncated normal values. \n | |||||
* @attention Constraints: | |||||
* The implementation for StatelessTruncatedNormalV2 on Ascend uses AICPU, with bad performance. | |||||
* @par Third-party framework compatibility | |||||
* @li compatible with tensorflow StatelessTruncatedNormalV2 operator. | |||||
*/ | |||||
REG_OP(StatelessTruncatedNormalV2) | |||||
.INPUT(shape, TensorType({ DT_INT32, DT_INT64 })) | |||||
.INPUT(key, TensorType({ DT_UINT64 })) | |||||
.INPUT(counter, TensorType({ DT_UINT64 })) | |||||
.INPUT(alg, TensorType({ DT_INT32 })) | |||||
.OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) | |||||
.ATTR(dtype, Type, DT_FLOAT) | |||||
.OP_END_FACTORY_REG(StatelessTruncatedNormalV2) | |||||
/** | |||||
* @brief Outputs deterministic pseudorandom random numbers from a gamma distribution. \n | |||||
* @par Inputs: | |||||
* @li shape: The shape of the output tensor. | |||||
* @li seed: 2 seeds (shape [2]). | |||||
* @li alpha: The concentration of the gamma distribution. Shape must match the rightmost dimensions of shape. \n | |||||
* @par Outputs: | |||||
* y: A Tensor. Has the same type as alpha. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with TensorFlow StatelessRandomGammaV2 operator. | |||||
*/ | |||||
REG_OP(StatelessRandomGammaV2) | |||||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(seed, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE})) | |||||
.OP_END_FACTORY_REG(StatelessRandomGammaV2) | |||||
/** | |||||
* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n | |||||
* @par Inputs: | |||||
* @li shape: The shape of the output tensor. | |||||
* @li seed: 2 seeds (shape [2]). \n | |||||
* @par Attributes: | |||||
* dtype:Output data type . \n | |||||
* @par Outputs: | |||||
* y: Returns Random values with specified shape . \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with TensorFlow StatelessRandomUniformFullInt operator. | |||||
*/ | |||||
REG_OP(StatelessRandomUniformFullInt) | |||||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(seed, TensorType({DT_INT32, DT_INT64})) | |||||
.OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) | |||||
.ATTR(dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(StatelessRandomUniformFullInt) | |||||
/** | |||||
* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n | |||||
* @par Inputs: | |||||
* @li shape: The shape of the output tensor. | |||||
* @li key: Key for the counter-based RNG algorithm. | |||||
* @li counter: Initial counter for the counter-based RNG algorithm. | |||||
* @li alg: 0-D. The RNG algorithm. \n | |||||
* @par Attributes: | |||||
* dtype:Output data type . \n | |||||
* @par Outputs: | |||||
* y: Returns Random values with specified shape . \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with TensorFlow StatelessRandomUniformFullIntV2 operator. | |||||
*/ | |||||
REG_OP(StatelessRandomUniformFullIntV2) | |||||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(key, TensorType({DT_UINT64})) | |||||
.INPUT(counter, TensorType({DT_UINT64})) | |||||
.INPUT(alg, TensorType({DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) | |||||
.ATTR(dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(StatelessRandomUniformFullIntV2) | |||||
/** | |||||
* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n | |||||
* @par Inputs: | |||||
* @li shape: The shape of the output tensor. | |||||
* @li key: Key for the counter-based RNG algorithm. | |||||
* @li counter: Initial counter for the counter-based RNG algorithm. | |||||
* @li alg: 0-D. The RNG algorithm. | |||||
* @li minval: Minimum value (inclusive, scalar). | |||||
* @li maxval: Maximum value (exclusive, scalar) . \n | |||||
* @par Outputs: | |||||
* y: Returns Random values with specified shape . \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with TensorFlow StatelessRandomUniformIntV2 operator. | |||||
*/ | |||||
REG_OP(StatelessRandomUniformIntV2) | |||||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(key, TensorType({DT_UINT64})) | |||||
.INPUT(counter, TensorType({DT_UINT64})) | |||||
.INPUT(alg, TensorType({DT_INT32})) | |||||
.INPUT(minval, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) | |||||
.INPUT(maxval, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) | |||||
.OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) | |||||
.OP_END_FACTORY_REG(StatelessRandomUniformIntV2) | |||||
/** | |||||
* @brief Outputs deterministic pseudorandom random integers from a binomial distribution. \n | |||||
* @par Inputs: | |||||
* @li shape: The shape of the output tensor. | |||||
* @li seed: 2 seeds (shape [2]). | |||||
* @li counts: The counts of the binomial distribution. Must be broadcastable with probs, | |||||
* and broadcastable with the rightmost dimensions of shape. | |||||
* @li probs: The probability of success for the binomial distribution. | |||||
* Must be broadcastable with counts and broadcastable with the rightmost dimensions of shape. \n | |||||
* @par Attributes: | |||||
* @li dtype: A optional int32, specifying the output data type. Defaults to "DT_INT32". \n | |||||
* @par Outputs: | |||||
* @li y: Returns Random values with specified shape. \n | |||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_ | |||||
* @par Third-party framework compatibility | |||||
* Compatible with TensorFlow StatelessRandomBinomial operator. | |||||
*/ | |||||
REG_OP(StatelessRandomBinomial) | |||||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(seed, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(counts, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64})) | |||||
.INPUT(probs, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64})) | |||||
.OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.ATTR(dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(StatelessRandomBinomial) | |||||
/** | |||||
* @brief Outputs deterministic pseudorandom random integers from a poisson distribution . \n | |||||
* @par Inputs: | |||||
* @li shape: The shape of the output tensor. | |||||
* @li seed: 2 seeds (shape [2]). | |||||
* @li lam: mean value value of poisson distribution . \n | |||||
* @par Attributes: | |||||
* dtype:Output data type . \n | |||||
* @par Outputs: | |||||
* y: Returns Random values with specified shape . \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with TensorFlow StatelessRandomUniformInt operator. | |||||
*/ | |||||
REG_OP(StatelessRandomPoisson) | |||||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(seed, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(lam, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT32, DT_INT64})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT32, DT_INT64})) | |||||
.REQUIRED_ATTR(dtype, Type) | |||||
.OP_END_FACTORY_REG(StatelessRandomPoisson) | |||||
/** | |||||
* @brief Get the counter of the RNG algorithm. \n | |||||
* @par Outputs: | |||||
* @li alg: The RNG algorithm. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with TensorFlow StatelessRandomGetAlg operator. | |||||
*/ | |||||
REG_OP(StatelessRandomGetAlg) | |||||
.OUTPUT(alg, TensorType({DT_INT32})) | |||||
.OP_END_FACTORY_REG(StatelessRandomGetAlg) | |||||
/** | |||||
* @brief This op picks the best counter-based RNG algorithm based on device, and | |||||
* scrambles a shape-[2] seed into a key and a counter, both needed by the | |||||
* counter-based algorithm. \n | |||||
* @par Inputs: | |||||
* @li seed: 2 seeds (shape [2]). \n | |||||
* @par Outputs: | |||||
* @li key: Key for the counter-based RNG algorithm. | |||||
* @li counter: Initial counter for the counter-based RNG algorithm. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with TensorFlow StatelessRandomGetKeyCounter operator. | |||||
*/ | |||||
REG_OP(StatelessRandomGetKeyCounter) | |||||
.INPUT(seed, TensorType({DT_INT32, DT_INT64})) | |||||
.OUTPUT(key, TensorType({DT_UINT64})) | |||||
.OUTPUT(counter, TensorType({DT_UINT64})) | |||||
.OP_END_FACTORY_REG(StatelessRandomGetKeyCounter) | |||||
/** | |||||
* @brief This op picks the best counter-based RNG algorithm based on device, and | |||||
* scrambles a shape-[2] seed into a key and a counter, both needed by the | |||||
* counter-based algorithm. \n | |||||
* @par Inputs: | |||||
* @li seed: 2 seeds (shape [2]). \n | |||||
* @par Outputs: | |||||
* @li key: Key for the counter-based RNG algorithm. | |||||
* @li counter: Initial counter for the counter-based RNG algorithm. | |||||
* @li alg: The RNG algorithm. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with TensorFlow StatelessRandomGetKeyCounterAlg operator. | |||||
*/ | |||||
REG_OP(StatelessRandomGetKeyCounterAlg) | |||||
.INPUT(seed, TensorType({DT_INT32, DT_INT64})) | |||||
.OUTPUT(key, TensorType({DT_UINT64})) | |||||
.OUTPUT(counter, TensorType({DT_UINT64})) | |||||
.OUTPUT(alg, TensorType({DT_INT32})) | |||||
.OP_END_FACTORY_REG(StatelessRandomGetKeyCounterAlg) | |||||
/** | |||||
* @brief Outputs deterministic pseudorandom values from a normal distribution. \n | |||||
* @par Inputs: | |||||
* @li shape: The shape of the output tensor. | |||||
* @li key: Key for the counter-based RNG algorithm. | |||||
* @li counter: Initial counter for the counter-based RNG algorithm. | |||||
* @li alg: The RNG algorithm. \n | |||||
* @par Attributes: | |||||
* @li dtype: Output data type . \n | |||||
* @par Outputs: | |||||
* @li y: Returns Random values with specified shape . \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with TensorFlow StatelessRandomNormalV2 operator. | |||||
*/ | |||||
REG_OP(StatelessRandomNormalV2) | |||||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(key, TensorType({DT_UINT64})) | |||||
.INPUT(counter, TensorType({DT_UINT64})) | |||||
.INPUT(alg, TensorType({DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.ATTR(dtype, Type, DT_FLOAT) | |||||
.OP_END_FACTORY_REG(StatelessRandomNormalV2) | |||||
/** | |||||
* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n | |||||
* @par Inputs: | |||||
* @li shape: The shape of the output tensor. | |||||
* @li key: Key for the counter-based RNG algorithm. | |||||
* @li counter: Initial counter for the counter-based RNG algorithm. | |||||
* @li alg: 0-D. The RNG algorithm. \n | |||||
* @par Attributes: | |||||
* dtype:Output data type . \n | |||||
* @par Outputs: | |||||
* y: Returns Random values with specified shape . \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with TensorFlow StatelessRandomUniformV2 operator. | |||||
*/ | |||||
REG_OP(StatelessRandomUniformV2) | |||||
.INPUT(shape, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(key, TensorType({DT_UINT64})) | |||||
.INPUT(counter, TensorType({DT_UINT64})) | |||||
.INPUT(alg, TensorType({DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE})) | |||||
.ATTR(dtype, Type, DT_FLOAT) | |||||
.OP_END_FACTORY_REG(StatelessRandomUniformV2) | |||||
/** | |||||
* @brief Create a random number seed generator . \n | |||||
* @par Inputs: | |||||
* include: | |||||
* @li seed:1-D Tensor,the seed to generate random. | |||||
* Must be one of the types:int32 or int64. | |||||
* @li seed2:1-D Tensor,the seed to generate random. | |||||
* Must be one of the types:int32 or int64. | |||||
* @li reshuffle:1-D Tensor.Seed selection, True:random seed, False:fixed seed. | |||||
* Must be one of the types:bool. \n | |||||
* @par Outputs: | |||||
* handle:Handle to the random number generator. | |||||
* deleter:Handle to the remover. | |||||
* Used when deleting the random number seed generator \n | |||||
* @see AnonymousSeedGenerator() | |||||
* @par Third-party framework compatibility | |||||
* compatible with AnonymousSeedGenerator op of tensorflow | |||||
*/ | |||||
REG_OP(AnonymousSeedGenerator) | |||||
.INPUT(seed, TensorType({DT_INT32,DT_INT64})) | |||||
.INPUT(seed2, TensorType({DT_INT32,DT_INT64})) | |||||
.INPUT(reshuffle, TensorType({DT_BOOL})) | |||||
.OUTPUT(handle, TensorType({DT_RESOURSE})) | |||||
.OUTPUT(deleter, TensorType({DT_VARIANT})) | |||||
.OP_END_FACTORY_REG(AnonymousSeedGenerator) | |||||
/** | |||||
* @brief DeleteSeedGenerator . \n | |||||
* @par Inputs: | |||||
* @li handle: A Tensor of type resource. | |||||
* @li deleter: A Tensor of type variant. | |||||
* @par Third-party framework compatibility | |||||
* Compatible with TensorFlow DeleteSeedGenerator operator. | |||||
*/ | |||||
REG_OP(DeleteSeedGenerator) | |||||
.INPUT(handle, TensorType({DT_RESOURCE})) | |||||
.INPUT(deleter, TensorType({DT_VARIANT})) | |||||
.OP_END_FACTORY_REG(DeleteSeedGenerator) | |||||
/** | |||||
* @brief Create a placeholder handle to rewrite and pass | |||||
* to use during the graph compilation phase. \n | |||||
* @par Outputs: | |||||
* handle:Output random number . \n | |||||
*/ | |||||
REG_OP(DummySeedGenerator) | |||||
.OUTPUT(handle, TensorType({ DT_RESOURCE })) | |||||
.OP_END_FACTORY_REG(DummySeedGenerator) | |||||
} // namespace ge | |||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_ |
@@ -60,7 +60,10 @@ REG_OP(Bitcast) | |||||
*x: A Tensor. Must be 4D Tensor of type float16, float32, int32, uint16, with format HWCN . \n | *x: A Tensor. Must be 4D Tensor of type float16, float32, int32, uint16, with format HWCN . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A 6D Tensor. Has the same type as "x", with format C1HWNCoC0. | |||||
*y: A 6D Tensor. Has the same type as "x", with format C1HWNCoC0. \n | |||||
*@attention Constraints: | |||||
*THIS OPERATOR IS DEPRECATED. It will be removed in a future version. | |||||
*/ | */ | ||||
REG_OP(DepthwiseWeight4DTo6D) | REG_OP(DepthwiseWeight4DTo6D) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | ||||
@@ -77,7 +80,10 @@ REG_OP(DepthwiseWeight4DTo6D) | |||||
*channel_size: An optional int, specifying the channel size of 4D Tensor with format HWCN . \n | *channel_size: An optional int, specifying the channel size of 4D Tensor with format HWCN . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A 4D Tensor. Has the same type as "x", with format HWCN. | |||||
*y: A 4D Tensor. Has the same type as "x", with format HWCN. \n | |||||
*@attention Constraints: | |||||
*THIS OPERATOR IS DEPRECATED. It will be removed in a future version. | |||||
*/ | */ | ||||
REG_OP(DepthwiseWeight6DTo4D) | REG_OP(DepthwiseWeight6DTo4D) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | ||||
@@ -92,6 +92,12 @@ typedef enum tagRtLimitType { | |||||
RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms | RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms | ||||
} rtLimitType_t; | } rtLimitType_t; | ||||
typedef enum tagRtFloatOverflowMode { | |||||
RT_OVERFLOW_MODE_SATURATION = 0, | |||||
RT_OVERFLOW_MODE_INFNAN, | |||||
RT_OVERFLOW_MODE_UNDEF, | |||||
} rtFloatOverflowMode_t; | |||||
typedef struct rtExceptionInfo { | typedef struct rtExceptionInfo { | ||||
uint32_t taskid; | uint32_t taskid; | ||||
uint32_t streamid; | uint32_t streamid; | ||||
@@ -145,12 +145,6 @@ typedef enum tagRTTaskTimeoutType { | |||||
RT_TIMEOUT_TYPE_OP_EXECUTE, | RT_TIMEOUT_TYPE_OP_EXECUTE, | ||||
} rtTaskTimeoutType_t; | } rtTaskTimeoutType_t; | ||||
typedef enum tagRtFloatOverflowMode { | |||||
RT_OVERFLOW_MODE_SATURATION = 0, | |||||
RT_OVERFLOW_MODE_INFNAN, | |||||
RT_OVERFLOW_MODE_UNDEF, | |||||
} rtFloatOverflowMode_t; | |||||
/** | /** | ||||
* @ingroup | * @ingroup | ||||
* @brief get AI core count | * @brief get AI core count | ||||
@@ -433,6 +433,32 @@ RTS_API rtError_t rtGetDevMsg(rtGetDevMsgType_t getMsgType, rtGetMsgCallback cal | |||||
* @return RT_MEMORY_TS, RT_MEMORY_HBM, RT_MEMORY_TS | RT_MEMORY_POLICY_HUGE_PAGE_ONLY | * @return RT_MEMORY_TS, RT_MEMORY_HBM, RT_MEMORY_TS | RT_MEMORY_POLICY_HUGE_PAGE_ONLY | ||||
*/ | */ | ||||
RTS_API uint32_t rtGetTsMemType(rtMemRequestFeature_t featureType, uint32_t memSize); | RTS_API uint32_t rtGetTsMemType(rtMemRequestFeature_t featureType, uint32_t memSize); | ||||
/** | |||||
* @ingroup | |||||
* @brief set saturation mode for current device. | |||||
* @param [in] saturation mode. | |||||
* @return RT_ERROR_NONE for ok | |||||
*/ | |||||
RTS_API rtError_t rtSetDeviceSatMode(rtFloatOverflowMode_t floatOverflowMode); | |||||
/** | |||||
* @ingroup | |||||
* @brief get saturation mode for current device. | |||||
* @param [out] saturation mode. | |||||
* @return RT_ERROR_NONE for ok | |||||
*/ | |||||
RTS_API rtError_t rtGetDeviceSatMode(rtFloatOverflowMode_t *floatOverflowMode); | |||||
/** | |||||
* @ingroup | |||||
* @brief get saturation mode for target stream. | |||||
* @param [in] target stm | |||||
* @param [out] saturation mode. | |||||
* @return RT_ERROR_NONE for ok | |||||
*/ | |||||
RTS_API rtError_t rtGetDeviceSatModeForStream(rtStream_t stm, rtFloatOverflowMode_t *floatOverflowMode); | |||||
#if defined(__cplusplus) | #if defined(__cplusplus) | ||||
} | } | ||||
#endif | #endif | ||||
@@ -229,6 +229,8 @@ typedef void (*rtCallback_t)(void *fnData); | |||||
#define RT_KERNEL_DUMPFLAG (0x02U) | #define RT_KERNEL_DUMPFLAG (0x02U) | ||||
#define RT_FUSION_KERNEL_DUMPFLAG (0x04U) | #define RT_FUSION_KERNEL_DUMPFLAG (0x04U) | ||||
#define RT_KERNEL_CUSTOM_AICPU (0x08U) | #define RT_KERNEL_CUSTOM_AICPU (0x08U) | ||||
#define RT_KERNEL_FFTSPLUS_DYNAMIC_SHAPE_DUMPFLAG (0x10U) | |||||
#define RT_KERNEL_FFTSPLUS_STATIC_SHAPE_DUMPFLAG (0x20U) | |||||
// STARS topic scheduler sqe : topic_type | // STARS topic scheduler sqe : topic_type | ||||
#define RT_KERNEL_DEVICE_FIRST (0x10U) | #define RT_KERNEL_DEVICE_FIRST (0x10U) | ||||
@@ -391,6 +391,18 @@ RTS_API rtError_t rtMemcpyHostTask(void * const dst, const uint64_t destMax, con | |||||
RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind, | RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind, | ||||
rtStream_t stm); | rtStream_t stm); | ||||
typedef struct rtMemcpyAddrInfo { | |||||
uint32_t resv0; | |||||
uint32_t resv1; | |||||
uint32_t resv2; | |||||
uint32_t len; | |||||
uint64_t src; | |||||
uint64_t dst; | |||||
} rtMemcpyAddrInfo; | |||||
RTS_API rtError_t rtMemcpyAsyncPtr(void *memcpyAddrInfo, uint64_t destMax, uint64_t count, | |||||
rtMemcpyKind_t kind, rtStream_t stream); | |||||
/** | /** | ||||
* @ingroup dvrt_mem | * @ingroup dvrt_mem | ||||
* @brief asynchronized reduce memcpy | * @brief asynchronized reduce memcpy | ||||
@@ -409,6 +421,23 @@ RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, ui | |||||
/** | /** | ||||
* @ingroup dvrt_mem | * @ingroup dvrt_mem | ||||
* @brief asynchronized reduce memcpy | |||||
* @param [in] dst destination address pointer | |||||
* @param [in] Max length of destination address memory | |||||
* @param [in] src source address pointer | |||||
* @param [in] count the number of byte to copy | |||||
* @param [in] kind memcpy type | |||||
* @param [in] type data type | |||||
* @param [in] stm asynchronized task stream | |||||
* @param [in] overflowAddr addr of overflow flag | |||||
* @return RT_ERROR_NONE for ok | |||||
* @return RT_ERROR_INVALID_VALUE for error input | |||||
*/ | |||||
RTS_API rtError_t rtReduceAsyncV2(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind, | |||||
rtDataType_t type, rtStream_t stm, void *overflowAddr); | |||||
/** | |||||
* @ingroup dvrt_mem | |||||
* @brief synchronized memcpy2D | * @brief synchronized memcpy2D | ||||
* @param [in] dst destination address pointer | * @param [in] dst destination address pointer | ||||
* @param [in] dstPitch pitch of destination memory | * @param [in] dstPitch pitch of destination memory | ||||
@@ -17,10 +17,19 @@ extern "C" { | |||||
#pragma pack(push) | #pragma pack(push) | ||||
#pragma pack (1) | #pragma pack (1) | ||||
typedef struct tagFftsPlusDumpInfo { | |||||
const void *loadDumpInfo; | |||||
const void *unloadDumpInfo; | |||||
uint32_t loadDumpInfolen; | |||||
uint32_t unloadDumpInfolen; | |||||
} rtFftsPlusDumpInfo_t; | |||||
typedef struct tagFftsPlusTaskInfo { | typedef struct tagFftsPlusTaskInfo { | ||||
const rtFftsPlusSqe_t *fftsPlusSqe; | const rtFftsPlusSqe_t *fftsPlusSqe; | ||||
const void *descBuf; // include total context | const void *descBuf; // include total context | ||||
size_t descBufLen; // the length of descBuf | size_t descBufLen; // the length of descBuf | ||||
rtFftsPlusDumpInfo_t fftsPlusDumpInfo; // used only in the dynamic shape | |||||
} rtFftsPlusTaskInfo_t; | } rtFftsPlusTaskInfo_t; | ||||
#pragma pack(pop) | #pragma pack(pop) | ||||
@@ -110,7 +110,8 @@ typedef struct tagFftsPlusAicAivCtx { | |||||
// 0-3 bytes | // 0-3 bytes | ||||
uint16_t contextType; | uint16_t contextType; | ||||
uint8_t successorNum; | uint8_t successorNum; | ||||
uint8_t resv : 7; | |||||
uint8_t resv : 6; | |||||
uint8_t dumpSwitch : 1; | |||||
uint8_t aten : 1; | uint8_t aten : 1; | ||||
// 4-7 | // 4-7 | ||||
uint8_t prefetchConfig; | uint8_t prefetchConfig; | ||||
@@ -172,7 +173,8 @@ typedef struct tagFftsPlusMixAicAivCtx { | |||||
// 0-3 bytes | // 0-3 bytes | ||||
uint16_t contextType; | uint16_t contextType; | ||||
uint8_t successorNum; | uint8_t successorNum; | ||||
uint8_t reserved1 : 7; | |||||
uint8_t reserved1 : 6; | |||||
uint8_t dumpSwitch : 1; | |||||
uint8_t aten : 1; | uint8_t aten : 1; | ||||
// 4-7 | // 4-7 | ||||
uint8_t prefetchConfig; | uint8_t prefetchConfig; | ||||
@@ -236,7 +238,8 @@ typedef struct tagFftsPlusSdmaCtx { | |||||
// 0-3 bytes | // 0-3 bytes | ||||
uint16_t contextType; | uint16_t contextType; | ||||
uint8_t successorNum; | uint8_t successorNum; | ||||
uint8_t res1 : 7; | |||||
uint8_t res1 : 6; | |||||
uint8_t dumpSwitch : 1; | |||||
uint8_t aten : 1; | uint8_t aten : 1; | ||||
// 4-7 | // 4-7 | ||||
uint8_t res2; | uint8_t res2; | ||||
@@ -376,7 +379,8 @@ typedef struct tagFftsPlusAiCpuCtx { | |||||
// 0-3 bytes | // 0-3 bytes | ||||
uint16_t contextType; | uint16_t contextType; | ||||
uint8_t successorNum; | uint8_t successorNum; | ||||
uint8_t res1 : 7; | |||||
uint8_t res1 : 6; | |||||
uint8_t dumpSwitch : 1; | |||||
uint8_t aten : 1; | uint8_t aten : 1; | ||||
// 4-7 | // 4-7 | ||||
uint8_t res2; | uint8_t res2; | ||||
@@ -70,10 +70,27 @@ typedef enum tagFftsPlusType { | |||||
RT_FFTS_PLUS_TYPE = 4, // FFTS+ mode | RT_FFTS_PLUS_TYPE = 4, // FFTS+ mode | ||||
} rtFftsPlusType_t; | } rtFftsPlusType_t; | ||||
typedef struct tagStarsFftsPlusHeader { | |||||
uint8_t type : 6; | |||||
uint8_t l1Lock : 1; | |||||
uint8_t l1Unlock : 1; | |||||
uint8_t ie : 2; | |||||
uint8_t preP : 2; | |||||
uint8_t postP : 2; | |||||
uint8_t wrCqe : 1; | |||||
/* tell mcu if this subgraph is overflow-enabled and mcu will send this flag to aicpu when aicpu ctx is excuted */ | |||||
uint8_t overflowEn : 1; | |||||
uint16_t blockDim; | |||||
uint16_t rtStreamId; | |||||
uint16_t taskId; | |||||
} rtStarsFftsPlusHeader_t; | |||||
// ffts+ sqe | // ffts+ sqe | ||||
typedef struct tagFftsPlusSqe { | typedef struct tagFftsPlusSqe { | ||||
// 0-7 bytes | // 0-7 bytes | ||||
rtStarsSqeHeader_t sqeHeader; | |||||
rtStarsSqeHeader_t sqeHeader; // use rtStarsFftsPlusHeader_t instead | |||||
// 8-11 bytes | // 8-11 bytes | ||||
uint16_t fftsType : 3; | uint16_t fftsType : 3; | ||||
uint16_t reserved1 : 9; | uint16_t reserved1 : 9; | ||||
@@ -27,6 +27,7 @@ extern "C" { | |||||
#define RT_STREAM_HEAD (0x20U) | #define RT_STREAM_HEAD (0x20U) | ||||
#define RT_STREAM_PRIMARY_DEFAULT (0x40U) | #define RT_STREAM_PRIMARY_DEFAULT (0x40U) | ||||
#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80U) | #define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80U) | ||||
#define RT_STREAM_OVERFLOW (0x100U) | |||||
/** | /** | ||||
* @ingroup stream_type | * @ingroup stream_type | ||||
@@ -212,6 +213,26 @@ RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stm, uint32_t flag, const | |||||
*/ | */ | ||||
RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stm); | RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stm); | ||||
/* | |||||
* @ingroup dvrt_stream | |||||
* @brief enable or disable stream overflow | |||||
* @param [in] stm: stream handle | |||||
* @param [in] flag: 0:disable others:enable | |||||
* @return RT_ERROR_NONE for ok | |||||
* @return RT_ERROR_INVALID_VALUE for error input | |||||
*/ | |||||
RTS_API rtError_t rtSetStreamOverflowSwitch(rtStream_t stm, uint32_t flags); | |||||
/* | |||||
* @ingroup dvrt_stream | |||||
* @brief get whether overflow of the stream is enable or disable | |||||
* @param [in] stm: stream handle | |||||
* @param [out] flag: 0:disable others:enable | |||||
* @return RT_ERROR_NONE for ok | |||||
* @return RT_ERROR_INVALID_VALUE for error input | |||||
*/ | |||||
RTS_API rtError_t rtGetStreamOverflowSwitch(rtStream_t stm, uint32_t *flags); | |||||
#if defined(__cplusplus) | #if defined(__cplusplus) | ||||
} | } | ||||
#endif | #endif | ||||
@@ -41,6 +41,10 @@ struct InitFlowGwInfo { | |||||
char_t rsv[128]; | char_t rsv[128]; | ||||
}; | }; | ||||
typedef enum { | |||||
TSD_CAPABILITY_PIDQOS = 0, | |||||
TSD_CAPABILITY_BUT | |||||
} TsdCapabilityType; | |||||
/** | /** | ||||
* @ingroup Open | * @ingroup Open | ||||
* @brief Used for the Framework process to communicate with the TSDDaemon process, | * @brief Used for the Framework process to communicate with the TSDDaemon process, | ||||
@@ -198,6 +202,36 @@ TDT_LIB_EXPORT uint32_t TsdSetMsprofReporterCallback(const MsprofReporterCallbac | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
TDT_LIB_EXPORT uint32_t TsdSetAttr(const char * const attrKey, const char * const attrValue); | TDT_LIB_EXPORT uint32_t TsdSetAttr(const char * const attrKey, const char * const attrValue); | ||||
/** | |||||
* @ingroup TsdCapabilityGet | |||||
* @brief use tsd to get some capability | |||||
* | |||||
* @par type | |||||
* capability type | |||||
* | |||||
* @par ptr | |||||
* the result | |||||
* @retval TDT_OK Success | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
TDT_LIB_EXPORT uint32_t TsdCapabilityGet(const uint32_t logicDeviceId, const int32_t type, const uint64_t ptr); | |||||
/** | |||||
* @ingroup GetHdcConctStatus | |||||
* @brief used to get hdc connection status | |||||
* | |||||
* @par logicDeviceId | |||||
* logic device id | |||||
* | |||||
* @par hdcSessStat | |||||
* hdc session status, DRV_ERROR_SOCKET_CONNECT or DRV_ERROR_SOCKET_CLOSE | |||||
* @retval TDT_OK Success | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
TDT_LIB_EXPORT uint32_t GetHdcConctStatus(const uint32_t logicDeviceId, int32_t *hdcSessStat); | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif // __cplusplus | #endif // __cplusplus | ||||
@@ -19,11 +19,11 @@ | |||||
#include "common/type_def.h" | #include "common/type_def.h" | ||||
namespace tsd { | namespace tsd { | ||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
using TSD_StatusT = uint32_t; | |||||
using TSD_StatusT = uint32_t; | |||||
#else | #else | ||||
typedef uint32_t TSD_StatusT; | |||||
typedef uint32_t TSD_StatusT; | |||||
#endif | #endif | ||||
// success code | |||||
constexpr TSD_StatusT TSD_OK = 0U; | |||||
} // namespace tsd | |||||
// success code | |||||
constexpr TSD_StatusT TSD_OK = 0U; | |||||
} | |||||
#endif // INC_TDT_STATUS_H | #endif // INC_TDT_STATUS_H |