Browse Source

upgrade Ascend package 9 Jun 22

pull/2124/MERGE^2
majorzhang 3 years ago
parent
commit
e82e2ff1de
41 changed files with 2127 additions and 1146 deletions
  1. +47
    -42
      inc/external/acl/error_codes/rt_error_codes.h
  2. +13
    -1
      inc/external/ge/ge_api_types.h
  3. +47
    -42
      inc/external/runtime/rt_error_codes.h
  4. +44
    -42
      inc/framework/common/debug/ge_log.h
  5. +3
    -3
      inc/framework/common/debug/log.h
  6. +3
    -3
      inc/framework/common/op_types.h
  7. +3
    -2
      inc/framework/common/types.h
  8. +1
    -0
      inc/framework/engine/dnnengine.h
  9. +1
    -1
      inc/framework/executor/ge_executor.h
  10. +1
    -0
      inc/framework/omg/parser/parser_types.h
  11. +1
    -1
      metadef
  12. +21
    -14
      third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
  13. +49
    -47
      third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
  14. +26
    -22
      third_party/fwkacllib/inc/common/type_def.h
  15. +25
    -0
      third_party/fwkacllib/inc/ops/data_flow_ops.h
  16. +2
    -2
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  17. +13
    -0
      third_party/fwkacllib/inc/ops/functional_ops.h
  18. +0
    -12
      third_party/fwkacllib/inc/ops/image_ops.h
  19. +34
    -0
      third_party/fwkacllib/inc/ops/linalg_ops.h
  20. +15
    -0
      third_party/fwkacllib/inc/ops/lookup_ops.h
  21. +180
    -1
      third_party/fwkacllib/inc/ops/math_ops.h
  22. +44
    -0
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  23. +21
    -0
      third_party/fwkacllib/inc/ops/nn_calculation_ops.h
  24. +37
    -1
      third_party/fwkacllib/inc/ops/nn_detect_ops.h
  25. +31
    -54
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  26. +13
    -13
      third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
  27. +82
    -71
      third_party/fwkacllib/inc/ops/quantize_ops.h
  28. +756
    -752
      third_party/fwkacllib/inc/ops/rnn.h
  29. +448
    -2
      third_party/fwkacllib/inc/ops/stateless_random_ops.h
  30. +8
    -2
      third_party/fwkacllib/inc/ops/transformation_ops.h
  31. +6
    -0
      third_party/fwkacllib/inc/runtime/base.h
  32. +0
    -6
      third_party/fwkacllib/inc/runtime/config.h
  33. +26
    -0
      third_party/fwkacllib/inc/runtime/dev.h
  34. +2
    -0
      third_party/fwkacllib/inc/runtime/kernel.h
  35. +29
    -0
      third_party/fwkacllib/inc/runtime/mem.h
  36. +9
    -0
      third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
  37. +8
    -4
      third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h
  38. +18
    -1
      third_party/fwkacllib/inc/runtime/rt_stars_define.h
  39. +21
    -0
      third_party/fwkacllib/inc/runtime/stream.h
  40. +34
    -0
      third_party/fwkacllib/inc/tdt/tsd_client.h
  41. +5
    -5
      third_party/fwkacllib/inc/tsd/status.h

+ 47
- 42
inc/external/acl/error_codes/rt_error_codes.h View File

@@ -64,49 +64,54 @@ static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is emp
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init
static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow
static const int32_t ACL_ERROR_RT_OVER_FLOW = 207017; // common over flow


static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set
static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042; // aic trap read overflow
static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow


static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error


+ 13
- 1
inc/external/ge/ge_api_types.h View File

@@ -71,13 +71,15 @@ const char_t *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExec
const char_t *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; const char_t *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange";
const char_t *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr"; const char_t *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr";
const char_t *const OPTION_EXEC_GRAPH_EXEC_TIMEOUT = "ge.exec.graphExecTimeout"; const char_t *const OPTION_EXEC_GRAPH_EXEC_TIMEOUT = "ge.exec.graphExecTimeout";
const char_t *const OPTION_EXEC_MODEL_EXEC_TIMEOUT = "ge.exec.modelExecTimeout";


// Option key: memory init // Option key: memory init
const char_t *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; const char_t *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize";
const char_t *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize"; const char_t *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize";
const char_t *const OPTION_EXEC_REUSE_ZERO_COPY_MEMORY = "ge.exec.reuseZeroCopyMemory"; const char_t *const OPTION_EXEC_REUSE_ZERO_COPY_MEMORY = "ge.exec.reuseZeroCopyMemory";


const char_t *const OPTION_EXEC_LOGICAL_DEVICE_CLUSTER_DEPLOY_MODE = "ge.exec.logicalDeviceClusterDeployMode";
const char_t *const OPTION_EXEC_LOGICAL_DEVICE_ID = "ge.exec.logicalDeviceId";

namespace configure_option { namespace configure_option {
const char_t *const STREAM_NUM = "ge.streamNum"; const char_t *const STREAM_NUM = "ge.streamNum";
const char_t *const HEAD_STREAM = "ge.headStream"; const char_t *const HEAD_STREAM = "ge.headStream";
@@ -107,6 +109,7 @@ const char_t *const OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode";
const char_t *const HCOM_PARALLEL = "ge.hcomParallel"; const char_t *const HCOM_PARALLEL = "ge.hcomParallel";
const char_t *const AUTO_TUNE_MODE = "ge.autoTuneMode"; const char_t *const AUTO_TUNE_MODE = "ge.autoTuneMode";
const char_t *const SOC_VERSION = "ge.socVersion"; const char_t *const SOC_VERSION = "ge.socVersion";
const char_t *const VIRTUAL_TYPE = "ge.virtual_type";
const char_t *const CORE_TYPE = "ge.engineType"; const char_t *const CORE_TYPE = "ge.engineType";
const char_t *const AICORE_NUM = "ge.aicoreNum"; const char_t *const AICORE_NUM = "ge.aicoreNum";
const char_t *const L1_FUSION = "ge.l1Fusion"; const char_t *const L1_FUSION = "ge.l1Fusion";
@@ -254,9 +257,16 @@ const std::string AUTO_TUNE_MODE = "ge.autoTuneMode";
// Configure soc version , example: "Ascend310" // Configure soc version , example: "Ascend310"
const std::string SOC_VERSION = "ge.socVersion"; const std::string SOC_VERSION = "ge.socVersion";


// configure whether to enable virtualization,
// its value should be "0" or "1", default value is "0"
const std::string VIRTUAL_TYPE = "ge.virtual_type";

// Configure core type "VectorEngine", default value is "AIcoreEngine" // Configure core type "VectorEngine", default value is "AIcoreEngine"
const std::string CORE_TYPE = "ge.engineType"; const std::string CORE_TYPE = "ge.engineType";


// Configure graph exclude one or more engines
const std::string EXCLUDE_ENGINES = "ge.exec.exclude_engines";

// Configure AICORE NUM // Configure AICORE NUM
const std::string AICORE_NUM = "ge.aicoreNum"; const std::string AICORE_NUM = "ge.aicoreNum";


@@ -420,6 +430,7 @@ static const char_t *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_
static const char_t *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); static const char_t *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str();
static const char_t *const CORE_TYPE = ge::CORE_TYPE.c_str(); static const char_t *const CORE_TYPE = ge::CORE_TYPE.c_str();
static const char_t *const SOC_VERSION = ge::SOC_VERSION.c_str(); static const char_t *const SOC_VERSION = ge::SOC_VERSION.c_str();
static const char_t *const VIRTUAL_TYPE = ge::VIRTUAL_TYPE.c_str();
static const char_t *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; static const char_t *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM;
static const char_t *const AICORE_NUM = ge::AICORE_NUM.c_str(); static const char_t *const AICORE_NUM = ge::AICORE_NUM.c_str();
static const char_t *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); static const char_t *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str();
@@ -487,6 +498,7 @@ const std::set<std::string> ir_parser_suppported_options = {
// for interface: aclgrphBuildInitialize // for interface: aclgrphBuildInitialize
const std::set<std::string> global_options = {CORE_TYPE, const std::set<std::string> global_options = {CORE_TYPE,
SOC_VERSION, SOC_VERSION,
VIRTUAL_TYPE,
BUFFER_OPTIMIZE, BUFFER_OPTIMIZE,
ENABLE_COMPRESS_WEIGHT, ENABLE_COMPRESS_WEIGHT,
COMPRESS_WEIGHT_CONF, COMPRESS_WEIGHT_CONF,


+ 47
- 42
inc/external/runtime/rt_error_codes.h View File

@@ -64,49 +64,54 @@ static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is emp
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init
static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow
static const int32_t ACL_ERROR_RT_OVER_FLOW = 207017; // common over flow


static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set
static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042; // aic trap read overflow
static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow


static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error


+ 44
- 42
inc/framework/common/debug/ge_log.h View File

@@ -17,6 +17,7 @@
#ifndef INC_FRAMEWORK_COMMON_DEBUG_GE_LOG_H_ #ifndef INC_FRAMEWORK_COMMON_DEBUG_GE_LOG_H_
#define INC_FRAMEWORK_COMMON_DEBUG_GE_LOG_H_ #define INC_FRAMEWORK_COMMON_DEBUG_GE_LOG_H_


#include <cinttypes>
#include <cstdint> #include <cstdint>


#include "framework/common/ge_inner_error_codes.h" #include "framework/common/ge_inner_error_codes.h"
@@ -56,63 +57,64 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) {
return (enable == 1); return (enable == 1);
} }


#define GELOGE(ERROR_CODE, fmt, ...) \
do { \
dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE), \
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \
##__VA_ARGS__); \
#define GELOGE(ERROR_CODE, fmt, ...) \
do { \
dlog_error(GE_MODULE_NAME, "%" PRIu64 " %s: ErrorNo: %" PRIuLEAST8 "(%s) %s" fmt, GeLog::GetTid(), \
&__FUNCTION__[0], (ERROR_CODE), ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), \
ErrorManager::GetInstance().GetLogHeader().c_str(), ##__VA_ARGS__); \
} while (false) } while (false)


#define GELOGW(fmt, ...) \
do { \
if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) { \
dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
} \
#define GELOGW(fmt, ...) \
do { \
if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) { \
dlog_warn(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
} \
} while (false) } while (false)


#define GELOGI(fmt, ...) \
do { \
if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) { \
dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
} \
#define GELOGI(fmt, ...) \
do { \
if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) { \
dlog_info(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
} \
} while (false) } while (false)


#define GELOGD(fmt, ...) \
do { \
if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) { \
dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
} \
#define GELOGD(fmt, ...) \
do { \
if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) { \
dlog_debug(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
} \
} while (false) } while (false)


#define GEEVENT(fmt, ...) \
do { \
dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
#define GEEVENT(fmt, ...) \
do { \
dlog_event(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
} while (false) } while (false)


#define GELOGT(VALUE, fmt, ...) \
do { \
TraceStatus stat = (VALUE); \
const char_t *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \
const int32_t idx = static_cast<int32_t>(stat); \
char_t *k = const_cast<char_t *>("status"); \
char_t *v = const_cast<char_t *>(TraceStatStr[idx]); \
KeyValue kv = {k, v}; \
DlogWithKV(GE_MODULE_NAME, DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
#define GELOGT(VALUE, fmt, ...) \
do { \
TraceStatus stat = (VALUE); \
const char_t *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \
const int32_t idx = static_cast<int32_t>(stat); \
char_t *k = const_cast<char_t *>("status"); \
char_t *v = const_cast<char_t *>(TraceStatStr[idx]); \
KeyValue kv = {k, v}; \
DlogWithKV(GE_MODULE_NAME, DLOG_TRACE, &kv, 1, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], \
##__VA_ARGS__); \
} while (false) } while (false)


#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \
do { \
dlog_error((MOD_NAME), "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE), \
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \
##__VA_ARGS__); \
#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \
do { \
dlog_error((MOD_NAME), "%" PRIu64 " %s: ErrorNo: %" PRIuLEAST8 "(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], \
(ERROR_CODE), ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), \
ErrorManager::GetInstance().GetLogHeader().c_str(), ##__VA_ARGS__); \
} while (false) } while (false)


// print memory when it is greater than 1KB. // print memory when it is greater than 1KB.
#define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \
do { \
if (static_cast<size_t>(SIZE) > 1024UL) { \
GELOGI("MallocMemory, func=%s, size=%zu, purpose=%s", (#FUNC), static_cast<size_t>(SIZE), (PURPOSE)); \
} \
#define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \
do { \
if (static_cast<size_t>(SIZE) > 1024UL) { \
GELOGI("MallocMemory, func=%s, size=%" PRIu64 ", purpose=%s", (#FUNC), static_cast<size_t>(SIZE), (PURPOSE)); \
} \
} while (false) } while (false)
#ifdef __cplusplus #ifdef __cplusplus
} }


+ 3
- 3
inc/framework/common/debug/log.h View File

@@ -180,13 +180,13 @@
#define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \ #define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \
{ \ { \
GELOGE((_status), "[Check][InnerData]%s", (errormsg)); \ GELOGE((_status), "[Check][InnerData]%s", (errormsg)); \
REPORT_INNER_ERROR("E19999", "%s", (errormsg)); \
REPORT_INNER_ERROR("E10052", "%s", (errormsg)); \
} }


#define GE_WARNINGLOG_AND_ERRORMSG(errormsg) \ #define GE_WARNINGLOG_AND_ERRORMSG(errormsg) \
{ \ { \
GELOGW("%s", (errormsg)); \ GELOGW("%s", (errormsg)); \
ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {(errormsg)}); \
ErrorManager::GetInstance().ATCReportErrMessage("E10052", {"reason"}, {(errormsg)}); \
} }


#define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg) \ #define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg) \
@@ -194,7 +194,7 @@
const bool b = (expr); \ const bool b = (expr); \
if (!b) { \ if (!b) { \
GELOGE((_status), "%s", (errormsg)); \ GELOGE((_status), "%s", (errormsg)); \
ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {(errormsg)}); \
ErrorManager::GetInstance().ATCReportErrMessage("E10052", {"reason"}, {(errormsg)}); \
return (_status); \ return (_status); \
} \ } \
} while (false) } while (false)


+ 3
- 3
inc/framework/common/op_types.h View File

@@ -31,8 +31,8 @@ class GE_FUNC_VISIBILITY OpTypeContainer {
} }
~OpTypeContainer() = default; ~OpTypeContainer() = default;


void Register(const std::string &op_type) {
static_cast<void>(op_type_list_.insert(op_type));
bool Register(const std::string &op_type) {
return op_type_list_.insert(op_type).second;
} }


bool IsExisting(const std::string &op_type) { bool IsExisting(const std::string &op_type) {
@@ -52,7 +52,7 @@ class GE_FUNC_VISIBILITY OpTypeContainer {


#define REGISTER_OPTYPE_DEFINE(var_name, str_name) \ #define REGISTER_OPTYPE_DEFINE(var_name, str_name) \
const char_t *var_name = str_name; \ const char_t *var_name = str_name; \
const bool g_##var_name##_reg = (static_cast<void>(OpTypeContainer::Instance()->Register(str_name)), true);
const bool g_##var_name##_reg = OpTypeContainer::Instance()->Register(str_name);


#define IS_OPTYPE_EXISTING(str_name) (ge::OpTypeContainer::Instance()->IsExisting(str_name)) #define IS_OPTYPE_EXISTING(str_name) (ge::OpTypeContainer::Instance()->IsExisting(str_name))
#endif // INC_FRAMEWORK_COMMON_OP_TYPES_H_ #endif // INC_FRAMEWORK_COMMON_OP_TYPES_H_

+ 3
- 2
inc/framework/common/types.h View File

@@ -162,6 +162,7 @@ REGISTER_OPTYPE_DECLARE(_IF, "_If");
REGISTER_OPTYPE_DECLARE(STATELESSIF, "StatelessIf"); REGISTER_OPTYPE_DECLARE(STATELESSIF, "StatelessIf");
REGISTER_OPTYPE_DECLARE(IF, "If"); REGISTER_OPTYPE_DECLARE(IF, "If");
REGISTER_OPTYPE_DECLARE(CASE, "Case"); REGISTER_OPTYPE_DECLARE(CASE, "Case");
REGISTER_OPTYPE_DECLARE(STATELESSCASE, "StatelessCase");
REGISTER_OPTYPE_DECLARE(_WHILE, "_While"); REGISTER_OPTYPE_DECLARE(_WHILE, "_While");
REGISTER_OPTYPE_DECLARE(WHILE, "While"); REGISTER_OPTYPE_DECLARE(WHILE, "While");
REGISTER_OPTYPE_DECLARE(STATELESSWHILE, "StatelessWhile"); REGISTER_OPTYPE_DECLARE(STATELESSWHILE, "StatelessWhile");
@@ -626,8 +627,8 @@ struct ModelFileHeader {
uint32_t version = MODEL_VERSION; // version 1.0 uint32_t version = MODEL_VERSION; // version 1.0
uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0U}; // signature uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0U}; // signature
uint32_t length = 0U; // Ciphertext length. In the non-encryption model, the length is the plaintext length. uint32_t length = 0U; // Ciphertext length. In the non-encryption model, the length is the plaintext length.
uint8_t is_encrypt =
static_cast<uint8_t>(ModelEncryptType::UNENCRYPTED); // whether encrypted 0:not encrypt, 1:encrypt
// whether encrypted 0:not encrypt, 1:encrypt
uint8_t is_encrypt = static_cast<uint8_t>(ModelEncryptType::UNENCRYPTED);
uint8_t is_checksum = static_cast<uint8_t>(ModelCheckType::CHECK); // whether to check the checksum uint8_t is_checksum = static_cast<uint8_t>(ModelCheckType::CHECK); // whether to check the checksum
uint8_t modeltype = 0U; // 0:IR model 1:standard model 2: OM Tiny model uint8_t modeltype = 0U; // 0:IR model 1:standard model 2: OM Tiny model
uint8_t genmode = 0U; // 0:offline generate 1:online generate uint8_t genmode = 0U; // 0:offline generate 1:online generate


+ 1
- 0
inc/framework/engine/dnnengine.h View File

@@ -31,6 +31,7 @@ enum class PriorityEnum {
COST_1 = 1, COST_1 = 1,
COST_2 = 2, COST_2 = 2,
COST_3 = 3, COST_3 = 3,
COST_4 = 4,
COST_9 = 9, COST_9 = 9,
COST_10 = 10, COST_10 = 10,
}; };


+ 1
- 1
inc/framework/executor/ge_executor.h View File

@@ -178,7 +178,7 @@ class GE_FUNC_VISIBILITY GeExecutor {


Status GetAippType(const uint32_t model_id, const uint32_t index, InputAippType &type, size_t &aipp_index); Status GetAippType(const uint32_t model_id, const uint32_t index, InputAippType &type, size_t &aipp_index);


Status CommandHandle(const Command &command);
Status CommandHandle(const Command &command) const;


Status SetDump(const DumpConfig &dump_config); Status SetDump(const DumpConfig &dump_config);




+ 1
- 0
inc/framework/omg/parser/parser_types.h View File

@@ -133,6 +133,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *_IF;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSIF; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSIF;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *IF; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *IF;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CASE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CASE;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSCASE;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *_WHILE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *_WHILE;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *WHILE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *WHILE;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSWHILE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSWHILE;


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit 002617852e22767bd864db3c01595630e23f5496
Subproject commit 58412ad7aed08cd1c01cc070d80706e4253c9075

+ 21
- 14
third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h View File

@@ -18,31 +18,38 @@
#define AICPU_OP_TYPE_LIST_H_ #define AICPU_OP_TYPE_LIST_H_
extern "C" { extern "C" {
enum OpKernelType { TF_KERNEL, CPU_KERNEL };
enum OpKernelType {
TF_KERNEL,
CPU_KERNEL
};
enum ReturnCode { OP_TYPE_NOT_SUPPORT, FORMAT_NOT_SUPPORT, DTYPE_NOT_SUPPORT };
enum ReturnCode {
OP_TYPE_NOT_SUPPORT,
FORMAT_NOT_SUPPORT,
DTYPE_NOT_SUPPORT
};
#pragma pack(push, 1) #pragma pack(push, 1)
// One byte alignment // One byte alignment
struct SysOpInfo { struct SysOpInfo {
uint64_t opLen;
uint64_t opType;
OpKernelType kernelsType;
uint64_t opLen;
uint64_t opType;
OpKernelType kernelsType;
}; };
struct SysOpCheckInfo { struct SysOpCheckInfo {
uint64_t opListNum;
uint64_t offSetLen;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
uint64_t opListNum;
uint64_t offSetLen;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
}; };
struct SysOpCheckResp { struct SysOpCheckResp {
uint64_t opListNum;
bool isWithoutJson;
uint64_t returnCodeList;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
uint64_t opListNum;
bool isWithoutJson;
uint64_t returnCodeList;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
}; };
#pragma pack(pop) #pragma pack(pop)
} }


+ 49
- 47
third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h View File

@@ -24,81 +24,83 @@ namespace aicpu {
using char_t = char; using char_t = char;


#pragma pack(push, 1) #pragma pack(push, 1)
struct AicpuParamHead {
uint32_t length; // Total length: include cunstom message
uint32_t ioAddrNum; // Input and output address number
uint32_t extInfoLength; // extInfo struct Length
uint64_t extInfoAddr; // extInfo address
struct AicpuParamHead
{
uint32_t length; // Total length: include cunstom message
uint32_t ioAddrNum; // Input and output address number
uint32_t extInfoLength; // extInfo struct Length
uint64_t extInfoAddr; // extInfo address
}; };


enum class AicpuConfigMsgType { enum class AicpuConfigMsgType {
AICPU_CONFIG_MSG_TYPE_BUF_FREE = 0, /* free buf */
AICPU_CONFIG_MSG_TYPE_BUF_RESET = 1, /* reset buf */
AICPU_CONFIG_MSG_TYPE_BUF_SET_ADDR = 2, /* set buf addr to aicpu */
AICPU_CONFIG_MSG_TYPE_BUF_FREE = 0, /* free buf */
AICPU_CONFIG_MSG_TYPE_BUF_RESET = 1, /* reset buf */
AICPU_CONFIG_MSG_TYPE_BUF_SET_ADDR = 2, /* set buf addr to aicpu */
}; };


enum class AicpuErrMsgType { enum class AicpuErrMsgType {
ERR_MSG_TYPE_NULL = 0,
ERR_MSG_TYPE_AICORE = 1,
ERR_MSG_TYPE_AICPU = 2,
ERR_MSG_TYPE_NULL = 0,
ERR_MSG_TYPE_AICORE = 1,
ERR_MSG_TYPE_AICPU = 2,
}; };


enum class AicpuExtInfoMsgType { enum class AicpuExtInfoMsgType {
EXT_MODEL_ID_MSG_TYPE = 0,
EXT_MODEL_ID_MSG_TYPE = 0,
}; };


struct AicpuConfigMsg { struct AicpuConfigMsg {
uint8_t msgType;
uint8_t reserved1;
uint16_t bufLen;
uint32_t offset;
uint64_t bufAddr;
uint32_t tsId;
uint32_t reserved2;
uint8_t msgType;
uint8_t reserved1;
uint16_t bufLen;
uint32_t offset;
uint64_t bufAddr;
uint32_t tsId;
uint32_t reserved2;
}; };


struct AicpuModelIdInfo { struct AicpuModelIdInfo {
uint32_t modelId;
uint32_t extendModelId;
uint32_t extendInfo[13];
uint32_t modelId;
uint32_t extendModelId;
uint32_t extendInfo[13];
}; };


// 64 bytes // 64 bytes
struct AicpuExtendInfo { struct AicpuExtendInfo {
uint8_t msgType;
uint8_t version;
uint8_t reserved[2];
union {
AicpuModelIdInfo modelIdMap;
};
uint8_t msgType;
uint8_t version;
uint8_t reserved[2];
union {
AicpuModelIdInfo modelIdMap;
};
}; };


struct AicoreErrMsgInfo { struct AicoreErrMsgInfo {
uint8_t errType;
uint8_t version;
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */
uint32_t errorCode;
uint32_t modelId;
uint32_t taskId;
uint32_t streamId;
uint64_t transactionId;
uint8_t reserved2[228]; /* the total byte is 256, reserved2 len = 256 - other lens */
uint8_t errType;
uint8_t version;
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */
uint32_t errorCode;
uint32_t modelId;
uint32_t taskId;
uint32_t streamId;
uint64_t transactionId;
uint8_t reserved2[228]; /* the total byte is 256, reserved2 len = 256 - other lens */
}; };


struct AicpuErrMsgInfo { struct AicpuErrMsgInfo {
uint8_t errType;
uint8_t version;
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */
uint32_t errorCode;
uint32_t modelId;
uint32_t streamId;
uint64_t transactionId;
char_t opName[64]; /* op name str */
char_t errDesc[128]; /* err msg desc info */
uint8_t reserved2[40]; /* the total byte is 256, reserved2 len = 256 - other lens */
uint8_t errType;
uint8_t version;
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */
uint32_t errorCode;
uint32_t modelId;
uint32_t streamId;
uint64_t transactionId;
char_t opName[64]; /* op name str */
char_t errDesc[128]; /* err msg desc info */
uint8_t reserved2[40]; /* the total byte is 256, reserved2 len = 256 - other lens */
}; };
#pragma pack(pop) #pragma pack(pop)


} // namespace aicpu } // namespace aicpu


#endif // AICPU_TASK_STRUCT_H #endif // AICPU_TASK_STRUCT_H


+ 26
- 22
third_party/fwkacllib/inc/common/type_def.h View File

@@ -1,13 +1,13 @@
/** /**
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* Description:interface.
* Create: 2021-12-21
*/
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* Description:interface.
* Create: 2021-12-21
*/
#ifndef AICPU_TYPE_DEF_H #ifndef AICPU_TYPE_DEF_H
#define AICPU_TYPE_DEF_H #define AICPU_TYPE_DEF_H


@@ -25,24 +25,28 @@ typedef float float32_t;
typedef double float64_t; typedef double float64_t;
#endif #endif


inline uint64_t PtrToValue(const void *ptr) {
return static_cast<const uint64_t>(reinterpret_cast<const uintptr_t>(ptr));
inline uint64_t PtrToValue(const void *ptr)
{
return static_cast<const uint64_t>(reinterpret_cast<const uintptr_t>(ptr));
} }


inline void *ValueToPtr(const uint64_t value) {
return reinterpret_cast<void *>(static_cast<const uintptr_t>(value));
inline void *ValueToPtr(const uint64_t value)
{
return reinterpret_cast<void *>(static_cast<const uintptr_t>(value));
} }


template <typename TI, typename TO>
inline TO *PtrToPtr(TI *ptr) {
return reinterpret_cast<TO *>(ptr);
template<typename TI, typename TO>
inline TO *PtrToPtr(TI *ptr)
{
return reinterpret_cast<TO *>(ptr);
} }


template <typename T>
inline T *PtrAdd(T *const ptr, const size_t maxIdx, const size_t idx) {
if ((ptr != nullptr) && (idx < maxIdx)) {
return reinterpret_cast<T *>(ptr + idx);
}
return nullptr;
template<typename T>
inline T *PtrAdd(T * const ptr, const size_t maxIdx, const size_t idx)
{
if ((ptr != nullptr) && (idx < maxIdx)) {
return reinterpret_cast<T *>(ptr + idx);
}
return nullptr;
} }
#endif // AICPU_TYPE_DEF_H #endif // AICPU_TYPE_DEF_H

+ 25
- 0
third_party/fwkacllib/inc/ops/data_flow_ops.h View File

@@ -2507,6 +2507,31 @@ REG_OP(GetNextFromQueue)
.OP_END_FACTORY_REG(GetNextFromQueue) .OP_END_FACTORY_REG(GetNextFromQueue)


/** /**
*@brief Get the batch of data in data processing . \n

*@par Attributes:
*@li output_types: A nested structure of DType objects corresponding to each
component of an element of this dataset.
*@li output_shapes: A nested structure of TensorShape objects corresponding
to each component of an element of this dataset.
*@li channel_name: A string. Default "" . \n

*@par Outputs:
*y:A nested structure of Tensor objects . \n

*@par Third-party framework compatibility
*Compatible with tensorflow GetNext operator
*/

REG_OP(PeekData)
.DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64,
DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL}))
.ATTR(output_types, ListType, {})
.ATTR(output_shapes, ListListInt, {})
.ATTR(channel_name, String, "")
.OP_END_FACTORY_REG(PeekData)

/**
* @brief OptionalGetValue * @brief OptionalGetValue
* @par Inputs: * @par Inputs:
* optional: A tensor of type variant * optional: A tensor of type variant


+ 2
- 2
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -42,8 +42,8 @@ namespace ge {
*Compatible with the TensorFlow operator AddN. *Compatible with the TensorFlow operator AddN.
*/ */
REG_OP(AddN) REG_OP(AddN)
.DYNAMIC_INPUT(x, TensorType::NumberType())
.OUTPUT(y, TensorType::NumberType())
.DYNAMIC_INPUT(x, TensorType({NumberType(), DT_VARIANT}))
.OUTPUT(y, TensorType({NumberType(), DT_VARIANT}))
.REQUIRED_ATTR(N, Int) .REQUIRED_ATTR(N, Int)
.OP_END_FACTORY_REG(AddN) .OP_END_FACTORY_REG(AddN)




+ 13
- 0
third_party/fwkacllib/inc/ops/functional_ops.h View File

@@ -349,6 +349,19 @@ REG_OP(StatefulPartitionedCall)
.ATTR(executor_type, String, "") .ATTR(executor_type, String, "")
.OP_END_FACTORY_REG(StatefulPartitionedCall) .OP_END_FACTORY_REG(StatefulPartitionedCall)


/**
* @par Inputs:
* @li input: The input tensors \n
*
* @par Outputs:
* @li output: The output tensors. \n
*/
REG_OP(ToBool)
.INPUT(input, TensorType({DT_INT64, DT_INT32, DT_INT16, DT_INT8, \
DT_UINT8, DT_FLOAT, DT_DOUBLE, DT_STRING, DT_BOOL}))
.OUTPUT(output, DT_BOOL)
.OP_END_FACTORY_REG(ToBool)

} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_

+ 0
- 12
third_party/fwkacllib/inc/ops/image_ops.h View File

@@ -1931,9 +1931,6 @@ REG_OP(DenseImageWarpGrad)


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with pytorch GridSampler2D operator. *Compatible with pytorch GridSampler2D operator.

*@par Restrictions:
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(GridSampler2D) REG_OP(GridSampler2D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
@@ -1966,9 +1963,6 @@ REG_OP(GridSampler2D)


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with pytorch GridSampler2DGrad operator. *Compatible with pytorch GridSampler2DGrad operator.

*@par Restrictions:
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(GridSampler2DGrad) REG_OP(GridSampler2DGrad)
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
@@ -2063,9 +2057,6 @@ REG_OP(IMGWarpOffsets)


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with pytorch GridSampler3D operator. *Compatible with pytorch GridSampler3D operator.

*@par Restrictions:
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(GridSampler3D) REG_OP(GridSampler3D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
@@ -2096,9 +2087,6 @@ REG_OP(GridSampler3D)


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with pytorch GridSampler3DGrad operator. *Compatible with pytorch GridSampler3DGrad operator.

*@par Restrictions:
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(GridSampler3DGrad) REG_OP(GridSampler3DGrad)
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))


+ 34
- 0
third_party/fwkacllib/inc/ops/linalg_ops.h View File

@@ -491,6 +491,40 @@ REG_OP(TridiagonalSolve)
.ATTR(partial_pivoting, Bool, true) .ATTR(partial_pivoting, Bool, true)
.OP_END_FACTORY_REG(TridiagonalSolve) .OP_END_FACTORY_REG(TridiagonalSolve)


/**
* @brief Solution of banded triangular matrix . \n

* @par Inputs:
* The input bands has to be symmetric and positive definite.
* @li bands:A Tensor. Must be one of the following types: double, float32,
float16,complex64, complex128. Shape is [... K,M], K corresponds to the
number of bands (actually stored diagonals), and M is the data of the
diagonals.
@li rhs:shape is [...M] or [...M, N]. Has the same type as bands \n

* @par Outputs:
* @li output:A Tensor. Has the same type as bands . \n

* @par Attributes:
* @li lower:An optional bool. Defaults to True.True: indicates the lower
triangular matrix. False: indicates the upper triangular matrix.
* @li adjoint:An optional bool. Defaults to False.Boolean indicating whether to
solve with matrix or its (block-wise) adjoint. \n

* @par Third-party framework compatibility
* Compatible with tensorflow BandedTriangularSolve operator.
*/

REG_OP(BandedTriangularSolve)
.INPUT(bands, TensorType({DT_FLOAT, DT_DOUBLE, \
DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, \
DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(output,TensorType({DT_FLOAT, DT_DOUBLE, \
DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
.ATTR(lower, Bool, true)
.ATTR(adjoint, Bool, false)
.OP_END_FACTORY_REG(BandedTriangularSolve)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_

+ 15
- 0
third_party/fwkacllib/inc/ops/lookup_ops.h View File

@@ -303,6 +303,21 @@ REG_OP(MutableHashTable)
.REQUIRED_ATTR(key_dtype, Type) .REQUIRED_ATTR(key_dtype, Type)
.REQUIRED_ATTR(value_dtype, Type) .REQUIRED_ATTR(value_dtype, Type)
.OP_END_FACTORY_REG(MutableHashTable) .OP_END_FACTORY_REG(MutableHashTable)

/**
* @brief Remove keys in the given table . \n

* @par Inputs:
* @li table_handle: A Tensor of type resource. Handle to the table. \n
* @li keys: A Tensor. Any shape. Keys to remove. \n

* @par Third-party framework compatibility.
* Compatible with tensorflow LookupTableInsert operator.
*/
REG_OP(LookupTableRemove)
.INPUT(table_handle, TensorType({DT_RESOURCE}))
.INPUT(keys,TensorType({RealNumberType, DT_BOOL, DT_STRING}))
.OP_END_FACTORY_REG(LookupTableRemove)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_

+ 180
- 1
third_party/fwkacllib/inc/ops/math_ops.h View File

@@ -377,7 +377,7 @@ to each component of an element of this dataset.


REG_OP(GetNext) REG_OP(GetNext)
.DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, .DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64,
DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL}))
DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL}))
.ATTR(output_types, ListType, {}) .ATTR(output_types, ListType, {})
.ATTR(output_shapes, ListListInt, {}) .ATTR(output_shapes, ListListInt, {})
.ATTR(output_num, Int, 1) .ATTR(output_num, Int, 1)
@@ -1156,6 +1156,185 @@ REG_OP(CdistGrad)
.ATTR(p, Float, 2.0) .ATTR(p, Float, 2.0)
.OP_END_FACTORY_REG(CdistGrad) .OP_END_FACTORY_REG(CdistGrad)


/**
* @brief Computes the RaggedBincount. \n

* @par Inputs:
* Four inputs, including:
* @li splits: A tensor with shpae: BxPXM. Must be one of the following types:
* int64.
* @li values: A tensor with shpae: BxPXM. Must be one of the following types:
* float16, float32.
* @li size: A tensor with shpae: BxRxM. Must be one of the following types:
* int32, int64.
* @li weights: A tensor with shpae: BxRxM.
* Must be one of the following types: int32, int64, float, double. \n

* @par Attributes:
* @li binary_output: An optional bool \n

* @par Outputs:
* output: Must be one of the following types: int32, int64, float, double. \n
*/
REG_OP(RaggedBincount)
.INPUT(splits, TensorType({DT_INT64}))
.INPUT(values, TensorType({DT_INT32, DT_INT64}))
.INPUT(size, TensorType({DT_INT32, DT_INT64}))
.INPUT(weights, TensorType({DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(output, TensorType({DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE}))
.ATTR(binary_output, Bool, false)
.OP_END_FACTORY_REG(RaggedBincount)

/**
* @brief Count the number of occurrences of each value in the input dense integer array,
* and output it according to the sparse matrix. \n

* @par Inputs:
* @li values: A 1D or 2D tensor of type int32 or int64.
* @li weights: A tensor of type int32 or int64 or float or double. \n

* @par Attributes:
* @li minlength: An optional int >=-1. Defaults to -1.
* @li maxlength: An optional int >=-1. Defaults to -1.
* @li binary_output: A required bool. \n

* @par Outputs:
* output_indices: A tensor of type int64.
* output_values: A tensor of the same type as "weights".
* output_dense_shape: A tensor of type int64. \n

* @par Third-party framework compatibility
* Compatible with the TensorFlow operator DenseCountSparseOutput. \n
*/
REG_OP(DenseCountSparseOutput)
.INPUT(values, TensorType({DT_INT32,DT_INT64}))
.INPUT(weights, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE}))
.OUTPUT(output_indices, TensorType({DT_INT64}))
.OUTPUT(output_values, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE}))
.OUTPUT(output_dense_shape, TensorType({DT_INT64}))
.ATTR(minlength, Int, -1)
.ATTR(maxlength, Int, -1)
.REQUIRED_ATTR(binary_output, Bool)
.OP_END_FACTORY_REG(DenseCountSparseOutput)

/**
* @brief Count the number of occurrences of each value in the input ragged integer array,
* and output it according to the sparse matrix. \n

* @par Inputs:
* @li splits: A 1D tensor of type int64.
* @li values: A 1D or 2D tensor of type int32 or int64.
* @li weights: A tensor of type int32 or int64 or float or double. \n

* @par Attributes:
* @li minlength: An optional int >=-1. Defaults to -1.
* @li maxlength: An optional int >=-1. Defaults to -1.
* @li binary_output: A required bool. \n

* @par Outputs:
* output_indices: A tensor of type int64.
* output_values: A tensor of the same type as "weights".
* output_dense_shape: A tensor of type int64. \n

* @par Third-party framework compatibility
* Compatible with the TensorFlow operator RaggedCountSparseOutput. \n
*/
REG_OP(RaggedCountSparseOutput)
.INPUT(splits, TensorType({DT_INT64}))
.INPUT(values, TensorType({DT_INT32,DT_INT64}))
.INPUT(weights, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE}))
.OUTPUT(output_indices, TensorType({DT_INT64}))
.OUTPUT(output_values, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE}))
.OUTPUT(output_dense_shape, TensorType({DT_INT64}))
.ATTR(minlength, Int, -1)
.ATTR(maxlength, Int, -1)
.REQUIRED_ATTR(binary_output, Bool)
.OP_END_FACTORY_REG(RaggedCountSparseOutput)

/**
* @brief SignBitsUnpack.

* @par Inputs:
* one input, including:
* @li x: A 1D Tensor of uint8.

* @par Attributes:
* @li size: dim of out put tensor, defaults to 1.
* @li dtype: dtype of out put tensor: DT_FLOAT(0) or DT_FLOAT16(1).

* @par Outputs:
* @li y: A 2D Tensor of type float32 (float16) with shape (size, (x.shape * 8) / size),
*/
REG_OP(SignBitsUnpack)
.INPUT(x, TensorType({DT_UINT8}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.REQUIRED_ATTR(size, Int)
.REQUIRED_ATTR(dtype, Type)
.OP_END_FACTORY_REG(SignBitsUnpack)

/**
* @brief Function scaled masked softmax . \n

* @par Inputs:
* Two inputs, including:
* @li x: A mutable Tensor. The type support float16/float32.
* @li mask: An optional Tensor. Must meet all of the following rules:
* shape of mask should be broadcastable with x.
* dtype of mask should be bool.
* mask is binary

* @par Attributes:
* scale: A attribute used to scale tensor. The type is float. The dimension softmax would be performed on. Defaults
* to "1.0" . \n
* fixed_triu_mask: A flag used to enable or disable a fixed upper triangle mask. The type is bool. Defaults
* to "false" . \n

* @par Outputs:
* y: A mutable Tensor. Has the same type as "x". \n

* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(ScaledMaskedSoftmax)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(mask, TensorType({DT_BOOL, DT_UINT1}))
.OUTPUT(y, TensorType({DT_FLOAT16}))
.ATTR(scale, Float, 1.0)
.ATTR(fixed_triu_mask, Bool, false)
.OP_END_FACTORY_REG(ScaledMaskedSoftmax)

/**
* @brief Function scaled masked softmax grad . \n

* @par Inputs:
* Three inputs, including:
* @li y_grad: A mutable Tensor. The type support float16/float32.
* @li y: A mutable Tensor. The type support float16/float32.
* @li mask: An optional Tensor. Must meet all of the following rules:
* shape of mask should be broadcastable with x.
* dtype of mask should be bool.
* mask is binary

* @par Attributes:
* scale: A attribute used to scale tensor. The type is float. The dimension softmax would be performed on. Defaults
* to "1.0" . \n
* fixed_triu_mask: A flag used to enable or disable a fixed upper triangle mask. The type is bool. Defaults
* to "false" . \n

* @par Outputs:
* x_grad: A mutable Tensor. Has the same type as "x". \n

* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(ScaledMaskedSoftmaxGrad)
.INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(mask, TensorType({DT_BOOL, DT_UINT1}))
.OUTPUT(x_grad, TensorType({DT_FLOAT16}))
.ATTR(scale, Float, 1.0)
.ATTR(fixed_triu_mask, Bool, false)
.OP_END_FACTORY_REG(ScaledMaskedSoftmaxGrad)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_

+ 44
- 0
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -1595,6 +1595,50 @@ REG_OP(Pinverse)
.ATTR(rcond, Float, 1e-15) .ATTR(rcond, Float, 1e-15)
.OP_END_FACTORY_REG(Pinverse) .OP_END_FACTORY_REG(Pinverse)


/**
* @brief From the input tensor and updates tensor, select the maximum value according to indices to output. \n

* @par Inputs:
* Three inputs, including:
* @li input: Must be one of the following types:
* float16, float32, double, int32, uint8, int16, int8, complex64, int64,
* qint8, quint8, qint32, uint16, complex128, uint32, uint64.
* @li indices: Must be one of the following types:
* int32, int64.
* @li updates: Must have the same type as input. \n

* @par Outputs:
* output: A Tensor with the same type as input. \n
*/
REG_OP(TensorScatterMax)
.INPUT(input, TensorType::BasicType())
.INPUT(indices, TensorType::IndexNumberType())
.INPUT(updates, TensorType::BasicType())
.OUTPUT(output, TensorType::BasicType())
.OP_END_FACTORY_REG(TensorScatterMax)

/**
* @brief From the input tensor and updates tensor, select the minimum value according to indices to output. \n

* @par Inputs:
* Three inputs, including:
* @li input: Must be one of the following types:
* float16, float32, double, int32, uint8, int16, int8, complex64, int64,
* qint8, quint8, qint32, uint16, complex128, uint32, uint64.
* @li indices: Must be one of the following types:
* int32, int64.
* @li updates: Must have the same type as input. \n

* @par Outputs:
* output: A Tensor with the same type as input. \n
*/
REG_OP(TensorScatterMin)
.INPUT(input, TensorType::BasicType())
.INPUT(indices, TensorType::IndexNumberType())
.INPUT(updates, TensorType::BasicType())
.OUTPUT(output, TensorType::BasicType())
.OP_END_FACTORY_REG(TensorScatterMin)

} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_

+ 21
- 0
third_party/fwkacllib/inc/ops/nn_calculation_ops.h View File

@@ -886,6 +886,7 @@ REG_OP(Conv2D)
* to the input image for int8 type. Ensure that the output is within the * to the input image for int8 type. Ensure that the output is within the
* effective range. Defaults to 0. * effective range. Defaults to 0.
*@li data_format: Reserved. *@li data_format: Reserved.
* @li alg: compress algorithm, default weight_unzip.
* *
*@par Outputs: *@par Outputs:
* y: A 4D Tensor of output feature map. Has the same type as "x". With the * y: A 4D Tensor of output feature map. Has the same type as "x". With the
@@ -909,6 +910,7 @@ REG_OP(Conv2DCompress)
.ATTR(groups, Int, 1) .ATTR(groups, Int, 1)
.ATTR(data_format, String, "NHWC") .ATTR(data_format, String, "NHWC")
.ATTR(offset_x, Int, 0) .ATTR(offset_x, Int, 0)
.ATTR(alg, String, "weight_unzip")
.OP_END_FACTORY_REG(Conv2DCompress) .OP_END_FACTORY_REG(Conv2DCompress)


/** /**
@@ -1688,5 +1690,24 @@ REG_OP(FixPipe)
.ATTR(eltwise_mode, String, "") .ATTR(eltwise_mode, String, "")
.OP_END_FACTORY_REG(FixPipe) .OP_END_FACTORY_REG(FixPipe)


/**
* @brief Solves a batch of isotonic regression problems. \n

* @par Inputs:
* @li input: A Tensor. \n

* @par Attributes:
* @li output_dtype: The data type of output. \n

* @par Outputs:
* @li output: A Tensor. A Tensor of type float16, float32, double.
* @li segments: A Tensor. A Tensor of type int32 \n
*/
REG_OP(IsotonicRegression)
.INPUT(input, TensorType::RealNumberType())
.OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(segments, TensorType({DT_INT32}))
.ATTR(output_dtype, Type, DT_FLOAT)
.OP_END_FACTORY_REG(IsotonicRegression)
} // namespace ge } // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_

+ 37
- 1
third_party/fwkacllib/inc/ops/nn_detect_ops.h View File

@@ -1550,7 +1550,8 @@ REG_OP(DecodeWheelsTarget)
*@li max_size_per_class: A required attribute of type int, specifying the nms output num per class. *@li max_size_per_class: A required attribute of type int, specifying the nms output num per class.
*@li max_total_size: A required attribute of type int, specifying the the nms output num per batch. *@li max_total_size: A required attribute of type int, specifying the the nms output num per batch.
*@li change_coordinate_frame: A optional attribute of type bool, whether to normalize coordinates after clipping. *@li change_coordinate_frame: A optional attribute of type bool, whether to normalize coordinates after clipping.
*@li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false" . \n
*@li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false".
*@li image_size: A optional attribute of type ListInt, the size of the image. \n


*@par Outputs: *@par Outputs:
*@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4), *@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4),
@@ -1580,6 +1581,7 @@ REG_OP(BatchMultiClassNonMaxSuppression)
.REQUIRED_ATTR(max_total_size, Int) .REQUIRED_ATTR(max_total_size, Int)
.ATTR(change_coordinate_frame, Bool, false) .ATTR(change_coordinate_frame, Bool, false)
.ATTR(transpose_box, Bool, false) .ATTR(transpose_box, Bool, false)
.ATTR(image_size, ListInt, {})
.OP_END_FACTORY_REG(BatchMultiClassNonMaxSuppression) .OP_END_FACTORY_REG(BatchMultiClassNonMaxSuppression)


/** /**
@@ -2316,6 +2318,40 @@ REG_OP(CIoU)
.ATTR(mode, String, "iou") .ATTR(mode, String, "iou")
.ATTR(atan_sub_flag, Bool, false) .ATTR(atan_sub_flag, Bool, false)
.OP_END_FACTORY_REG(CIoU) .OP_END_FACTORY_REG(CIoU)

/**
* @brief First calculate the minimum closure area of the two boxes, IoU,
* The DIoU is obtained by combining the center distance and IoU. \n

* @par Inputs:
* Two inputs, including:
* @li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
* shape (4, N). "N" indicates the number of bounding boxes, and the value
* "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
* @li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
* with shape (4, M). "M" indicates the number of ground truth boxes, and
* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n

* @par Attributes:
* @li trans: An optional bool, true for 'xywh', false for 'xyxy'.
* @li is_cross: An optional bool, control whether the output shape is [N, M] or [1, N].
* @li mode: An optional string, computation mode, a character string with the value range of [iou, iof]. \n

* @par Outputs:
* overlap: A 2D Tensor of type float16 or float32 with shape [N, M] or [1, N],
* specifying the IoU or IoF ratio . \n

* @attention Constraints:
* "is_cross" only support false.
*/
REG_OP(DIoU)
.INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(trans, Bool, false)
.ATTR(is_cross, Bool, true)
.ATTR(mode, String, "iou")
.OP_END_FACTORY_REG(DIoU)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_


+ 31
- 54
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -426,7 +426,10 @@ REG_OP(ConfusionSoftmaxGrad)
*@li keepdims: A bool Scalar. If true, retains reduced dimensions with length 1 . \n *@li keepdims: A bool Scalar. If true, retains reduced dimensions with length 1 . \n


*@par Outputs: *@par Outputs:
*y: A Tensor dtype of float16, float32.
*y: A Tensor dtype of float16, float32. \n

*@attention Constraints:
*THIS OPERATOR IS DEPRECATED. It will be removed in a future version.
*/ */
REG_OP(SoftmaxGradExt) REG_OP(SoftmaxGradExt)
.INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -1026,74 +1029,48 @@ REG_OP(RNNTLoss)
.OP_END_FACTORY_REG(RNNTLoss) .OP_END_FACTORY_REG(RNNTLoss)


/** /**
*@brief Performs group normalization . \n
* @brief Performs group normalization . \n


*@par Inputs:
* Five inputs, including: (NHWC, NCHW supported)
*@li x: A 4D Tensor of type float16 or float32, with format NHWC or
NCHW for 4D.
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format
NHWC or NCHW. Specifies the scaling factor.
*@li offset: A Tensor of type float32. Must be 1D if input "x" is with
format NHWC or NCHW. Specifies the offset.
*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format
NHWC or NCHW. Reserved. Mu
st be "None" if the operation is used for training.
*@li variance: A Tensor of type float32. Must be 1D if input "x" is with
format NHWC or NCHW. Specifies the variance used for inference. Reserved . \n
* @par Inputs:
* Three inputs
* @li x: A ND Tensor of type float16 or float32, with format NCHW for 4D.
* @li gamma: A Tensor of type float16 or float32. Must be 1D. Specifies the scaling factor.
* @li beta: A Tensor of type float16 or float32. Must be 1D. Specifies the offset. \n


*@par Attributes:
*@li epsilon: An optional float32, specifying the small value added to
* @par Attributes:
* @li num_groups: An required int32, specifying the number of group.
* @li eps: An optional float32, specifying the small value added to
variance to avoid dividing by zero. Defaults to "0.0001". variance to avoid dividing by zero. Defaults to "0.0001".
*@li data_format: An optional string, specifying the format of "x".
* @li data_format: An optional string, specifying the format of "x".
Defaults to "NHWC". Defaults to "NHWC".
*@li is_training: An optional bool, specifying if the operation is used for
* @li is_training: An optional bool, specifying if the operation is used for
training or inference. Defaults to "True" . \n training or inference. Defaults to "True" . \n


*@par Outputs:
* Five outputs, including: (NHWC, NCHW supported)
*@li y: A 4D Tensor of type float16 or float32 for the normalized "x",
with format NHWC or NCHW for 4D.
*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with
format NHWC or NCHW. Specifies the mean of "x".
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is
with format NHWC or NCHW. Specifies the variance of "x".
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if
input "x" is with format NHWC or NCHW. Specifies the mean o
f "x" for gradient computation. Pass "None" to skip this output.
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if
input "x" is with format NHWC or NCHW. Specifies the varian
ce of "x" for gradient computation. Pass "None" to skip this output . \n
* @par Outputs:
* Three outputs
* @li y: A ND Tensor of type float16 or float32 for the normalized "x",
with format NCHW for 4D.
* @li mean: A Tensor of type float16 or float32. Must be 1D. Specifies the mean of "x".
* @li variance: A Tensor of type float16 or float32. Must be 1D. Specifies the variance of "x". \n


*@attention Constraints:
*@li If the operation is used for inference and outputs "reserve_space_1"
and "reserve_space_2" are available, then "reserve_space_1" has the same
value as "mean" and "reserve_spa
ce_2" has the same value as "variance".
*@li For Ascend 310, the result accuracy fails due to the square root
instruction . \n
* @attention Constraints:
* @li For Ascend 310, only support NCHW which can be trans to 5HD. \n


*@par Third-party framework compatibility
*@li Compatible with the PyTorch operator GroupNorm.
* @par Third-party framework compatibility
* @li Compatible with the PyTorch operator GroupNorm.


*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(GroupNorm) REG_OP(GroupNorm)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(scale, TensorType({DT_FLOAT,}))
.INPUT(offset, TensorType({DT_FLOAT,}))
.OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
.INPUT(gamma, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(beta, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(batch_mean, TensorType({DT_FLOAT}))
.OUTPUT(batch_variance, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.0001)
.OUTPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(variance, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(num_groups, Int)
.ATTR(data_format, String, "NHWC") .ATTR(data_format, String, "NHWC")
.ATTR(eps, Float, 0.0001)
.ATTR(is_training, Bool, true) .ATTR(is_training, Bool, true)
.ATTR(num_groups, Int, 2)
.OP_END_FACTORY_REG(GroupNorm) .OP_END_FACTORY_REG(GroupNorm)


/** /**


+ 13
- 13
third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h View File

@@ -307,7 +307,7 @@ REG_OP(Relu6D)


* @par Inputs: * @par Inputs:
* @li gradients: A Tensor of type RealNumberType. The backpropagated * @li gradients: A Tensor of type RealNumberType. The backpropagated
gradients to the corresponding Relu6 operation.
gradients to the corresponding Relu6 operation.
* @li features: A Tensor with the same type as gradients.he features passed * @li features: A Tensor with the same type as gradients.he features passed
as input to the corresponding Relu6 operation, or its output; as input to the corresponding Relu6 operation, or its output;
using either one produces the same result. \n using either one produces the same result. \n
@@ -325,22 +325,22 @@ REG_OP(Relu6Grad)
.OUTPUT(backprops, TensorType::RealNumberType()) .OUTPUT(backprops, TensorType::RealNumberType())
.OP_END_FACTORY_REG(Relu6Grad) .OP_END_FACTORY_REG(Relu6Grad)
/** /**
*@brief Calculate the elu_grad_v2 function.
*@brief Calculate the elu_grad_v2 function.
*Applies the element-wise function: *Applies the element-wise function:
* Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha . * Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha .
*@par Inputs: *@par Inputs:
*Two inputs, including: *Two inputs, including:
* @li grads: A tensor. Must be one of the following types: * @li grads: A tensor. Must be one of the following types:
* float16, float32.
* float16, float32.
* @li activations: A tensor. Must be one of the following types: * @li activations: A tensor. Must be one of the following types:
* float16, float32.
* float16, float32.
* *
*@par Outputs: *@par Outputs:
*y: A Tensor with the same type and shape of grads's. *y: A Tensor with the same type and shape of grads's.
*
*
*@par Attributes: *@par Attributes:
*alpha: scalar parameter, default value = 1.0 *alpha: scalar parameter, default value = 1.0
*/
*/
REG_OP(EluGradV2) REG_OP(EluGradV2)
.INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -972,18 +972,18 @@ REG_OP(SoftplusV2Grad)
/** /**
* @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor) * @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor)
* where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise. * where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise.
*
*
* @par Inputs: * @par Inputs:
* one input including: * one input including:
* x: input A Tensor. Must be one of the following types: float32, float16 * x: input A Tensor. Must be one of the following types: float32, float16
*
*
* @par Attributes: * @par Attributes:
* alpha: An optional float. Defaults to 1.0. \n * alpha: An optional float. Defaults to 1.0. \n


* @par Outputs: * @par Outputs:
* one output including: * one output including:
* y:A Tensor of the same type as x * y:A Tensor of the same type as x
*
*
*/ */
REG_OP(ThresholdedRelu) REG_OP(ThresholdedRelu)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1059,7 +1059,7 @@ REG_OP(HardShrink)


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator Hardsigmoid. \n * Compatible with the Pytorch operator Hardsigmoid. \n
*/
*/
REG_OP(HardSigmoid) REG_OP(HardSigmoid)
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16})) .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -1219,13 +1219,13 @@ REG_OP(Shrink)
* Three inputs, including: * Three inputs, including:
* @li x: A Tensor. * @li x: A Tensor.
* Must be one of the following types on Ascend310: float16, int8, int32, uint8. * Must be one of the following types on Ascend310: float16, int8, int32, uint8.
* Must be one of the following types on Ascend710 or Ascend910: float16, float32, int8, int32, uint8. \n
* Must be one of the following types on Ascend310P or Ascend910: float16, float32, int8, int32, uint8. \n
* @li threshold: A Tensor which should have the shape (1,), the value to threshold at. * @li threshold: A Tensor which should have the shape (1,), the value to threshold at.
* Must be one of the following types on Ascend310: float16, int8, int32, uint8. * Must be one of the following types on Ascend310: float16, int8, int32, uint8.
* Must be one of the following types on Ascend710 or Ascend910: float16, float32, int8, int32, uint8. \n
* Must be one of the following types on Ascend310P or Ascend910: float16, float32, int8, int32, uint8. \n
* @li value: A Tensor which should have the shape (1,), the value to replace with. default value is 0. * @li value: A Tensor which should have the shape (1,), the value to replace with. default value is 0.
* Must be one of the following types on Ascend310: float16, int8, int32, uint8. * Must be one of the following types on Ascend310: float16, int8, int32, uint8.
* Must be one of the following types on Ascend710 or Ascend910: float16, float32, int8, int32, uint8. \n
* Must be one of the following types on Ascend310P or Ascend910: float16, float32, int8, int32, uint8. \n


* @par Outputs: * @par Outputs:
* y: A Tensor which has the same shape and type as the input x. \n * y: A Tensor which has the same shape and type as the input x. \n


+ 82
- 71
third_party/fwkacllib/inc/ops/quantize_ops.h View File

@@ -61,16 +61,16 @@ REG_OP(Dequantize)
.OP_END_FACTORY_REG(Dequantize) .OP_END_FACTORY_REG(Dequantize)


/** /**
*@brief Quantizes the input . \n
*@par Inputs:
*@li x: shape and dtype of input_x. \n
*@li scales: shape and dtype of input_scales. \n
*@li zero_points: shape and dtype of input_zero_points \n
*@par Attributes:
*@li dtype: required, type.
*@li axis: the processed dim. \n
*@par Outputs:
*y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n
* @brief Quantizes the input . \n
* @par Inputs:
* @li x: shape and dtype of input_x. \n
* @li scales: shape and dtype of input_scales. \n
* @li zero_points: shape and dtype of input_zero_points \n
* @par Attributes:
* @li dtype: required, type.
* @li axis: the processed dim. \n
* @par Outputs:
* y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n
*/ */
REG_OP(Quantize) REG_OP(Quantize)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -82,23 +82,31 @@ REG_OP(Quantize)
.OP_END_FACTORY_REG(Quantize) .OP_END_FACTORY_REG(Quantize)


/** /**
*@brief Quantizes the input . \n
* @brief Quantizes the input . \n


*@par Inputs:
*x: An tensor of type float16 or float32, specifying the input . \n
* @par Inputs:
* x: An tensor of type float16 or float32, specifying the input . \n


*@par Attributes:
*@li scale: A required float32, specifying the scaling ratio.
*@li offset: A required float16, specifying the offset.
*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False".
*@li round_mode: An optional string, specifying the float16 to int8 cast type.
* @par Attributes:
* @li scale: A required float32, specifying the scaling ratio.
* @li offset: A required float16, specifying the offset.
* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False".
* Defaults to "False".
* @li round_mode: An optional string, specifying the float16 to int8 cast type.
* The value range is [Round, Floor, Ceil, Truncate]. Defaults to "Round" . * The value range is [Round, Floor, Ceil, Truncate]. Defaults to "Round" .
*@li dst_type: A optional int32, specifying the output data type. Defaults to "DT_INT8" . \n
* @li dst_type: A optional int32, specifying the output data type. Defaults to "DT_INT8" . \n

* @par Outputs:
* y: The quantized output tensor of type int8 or int4. \n


*@par Outputs:
*y: The quantized output tensor of type int8 or int4. \n
* @attention Constraints:
* round_mode value range is [Round, Floor, Ceil, Truncate].
* @li Round: round to nearest, tie to even(c language rint).
* @li Floor: round to minus infinity(c language floor).
* @li Ceil: round to positive infinity(c language ceil).
* @li Truncate: round to zero(c language trunc). \n


*@par Third-party framework compatibility
* @par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe. * It is a custom operator. It has no corresponding operator in Caffe.
*/ */
REG_OP(AscendQuant) REG_OP(AscendQuant)
@@ -112,21 +120,22 @@ REG_OP(AscendQuant)
.OP_END_FACTORY_REG(AscendQuant) .OP_END_FACTORY_REG(AscendQuant)


/** /**
*@brief Dequantizes the input . \n
* @brief Dequantizes the input . \n


*@par Inputs:
*@par Inputs:
* @li x: An tensor of type int32, specifying the input. * @li x: An tensor of type int32, specifying the input.
* @li deq_scale: An tensor of type uint64, specifying the scaling ratio . \n * @li deq_scale: An tensor of type uint64, specifying the scaling ratio . \n


*@par Attributes:
* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False".
* @par Attributes:
* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False".
* Defaults to "False".
* @li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False". * @li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False".
* @li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n * @li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n


*@par Outputs:
*y: The dequantized output tensor of type float16 or float32. \n
* @par Outputs:
* y: The dequantized output tensor of type float16 or float32. \n


*@par Third-party framework compatibility
* @par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe. * It is a custom operator. It has no corresponding operator in Caffe.
*/ */
REG_OP(AscendDequant) REG_OP(AscendDequant)
@@ -139,21 +148,22 @@ REG_OP(AscendDequant)
.OP_END_FACTORY_REG(AscendDequant) .OP_END_FACTORY_REG(AscendDequant)


/** /**
*@brief Anti quantizes the input . \n
* @brief Anti quantizes the input . \n


*@par Inputs:
*x: An tensor of type int8, specifying the input . \n
* @par Inputs:
* x: An tensor of type int8, specifying the input . \n


*@par Attributes:
*@li scale: A required float32 scale.
*@li offset: A required float32 offset.
*@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT".
*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False" . \n
* @par Attributes:
* @li scale: A required float32 scale.
* @li offset: A required float32 offset.
* @li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT".
* @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False".
* Defaults to "False" . \n


*@par Outputs:
*y: The dequantized output tensor of type float16 or float32. \n
* @par Outputs:
* y: The dequantized output tensor of type float16 or float32. \n


*@par Third-party framework compatibility
* @par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe. * It is a custom operator. It has no corresponding operator in Caffe.
*/ */
REG_OP(AscendAntiQuant) REG_OP(AscendAntiQuant)
@@ -166,20 +176,20 @@ REG_OP(AscendAntiQuant)
.OP_END_FACTORY_REG(AscendAntiQuant) .OP_END_FACTORY_REG(AscendAntiQuant)


/** /**
*@brief Dequantizes the input of int16 . \n
* @brief Dequantizes the input of int16 . \n


*@par Inputs:
*@li x0: An tensor of type int32, specifying the input.
*@li deq_scale: An tensor of type uint64, specifying the scaling ratio.
*@li x1: An tensor of type int16, specifying the input . \n
* @par Inputs:
* @li x0: An tensor of type int32, specifying the input.
* @li deq_scale: An tensor of type uint64, specifying the scaling ratio.
* @li x1: An tensor of type int16, specifying the input . \n


*@par Attributes:
*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n
* @par Attributes:
* relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n


*@par Outputs:
*y: The dequantized output tensor of type int16. \n
* @par Outputs:
* y: The dequantized output tensor of type int16. \n


*@par Third-party framework compatibility
* @par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe. * It is a custom operator. It has no corresponding operator in Caffe.
*/ */
REG_OP(AscendDequantS16) REG_OP(AscendDequantS16)
@@ -191,19 +201,19 @@ REG_OP(AscendDequantS16)
.OP_END_FACTORY_REG(AscendDequantS16) .OP_END_FACTORY_REG(AscendDequantS16)


/** /**
*@brief Requantizes the input . \n
* @brief Requantizes the input . \n


*@par Inputs:
*@li x: An tensor of type int32, specifying the input.
*@li req_scale: An tensor of type uint64, specifying the scaling ratio . \n
* @par Inputs:
* @li x: An tensor of type int32, specifying the input.
* @li req_scale: An tensor of type uint64, specifying the scaling ratio . \n


*@par Attributes:
*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n
* @par Attributes:
* relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n


*@par Outputs:
*y: The dequantized output tensor of type int8. \n
* @par Outputs:
* y: The dequantized output tensor of type int8. \n


*@par Third-party framework compatibility
* @par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe. * It is a custom operator. It has no corresponding operator in Caffe.
*/ */
REG_OP(AscendRequant) REG_OP(AscendRequant)
@@ -214,22 +224,23 @@ REG_OP(AscendRequant)
.OP_END_FACTORY_REG(AscendRequant) .OP_END_FACTORY_REG(AscendRequant)


/** /**
*@brief Requantizes the input of int16 . \n
* @brief Requantizes the input of int16 . \n


*@par Inputs:
*@li x0: An tensor of type int16, specifying the input.
*@li req_scale: An tensor of type uint64, specifying the scaling ratio.
*@li x1: An tensor of type int16 . \n
* @par Inputs:
* @li x0: An tensor of type int16, specifying the input.
* @li req_scale: An tensor of type uint64, specifying the scaling ratio.
* @li x1: An tensor of type int16 . \n


*@par Attributes:
*@li dual_output: A optional bool, specifying whether to perform dual ouput, either "True" or "False". Defaults to "False".
*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n
* @par Attributes:
* @li dual_output: A optional bool, specifying whether to perform dual ouput, either "True" or "False".
* Defaults to "False".
* @li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n


*@par Outputs:
*@li y0: The dequantized output tensor of type int8.
*@li y1: The dequantized output tensor of type int16. \n
* @par Outputs:
* @li y0: The dequantized output tensor of type int8.
* @li y1: The dequantized output tensor of type int16. \n


*@par Third-party framework compatibility
* @par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe. * It is a custom operator. It has no corresponding operator in Caffe.
*/ */
REG_OP(AscendRequantS16) REG_OP(AscendRequantS16)


+ 756
- 752
third_party/fwkacllib/inc/ops/rnn.h
File diff suppressed because it is too large
View File


+ 448
- 2
third_party/fwkacllib/inc/ops/stateless_random_ops.h View File

@@ -79,6 +79,452 @@ REG_OP(StatelessRandomUniformInt)
.OUTPUT(y, TensorType({DT_INT32, DT_INT64})) .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
.OP_END_FACTORY_REG(StatelessRandomUniformInt) .OP_END_FACTORY_REG(StatelessRandomUniformInt)


} // namespace ge
/**
* @brief Outputs random values from a normal distribution. \n

* @par Inputs:
* Inputs include:
* @li shape: A Tensor. Must be one of the following types: int32, int64.
The shape of the output tensor. Batches are indexed by the 0th dimension.
* @li seed: 2 seeds (shape [2]).
* @li means: A Tensor. Must be one of the following types: half, bfloat16, float32, float64.
* @li stdevs: A Tensor. Must have the same type as means.
* @li min: A Tensor. Must have the same type as means. The minimum cutoff. May be -infinity.
* @li max: A Tensor. Must have the same type as means. \n

* @par Outputs:
* y: A Tensor. Has the same type as means. \n

* @attention Constraints:
* The implementation for StatelessParameterizedTruncatedNormal on Ascend uses AICPU, with bad performance. \n

* @par Third-party framework compatibility
* @li compatible with tensorflow StatelessParameterizedTruncatedNormal operator.
*/

REG_OP(StatelessParameterizedTruncatedNormal)
.INPUT(shape, TensorType({DT_INT32, DT_INT64}))
.INPUT(seed, TensorType({DT_INT32, DT_INT64}))
.INPUT(means, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(stdevs, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(min, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(max, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(StatelessParameterizedTruncatedNormal)

/**
* @brief Generate a single randomly distorted bounding box for an image . \n

* @par Inputs:
* Input images must be a 4-D tensor. Inputs include:
* @li image_size: 1-D, containing [height, width, channels].
* @li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding
boxes associated with the image.
* @li min_object_covered: The cropped area of the image must contain at least
this fraction of any bounding box supplied. The value of this parameter should
be non-negative. In the case of 0, the cropped area does not need to overlap
any of the bounding boxes supplied .
* @li seed: A shape [2] Tensor, the seed to the random number generator. \n

* @par Attributes:
* @li aspect_ratio_range: The cropped area of the image must have an aspect
ratio = width / height within this range.
* @li area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
cropped area of the image must contain a fraction of the supplied image
within this range.
* @li max_attempts: Number of attempts at generating a cropped region of the
image of the specified constraints. After max_attempts failures, return the
entire image.
* @li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes
supplied. If true, assume an implicit bounding box covering the whole input.
If false, raise an error . \n

* @par Outputs:
* @li begin: 1-D, containing [offset_height, offset_width, 0].
* @li size: 1-D, containing [target_height, target_width, -1].
* @li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n

* @attention Constraints:
* Input images can be of different types but output images are always float . \n

* @par Third-party framework compatibility
* Compatible with tensorflow StatelessSampleDistortedBoundingBox operator.
*/

REG_OP(StatelessSampleDistortedBoundingBox)
.INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
DT_INT32, DT_INT64 }))
.INPUT(bounding_boxes, TensorType({ DT_FLOAT }))
.INPUT(min_object_covered, TensorType({ DT_FLOAT }))
.INPUT(seed, TensorType({ DT_INT32, DT_INT64 }))
.OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
DT_INT32, DT_INT64 }))
.OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
DT_INT32, DT_INT64 }))
.OUTPUT(bboxes, TensorType({ DT_FLOAT }))
.ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f })
.ATTR(area_range, ListFloat, { 0.05f, 1.0f })
.ATTR(max_attempts, Int, 100)
.ATTR(use_image_if_no_bounding_boxes, Bool, false)
.OP_END_FACTORY_REG(StatelessSampleDistortedBoundingBox)

/**
* @brief Outputs random values from a truncated normal distribution. \n

* @par Inputs:
* Inputs include:
* @li shape: A Tensor. Must be one of the following types: int32, int64. \n
* @li key: Key of RNG algorithm. Shape[1]. \n
* @li counter: Counter of RNG algorithm. Shape[2] for philox, shape[1] for threefry. \n
* @li alg: RNG algorithm. 1:philox 2:threefry. \n

* @par Attributes:
* @li dtype: dtype: A optional attr, specifying the output data type. Defaults to "DT_FLOAT". \n

* @par Outputs:
* y: A Tensor of types: float16, float32, double. A tensor of the specified shape
filled with random truncated normal values. \n

* @attention Constraints:
* The implementation for StatelessTruncatedNormalV2 on Ascend uses AICPU, with bad performance.

* @par Third-party framework compatibility
* @li compatible with tensorflow StatelessTruncatedNormalV2 operator.
*/

REG_OP(StatelessTruncatedNormalV2)
.INPUT(shape, TensorType({ DT_INT32, DT_INT64 }))
.INPUT(key, TensorType({ DT_UINT64 }))
.INPUT(counter, TensorType({ DT_UINT64 }))
.INPUT(alg, TensorType({ DT_INT32 }))
.OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
.ATTR(dtype, Type, DT_FLOAT)
.OP_END_FACTORY_REG(StatelessTruncatedNormalV2)

/**
* @brief Outputs deterministic pseudorandom random numbers from a gamma distribution. \n

* @par Inputs:
* @li shape: The shape of the output tensor.
* @li seed: 2 seeds (shape [2]).
* @li alpha: The concentration of the gamma distribution. Shape must match the rightmost dimensions of shape. \n

* @par Outputs:
* y: A Tensor. Has the same type as alpha. \n

* @par Third-party framework compatibility
* Compatible with TensorFlow StatelessRandomGammaV2 operator.
*/

REG_OP(StatelessRandomGammaV2)
.INPUT(shape, TensorType({DT_INT32, DT_INT64}))
.INPUT(seed, TensorType({DT_INT32, DT_INT64}))
.INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE}))
.OP_END_FACTORY_REG(StatelessRandomGammaV2)

/**
* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n

* @par Inputs:
* @li shape: The shape of the output tensor.
* @li seed: 2 seeds (shape [2]). \n

* @par Attributes:
* dtype:Output data type . \n

* @par Outputs:
* y: Returns Random values with specified shape . \n

* @par Third-party framework compatibility
* Compatible with TensorFlow StatelessRandomUniformFullInt operator.
*/

REG_OP(StatelessRandomUniformFullInt)
.INPUT(shape, TensorType({DT_INT32, DT_INT64}))
.INPUT(seed, TensorType({DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
.ATTR(dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(StatelessRandomUniformFullInt)

/**
* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n

* @par Inputs:
* @li shape: The shape of the output tensor.
* @li key: Key for the counter-based RNG algorithm.
* @li counter: Initial counter for the counter-based RNG algorithm.
* @li alg: 0-D. The RNG algorithm. \n

* @par Attributes:
* dtype:Output data type . \n

* @par Outputs:
* y: Returns Random values with specified shape . \n

* @par Third-party framework compatibility
* Compatible with TensorFlow StatelessRandomUniformFullIntV2 operator.
*/

REG_OP(StatelessRandomUniformFullIntV2)
.INPUT(shape, TensorType({DT_INT32, DT_INT64}))
.INPUT(key, TensorType({DT_UINT64}))
.INPUT(counter, TensorType({DT_UINT64}))
.INPUT(alg, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
.ATTR(dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(StatelessRandomUniformFullIntV2)

/**
* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n

* @par Inputs:
* @li shape: The shape of the output tensor.
* @li key: Key for the counter-based RNG algorithm.
* @li counter: Initial counter for the counter-based RNG algorithm.
* @li alg: 0-D. The RNG algorithm.
* @li minval: Minimum value (inclusive, scalar).
* @li maxval: Maximum value (exclusive, scalar) . \n

* @par Outputs:
* y: Returns Random values with specified shape . \n

* @par Third-party framework compatibility
* Compatible with TensorFlow StatelessRandomUniformIntV2 operator.
*/

REG_OP(StatelessRandomUniformIntV2)
.INPUT(shape, TensorType({DT_INT32, DT_INT64}))
.INPUT(key, TensorType({DT_UINT64}))
.INPUT(counter, TensorType({DT_UINT64}))
.INPUT(alg, TensorType({DT_INT32}))
.INPUT(minval, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
.INPUT(maxval, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
.OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
.OP_END_FACTORY_REG(StatelessRandomUniformIntV2)

/**
* @brief Outputs deterministic pseudorandom random integers from a binomial distribution. \n

* @par Inputs:
* @li shape: The shape of the output tensor.
* @li seed: 2 seeds (shape [2]).
* @li counts: The counts of the binomial distribution. Must be broadcastable with probs,
* and broadcastable with the rightmost dimensions of shape.
* @li probs: The probability of success for the binomial distribution.
* Must be broadcastable with counts and broadcastable with the rightmost dimensions of shape. \n

* @par Attributes:
* @li dtype: A optional int32, specifying the output data type. Defaults to "DT_INT32". \n

* @par Outputs:
* @li y: Returns Random values with specified shape. \n


#endif // OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_
* @par Third-party framework compatibility
* Compatible with TensorFlow StatelessRandomBinomial operator.
*/
REG_OP(StatelessRandomBinomial)
.INPUT(shape, TensorType({DT_INT32, DT_INT64}))
.INPUT(seed, TensorType({DT_INT32, DT_INT64}))
.INPUT(counts, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
.INPUT(probs, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.ATTR(dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(StatelessRandomBinomial)

/**
* @brief Outputs deterministic pseudorandom random integers from a poisson distribution . \n

* @par Inputs:
* @li shape: The shape of the output tensor.
* @li seed: 2 seeds (shape [2]).
* @li lam: mean value value of poisson distribution . \n

* @par Attributes:
* dtype:Output data type . \n

* @par Outputs:
* y: Returns Random values with specified shape . \n

* @par Third-party framework compatibility
* Compatible with TensorFlow StatelessRandomUniformInt operator.
*/

REG_OP(StatelessRandomPoisson)
.INPUT(shape, TensorType({DT_INT32, DT_INT64}))
.INPUT(seed, TensorType({DT_INT32, DT_INT64}))
.INPUT(lam, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT32, DT_INT64}))
.REQUIRED_ATTR(dtype, Type)
.OP_END_FACTORY_REG(StatelessRandomPoisson)

/**
* @brief Get the counter of the RNG algorithm. \n

* @par Outputs:
* @li alg: The RNG algorithm. \n

* @par Third-party framework compatibility
* Compatible with TensorFlow StatelessRandomGetAlg operator.
*/
REG_OP(StatelessRandomGetAlg)
.OUTPUT(alg, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(StatelessRandomGetAlg)

/**
* @brief This op picks the best counter-based RNG algorithm based on device, and
* scrambles a shape-[2] seed into a key and a counter, both needed by the
* counter-based algorithm. \n

* @par Inputs:
* @li seed: 2 seeds (shape [2]). \n

* @par Outputs:
* @li key: Key for the counter-based RNG algorithm.
* @li counter: Initial counter for the counter-based RNG algorithm. \n

* @par Third-party framework compatibility
* Compatible with TensorFlow StatelessRandomGetKeyCounter operator.
*/
REG_OP(StatelessRandomGetKeyCounter)
.INPUT(seed, TensorType({DT_INT32, DT_INT64}))
.OUTPUT(key, TensorType({DT_UINT64}))
.OUTPUT(counter, TensorType({DT_UINT64}))
.OP_END_FACTORY_REG(StatelessRandomGetKeyCounter)

/**
* @brief This op picks the best counter-based RNG algorithm based on device, and
* scrambles a shape-[2] seed into a key and a counter, both needed by the
* counter-based algorithm. \n

* @par Inputs:
* @li seed: 2 seeds (shape [2]). \n

* @par Outputs:
* @li key: Key for the counter-based RNG algorithm.
* @li counter: Initial counter for the counter-based RNG algorithm.
* @li alg: The RNG algorithm. \n

* @par Third-party framework compatibility
* Compatible with TensorFlow StatelessRandomGetKeyCounterAlg operator.
*/
REG_OP(StatelessRandomGetKeyCounterAlg)
.INPUT(seed, TensorType({DT_INT32, DT_INT64}))
.OUTPUT(key, TensorType({DT_UINT64}))
.OUTPUT(counter, TensorType({DT_UINT64}))
.OUTPUT(alg, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(StatelessRandomGetKeyCounterAlg)

/**
* @brief Outputs deterministic pseudorandom values from a normal distribution. \n

* @par Inputs:
* @li shape: The shape of the output tensor.
* @li key: Key for the counter-based RNG algorithm.
* @li counter: Initial counter for the counter-based RNG algorithm.
* @li alg: The RNG algorithm. \n

* @par Attributes:
* @li dtype: Output data type . \n

* @par Outputs:
* @li y: Returns Random values with specified shape . \n

* @par Third-party framework compatibility
* Compatible with TensorFlow StatelessRandomNormalV2 operator.
*/
REG_OP(StatelessRandomNormalV2)
.INPUT(shape, TensorType({DT_INT32, DT_INT64}))
.INPUT(key, TensorType({DT_UINT64}))
.INPUT(counter, TensorType({DT_UINT64}))
.INPUT(alg, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.ATTR(dtype, Type, DT_FLOAT)
.OP_END_FACTORY_REG(StatelessRandomNormalV2)

/**
* @brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n

* @par Inputs:
* @li shape: The shape of the output tensor.
* @li key: Key for the counter-based RNG algorithm.
* @li counter: Initial counter for the counter-based RNG algorithm.
* @li alg: 0-D. The RNG algorithm. \n

* @par Attributes:
* dtype:Output data type . \n

* @par Outputs:
* y: Returns Random values with specified shape . \n

* @par Third-party framework compatibility
* Compatible with TensorFlow StatelessRandomUniformV2 operator.
*/

REG_OP(StatelessRandomUniformV2)
.INPUT(shape, TensorType({DT_INT32, DT_INT64}))
.INPUT(key, TensorType({DT_UINT64}))
.INPUT(counter, TensorType({DT_UINT64}))
.INPUT(alg, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE}))
.ATTR(dtype, Type, DT_FLOAT)
.OP_END_FACTORY_REG(StatelessRandomUniformV2)

/**
* @brief Create a random number seed generator . \n

* @par Inputs:
* include:
* @li seed:1-D Tensor,the seed to generate random.
* Must be one of the types:int32 or int64.
* @li seed2:1-D Tensor,the seed to generate random.
* Must be one of the types:int32 or int64.
* @li reshuffle:1-D Tensor.Seed selection, True:random seed, False:fixed seed.
* Must be one of the types:bool. \n

* @par Outputs:
* handle:Handle to the random number generator.
* deleter:Handle to the remover.
* Used when deleting the random number seed generator \n

* @see AnonymousSeedGenerator()

* @par Third-party framework compatibility
* compatible with AnonymousSeedGenerator op of tensorflow
*/
REG_OP(AnonymousSeedGenerator)
.INPUT(seed, TensorType({DT_INT32,DT_INT64}))
.INPUT(seed2, TensorType({DT_INT32,DT_INT64}))
.INPUT(reshuffle, TensorType({DT_BOOL}))
.OUTPUT(handle, TensorType({DT_RESOURSE}))
.OUTPUT(deleter, TensorType({DT_VARIANT}))
.OP_END_FACTORY_REG(AnonymousSeedGenerator)

/**
* @brief DeleteSeedGenerator . \n

* @par Inputs:
* @li handle: A Tensor of type resource.
* @li deleter: A Tensor of type variant.

* @par Third-party framework compatibility
* Compatible with TensorFlow DeleteSeedGenerator operator.
*/
REG_OP(DeleteSeedGenerator)
.INPUT(handle, TensorType({DT_RESOURCE}))
.INPUT(deleter, TensorType({DT_VARIANT}))
.OP_END_FACTORY_REG(DeleteSeedGenerator)

/**
* @brief Create a placeholder handle to rewrite and pass
* to use during the graph compilation phase. \n

* @par Outputs:
* handle:Output random number . \n
*/
REG_OP(DummySeedGenerator)
.OUTPUT(handle, TensorType({ DT_RESOURCE }))
.OP_END_FACTORY_REG(DummySeedGenerator)

} // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_

+ 8
- 2
third_party/fwkacllib/inc/ops/transformation_ops.h View File

@@ -60,7 +60,10 @@ REG_OP(Bitcast)
*x: A Tensor. Must be 4D Tensor of type float16, float32, int32, uint16, with format HWCN . \n *x: A Tensor. Must be 4D Tensor of type float16, float32, int32, uint16, with format HWCN . \n


*@par Outputs: *@par Outputs:
*y: A 6D Tensor. Has the same type as "x", with format C1HWNCoC0.
*y: A 6D Tensor. Has the same type as "x", with format C1HWNCoC0. \n

*@attention Constraints:
*THIS OPERATOR IS DEPRECATED. It will be removed in a future version.
*/ */
REG_OP(DepthwiseWeight4DTo6D) REG_OP(DepthwiseWeight4DTo6D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))
@@ -77,7 +80,10 @@ REG_OP(DepthwiseWeight4DTo6D)
*channel_size: An optional int, specifying the channel size of 4D Tensor with format HWCN . \n *channel_size: An optional int, specifying the channel size of 4D Tensor with format HWCN . \n


*@par Outputs: *@par Outputs:
*y: A 4D Tensor. Has the same type as "x", with format HWCN.
*y: A 4D Tensor. Has the same type as "x", with format HWCN. \n

*@attention Constraints:
*THIS OPERATOR IS DEPRECATED. It will be removed in a future version.
*/ */
REG_OP(DepthwiseWeight6DTo4D) REG_OP(DepthwiseWeight6DTo4D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))


+ 6
- 0
third_party/fwkacllib/inc/runtime/base.h View File

@@ -92,6 +92,12 @@ typedef enum tagRtLimitType {
RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms
} rtLimitType_t; } rtLimitType_t;


typedef enum tagRtFloatOverflowMode {
RT_OVERFLOW_MODE_SATURATION = 0,
RT_OVERFLOW_MODE_INFNAN,
RT_OVERFLOW_MODE_UNDEF,
} rtFloatOverflowMode_t;

typedef struct rtExceptionInfo { typedef struct rtExceptionInfo {
uint32_t taskid; uint32_t taskid;
uint32_t streamid; uint32_t streamid;


+ 0
- 6
third_party/fwkacllib/inc/runtime/config.h View File

@@ -145,12 +145,6 @@ typedef enum tagRTTaskTimeoutType {
RT_TIMEOUT_TYPE_OP_EXECUTE, RT_TIMEOUT_TYPE_OP_EXECUTE,
} rtTaskTimeoutType_t; } rtTaskTimeoutType_t;


typedef enum tagRtFloatOverflowMode {
RT_OVERFLOW_MODE_SATURATION = 0,
RT_OVERFLOW_MODE_INFNAN,
RT_OVERFLOW_MODE_UNDEF,
} rtFloatOverflowMode_t;

/** /**
* @ingroup * @ingroup
* @brief get AI core count * @brief get AI core count


+ 26
- 0
third_party/fwkacllib/inc/runtime/dev.h View File

@@ -433,6 +433,32 @@ RTS_API rtError_t rtGetDevMsg(rtGetDevMsgType_t getMsgType, rtGetMsgCallback cal
* @return RT_MEMORY_TS, RT_MEMORY_HBM, RT_MEMORY_TS | RT_MEMORY_POLICY_HUGE_PAGE_ONLY * @return RT_MEMORY_TS, RT_MEMORY_HBM, RT_MEMORY_TS | RT_MEMORY_POLICY_HUGE_PAGE_ONLY
*/ */
RTS_API uint32_t rtGetTsMemType(rtMemRequestFeature_t featureType, uint32_t memSize); RTS_API uint32_t rtGetTsMemType(rtMemRequestFeature_t featureType, uint32_t memSize);

/**
* @ingroup
* @brief set saturation mode for current device.
* @param [in] saturation mode.
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtSetDeviceSatMode(rtFloatOverflowMode_t floatOverflowMode);

/**
* @ingroup
* @brief get saturation mode for current device.
* @param [out] saturation mode.
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtGetDeviceSatMode(rtFloatOverflowMode_t *floatOverflowMode);

/**
* @ingroup
* @brief get saturation mode for target stream.
* @param [in] target stm
* @param [out] saturation mode.
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtGetDeviceSatModeForStream(rtStream_t stm, rtFloatOverflowMode_t *floatOverflowMode);

#if defined(__cplusplus) #if defined(__cplusplus)
} }
#endif #endif


+ 2
- 0
third_party/fwkacllib/inc/runtime/kernel.h View File

@@ -229,6 +229,8 @@ typedef void (*rtCallback_t)(void *fnData);
#define RT_KERNEL_DUMPFLAG (0x02U) #define RT_KERNEL_DUMPFLAG (0x02U)
#define RT_FUSION_KERNEL_DUMPFLAG (0x04U) #define RT_FUSION_KERNEL_DUMPFLAG (0x04U)
#define RT_KERNEL_CUSTOM_AICPU (0x08U) #define RT_KERNEL_CUSTOM_AICPU (0x08U)
#define RT_KERNEL_FFTSPLUS_DYNAMIC_SHAPE_DUMPFLAG (0x10U)
#define RT_KERNEL_FFTSPLUS_STATIC_SHAPE_DUMPFLAG (0x20U)


// STARS topic scheduler sqe : topic_type // STARS topic scheduler sqe : topic_type
#define RT_KERNEL_DEVICE_FIRST (0x10U) #define RT_KERNEL_DEVICE_FIRST (0x10U)


+ 29
- 0
third_party/fwkacllib/inc/runtime/mem.h View File

@@ -391,6 +391,18 @@ RTS_API rtError_t rtMemcpyHostTask(void * const dst, const uint64_t destMax, con
RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind, RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind,
rtStream_t stm); rtStream_t stm);


typedef struct rtMemcpyAddrInfo {
uint32_t resv0;
uint32_t resv1;
uint32_t resv2;
uint32_t len;
uint64_t src;
uint64_t dst;
} rtMemcpyAddrInfo;

RTS_API rtError_t rtMemcpyAsyncPtr(void *memcpyAddrInfo, uint64_t destMax, uint64_t count,
rtMemcpyKind_t kind, rtStream_t stream);

/** /**
* @ingroup dvrt_mem * @ingroup dvrt_mem
* @brief asynchronized reduce memcpy * @brief asynchronized reduce memcpy
@@ -409,6 +421,23 @@ RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, ui


/** /**
* @ingroup dvrt_mem * @ingroup dvrt_mem
* @brief asynchronized reduce memcpy
* @param [in] dst destination address pointer
* @param [in] Max length of destination address memory
* @param [in] src source address pointer
* @param [in] count the number of byte to copy
* @param [in] kind memcpy type
* @param [in] type data type
* @param [in] stm asynchronized task stream
* @param [in] overflowAddr addr of overflow flag
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtReduceAsyncV2(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
rtDataType_t type, rtStream_t stm, void *overflowAddr);

/**
* @ingroup dvrt_mem
* @brief synchronized memcpy2D * @brief synchronized memcpy2D
* @param [in] dst destination address pointer * @param [in] dst destination address pointer
* @param [in] dstPitch pitch of destination memory * @param [in] dstPitch pitch of destination memory


+ 9
- 0
third_party/fwkacllib/inc/runtime/rt_ffts_plus.h View File

@@ -17,10 +17,19 @@ extern "C" {
#pragma pack(push) #pragma pack(push)
#pragma pack (1) #pragma pack (1)


typedef struct tagFftsPlusDumpInfo {
const void *loadDumpInfo;
const void *unloadDumpInfo;
uint32_t loadDumpInfolen;
uint32_t unloadDumpInfolen;
} rtFftsPlusDumpInfo_t;


typedef struct tagFftsPlusTaskInfo { typedef struct tagFftsPlusTaskInfo {
const rtFftsPlusSqe_t *fftsPlusSqe; const rtFftsPlusSqe_t *fftsPlusSqe;
const void *descBuf; // include total context const void *descBuf; // include total context
size_t descBufLen; // the length of descBuf size_t descBufLen; // the length of descBuf
rtFftsPlusDumpInfo_t fftsPlusDumpInfo; // used only in the dynamic shape
} rtFftsPlusTaskInfo_t; } rtFftsPlusTaskInfo_t;


#pragma pack(pop) #pragma pack(pop)


+ 8
- 4
third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h View File

@@ -110,7 +110,8 @@ typedef struct tagFftsPlusAicAivCtx {
// 0-3 bytes // 0-3 bytes
uint16_t contextType; uint16_t contextType;
uint8_t successorNum; uint8_t successorNum;
uint8_t resv : 7;
uint8_t resv : 6;
uint8_t dumpSwitch : 1;
uint8_t aten : 1; uint8_t aten : 1;
// 4-7 // 4-7
uint8_t prefetchConfig; uint8_t prefetchConfig;
@@ -172,7 +173,8 @@ typedef struct tagFftsPlusMixAicAivCtx {
// 0-3 bytes // 0-3 bytes
uint16_t contextType; uint16_t contextType;
uint8_t successorNum; uint8_t successorNum;
uint8_t reserved1 : 7;
uint8_t reserved1 : 6;
uint8_t dumpSwitch : 1;
uint8_t aten : 1; uint8_t aten : 1;
// 4-7 // 4-7
uint8_t prefetchConfig; uint8_t prefetchConfig;
@@ -236,7 +238,8 @@ typedef struct tagFftsPlusSdmaCtx {
// 0-3 bytes // 0-3 bytes
uint16_t contextType; uint16_t contextType;
uint8_t successorNum; uint8_t successorNum;
uint8_t res1 : 7;
uint8_t res1 : 6;
uint8_t dumpSwitch : 1;
uint8_t aten : 1; uint8_t aten : 1;
// 4-7 // 4-7
uint8_t res2; uint8_t res2;
@@ -376,7 +379,8 @@ typedef struct tagFftsPlusAiCpuCtx {
// 0-3 bytes // 0-3 bytes
uint16_t contextType; uint16_t contextType;
uint8_t successorNum; uint8_t successorNum;
uint8_t res1 : 7;
uint8_t res1 : 6;
uint8_t dumpSwitch : 1;
uint8_t aten : 1; uint8_t aten : 1;
// 4-7 // 4-7
uint8_t res2; uint8_t res2;


+ 18
- 1
third_party/fwkacllib/inc/runtime/rt_stars_define.h View File

@@ -70,10 +70,27 @@ typedef enum tagFftsPlusType {
RT_FFTS_PLUS_TYPE = 4, // FFTS+ mode RT_FFTS_PLUS_TYPE = 4, // FFTS+ mode
} rtFftsPlusType_t; } rtFftsPlusType_t;


typedef struct tagStarsFftsPlusHeader {
uint8_t type : 6;
uint8_t l1Lock : 1;
uint8_t l1Unlock : 1;

uint8_t ie : 2;
uint8_t preP : 2;
uint8_t postP : 2;
uint8_t wrCqe : 1;
/* tell mcu if this subgraph is overflow-enabled and mcu will send this flag to aicpu when aicpu ctx is excuted */
uint8_t overflowEn : 1;

uint16_t blockDim;

uint16_t rtStreamId;
uint16_t taskId;
} rtStarsFftsPlusHeader_t;
// ffts+ sqe // ffts+ sqe
typedef struct tagFftsPlusSqe { typedef struct tagFftsPlusSqe {
// 0-7 bytes // 0-7 bytes
rtStarsSqeHeader_t sqeHeader;
rtStarsSqeHeader_t sqeHeader; // use rtStarsFftsPlusHeader_t instead
// 8-11 bytes // 8-11 bytes
uint16_t fftsType : 3; uint16_t fftsType : 3;
uint16_t reserved1 : 9; uint16_t reserved1 : 9;


+ 21
- 0
third_party/fwkacllib/inc/runtime/stream.h View File

@@ -27,6 +27,7 @@ extern "C" {
#define RT_STREAM_HEAD (0x20U) #define RT_STREAM_HEAD (0x20U)
#define RT_STREAM_PRIMARY_DEFAULT (0x40U) #define RT_STREAM_PRIMARY_DEFAULT (0x40U)
#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80U) #define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80U)
#define RT_STREAM_OVERFLOW (0x100U)


/** /**
* @ingroup stream_type * @ingroup stream_type
@@ -212,6 +213,26 @@ RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stm, uint32_t flag, const
*/ */
RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stm); RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stm);


/*
* @ingroup dvrt_stream
* @brief enable or disable stream overflow
* @param [in] stm: stream handle
* @param [in] flag: 0:disable others:enable
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtSetStreamOverflowSwitch(rtStream_t stm, uint32_t flags);

/*
* @ingroup dvrt_stream
* @brief get whether overflow of the stream is enable or disable
* @param [in] stm: stream handle
* @param [out] flag: 0:disable others:enable
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtGetStreamOverflowSwitch(rtStream_t stm, uint32_t *flags);

#if defined(__cplusplus) #if defined(__cplusplus)
} }
#endif #endif


+ 34
- 0
third_party/fwkacllib/inc/tdt/tsd_client.h View File

@@ -41,6 +41,10 @@ struct InitFlowGwInfo {
char_t rsv[128]; char_t rsv[128];
}; };


typedef enum {
TSD_CAPABILITY_PIDQOS = 0,
TSD_CAPABILITY_BUT
} TsdCapabilityType;
/** /**
* @ingroup Open * @ingroup Open
* @brief Used for the Framework process to communicate with the TSDDaemon process, * @brief Used for the Framework process to communicate with the TSDDaemon process,
@@ -198,6 +202,36 @@ TDT_LIB_EXPORT uint32_t TsdSetMsprofReporterCallback(const MsprofReporterCallbac
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
TDT_LIB_EXPORT uint32_t TsdSetAttr(const char * const attrKey, const char * const attrValue); TDT_LIB_EXPORT uint32_t TsdSetAttr(const char * const attrKey, const char * const attrValue);

/**
* @ingroup TsdCapabilityGet
* @brief use tsd to get some capability
*
* @par type
* capability type
*
* @par ptr
* the result
* @retval TDT_OK Success
* @retval OtherValues Failure
*/
TDT_LIB_EXPORT uint32_t TsdCapabilityGet(const uint32_t logicDeviceId, const int32_t type, const uint64_t ptr);


/**
* @ingroup GetHdcConctStatus
* @brief used to get hdc connection status
*
* @par logicDeviceId
* logic device id
*
* @par hdcSessStat
* hdc session status, DRV_ERROR_SOCKET_CONNECT or DRV_ERROR_SOCKET_CLOSE
* @retval TDT_OK Success
* @retval OtherValues Failure
*/
TDT_LIB_EXPORT uint32_t GetHdcConctStatus(const uint32_t logicDeviceId, int32_t *hdcSessStat);

#ifdef __cplusplus #ifdef __cplusplus
} }
#endif // __cplusplus #endif // __cplusplus


+ 5
- 5
third_party/fwkacllib/inc/tsd/status.h View File

@@ -19,11 +19,11 @@
#include "common/type_def.h" #include "common/type_def.h"
namespace tsd { namespace tsd {
#ifdef __cplusplus #ifdef __cplusplus
using TSD_StatusT = uint32_t;
using TSD_StatusT = uint32_t;
#else #else
typedef uint32_t TSD_StatusT;
typedef uint32_t TSD_StatusT;
#endif #endif
// success code
constexpr TSD_StatusT TSD_OK = 0U;
} // namespace tsd
// success code
constexpr TSD_StatusT TSD_OK = 0U;
}
#endif // INC_TDT_STATUS_H #endif // INC_TDT_STATUS_H

Loading…
Cancel
Save