Browse Source

code_sync_0428_inc

tags/v1.3.0
dingpeifei 4 years ago
parent
commit
452ec62cb2
18 changed files with 353 additions and 213 deletions
  1. +37
    -34
      inc/external/acl/error_codes/rt_error_codes.h
  2. +37
    -34
      inc/external/runtime/rt_error_codes.h
  3. +16
    -0
      third_party/fwkacllib/inc/ops/data_flow_ops.h
  4. +8
    -6
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  5. +8
    -5
      third_party/fwkacllib/inc/ops/image_ops.h
  6. +31
    -0
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  7. +0
    -28
      third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
  8. +3
    -0
      third_party/fwkacllib/inc/ops/nn_calculation_ops.h
  9. +24
    -29
      third_party/fwkacllib/inc/ops/nn_detect_ops.h
  10. +22
    -22
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  11. +50
    -9
      third_party/fwkacllib/inc/ops/nn_pooling_ops.h
  12. +49
    -0
      third_party/fwkacllib/inc/ops/nn_training_ops.h
  13. +9
    -9
      third_party/fwkacllib/inc/ops/parsing_ops.h
  14. +2
    -0
      third_party/fwkacllib/inc/ops/random_ops.h
  15. +23
    -12
      third_party/fwkacllib/inc/ops/rnn.h
  16. +27
    -0
      third_party/fwkacllib/inc/ops/selection_ops.h
  17. +2
    -0
      third_party/fwkacllib/inc/ops/string_ops.h
  18. +5
    -25
      third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h

+ 37
- 34
inc/external/acl/error_codes/rt_error_codes.h View File

@@ -57,40 +57,43 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream re
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource


static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception


static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error


+ 37
- 34
inc/external/runtime/rt_error_codes.h View File

@@ -57,40 +57,43 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream re
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource


static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception


static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error


+ 16
- 0
third_party/fwkacllib/inc/ops/data_flow_ops.h View File

@@ -2331,6 +2331,22 @@ REG_OP(CacheAllIndexToLocal)
.REQUIRED_ATTR(dtype, Type) .REQUIRED_ATTR(dtype, Type)
.OP_END_FACTORY_REG(CacheAllIndexToLocal) .OP_END_FACTORY_REG(CacheAllIndexToLocal)


/**
*@brief DynamicGetNext, dynamic get next data
*@par Inputs:
*x: the iterator, all types are available
*@par Outputs:
*y: the date in iterator, all types are available
*@par Attributes:
*output_types: types of all outputs
*output_shapes: shapes of all outputs
*_dynamic_graph_execute_mode: dynamic graph execution mode,
value is one of lazy_recompile and dynamic_execute
*_getnext_inputs_shape_range: shape ranges of outputs,
it works where _dynamic_graph_execute_mode is dynamic_execute
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(DynamicGetNext) REG_OP(DynamicGetNext)
.INPUT(x, TensorType::ALL()) .INPUT(x, TensorType::ALL())
.DYNAMIC_OUTPUT(y, TensorType::ALL()) .DYNAMIC_OUTPUT(y, TensorType::ALL())


+ 8
- 6
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -28,7 +28,7 @@ namespace ge {


*@par Inputs: *@par Inputs:
*Dynamic inputs, including: *Dynamic inputs, including:
* @li x: A list of Tensor objects, each with same shape and type. The supported types are:
*x: A list of Tensor objects, each with same shape and type. The supported types are:
* float16, float32, double, int32, uint8, int16, int8, complex64, int64, * float16, float32, double, int32, uint8, int16, int8, complex64, int64,
* qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n * qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n


@@ -330,8 +330,8 @@ REG_OP(Sub)
*@brief computes the absolute value of a tensor. \n *@brief computes the absolute value of a tensor. \n


*@par Inputs: *@par Inputs:
*One inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n
*One input, including: \n
*x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as "x". \n *y: A Tensor. Has the same type as "x". \n
@@ -3243,9 +3243,11 @@ REG_OP(Fills)
*@brief Add tensor with scale. \n *@brief Add tensor with scale. \n


*@par Inputs: *@par Inputs:
*Five inputs, including:
* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
* @li x2: A scale. Must be float. \n
*One input, including: \n
*x: A Tensor. Must be one of the following types:int32,int16, float16, float32. \n

*@par Attributes:
*value: A scale. Must be float. \n


*@par Outputs: *@par Outputs:
*@li y: A Tensor. Has the same type and shape as "x1". \n *@li y: A Tensor. Has the same type and shape as "x1". \n


+ 8
- 5
third_party/fwkacllib/inc/ops/image_ops.h View File

@@ -1134,7 +1134,7 @@ REG_OP(DecodeBmp)
.ATTR(channels, Int, 0) .ATTR(channels, Int, 0)
.OP_END_FACTORY_REG(DecodeBmp) .OP_END_FACTORY_REG(DecodeBmp)


/*
/**
*@brief Function parse image from string to int. \n *@brief Function parse image from string to int. \n


*@par Inputs: *@par Inputs:
@@ -1602,11 +1602,11 @@ REG_OP(DecodeJpeg)
*@brief Image warping using per-pixel flow vectors. \n *@brief Image warping using per-pixel flow vectors. \n


*@par Inputs: *@par Inputs:
*@li images: 4-D Tensor with shape `[batch, height, width, channels]`.
*@li image: 4-D Tensor with shape `[batch, height, width, channels]`.
*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n


*@par Outputs: *@par Outputs:
*y: Returns 4-D with the same shape and dtype as `images`. \n
*y: Returns 4-D with the same shape and dtype as `image`. \n
*/ */
REG_OP(DenseImageWarp) REG_OP(DenseImageWarp)
.INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -1709,11 +1709,11 @@ REG_OP(ResizeGradD)


*@par Inputs: *@par Inputs:
*@li grad: gradients with respect to DenseImageWarp output. *@li grad: gradients with respect to DenseImageWarp output.
*@li images: 4-D Tensor with shape `[batch, height, width, channels]`.
*@li image: 4-D Tensor with shape `[batch, height, width, channels]`.
*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n


*@par Outputs: *@par Outputs:
*grad_image: Returns 4-D with the same shape and dtype as `images`.
*grad_image: Returns 4-D with the same shape and dtype as `image`.
*grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n *grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n
*/ */
REG_OP(DenseImageWarpGrad) REG_OP(DenseImageWarpGrad)
@@ -1747,6 +1747,9 @@ REG_OP(DenseImageWarpGrad)


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with pytorch GridSampler2D operator. *Compatible with pytorch GridSampler2D operator.

*@par Restrictions:
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(GridSampler2D) REG_OP(GridSampler2D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))


+ 31
- 0
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -444,6 +444,9 @@ REG_OP(ScatterNdUpdate)


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the TensorFlow operator TensorScatterUpdate. * Compatible with the TensorFlow operator TensorScatterUpdate.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(TensorScatterUpdate) REG_OP(TensorScatterUpdate)
.INPUT(x, TensorType::BasicType()) .INPUT(x, TensorType::BasicType())
@@ -565,6 +568,9 @@ REG_OP(ScatterNdAdd)


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the TensorFlow operator TensorScatterAdd. * Compatible with the TensorFlow operator TensorScatterAdd.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(TensorScatterAdd) REG_OP(TensorScatterAdd)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
@@ -623,6 +629,9 @@ REG_OP(ScatterNdSub)


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the TensorFlow operator TensorScatterSub. * Compatible with the TensorFlow operator TensorScatterSub.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(TensorScatterSub) REG_OP(TensorScatterSub)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
@@ -1045,6 +1054,28 @@ REG_OP(MatrixDiagV2)
.OUTPUT(output, TensorType::BasicType()) .OUTPUT(output, TensorType::BasicType())
.OP_END_FACTORY_REG(MatrixDiagV2) .OP_END_FACTORY_REG(MatrixDiagV2)


/**
* @brief Add updates to var_out according to axis and indices.

* @par Inputs:
* Three inputs, including:
* @li var: A Tensor. Must be one of the following types:
* float16, float32, int32, int8, uint8.
* @li indices: A Tensor of the indices, type should be int32.
* @li updates: A Tensor of the same type as "var".

* @par Attributes:
* @li axis: An required int to specify the axis to perform indices add.

* @par Outputs:
* @li var_out: A Tensor. Same as input "var".

* @par Third-party framework compatibility
* Compatible with the Pytorch operator index_add.

* @par Restrictions:
* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(IndexAdd) REG_OP(IndexAdd)
.INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) .INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
.INPUT(indices, TensorType({DT_INT32})) .INPUT(indices, TensorType({DT_INT32}))


+ 0
- 28
third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h View File

@@ -419,35 +419,7 @@ REG_OP(BNInference)
.ATTR(use_global_stats, Bool,true) .ATTR(use_global_stats, Bool,true)
.ATTR(mode, Int,1) .ATTR(mode, Int,1)
.OP_END_FACTORY_REG(BNInference) .OP_END_FACTORY_REG(BNInference)
/**
*@brief aicpu batch normalization host . \n

*@par Inputs:


*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference.
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference.
*@li momentum: An optional float, mean and variance's Scale factor
*@par Attributes:
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
*@li use_global_stats: mean inference mode , only can be "True".
*@li mode: An optional attr, not use
*@par Outputs:
*@li alpha: A Tensor of type float16 or float32 for the cpu calculate mean
*@li beta: A Tensor of type float16 or float32 for the cpu calculate variance
*/
REG_OP(BnHost)
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(momentum, TensorType({DT_FLOAT16,DT_FLOAT}))
.OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT}))
.OPTIONAL_INPUT(offset, TensorType({DT_FLOAT16,DT_FLOAT}))
.ATTR(epsilon, Float, 0.00001)
.ATTR(mode, Int, 1)
.ATTR(use_global_stats, Bool, true)
.OUTPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(mu, TensorType({DT_FLOAT16,DT_FLOAT}))
.OP_END_FACTORY_REG(BnHost)
/** /**
*@brief Performs batch normalization . \n *@brief Performs batch normalization . \n




+ 3
- 0
third_party/fwkacllib/inc/ops/nn_calculation_ops.h View File

@@ -989,6 +989,8 @@ REG_OP(Conv2DCompress)
*@li deformable_groups: Optional. An integer of type int32. The number of *@li deformable_groups: Optional. An integer of type int32. The number of
* deformable group partitions. In_channels must be divisible by * deformable group partitions. In_channels must be divisible by
* "deformable_groups". Defaults to 1. * "deformable_groups". Defaults to 1.
*@li modulated: Optional. Specify version of DeformableConv2D, true means v2,
* false means v1, currently only support v2.
*\n *\n
*\n *\n
* The following value range restrictions must be met: * The following value range restrictions must be met:
@@ -1037,6 +1039,7 @@ REG_OP(DeformableConv2D)
.ATTR(groups, Int, 1) .ATTR(groups, Int, 1)
.ATTR(data_format, String, "NHWC") .ATTR(data_format, String, "NHWC")
.ATTR(deformable_groups, Int, 1) .ATTR(deformable_groups, Int, 1)
.ATTR(modulated, Bool, true)
.OP_END_FACTORY_REG(DeformableConv2D) .OP_END_FACTORY_REG(DeformableConv2D)


/** /**


+ 24
- 29
third_party/fwkacllib/inc/ops/nn_detect_ops.h View File

@@ -1202,35 +1202,6 @@ REG_OP(RpnProposalsD)
.OUTPUT(sorted_box, TensorType({DT_FLOAT16})) .OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
.OP_END_FACTORY_REG(RpnProposalsD) .OP_END_FACTORY_REG(RpnProposalsD)


/**
*@brief Computes Score Filte Pre-Sort function.

*@par Inputs:
*Inputs include:
* @li rois: A Tensor. Must be float16. N-D with shape [N, 4].
* @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1].

*@par Attributes:
* @li score_threshold: required, float, threahold of topk process.
* @li k: required, Int, threahold of topk process.
* @li score_filter: bool, mark of score_filter. Defaults to "true"
* @li core_max_num: int, max number of core. Defaults to "8"
*@par Outputs:
* @li sorted_proposal: A Tensor. Must be float16.
* N-D with shape [8*6002, 8].
* @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8].
*/

REG_OP(ScoreFiltePreSort)
.INPUT(rois, TensorType({DT_FLOAT16}))
.INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
.OUTPUT(sorted_proposal, TensorType({ DT_FLOAT16}))
.OUTPUT(proposal_num, TensorType({ DT_UINT32}))
.REQUIRED_ATTR(score_threshold, Float)
.REQUIRED_ATTR(k, Int)
.ATTR(score_filter, Bool, true)
.ATTR(core_max_num, Int, 8)
.OP_END_FACTORY_REG(ScoreFiltePreSort)


/** /**
*@brief Computes Score Filte Pre-Sort function. *@brief Computes Score Filte Pre-Sort function.
@@ -1500,6 +1471,26 @@ REG_OP(Sort)
.ATTR(descending, Bool, false) .ATTR(descending, Bool, false)
.OP_END_FACTORY_REG(Sort) .OP_END_FACTORY_REG(Sort)


/**
*@brief Computes iou for input bboxes and gtboxes.

*@par Inputs:
* Two inputs, including:
*@li bboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1),
*@li gtboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1).\n

*@par Attributes:
*@li mode: A optional attribute of type string, whether judge the mode of iou. \n

*@par Outputs:
*@li overlap: A 2D Tensor of type float16 with shape [n, m]. \n

*@attention Constraints:
* Only computation of float16 data is supported.

*@par Restrictions:
*Warning:THIS FUNCTION IS DEPRECATED. Please use Iou instead.
*/
REG_OP(PtIou) REG_OP(PtIou)
.INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1541,6 +1532,9 @@ selected indices from the boxes tensor, where M <= max_output_size. \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with onnx NonMaxSuppression operator. *Compatible with onnx NonMaxSuppression operator.

*@par Restrictions:
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */


REG_OP(NonMaxSuppressionV6) REG_OP(NonMaxSuppressionV6)
@@ -1729,3 +1723,4 @@ REG_OP(PSROIPoolingGradV2D)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_


+ 22
- 22
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -1073,26 +1073,6 @@ REG_OP(INInferV2D)
.OP_END_FACTORY_REG(INInferV2D) .OP_END_FACTORY_REG(INInferV2D)


/** /**
*@brief Performs instance normalization for inference of InHost part.

*@par Inputs:\n
* One input, including: (NC1HWC0 supported)
* variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance.

*@par Attributes:
* epsilon: An optional float32, specifying the small value added to
variance to avoid dividing by zero. Defaults to "0.00001" . \n

*@par Outputs:\n
* variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt.
*/
REG_OP(InHost)
.INPUT(variance, TensorType({DT_FLOAT}))
.OUTPUT(variance_sqrt, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.00001)
.OP_END_FACTORY_REG(InHost)

/**
* @brief perform instance normalization to x. \n * @brief perform instance normalization to x. \n


* @par Inputs: * @par Inputs:
@@ -1124,6 +1104,26 @@ REG_OP(InstanceNorm)
.REQUIRED_ATTR(epsilon, Float) .REQUIRED_ATTR(epsilon, Float)
.OP_END_FACTORY_REG(InstanceNorm) .OP_END_FACTORY_REG(InstanceNorm)


/**
* @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n

* @par Inputs:
* Three inputs, including:
* @li grad: A Tensor. Must be one of the following types: float16, float32.
* Required.
* @li input: A Tensor. Has the same type as "grad". Required.
* @li target: A Tensor. Has the same type as "grad". Required. \n

* @par Attributes:
* @li reduction: An optional attribute of type String. Defaults to "mean". \n
* @li log_target: An optional attribute of type Bool. Defaults to false. \n

* @par Outputs:
* @li y: A Tensor. Has the same type as "grad". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator KlDivLossGrad.
*/
REG_OP(KlDivLossGrad) REG_OP(KlDivLossGrad)
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1453,12 +1453,12 @@ REG_OP(PoissonNllLoss)
* *
* *
* @par Output: * @par Output:
* y: A mutable Tensor of type int32, with the shape of [num_step, batch_size, hidden_size]. \n
* y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n
* *
*/ */
REG_OP(RnnGenMask) REG_OP(RnnGenMask)
.INPUT(seq_length, TensorType({DT_INT32})) .INPUT(seq_length, TensorType({DT_INT32}))
.OUTPUT(seq_mask, TensorType({DT_INT32}))
.OUTPUT(seq_mask, TensorType({DT_FLOAT16}))
.REQUIRED_ATTR(num_step, Int) .REQUIRED_ATTR(num_step, Int)
.REQUIRED_ATTR(hidden_size, Int) .REQUIRED_ATTR(hidden_size, Int)
.OP_END_FACTORY_REG(RnnGenMask) .OP_END_FACTORY_REG(RnnGenMask)


+ 50
- 9
third_party/fwkacllib/inc/ops/nn_pooling_ops.h View File

@@ -237,18 +237,18 @@ REG_OP(AvgPool3DD)
* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. * @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
* @li count_include_pad: When true, will include the zero-padding in the averaging calculation. * @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. * @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
* @li data_format: A string, format of input data .
* @li data_format: A string, format of input data.


* @par Outputs: * @par Outputs:
* @output: A mutable tensor with the same shape and type as "orig_input".
* @output: A mutable tensor with the same shape and type as "orig_input_shape".


* @par Third-party framework compatibility * @par Third-party framework compatibility
* @li Compatible with the TensorFlow operator AvgPoolGrad. * @li Compatible with the TensorFlow operator AvgPoolGrad.
*/ */


REG_OP(AvgPool3DGrad) REG_OP(AvgPool3DGrad)
.INPUT(orig_input_shape, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.INPUT(grads, TensorType({DT_INT32}))
.INPUT(orig_input_shape, TensorType({DT_INT32}))
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.REQUIRED_ATTR(ksize, ListInt) .REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(strides, ListInt)
@@ -888,7 +888,7 @@ REG_OP(AvgPoolV2Grad)
* @brief Computes gradients of averagev2 pooling function. * @brief Computes gradients of averagev2 pooling function.


* @par Inputs: * @par Inputs:
* @li input_grad: An NHWC tensor of type float16, float32, or double.
*input_grad: An NHWC tensor of type float16, float32, or double.


* @par Attributes: * @par Attributes:
* @li orig_input_shape: A required tuple or list of type int32. * @li orig_input_shape: A required tuple or list of type int32.
@@ -906,10 +906,10 @@ REG_OP(AvgPoolV2Grad)
* @li data_format: An optional string. Defaults to "NHWC". * @li data_format: An optional string. Defaults to "NHWC".


* @par Outputs: * @par Outputs:
* @out_grad: A mutable tensor with the same shape and type as "orig_input".
*out_grad: A mutable tensor with the same shape and type as "orig_input".


* @par Third-party framework compatibility * @par Third-party framework compatibility
* @li Compatible with the TensorFlow operator AvgPoolGrad.
*Compatible with the TensorFlow operator AvgPoolGrad.
*/ */
REG_OP(AvgPoolV2GradD) REG_OP(AvgPoolV2GradD)
.INPUT(input_grad, TensorType({DT_FLOAT16})) .INPUT(input_grad, TensorType({DT_FLOAT16}))
@@ -1682,7 +1682,27 @@ REG_OP(MaxPoolWithArgmaxV1)
.ATTR(ceil_mode, Bool, false) .ATTR(ceil_mode, Bool, false)
.OP_END_FACTORY_REG(MaxPoolWithArgmaxV1) .OP_END_FACTORY_REG(MaxPoolWithArgmaxV1)


// SubSample
/**
*@brief Randomly sample a subset of positive and negative examples,and overwrite
the label vector to the ignore value (-1) for all elements that are not
included in the sample.\n

* @par Inputs:
* One input:
* labels: shape of labels,(N, ) label vector with values. \n

* @par Attributes:
* @li batch_size_per_images: A require attribute of type int.
* @li positive_fraction: A require attribute of type float.

*@par Outputs:
*y: The result of subSample. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator SubSample.
*@par Restrictions:
*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly.
*/
REG_OP(SubSample) REG_OP(SubSample)
.INPUT(labels, TensorType({DT_INT32})) .INPUT(labels, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_INT32})) .OUTPUT(y, TensorType({DT_INT32}))
@@ -1690,7 +1710,28 @@ REG_OP(SubSample)
.REQUIRED_ATTR(positive_fraction, Float) .REQUIRED_ATTR(positive_fraction, Float)
.OP_END_FACTORY_REG(SubSample) .OP_END_FACTORY_REG(SubSample)


// SubSampleLabels
/**
*@brief Randomly sample a subset of positive and negative examples,and overwrite
the label vector to the ignore value (-1) for all elements that are not
included in the sample.\n

* @par Inputs:
* two inputs, including:
* @li labels: shape of labels,(N, ) label vector with values:.
* @li shuffle_matrix: random matrix with shape (N, ). \n

* @par Attributes:
* @li batch_size_per_images: A require attribute of type int.
* @li positive_fraction: A require attribute of type float.

*@par Outputs:
*y: The result of subSample. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator SubSampleLabels.
*@par Restrictions:
*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly.
*/
REG_OP(SubSampleLabels) REG_OP(SubSampleLabels)
.INPUT(labels, TensorType({DT_INT32})) .INPUT(labels, TensorType({DT_INT32}))
.INPUT(shuffle_matrix, TensorType({DT_INT32})) .INPUT(shuffle_matrix, TensorType({DT_INT32}))


+ 49
- 0
third_party/fwkacllib/inc/ops/nn_training_ops.h View File

@@ -2102,6 +2102,55 @@ REG_OP(FusedMulApplyMomentumExtern)
.OP_END_FACTORY_REG(FusedMulApplyMomentumExtern) .OP_END_FACTORY_REG(FusedMulApplyMomentumExtern)


/** /**
*@brief Updates '*var' according to the momentum scheme.
* accum = accum * momentum - x1 * x2 * lr
* if use_nesterov is True:
* var += accum * momentum - x1 * x2 * lr
* else:
* var += accum
*
*@par Inputs:
*@li var: A mutable tensor. Must be one of the data types defined in
* TensorType::NumberType(). Should be from a Variable().
*@li accum: A mutable tensor. Has the same type as "var". Should be from a
* Variable().
*@li lr: A tensor for the learning rate. Has the same type as "var". Should be
* from a Variable().
*@li x1: A Tensor has type TensorType::NumberType().
*@li momentum: A scalar. Has the same type as "var".
*@li x2: A scalar has the same type as "var".
*
*@par Attributes:
*@li use_nesterov: An optional bool. Defaults to "False".
* If "True", var will be updated by using Nesterov momentum.
*@li use_locking: An optional bool. Defaults to "False".
* If "True", updating of the "var" tensor is protected by a lock;
* otherwise the behavior is undefined, but may exhibit less contention.
*
*@par Outputs:
* var: A mutable tensor. Has the same type as input "var".
*
*@attention Constraints:
* The input tensors must have the same shape.
*
*@par Third-party framework compatibility
* Compatible with the TensorFlow operator ResourceApplyKerasMomentum.
*
*/
REG_OP(FusedMulApplyKerasMomentum)
.INPUT(var, TensorType::NumberType())
.INPUT(accum, TensorType::NumberType())
.INPUT(lr, TensorType::NumberType())
.INPUT(x1, TensorType::NumberType())
.INPUT(momentum, TensorType::NumberType())
.INPUT(x2, TensorType::NumberType())
.OUTPUT(var, TensorType::NumberType())
.OUTPUT(accum, TensorType::NumberType())
.ATTR(use_locking, Bool, false)
.ATTR(use_nesterov, Bool, false)
.OP_END_FACTORY_REG(FusedMulApplyKerasMomentum)

/**
*@brief Update "g" according to the LARS algorithm . \n *@brief Update "g" according to the LARS algorithm . \n


*@par Inputs: *@par Inputs:


+ 9
- 9
third_party/fwkacllib/inc/ops/parsing_ops.h View File

@@ -86,7 +86,7 @@ REG_OP(ParseSingleExample)
.ATTR(sparse_keys, ListString, {}) .ATTR(sparse_keys, ListString, {})
.ATTR(dense_keys, ListString, {}) .ATTR(dense_keys, ListString, {})
.ATTR(sparse_types, ListType, {}) .ATTR(sparse_types, ListType, {})
.ATTR(dense_types, ListType, {})
.ATTR(Tdense, ListType, {})
.ATTR(dense_shapes, ListListInt, {}) .ATTR(dense_shapes, ListListInt, {})
.OP_END_FACTORY_REG(ParseSingleExample) .OP_END_FACTORY_REG(ParseSingleExample)


@@ -173,9 +173,9 @@ REG_OP(ParseTensor)
REG_OP(DecodeCSV) REG_OP(DecodeCSV)
.INPUT(records, TensorType({DT_STRING})) .INPUT(records, TensorType({DT_STRING}))
.DYNAMIC_INPUT(record_defaults, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, .DYNAMIC_INPUT(record_defaults, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32,
DT_INT64, DT_STRING, DT_RESOURCE}))
DT_INT64, DT_STRING}))
.DYNAMIC_OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, .DYNAMIC_OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32,
DT_INT64, DT_STRING, DT_RESOURCE}))
DT_INT64, DT_STRING}))
.ATTR(OUT_TYPE, ListType, {}) .ATTR(OUT_TYPE, ListType, {})
.ATTR(field_delim, String, ",") .ATTR(field_delim, String, ",")
.ATTR(use_quote_delim, Bool, true) .ATTR(use_quote_delim, Bool, true)
@@ -283,12 +283,12 @@ REG_OP(ParseSingleSequenceExample)
.ATTR(Ncontext_dense, Int, 0) .ATTR(Ncontext_dense, Int, 0)
.ATTR(Nfeature_list_sparse, Int, 0) .ATTR(Nfeature_list_sparse, Int, 0)
.ATTR(Nfeature_list_dense, Int, 0) .ATTR(Nfeature_list_dense, Int, 0)
.REQUIRED_ATTR(context_sparse_types, ListType)
.REQUIRED_ATTR(Tcontext_dense, ListType)
.REQUIRED_ATTR(feature_list_dense_types, ListType)
.REQUIRED_ATTR(context_dense_shapes, ListListInt)
.REQUIRED_ATTR(feature_list_sparse_types, ListType)
.REQUIRED_ATTR(feature_list_dense_shapes, ListListInt)
.ATTR(context_sparse_types, ListType, {})
.ATTR(Tcontext_dense, ListType, {})
.ATTR(feature_list_dense_types, ListType, {})
.ATTR(context_dense_shapes, ListListInt, {})
.ATTR(feature_list_sparse_types, ListType, {})
.ATTR(feature_list_dense_shapes, ListListInt, {})
.OP_END_FACTORY_REG(ParseSingleSequenceExample) .OP_END_FACTORY_REG(ParseSingleSequenceExample)


} // namespace ge } // namespace ge


+ 2
- 0
third_party/fwkacllib/inc/ops/random_ops.h View File

@@ -548,6 +548,8 @@ REG_OP(ShuffleChannel)
* Each value along the axis zero represents the outcome of * Each value along the axis zero represents the outcome of
* the corresponding sample in a batch. * the corresponding sample in a batch.
* *
* @par Restrictions:
* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(MultinomialFuss) REG_OP(MultinomialFuss)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64}))


+ 23
- 12
third_party/fwkacllib/inc/ops/rnn.h View File

@@ -190,7 +190,7 @@ REG_OP(DynamicRNNGrad)
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. *@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
*@li seq_length:A optional Tensor. Only Support float16 in FRACTAL_NZ and int32 in ND.
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. *@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
@@ -228,7 +228,7 @@ REG_OP(DynamicRNN)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16}))
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -698,9 +698,6 @@ REG_OP(DynamicGRU)
*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(DynamicGRUV2) REG_OP(DynamicGRUV2)
.INPUT(x, TensorType({DT_FLOAT16})) .INPUT(x, TensorType({DT_FLOAT16}))
@@ -989,6 +986,27 @@ REG_OP(CommonLSTM)
.OP_END_FACTORY_REG(CommonLSTM) .OP_END_FACTORY_REG(CommonLSTM)


/** /**
* @brief Calculate the mask. According to hidden_size and num_step, convert seq_length to mask.
*
* @par Inputs:
* @li seq_length: A 1D Tensor. Must be one of the following types: int32. Record the current length of each batch. [batch_size].
* @li b: A 1D Tensor. Must be one of the following types: fp16/fp32. Record the hidden_size. [4 * hidden_size].
* @li x: A 3D Tensor. Must be one of the following types: fp16/fp32. Record the num_step/batch_size/input_size. [num_step, batch_size, input_size].
*
* @par Outputs:
* seq_mask: A 3D Tensor. Must be one of the following types: fp16/fp32. with the shape of [num_step, batch_size, hidden_size]. And has the same type as "b" \n
*
* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(RnnGenMaskV2)
.INPUT(seq_length, TensorType({DT_INT32}))
.INPUT(b, TensorType({{DT_FLOAT16, DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(seq_mask, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(RnnGenMaskV2)

/**
* @brief Common GRU calculation. * @brief Common GRU calculation.


* @par Inputs: * @par Inputs:
@@ -1002,22 +1020,15 @@ REG_OP(CommonLSTM)


* @par Attributes: * @par Attributes:
* @li activation_alpha: Optional scaling values used by some activation functions. \n * @li activation_alpha: Optional scaling values used by some activation functions. \n

* @li activation_beta: Optional scaling values used by some activation functions. \n * @li activation_beta: Optional scaling values used by some activation functions. \n

* @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates. \n * @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates. \n

* @li clip: Cell clip threshold. \n * @li clip: Cell clip threshold. \n

* @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n * @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n

* @li hidden_size: Number of neurons in the hidden layer. \n * @li hidden_size: Number of neurons in the hidden layer. \n

* @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n * @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n


* @par Outputs: * @par Outputs:
* @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ * @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ

* @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ * @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ
*/ */
REG_OP(CommonGRU) REG_OP(CommonGRU)


+ 27
- 0
third_party/fwkacllib/inc/ops/selection_ops.h View File

@@ -2254,6 +2254,33 @@ REG_OP(IndexFillD)
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.REQUIRED_ATTR(dim, Int) .REQUIRED_ATTR(dim, Int)
.OP_END_FACTORY_REG(IndexFillD) .OP_END_FACTORY_REG(IndexFillD)

/**
* @brief For each row r of this and for each column c, do (*this)(r, c) += src(j, c), \n
* where j ranges from indexes[r].first through indexes[r].second - 1. \n
* In general indexes must be >= 0 and < src.NumRows(); \n
* but to represent an empty range you may use the pair (-1, -1) or any pair of numbers (i, j) such that i >= j. \n

* @par Inputs:
* Three inputs, including:
* @li x: A Tensor. Must be one of the following types:
* float16, float32.
* @li indices: A Tensor of the indices, type should be int32.
* @li src: A Tensor of the same type as "x". \n

* @par Outputs:
* @li x: A Tensor. Same as input "x".

* @par Third-party framework compatibility
* Compatible with the kaldi operator AddRowRanges.
*/
REG_OP(AddRowRanges)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(src, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(indices, TensorType({DT_INT32}))
.OUTPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.OP_END_FACTORY_REG(AddRowRanges)

} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_

+ 2
- 0
third_party/fwkacllib/inc/ops/string_ops.h View File

@@ -114,6 +114,7 @@ REG_OP(UnicodeDecodeWithOffsets)
.ATTR(errors, String, "replace") .ATTR(errors, String, "replace")
.ATTR(replacement_char, Int, 65533) .ATTR(replacement_char, Int, 65533)
.ATTR(replace_control_characters, Bool, false) .ATTR(replace_control_characters, Bool, false)
.ATTR(Tsplits, Type, DT_INT64)
.OP_END_FACTORY_REG(UnicodeDecodeWithOffsets) .OP_END_FACTORY_REG(UnicodeDecodeWithOffsets)


/** /**
@@ -161,6 +162,7 @@ REG_OP(UnicodeDecode)
.ATTR(errors, String, "replace") .ATTR(errors, String, "replace")
.ATTR(replacement_char, Int, 65533) .ATTR(replacement_char, Int, 65533)
.ATTR(replace_control_characters, Bool, false) .ATTR(replace_control_characters, Bool, false)
.ATTR(Tsplits, Type, DT_INT64)
.OP_END_FACTORY_REG(UnicodeDecode) .OP_END_FACTORY_REG(UnicodeDecode)


/** /**


+ 5
- 25
third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h View File

@@ -23,8 +23,8 @@
* @attention 无 * @attention 无
* @param option [IN] 调优参数 * @param option [IN] 调优参数
* @param msg [OUT] 调优异常下返回信息 * @param msg [OUT] 调优异常下返回信息
* @retval #MSTUNE_SUCCESS 执行成功
* @retval #MSTUNE_FAILED 执行失败
* @retval #AOE_SUCCESS 执行成功
* @retval #AOE_FAILURE 执行失败
* @par 依赖: * @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。 * @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。 * @li tune_api.h:该接口声明所在的头文件。
@@ -35,33 +35,13 @@ AoeStatus AoeOfflineTuning(const std::map<std::string, std::string> &option, std


/** /**
* @ingroup aoe * @ingroup aoe
* @par 描述: 梯度调优
*
* @attention 无
* @param tuningGraph [IN] 调优图
* @param dependGraph [IN] 调优依赖图
* @param session [IN] ge连接会话
* @param option [IN] 参数集. 包含调优参数及ge参数
* @retval #MSTUNE_SUCCESS 执行成功
* @retval #MSTUNE_FAILED 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。
* @see 无
* @since
*/
extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);

/**
* @ingroup aoe
* @par 描述: 调优初始化 * @par 描述: 调优初始化
* *
* @attention 无 * @attention 无
* @param session [IN] ge连接会话 * @param session [IN] ge连接会话
* @param option [IN] 参数集. 包含调优参数及ge参数 * @param option [IN] 参数集. 包含调优参数及ge参数
* @retval #AOE_SUCCESS 执行成功 * @retval #AOE_SUCCESS 执行成功
* @retval #AOE_FAILED 执行失败
* @retval #AOE_FAILURE 执行失败
* @par 依赖: * @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。 * @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。 * @li tune_api.h:该接口声明所在的头文件。
@@ -77,7 +57,7 @@ extern "C" AoeStatus AoeOnlineInitialize(ge::Session *session, const std::map<st
* @attention 无 * @attention 无
* @param 无 * @param 无
* @retval #AOE_SUCCESS 执行成功 * @retval #AOE_SUCCESS 执行成功
* @retval #AOE_FAILED 执行失败
* @retval #AOE_FAILURE 执行失败
* @par 依赖: * @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。 * @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。 * @li tune_api.h:该接口声明所在的头文件。
@@ -96,7 +76,7 @@ extern "C" AoeStatus AoeOnlineFinalize();
* @param session [IN] ge连接会话 * @param session [IN] ge连接会话
* @param option [IN] 参数集. 包含调优参数及ge参数 * @param option [IN] 参数集. 包含调优参数及ge参数
* @retval #AOE_SUCCESS 执行成功 * @retval #AOE_SUCCESS 执行成功
* @retval #AOE_FAILED 执行失败
* @retval #AOE_FAILURE 执行失败
* @par 依赖: * @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。 * @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。 * @li tune_api.h:该接口声明所在的头文件。


Loading…
Cancel
Save