Browse Source

code_sync_0428_inc

tags/v1.3.0
dingpeifei 4 years ago
parent
commit
452ec62cb2
18 changed files with 353 additions and 213 deletions
  1. +37
    -34
      inc/external/acl/error_codes/rt_error_codes.h
  2. +37
    -34
      inc/external/runtime/rt_error_codes.h
  3. +16
    -0
      third_party/fwkacllib/inc/ops/data_flow_ops.h
  4. +8
    -6
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  5. +8
    -5
      third_party/fwkacllib/inc/ops/image_ops.h
  6. +31
    -0
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  7. +0
    -28
      third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
  8. +3
    -0
      third_party/fwkacllib/inc/ops/nn_calculation_ops.h
  9. +24
    -29
      third_party/fwkacllib/inc/ops/nn_detect_ops.h
  10. +22
    -22
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  11. +50
    -9
      third_party/fwkacllib/inc/ops/nn_pooling_ops.h
  12. +49
    -0
      third_party/fwkacllib/inc/ops/nn_training_ops.h
  13. +9
    -9
      third_party/fwkacllib/inc/ops/parsing_ops.h
  14. +2
    -0
      third_party/fwkacllib/inc/ops/random_ops.h
  15. +23
    -12
      third_party/fwkacllib/inc/ops/rnn.h
  16. +27
    -0
      third_party/fwkacllib/inc/ops/selection_ops.h
  17. +2
    -0
      third_party/fwkacllib/inc/ops/string_ops.h
  18. +5
    -25
      third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h

+ 37
- 34
inc/external/acl/error_codes/rt_error_codes.h View File

@@ -57,40 +57,43 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream re
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource

static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception

static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error


+ 37
- 34
inc/external/runtime/rt_error_codes.h View File

@@ -57,40 +57,43 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream re
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource

static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception

static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error


+ 16
- 0
third_party/fwkacllib/inc/ops/data_flow_ops.h View File

@@ -2331,6 +2331,22 @@ REG_OP(CacheAllIndexToLocal)
.REQUIRED_ATTR(dtype, Type)
.OP_END_FACTORY_REG(CacheAllIndexToLocal)

/**
*@brief DynamicGetNext, dynamic get next data
*@par Inputs:
*x: the iterator, all types are available
*@par Outputs:
*y: the date in iterator, all types are available
*@par Attributes:
*output_types: types of all outputs
*output_shapes: shapes of all outputs
*_dynamic_graph_execute_mode: dynamic graph execution mode,
value is one of lazy_recompile and dynamic_execute
*_getnext_inputs_shape_range: shape ranges of outputs,
it works where _dynamic_graph_execute_mode is dynamic_execute
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(DynamicGetNext)
.INPUT(x, TensorType::ALL())
.DYNAMIC_OUTPUT(y, TensorType::ALL())


+ 8
- 6
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -28,7 +28,7 @@ namespace ge {

*@par Inputs:
*Dynamic inputs, including:
* @li x: A list of Tensor objects, each with same shape and type. The supported types are:
*x: A list of Tensor objects, each with same shape and type. The supported types are:
* float16, float32, double, int32, uint8, int16, int8, complex64, int64,
* qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n

@@ -330,8 +330,8 @@ REG_OP(Sub)
*@brief computes the absolute value of a tensor. \n

*@par Inputs:
*One inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n
*One input, including: \n
*x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n

*@par Outputs:
*y: A Tensor. Has the same type as "x". \n
@@ -3243,9 +3243,11 @@ REG_OP(Fills)
*@brief Add tensor with scale. \n

*@par Inputs:
*Five inputs, including:
* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
* @li x2: A scale. Must be float. \n
*One input, including: \n
*x: A Tensor. Must be one of the following types:int32,int16, float16, float32. \n

*@par Attributes:
*value: A scale. Must be float. \n

*@par Outputs:
*@li y: A Tensor. Has the same type and shape as "x1". \n


+ 8
- 5
third_party/fwkacllib/inc/ops/image_ops.h View File

@@ -1134,7 +1134,7 @@ REG_OP(DecodeBmp)
.ATTR(channels, Int, 0)
.OP_END_FACTORY_REG(DecodeBmp)

/*
/**
*@brief Function parse image from string to int. \n

*@par Inputs:
@@ -1602,11 +1602,11 @@ REG_OP(DecodeJpeg)
*@brief Image warping using per-pixel flow vectors. \n

*@par Inputs:
*@li images: 4-D Tensor with shape `[batch, height, width, channels]`.
*@li image: 4-D Tensor with shape `[batch, height, width, channels]`.
*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n

*@par Outputs:
*y: Returns 4-D with the same shape and dtype as `images`. \n
*y: Returns 4-D with the same shape and dtype as `image`. \n
*/
REG_OP(DenseImageWarp)
.INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -1709,11 +1709,11 @@ REG_OP(ResizeGradD)

*@par Inputs:
*@li grad: gradients with respect to DenseImageWarp output.
*@li images: 4-D Tensor with shape `[batch, height, width, channels]`.
*@li image: 4-D Tensor with shape `[batch, height, width, channels]`.
*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n

*@par Outputs:
*grad_image: Returns 4-D with the same shape and dtype as `images`.
*grad_image: Returns 4-D with the same shape and dtype as `image`.
*grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n
*/
REG_OP(DenseImageWarpGrad)
@@ -1747,6 +1747,9 @@ REG_OP(DenseImageWarpGrad)

*@par Third-party framework compatibility
*Compatible with pytorch GridSampler2D operator.

*@par Restrictions:
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(GridSampler2D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))


+ 31
- 0
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -444,6 +444,9 @@ REG_OP(ScatterNdUpdate)

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator TensorScatterUpdate.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(TensorScatterUpdate)
.INPUT(x, TensorType::BasicType())
@@ -565,6 +568,9 @@ REG_OP(ScatterNdAdd)

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator TensorScatterAdd.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(TensorScatterAdd)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
@@ -623,6 +629,9 @@ REG_OP(ScatterNdSub)

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator TensorScatterSub.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(TensorScatterSub)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
@@ -1045,6 +1054,28 @@ REG_OP(MatrixDiagV2)
.OUTPUT(output, TensorType::BasicType())
.OP_END_FACTORY_REG(MatrixDiagV2)

/**
* @brief Add updates to var_out according to axis and indices.

* @par Inputs:
* Three inputs, including:
* @li var: A Tensor. Must be one of the following types:
* float16, float32, int32, int8, uint8.
* @li indices: A Tensor of the indices, type should be int32.
* @li updates: A Tensor of the same type as "var".

* @par Attributes:
* @li axis: An required int to specify the axis to perform indices add.

* @par Outputs:
* @li var_out: A Tensor. Same as input "var".

* @par Third-party framework compatibility
* Compatible with the Pytorch operator index_add.

* @par Restrictions:
* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(IndexAdd)
.INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
.INPUT(indices, TensorType({DT_INT32}))


+ 0
- 28
third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h View File

@@ -419,35 +419,7 @@ REG_OP(BNInference)
.ATTR(use_global_stats, Bool,true)
.ATTR(mode, Int,1)
.OP_END_FACTORY_REG(BNInference)
/**
*@brief aicpu batch normalization host . \n

*@par Inputs:

*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference.
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference.
*@li momentum: An optional float, mean and variance's Scale factor
*@par Attributes:
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
*@li use_global_stats: mean inference mode , only can be "True".
*@li mode: An optional attr, not use
*@par Outputs:
*@li alpha: A Tensor of type float16 or float32 for the cpu calculate mean
*@li beta: A Tensor of type float16 or float32 for the cpu calculate variance
*/
REG_OP(BnHost)
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(momentum, TensorType({DT_FLOAT16,DT_FLOAT}))
.OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT}))
.OPTIONAL_INPUT(offset, TensorType({DT_FLOAT16,DT_FLOAT}))
.ATTR(epsilon, Float, 0.00001)
.ATTR(mode, Int, 1)
.ATTR(use_global_stats, Bool, true)
.OUTPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(mu, TensorType({DT_FLOAT16,DT_FLOAT}))
.OP_END_FACTORY_REG(BnHost)
/**
*@brief Performs batch normalization . \n



+ 3
- 0
third_party/fwkacllib/inc/ops/nn_calculation_ops.h View File

@@ -989,6 +989,8 @@ REG_OP(Conv2DCompress)
*@li deformable_groups: Optional. An integer of type int32. The number of
* deformable group partitions. In_channels must be divisible by
* "deformable_groups". Defaults to 1.
*@li modulated: Optional. Specify version of DeformableConv2D, true means v2,
* false means v1, currently only support v2.
*\n
*\n
* The following value range restrictions must be met:
@@ -1037,6 +1039,7 @@ REG_OP(DeformableConv2D)
.ATTR(groups, Int, 1)
.ATTR(data_format, String, "NHWC")
.ATTR(deformable_groups, Int, 1)
.ATTR(modulated, Bool, true)
.OP_END_FACTORY_REG(DeformableConv2D)

/**


+ 24
- 29
third_party/fwkacllib/inc/ops/nn_detect_ops.h View File

@@ -1202,35 +1202,6 @@ REG_OP(RpnProposalsD)
.OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
.OP_END_FACTORY_REG(RpnProposalsD)

/**
*@brief Computes Score Filte Pre-Sort function.

*@par Inputs:
*Inputs include:
* @li rois: A Tensor. Must be float16. N-D with shape [N, 4].
* @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1].

*@par Attributes:
* @li score_threshold: required, float, threahold of topk process.
* @li k: required, Int, threahold of topk process.
* @li score_filter: bool, mark of score_filter. Defaults to "true"
* @li core_max_num: int, max number of core. Defaults to "8"
*@par Outputs:
* @li sorted_proposal: A Tensor. Must be float16.
* N-D with shape [8*6002, 8].
* @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8].
*/

REG_OP(ScoreFiltePreSort)
.INPUT(rois, TensorType({DT_FLOAT16}))
.INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
.OUTPUT(sorted_proposal, TensorType({ DT_FLOAT16}))
.OUTPUT(proposal_num, TensorType({ DT_UINT32}))
.REQUIRED_ATTR(score_threshold, Float)
.REQUIRED_ATTR(k, Int)
.ATTR(score_filter, Bool, true)
.ATTR(core_max_num, Int, 8)
.OP_END_FACTORY_REG(ScoreFiltePreSort)

/**
*@brief Computes Score Filte Pre-Sort function.
@@ -1500,6 +1471,26 @@ REG_OP(Sort)
.ATTR(descending, Bool, false)
.OP_END_FACTORY_REG(Sort)

/**
*@brief Computes iou for input bboxes and gtboxes.

*@par Inputs:
* Two inputs, including:
*@li bboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1),
*@li gtboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1).\n

*@par Attributes:
*@li mode: A optional attribute of type string, whether judge the mode of iou. \n

*@par Outputs:
*@li overlap: A 2D Tensor of type float16 with shape [n, m]. \n

*@attention Constraints:
* Only computation of float16 data is supported.

*@par Restrictions:
*Warning:THIS FUNCTION IS DEPRECATED. Please use Iou instead.
*/
REG_OP(PtIou)
.INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1541,6 +1532,9 @@ selected indices from the boxes tensor, where M <= max_output_size. \n

*@par Third-party framework compatibility
*Compatible with onnx NonMaxSuppression operator.

*@par Restrictions:
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(NonMaxSuppressionV6)
@@ -1729,3 +1723,4 @@ REG_OP(PSROIPoolingGradV2D)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_


+ 22
- 22
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -1073,26 +1073,6 @@ REG_OP(INInferV2D)
.OP_END_FACTORY_REG(INInferV2D)

/**
*@brief Performs instance normalization for inference of InHost part.

*@par Inputs:\n
* One input, including: (NC1HWC0 supported)
* variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance.

*@par Attributes:
* epsilon: An optional float32, specifying the small value added to
variance to avoid dividing by zero. Defaults to "0.00001" . \n

*@par Outputs:\n
* variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt.
*/
REG_OP(InHost)
.INPUT(variance, TensorType({DT_FLOAT}))
.OUTPUT(variance_sqrt, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.00001)
.OP_END_FACTORY_REG(InHost)

/**
* @brief perform instance normalization to x. \n

* @par Inputs:
@@ -1124,6 +1104,26 @@ REG_OP(InstanceNorm)
.REQUIRED_ATTR(epsilon, Float)
.OP_END_FACTORY_REG(InstanceNorm)

/**
* @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n

* @par Inputs:
* Three inputs, including:
* @li grad: A Tensor. Must be one of the following types: float16, float32.
* Required.
* @li input: A Tensor. Has the same type as "grad". Required.
* @li target: A Tensor. Has the same type as "grad". Required. \n

* @par Attributes:
* @li reduction: An optional attribute of type String. Defaults to "mean". \n
* @li log_target: An optional attribute of type Bool. Defaults to false. \n

* @par Outputs:
* @li y: A Tensor. Has the same type as "grad". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator KlDivLossGrad.
*/
REG_OP(KlDivLossGrad)
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1453,12 +1453,12 @@ REG_OP(PoissonNllLoss)
*
*
* @par Output:
* y: A mutable Tensor of type int32, with the shape of [num_step, batch_size, hidden_size]. \n
* y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n
*
*/
REG_OP(RnnGenMask)
.INPUT(seq_length, TensorType({DT_INT32}))
.OUTPUT(seq_mask, TensorType({DT_INT32}))
.OUTPUT(seq_mask, TensorType({DT_FLOAT16}))
.REQUIRED_ATTR(num_step, Int)
.REQUIRED_ATTR(hidden_size, Int)
.OP_END_FACTORY_REG(RnnGenMask)


+ 50
- 9
third_party/fwkacllib/inc/ops/nn_pooling_ops.h View File

@@ -237,18 +237,18 @@ REG_OP(AvgPool3DD)
* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
* @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
* @li data_format: A string, format of input data .
* @li data_format: A string, format of input data.

* @par Outputs:
* @output: A mutable tensor with the same shape and type as "orig_input".
* @output: A mutable tensor with the same shape and type as "orig_input_shape".

* @par Third-party framework compatibility
* @li Compatible with the TensorFlow operator AvgPoolGrad.
*/

REG_OP(AvgPool3DGrad)
.INPUT(orig_input_shape, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.INPUT(grads, TensorType({DT_INT32}))
.INPUT(orig_input_shape, TensorType({DT_INT32}))
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt)
@@ -888,7 +888,7 @@ REG_OP(AvgPoolV2Grad)
* @brief Computes gradients of averagev2 pooling function.

* @par Inputs:
* @li input_grad: An NHWC tensor of type float16, float32, or double.
*input_grad: An NHWC tensor of type float16, float32, or double.

* @par Attributes:
* @li orig_input_shape: A required tuple or list of type int32.
@@ -906,10 +906,10 @@ REG_OP(AvgPoolV2Grad)
* @li data_format: An optional string. Defaults to "NHWC".

* @par Outputs:
* @out_grad: A mutable tensor with the same shape and type as "orig_input".
*out_grad: A mutable tensor with the same shape and type as "orig_input".

* @par Third-party framework compatibility
* @li Compatible with the TensorFlow operator AvgPoolGrad.
*Compatible with the TensorFlow operator AvgPoolGrad.
*/
REG_OP(AvgPoolV2GradD)
.INPUT(input_grad, TensorType({DT_FLOAT16}))
@@ -1682,7 +1682,27 @@ REG_OP(MaxPoolWithArgmaxV1)
.ATTR(ceil_mode, Bool, false)
.OP_END_FACTORY_REG(MaxPoolWithArgmaxV1)

// SubSample
/**
*@brief Randomly sample a subset of positive and negative examples,and overwrite
the label vector to the ignore value (-1) for all elements that are not
included in the sample.\n

* @par Inputs:
* One input:
* labels: shape of labels,(N, ) label vector with values. \n

* @par Attributes:
* @li batch_size_per_images: A require attribute of type int.
* @li positive_fraction: A require attribute of type float.

*@par Outputs:
*y: The result of subSample. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator SubSample.
*@par Restrictions:
*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly.
*/
REG_OP(SubSample)
.INPUT(labels, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_INT32}))
@@ -1690,7 +1710,28 @@ REG_OP(SubSample)
.REQUIRED_ATTR(positive_fraction, Float)
.OP_END_FACTORY_REG(SubSample)

// SubSampleLabels
/**
*@brief Randomly sample a subset of positive and negative examples,and overwrite
the label vector to the ignore value (-1) for all elements that are not
included in the sample.\n

* @par Inputs:
* two inputs, including:
* @li labels: shape of labels,(N, ) label vector with values:.
* @li shuffle_matrix: random matrix with shape (N, ). \n

* @par Attributes:
* @li batch_size_per_images: A require attribute of type int.
* @li positive_fraction: A require attribute of type float.

*@par Outputs:
*y: The result of subSample. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator SubSampleLabels.
*@par Restrictions:
*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly.
*/
REG_OP(SubSampleLabels)
.INPUT(labels, TensorType({DT_INT32}))
.INPUT(shuffle_matrix, TensorType({DT_INT32}))


+ 49
- 0
third_party/fwkacllib/inc/ops/nn_training_ops.h View File

@@ -2102,6 +2102,55 @@ REG_OP(FusedMulApplyMomentumExtern)
.OP_END_FACTORY_REG(FusedMulApplyMomentumExtern)

/**
*@brief Updates '*var' according to the momentum scheme.
* accum = accum * momentum - x1 * x2 * lr
* if use_nesterov is True:
* var += accum * momentum - x1 * x2 * lr
* else:
* var += accum
*
*@par Inputs:
*@li var: A mutable tensor. Must be one of the data types defined in
* TensorType::NumberType(). Should be from a Variable().
*@li accum: A mutable tensor. Has the same type as "var". Should be from a
* Variable().
*@li lr: A tensor for the learning rate. Has the same type as "var". Should be
* from a Variable().
*@li x1: A Tensor has type TensorType::NumberType().
*@li momentum: A scalar. Has the same type as "var".
*@li x2: A scalar has the same type as "var".
*
*@par Attributes:
*@li use_nesterov: An optional bool. Defaults to "False".
* If "True", var will be updated by using Nesterov momentum.
*@li use_locking: An optional bool. Defaults to "False".
* If "True", updating of the "var" tensor is protected by a lock;
* otherwise the behavior is undefined, but may exhibit less contention.
*
*@par Outputs:
* var: A mutable tensor. Has the same type as input "var".
*
*@attention Constraints:
* The input tensors must have the same shape.
*
*@par Third-party framework compatibility
* Compatible with the TensorFlow operator ResourceApplyKerasMomentum.
*
*/
REG_OP(FusedMulApplyKerasMomentum)
.INPUT(var, TensorType::NumberType())
.INPUT(accum, TensorType::NumberType())
.INPUT(lr, TensorType::NumberType())
.INPUT(x1, TensorType::NumberType())
.INPUT(momentum, TensorType::NumberType())
.INPUT(x2, TensorType::NumberType())
.OUTPUT(var, TensorType::NumberType())
.OUTPUT(accum, TensorType::NumberType())
.ATTR(use_locking, Bool, false)
.ATTR(use_nesterov, Bool, false)
.OP_END_FACTORY_REG(FusedMulApplyKerasMomentum)

/**
*@brief Update "g" according to the LARS algorithm . \n

*@par Inputs:


+ 9
- 9
third_party/fwkacllib/inc/ops/parsing_ops.h View File

@@ -86,7 +86,7 @@ REG_OP(ParseSingleExample)
.ATTR(sparse_keys, ListString, {})
.ATTR(dense_keys, ListString, {})
.ATTR(sparse_types, ListType, {})
.ATTR(dense_types, ListType, {})
.ATTR(Tdense, ListType, {})
.ATTR(dense_shapes, ListListInt, {})
.OP_END_FACTORY_REG(ParseSingleExample)

@@ -173,9 +173,9 @@ REG_OP(ParseTensor)
REG_OP(DecodeCSV)
.INPUT(records, TensorType({DT_STRING}))
.DYNAMIC_INPUT(record_defaults, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32,
DT_INT64, DT_STRING, DT_RESOURCE}))
DT_INT64, DT_STRING}))
.DYNAMIC_OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32,
DT_INT64, DT_STRING, DT_RESOURCE}))
DT_INT64, DT_STRING}))
.ATTR(OUT_TYPE, ListType, {})
.ATTR(field_delim, String, ",")
.ATTR(use_quote_delim, Bool, true)
@@ -283,12 +283,12 @@ REG_OP(ParseSingleSequenceExample)
.ATTR(Ncontext_dense, Int, 0)
.ATTR(Nfeature_list_sparse, Int, 0)
.ATTR(Nfeature_list_dense, Int, 0)
.REQUIRED_ATTR(context_sparse_types, ListType)
.REQUIRED_ATTR(Tcontext_dense, ListType)
.REQUIRED_ATTR(feature_list_dense_types, ListType)
.REQUIRED_ATTR(context_dense_shapes, ListListInt)
.REQUIRED_ATTR(feature_list_sparse_types, ListType)
.REQUIRED_ATTR(feature_list_dense_shapes, ListListInt)
.ATTR(context_sparse_types, ListType, {})
.ATTR(Tcontext_dense, ListType, {})
.ATTR(feature_list_dense_types, ListType, {})
.ATTR(context_dense_shapes, ListListInt, {})
.ATTR(feature_list_sparse_types, ListType, {})
.ATTR(feature_list_dense_shapes, ListListInt, {})
.OP_END_FACTORY_REG(ParseSingleSequenceExample)

} // namespace ge


+ 2
- 0
third_party/fwkacllib/inc/ops/random_ops.h View File

@@ -548,6 +548,8 @@ REG_OP(ShuffleChannel)
* Each value along the axis zero represents the outcome of
* the corresponding sample in a batch.
*
* @par Restrictions:
* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(MultinomialFuss)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64}))


+ 23
- 12
third_party/fwkacllib/inc/ops/rnn.h View File

@@ -190,7 +190,7 @@ REG_OP(DynamicRNNGrad)
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
*@li seq_length:A optional Tensor. Only Support float16 in FRACTAL_NZ and int32 in ND.
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
@@ -228,7 +228,7 @@ REG_OP(DynamicRNN)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16}))
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -698,9 +698,6 @@ REG_OP(DynamicGRU)
*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(DynamicGRUV2)
.INPUT(x, TensorType({DT_FLOAT16}))
@@ -989,6 +986,27 @@ REG_OP(CommonLSTM)
.OP_END_FACTORY_REG(CommonLSTM)

/**
* @brief Calculate the mask. According to hidden_size and num_step, convert seq_length to mask.
*
* @par Inputs:
* @li seq_length: A 1D Tensor. Must be one of the following types: int32. Record the current length of each batch. [batch_size].
* @li b: A 1D Tensor. Must be one of the following types: fp16/fp32. Record the hidden_size. [4 * hidden_size].
* @li x: A 3D Tensor. Must be one of the following types: fp16/fp32. Record the num_step/batch_size/input_size. [num_step, batch_size, input_size].
*
* @par Outputs:
* seq_mask: A 3D Tensor. Must be one of the following types: fp16/fp32. with the shape of [num_step, batch_size, hidden_size]. And has the same type as "b" \n
*
* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(RnnGenMaskV2)
.INPUT(seq_length, TensorType({DT_INT32}))
.INPUT(b, TensorType({{DT_FLOAT16, DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(seq_mask, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(RnnGenMaskV2)

/**
* @brief Common GRU calculation.

* @par Inputs:
@@ -1002,22 +1020,15 @@ REG_OP(CommonLSTM)

* @par Attributes:
* @li activation_alpha: Optional scaling values used by some activation functions. \n

* @li activation_beta: Optional scaling values used by some activation functions. \n

* @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates. \n

* @li clip: Cell clip threshold. \n

* @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n

* @li hidden_size: Number of neurons in the hidden layer. \n

* @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n

* @par Outputs:
* @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ

* @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ
*/
REG_OP(CommonGRU)


+ 27
- 0
third_party/fwkacllib/inc/ops/selection_ops.h View File

@@ -2254,6 +2254,33 @@ REG_OP(IndexFillD)
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.REQUIRED_ATTR(dim, Int)
.OP_END_FACTORY_REG(IndexFillD)

/**
* @brief For each row r of this and for each column c, do (*this)(r, c) += src(j, c), \n
* where j ranges from indexes[r].first through indexes[r].second - 1. \n
* In general indexes must be >= 0 and < src.NumRows(); \n
* but to represent an empty range you may use the pair (-1, -1) or any pair of numbers (i, j) such that i >= j. \n

* @par Inputs:
* Three inputs, including:
* @li x: A Tensor. Must be one of the following types:
* float16, float32.
* @li indices: A Tensor of the indices, type should be int32.
* @li src: A Tensor of the same type as "x". \n

* @par Outputs:
* @li x: A Tensor. Same as input "x".

* @par Third-party framework compatibility
* Compatible with the kaldi operator AddRowRanges.
*/
REG_OP(AddRowRanges)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(src, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(indices, TensorType({DT_INT32}))
.OUTPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.OP_END_FACTORY_REG(AddRowRanges)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_

+ 2
- 0
third_party/fwkacllib/inc/ops/string_ops.h View File

@@ -114,6 +114,7 @@ REG_OP(UnicodeDecodeWithOffsets)
.ATTR(errors, String, "replace")
.ATTR(replacement_char, Int, 65533)
.ATTR(replace_control_characters, Bool, false)
.ATTR(Tsplits, Type, DT_INT64)
.OP_END_FACTORY_REG(UnicodeDecodeWithOffsets)

/**
@@ -161,6 +162,7 @@ REG_OP(UnicodeDecode)
.ATTR(errors, String, "replace")
.ATTR(replacement_char, Int, 65533)
.ATTR(replace_control_characters, Bool, false)
.ATTR(Tsplits, Type, DT_INT64)
.OP_END_FACTORY_REG(UnicodeDecode)

/**


+ 5
- 25
third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h View File

@@ -23,8 +23,8 @@
* @attention 无
* @param option [IN] 调优参数
* @param msg [OUT] 调优异常下返回信息
* @retval #MSTUNE_SUCCESS 执行成功
* @retval #MSTUNE_FAILED 执行失败
* @retval #AOE_SUCCESS 执行成功
* @retval #AOE_FAILURE 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。
@@ -35,33 +35,13 @@ AoeStatus AoeOfflineTuning(const std::map<std::string, std::string> &option, std

/**
* @ingroup aoe
* @par 描述: 梯度调优
*
* @attention 无
* @param tuningGraph [IN] 调优图
* @param dependGraph [IN] 调优依赖图
* @param session [IN] ge连接会话
* @param option [IN] 参数集. 包含调优参数及ge参数
* @retval #MSTUNE_SUCCESS 执行成功
* @retval #MSTUNE_FAILED 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。
* @see 无
* @since
*/
extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);

/**
* @ingroup aoe
* @par 描述: 调优初始化
*
* @attention 无
* @param session [IN] ge连接会话
* @param option [IN] 参数集. 包含调优参数及ge参数
* @retval #AOE_SUCCESS 执行成功
* @retval #AOE_FAILED 执行失败
* @retval #AOE_FAILURE 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。
@@ -77,7 +57,7 @@ extern "C" AoeStatus AoeOnlineInitialize(ge::Session *session, const std::map<st
* @attention 无
* @param 无
* @retval #AOE_SUCCESS 执行成功
* @retval #AOE_FAILED 执行失败
* @retval #AOE_FAILURE 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。
@@ -96,7 +76,7 @@ extern "C" AoeStatus AoeOnlineFinalize();
* @param session [IN] ge连接会话
* @param option [IN] 参数集. 包含调优参数及ge参数
* @retval #AOE_SUCCESS 执行成功
* @retval #AOE_FAILED 执行失败
* @retval #AOE_FAILURE 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。


Loading…
Cancel
Save