From 452ec62cb297fe61cab05c62a0ebd1176fe4e060 Mon Sep 17 00:00:00 2001 From: dingpeifei Date: Thu, 29 Apr 2021 16:10:37 +0800 Subject: [PATCH] code_sync_0428_inc --- inc/external/acl/error_codes/rt_error_codes.h | 71 +++++++++++----------- inc/external/runtime/rt_error_codes.h | 71 +++++++++++----------- third_party/fwkacllib/inc/ops/data_flow_ops.h | 16 +++++ .../fwkacllib/inc/ops/elewise_calculation_ops.h | 14 +++-- third_party/fwkacllib/inc/ops/image_ops.h | 13 ++-- .../fwkacllib/inc/ops/matrix_calculation_ops.h | 31 ++++++++++ third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h | 28 --------- third_party/fwkacllib/inc/ops/nn_calculation_ops.h | 3 + third_party/fwkacllib/inc/ops/nn_detect_ops.h | 53 ++++++++-------- third_party/fwkacllib/inc/ops/nn_norm_ops.h | 44 +++++++------- third_party/fwkacllib/inc/ops/nn_pooling_ops.h | 59 +++++++++++++++--- third_party/fwkacllib/inc/ops/nn_training_ops.h | 49 +++++++++++++++ third_party/fwkacllib/inc/ops/parsing_ops.h | 18 +++--- third_party/fwkacllib/inc/ops/random_ops.h | 2 + third_party/fwkacllib/inc/ops/rnn.h | 35 +++++++---- third_party/fwkacllib/inc/ops/selection_ops.h | 27 ++++++++ third_party/fwkacllib/inc/ops/string_ops.h | 2 + .../fwkacllib/inc/toolchain/tuning_tool/tune_api.h | 30 ++------- 18 files changed, 353 insertions(+), 213 deletions(-) diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h index 437a5544..1c196c48 100644 --- a/inc/external/acl/error_codes/rt_error_codes.h +++ b/inc/external/acl/error_codes/rt_error_codes.h @@ -57,40 +57,43 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream re static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource -static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error -static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error -static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream -static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream -static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete -static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence -static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete -static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error -static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error -static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support -static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat -static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed -static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout -static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error -static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout -static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception -static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception -static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout -static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception -static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error -static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error -static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error -static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error -static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal -static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering -static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init -static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data -static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error -static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate -static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed -static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed -static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context -static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out -static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error +static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error +static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error +static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream +static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream +static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete +static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence +static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete +static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error +static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error +static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support +static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat +static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed +static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout +static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error +static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout +static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception +static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception +static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout +static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception +static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error +static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error +static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error +static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error +static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal +static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering +static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init +static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data +static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error +static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate +static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed +static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed +static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context +static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out +static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout +static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h index 437a5544..1c196c48 100644 --- a/inc/external/runtime/rt_error_codes.h +++ b/inc/external/runtime/rt_error_codes.h @@ -57,40 +57,43 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream re static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource -static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error -static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error -static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream -static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream -static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete -static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence -static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete -static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error -static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error -static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support -static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat -static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed -static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout -static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error -static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout -static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception -static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception -static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout -static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception -static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error -static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error -static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error -static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error -static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal -static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering -static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init -static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data -static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error -static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate -static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed -static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed -static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context -static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out -static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error +static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error +static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error +static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream +static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream +static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete +static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence +static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete +static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error +static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error +static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support +static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat +static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed +static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout +static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error +static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout +static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception +static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception +static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout +static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception +static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error +static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error +static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error +static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error +static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal +static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering +static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init +static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data +static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error +static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate +static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed +static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed +static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context +static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out +static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout +static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h index 0043c027..05de1b0a 100644 --- a/third_party/fwkacllib/inc/ops/data_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h @@ -2331,6 +2331,22 @@ REG_OP(CacheAllIndexToLocal) .REQUIRED_ATTR(dtype, Type) .OP_END_FACTORY_REG(CacheAllIndexToLocal) +/** +*@brief DynamicGetNext, dynamic get next data +*@par Inputs: +*x: the iterator, all types are available +*@par Outputs: +*y: the date in iterator, all types are available +*@par Attributes: +*output_types: types of all outputs +*output_shapes: shapes of all outputs +*_dynamic_graph_execute_mode: dynamic graph execution mode, +value is one of lazy_recompile and dynamic_execute +*_getnext_inputs_shape_range: shape ranges of outputs, +it works where _dynamic_graph_execute_mode is dynamic_execute +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ REG_OP(DynamicGetNext) .INPUT(x, TensorType::ALL()) .DYNAMIC_OUTPUT(y, TensorType::ALL()) diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index 209967bd..8c87cfe8 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -28,7 +28,7 @@ namespace ge { *@par Inputs: *Dynamic inputs, including: -* @li x: A list of Tensor objects, each with same shape and type. The supported types are: +*x: A list of Tensor objects, each with same shape and type. The supported types are: * float16, float32, double, int32, uint8, int16, int8, complex64, int64, * qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n @@ -330,8 +330,8 @@ REG_OP(Sub) *@brief computes the absolute value of a tensor. \n *@par Inputs: -*One inputs, including: -* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n +*One input, including: \n +*x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n *@par Outputs: *y: A Tensor. Has the same type as "x". \n @@ -3243,9 +3243,11 @@ REG_OP(Fills) *@brief Add tensor with scale. \n *@par Inputs: -*Five inputs, including: -* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32. -* @li x2: A scale. Must be float. \n +*One input, including: \n +*x: A Tensor. Must be one of the following types:int32,int16, float16, float32. \n + +*@par Attributes: +*value: A scale. Must be float. \n *@par Outputs: *@li y: A Tensor. Has the same type and shape as "x1". \n diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index 3ef4d95e..e6802c1e 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -1134,7 +1134,7 @@ REG_OP(DecodeBmp) .ATTR(channels, Int, 0) .OP_END_FACTORY_REG(DecodeBmp) -/* +/** *@brief Function parse image from string to int. \n *@par Inputs: @@ -1602,11 +1602,11 @@ REG_OP(DecodeJpeg) *@brief Image warping using per-pixel flow vectors. \n *@par Inputs: -*@li images: 4-D Tensor with shape `[batch, height, width, channels]`. +*@li image: 4-D Tensor with shape `[batch, height, width, channels]`. *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n *@par Outputs: -*y: Returns 4-D with the same shape and dtype as `images`. \n +*y: Returns 4-D with the same shape and dtype as `image`. \n */ REG_OP(DenseImageWarp) .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -1709,11 +1709,11 @@ REG_OP(ResizeGradD) *@par Inputs: *@li grad: gradients with respect to DenseImageWarp output. -*@li images: 4-D Tensor with shape `[batch, height, width, channels]`. +*@li image: 4-D Tensor with shape `[batch, height, width, channels]`. *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n *@par Outputs: -*grad_image: Returns 4-D with the same shape and dtype as `images`. +*grad_image: Returns 4-D with the same shape and dtype as `image`. *grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n */ REG_OP(DenseImageWarpGrad) @@ -1747,6 +1747,9 @@ REG_OP(DenseImageWarpGrad) *@par Third-party framework compatibility *Compatible with pytorch GridSampler2D operator. + +*@par Restrictions: +*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(GridSampler2D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index 3340007c..083d4f9c 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -444,6 +444,9 @@ REG_OP(ScatterNdUpdate) *@par Third-party framework compatibility * Compatible with the TensorFlow operator TensorScatterUpdate. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(TensorScatterUpdate) .INPUT(x, TensorType::BasicType()) @@ -565,6 +568,9 @@ REG_OP(ScatterNdAdd) *@par Third-party framework compatibility * Compatible with the TensorFlow operator TensorScatterAdd. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(TensorScatterAdd) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -623,6 +629,9 @@ REG_OP(ScatterNdSub) *@par Third-party framework compatibility * Compatible with the TensorFlow operator TensorScatterSub. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(TensorScatterSub) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -1045,6 +1054,28 @@ REG_OP(MatrixDiagV2) .OUTPUT(output, TensorType::BasicType()) .OP_END_FACTORY_REG(MatrixDiagV2) +/** +* @brief Add updates to var_out according to axis and indices. + +* @par Inputs: +* Three inputs, including: +* @li var: A Tensor. Must be one of the following types: +* float16, float32, int32, int8, uint8. +* @li indices: A Tensor of the indices, type should be int32. +* @li updates: A Tensor of the same type as "var". + +* @par Attributes: +* @li axis: An required int to specify the axis to perform indices add. + +* @par Outputs: +* @li var_out: A Tensor. Same as input "var". + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator index_add. + +* @par Restrictions: +* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ REG_OP(IndexAdd) .INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) .INPUT(indices, TensorType({DT_INT32})) diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index ddd70bc8..9629976e 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -419,35 +419,7 @@ REG_OP(BNInference) .ATTR(use_global_stats, Bool,true) .ATTR(mode, Int,1) .OP_END_FACTORY_REG(BNInference) -/** -*@brief aicpu batch normalization host . \n - -*@par Inputs: -*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. -*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. -*@li momentum: An optional float, mean and variance's Scale factor -*@par Attributes: -*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". -*@li use_global_stats: mean inference mode , only can be "True". -*@li mode: An optional attr, not use -*@par Outputs: -*@li alpha: A Tensor of type float16 or float32 for the cpu calculate mean -*@li beta: A Tensor of type float16 or float32 for the cpu calculate variance -*/ -REG_OP(BnHost) - .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(momentum, TensorType({DT_FLOAT16,DT_FLOAT})) - .OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT})) - .OPTIONAL_INPUT(offset, TensorType({DT_FLOAT16,DT_FLOAT})) - .ATTR(epsilon, Float, 0.00001) - .ATTR(mode, Int, 1) - .ATTR(use_global_stats, Bool, true) - .OUTPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16})) - .OUTPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16})) - .OUTPUT(mu, TensorType({DT_FLOAT16,DT_FLOAT})) - .OP_END_FACTORY_REG(BnHost) /** *@brief Performs batch normalization . \n diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index 8cbdf9ff..0eeeb511 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -989,6 +989,8 @@ REG_OP(Conv2DCompress) *@li deformable_groups: Optional. An integer of type int32. The number of * deformable group partitions. In_channels must be divisible by * "deformable_groups". Defaults to 1. +*@li modulated: Optional. Specify version of DeformableConv2D, true means v2, +* false means v1, currently only support v2. *\n *\n * The following value range restrictions must be met: @@ -1037,6 +1039,7 @@ REG_OP(DeformableConv2D) .ATTR(groups, Int, 1) .ATTR(data_format, String, "NHWC") .ATTR(deformable_groups, Int, 1) + .ATTR(modulated, Bool, true) .OP_END_FACTORY_REG(DeformableConv2D) /** diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index 33148e62..cc60f483 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -1202,35 +1202,6 @@ REG_OP(RpnProposalsD) .OUTPUT(sorted_box, TensorType({DT_FLOAT16})) .OP_END_FACTORY_REG(RpnProposalsD) -/** -*@brief Computes Score Filte Pre-Sort function. - -*@par Inputs: -*Inputs include: -* @li rois: A Tensor. Must be float16. N-D with shape [N, 4]. -* @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1]. - -*@par Attributes: -* @li score_threshold: required, float, threahold of topk process. -* @li k: required, Int, threahold of topk process. -* @li score_filter: bool, mark of score_filter. Defaults to "true" -* @li core_max_num: int, max number of core. Defaults to "8" -*@par Outputs: -* @li sorted_proposal: A Tensor. Must be float16. -* N-D with shape [8*6002, 8]. -* @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8]. -*/ - -REG_OP(ScoreFiltePreSort) - .INPUT(rois, TensorType({DT_FLOAT16})) - .INPUT(cls_bg_prob, TensorType({DT_FLOAT16})) - .OUTPUT(sorted_proposal, TensorType({ DT_FLOAT16})) - .OUTPUT(proposal_num, TensorType({ DT_UINT32})) - .REQUIRED_ATTR(score_threshold, Float) - .REQUIRED_ATTR(k, Int) - .ATTR(score_filter, Bool, true) - .ATTR(core_max_num, Int, 8) - .OP_END_FACTORY_REG(ScoreFiltePreSort) /** *@brief Computes Score Filte Pre-Sort function. @@ -1500,6 +1471,26 @@ REG_OP(Sort) .ATTR(descending, Bool, false) .OP_END_FACTORY_REG(Sort) +/** +*@brief Computes iou for input bboxes and gtboxes. + +*@par Inputs: +* Two inputs, including: +*@li bboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1), +*@li gtboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1).\n + +*@par Attributes: +*@li mode: A optional attribute of type string, whether judge the mode of iou. \n + +*@par Outputs: +*@li overlap: A 2D Tensor of type float16 with shape [n, m]. \n + +*@attention Constraints: +* Only computation of float16 data is supported. + +*@par Restrictions: +*Warning:THIS FUNCTION IS DEPRECATED. Please use Iou instead. +*/ REG_OP(PtIou) .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1541,6 +1532,9 @@ selected indices from the boxes tensor, where M <= max_output_size. \n *@par Third-party framework compatibility *Compatible with onnx NonMaxSuppression operator. + +*@par Restrictions: +*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NonMaxSuppressionV6) @@ -1729,3 +1723,4 @@ REG_OP(PSROIPoolingGradV2D) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ + diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index a0251d88..22023f46 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -1073,26 +1073,6 @@ REG_OP(INInferV2D) .OP_END_FACTORY_REG(INInferV2D) /** -*@brief Performs instance normalization for inference of InHost part. - -*@par Inputs:\n -* One input, including: (NC1HWC0 supported) -* variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance. - -*@par Attributes: -* epsilon: An optional float32, specifying the small value added to -variance to avoid dividing by zero. Defaults to "0.00001" . \n - -*@par Outputs:\n -* variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt. -*/ -REG_OP(InHost) - .INPUT(variance, TensorType({DT_FLOAT})) - .OUTPUT(variance_sqrt, TensorType({DT_FLOAT})) - .ATTR(epsilon, Float, 0.00001) - .OP_END_FACTORY_REG(InHost) - -/** * @brief perform instance normalization to x. \n * @par Inputs: @@ -1124,6 +1104,26 @@ REG_OP(InstanceNorm) .REQUIRED_ATTR(epsilon, Float) .OP_END_FACTORY_REG(InstanceNorm) +/** +* @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n + +* @par Inputs: +* Three inputs, including: +* @li grad: A Tensor. Must be one of the following types: float16, float32. +* Required. +* @li input: A Tensor. Has the same type as "grad". Required. +* @li target: A Tensor. Has the same type as "grad". Required. \n + +* @par Attributes: +* @li reduction: An optional attribute of type String. Defaults to "mean". \n +* @li log_target: An optional attribute of type Bool. Defaults to false. \n + +* @par Outputs: +* @li y: A Tensor. Has the same type as "grad". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator KlDivLossGrad. +*/ REG_OP(KlDivLossGrad) .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1453,12 +1453,12 @@ REG_OP(PoissonNllLoss) * * * @par Output: - * y: A mutable Tensor of type int32, with the shape of [num_step, batch_size, hidden_size]. \n + * y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n * */ REG_OP(RnnGenMask) .INPUT(seq_length, TensorType({DT_INT32})) - .OUTPUT(seq_mask, TensorType({DT_INT32})) + .OUTPUT(seq_mask, TensorType({DT_FLOAT16})) .REQUIRED_ATTR(num_step, Int) .REQUIRED_ATTR(hidden_size, Int) .OP_END_FACTORY_REG(RnnGenMask) diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index 743c28b7..ef9fabb8 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -237,18 +237,18 @@ REG_OP(AvgPool3DD) * @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. * @li count_include_pad: When true, will include the zero-padding in the averaging calculation. * @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. -* @li data_format: A string, format of input data . +* @li data_format: A string, format of input data. * @par Outputs: -* @output: A mutable tensor with the same shape and type as "orig_input". +* @output: A mutable tensor with the same shape and type as "orig_input_shape". * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator AvgPoolGrad. */ REG_OP(AvgPool3DGrad) - .INPUT(orig_input_shape, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) - .INPUT(grads, TensorType({DT_INT32})) + .INPUT(orig_input_shape, TensorType({DT_INT32})) + .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) .REQUIRED_ATTR(ksize, ListInt) .REQUIRED_ATTR(strides, ListInt) @@ -888,7 +888,7 @@ REG_OP(AvgPoolV2Grad) * @brief Computes gradients of averagev2 pooling function. * @par Inputs: -* @li input_grad: An NHWC tensor of type float16, float32, or double. +*input_grad: An NHWC tensor of type float16, float32, or double. * @par Attributes: * @li orig_input_shape: A required tuple or list of type int32. @@ -906,10 +906,10 @@ REG_OP(AvgPoolV2Grad) * @li data_format: An optional string. Defaults to "NHWC". * @par Outputs: -* @out_grad: A mutable tensor with the same shape and type as "orig_input". +*out_grad: A mutable tensor with the same shape and type as "orig_input". * @par Third-party framework compatibility -* @li Compatible with the TensorFlow operator AvgPoolGrad. +*Compatible with the TensorFlow operator AvgPoolGrad. */ REG_OP(AvgPoolV2GradD) .INPUT(input_grad, TensorType({DT_FLOAT16})) @@ -1682,7 +1682,27 @@ REG_OP(MaxPoolWithArgmaxV1) .ATTR(ceil_mode, Bool, false) .OP_END_FACTORY_REG(MaxPoolWithArgmaxV1) -// SubSample +/** +*@brief Randomly sample a subset of positive and negative examples,and overwrite +the label vector to the ignore value (-1) for all elements that are not +included in the sample.\n + +* @par Inputs: +* One input: +* labels: shape of labels,(N, ) label vector with values. \n + +* @par Attributes: +* @li batch_size_per_images: A require attribute of type int. +* @li positive_fraction: A require attribute of type float. + +*@par Outputs: +*y: The result of subSample. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator SubSample. +*@par Restrictions: +*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. +*/ REG_OP(SubSample) .INPUT(labels, TensorType({DT_INT32})) .OUTPUT(y, TensorType({DT_INT32})) @@ -1690,7 +1710,28 @@ REG_OP(SubSample) .REQUIRED_ATTR(positive_fraction, Float) .OP_END_FACTORY_REG(SubSample) -// SubSampleLabels +/** +*@brief Randomly sample a subset of positive and negative examples,and overwrite +the label vector to the ignore value (-1) for all elements that are not +included in the sample.\n + +* @par Inputs: +* two inputs, including: +* @li labels: shape of labels,(N, ) label vector with values:. +* @li shuffle_matrix: random matrix with shape (N, ). \n + +* @par Attributes: +* @li batch_size_per_images: A require attribute of type int. +* @li positive_fraction: A require attribute of type float. + +*@par Outputs: +*y: The result of subSample. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator SubSampleLabels. +*@par Restrictions: +*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. +*/ REG_OP(SubSampleLabels) .INPUT(labels, TensorType({DT_INT32})) .INPUT(shuffle_matrix, TensorType({DT_INT32})) diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index 92074872..75e91aee 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -2102,6 +2102,55 @@ REG_OP(FusedMulApplyMomentumExtern) .OP_END_FACTORY_REG(FusedMulApplyMomentumExtern) /** +*@brief Updates '*var' according to the momentum scheme. +* accum = accum * momentum - x1 * x2 * lr +* if use_nesterov is True: +* var += accum * momentum - x1 * x2 * lr +* else: +* var += accum +* +*@par Inputs: +*@li var: A mutable tensor. Must be one of the data types defined in +* TensorType::NumberType(). Should be from a Variable(). +*@li accum: A mutable tensor. Has the same type as "var". Should be from a +* Variable(). +*@li lr: A tensor for the learning rate. Has the same type as "var". Should be +* from a Variable(). +*@li x1: A Tensor has type TensorType::NumberType(). +*@li momentum: A scalar. Has the same type as "var". +*@li x2: A scalar has the same type as "var". +* +*@par Attributes: +*@li use_nesterov: An optional bool. Defaults to "False". +* If "True", var will be updated by using Nesterov momentum. +*@li use_locking: An optional bool. Defaults to "False". +* If "True", updating of the "var" tensor is protected by a lock; +* otherwise the behavior is undefined, but may exhibit less contention. +* +*@par Outputs: +* var: A mutable tensor. Has the same type as input "var". +* +*@attention Constraints: +* The input tensors must have the same shape. +* +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ResourceApplyKerasMomentum. +* +*/ +REG_OP(FusedMulApplyKerasMomentum) + .INPUT(var, TensorType::NumberType()) + .INPUT(accum, TensorType::NumberType()) + .INPUT(lr, TensorType::NumberType()) + .INPUT(x1, TensorType::NumberType()) + .INPUT(momentum, TensorType::NumberType()) + .INPUT(x2, TensorType::NumberType()) + .OUTPUT(var, TensorType::NumberType()) + .OUTPUT(accum, TensorType::NumberType()) + .ATTR(use_locking, Bool, false) + .ATTR(use_nesterov, Bool, false) + .OP_END_FACTORY_REG(FusedMulApplyKerasMomentum) + +/** *@brief Update "g" according to the LARS algorithm . \n *@par Inputs: diff --git a/third_party/fwkacllib/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/ops/parsing_ops.h index 452961a9..b625180a 100644 --- a/third_party/fwkacllib/inc/ops/parsing_ops.h +++ b/third_party/fwkacllib/inc/ops/parsing_ops.h @@ -86,7 +86,7 @@ REG_OP(ParseSingleExample) .ATTR(sparse_keys, ListString, {}) .ATTR(dense_keys, ListString, {}) .ATTR(sparse_types, ListType, {}) - .ATTR(dense_types, ListType, {}) + .ATTR(Tdense, ListType, {}) .ATTR(dense_shapes, ListListInt, {}) .OP_END_FACTORY_REG(ParseSingleExample) @@ -173,9 +173,9 @@ REG_OP(ParseTensor) REG_OP(DecodeCSV) .INPUT(records, TensorType({DT_STRING})) .DYNAMIC_INPUT(record_defaults, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, - DT_INT64, DT_STRING, DT_RESOURCE})) + DT_INT64, DT_STRING})) .DYNAMIC_OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, - DT_INT64, DT_STRING, DT_RESOURCE})) + DT_INT64, DT_STRING})) .ATTR(OUT_TYPE, ListType, {}) .ATTR(field_delim, String, ",") .ATTR(use_quote_delim, Bool, true) @@ -283,12 +283,12 @@ REG_OP(ParseSingleSequenceExample) .ATTR(Ncontext_dense, Int, 0) .ATTR(Nfeature_list_sparse, Int, 0) .ATTR(Nfeature_list_dense, Int, 0) - .REQUIRED_ATTR(context_sparse_types, ListType) - .REQUIRED_ATTR(Tcontext_dense, ListType) - .REQUIRED_ATTR(feature_list_dense_types, ListType) - .REQUIRED_ATTR(context_dense_shapes, ListListInt) - .REQUIRED_ATTR(feature_list_sparse_types, ListType) - .REQUIRED_ATTR(feature_list_dense_shapes, ListListInt) + .ATTR(context_sparse_types, ListType, {}) + .ATTR(Tcontext_dense, ListType, {}) + .ATTR(feature_list_dense_types, ListType, {}) + .ATTR(context_dense_shapes, ListListInt, {}) + .ATTR(feature_list_sparse_types, ListType, {}) + .ATTR(feature_list_dense_shapes, ListListInt, {}) .OP_END_FACTORY_REG(ParseSingleSequenceExample) } // namespace ge diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index 8104cb01..b65a68f1 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -548,6 +548,8 @@ REG_OP(ShuffleChannel) * Each value along the axis zero represents the outcome of * the corresponding sample in a batch. * + * @par Restrictions: + * Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(MultinomialFuss) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64})) diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index d4b3b102..d671a531 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -190,7 +190,7 @@ REG_OP(DynamicRNNGrad) *@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND. +*@li seq_length:A optional Tensor. Only Support float16 in FRACTAL_NZ and int32 in ND. *@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. @@ -228,7 +228,7 @@ REG_OP(DynamicRNN) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) - .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16})) .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -698,9 +698,6 @@ REG_OP(DynamicGRU) *@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. - -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DynamicGRUV2) .INPUT(x, TensorType({DT_FLOAT16})) @@ -989,6 +986,27 @@ REG_OP(CommonLSTM) .OP_END_FACTORY_REG(CommonLSTM) /** + * @brief Calculate the mask. According to hidden_size and num_step, convert seq_length to mask. + * + * @par Inputs: + * @li seq_length: A 1D Tensor. Must be one of the following types: int32. Record the current length of each batch. [batch_size]. + * @li b: A 1D Tensor. Must be one of the following types: fp16/fp32. Record the hidden_size. [4 * hidden_size]. + * @li x: A 3D Tensor. Must be one of the following types: fp16/fp32. Record the num_step/batch_size/input_size. [num_step, batch_size, input_size]. + * + * @par Outputs: + * seq_mask: A 3D Tensor. Must be one of the following types: fp16/fp32. with the shape of [num_step, batch_size, hidden_size]. And has the same type as "b" \n + * + * @par Restrictions: + * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. + */ +REG_OP(RnnGenMaskV2) + .INPUT(seq_length, TensorType({DT_INT32})) + .INPUT(b, TensorType({{DT_FLOAT16, DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(seq_mask, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(RnnGenMaskV2) + +/** * @brief Common GRU calculation. * @par Inputs: @@ -1002,22 +1020,15 @@ REG_OP(CommonLSTM) * @par Attributes: * @li activation_alpha: Optional scaling values used by some activation functions. \n - * @li activation_beta: Optional scaling values used by some activation functions. \n - * @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates. \n - * @li clip: Cell clip threshold. \n - * @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n - * @li hidden_size: Number of neurons in the hidden layer. \n - * @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n * @par Outputs: * @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ - * @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ */ REG_OP(CommonGRU) diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index da5bdb6a..f99493b7 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -2254,6 +2254,33 @@ REG_OP(IndexFillD) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) .REQUIRED_ATTR(dim, Int) .OP_END_FACTORY_REG(IndexFillD) + +/** +* @brief For each row r of this and for each column c, do (*this)(r, c) += src(j, c), \n +* where j ranges from indexes[r].first through indexes[r].second - 1. \n +* In general indexes must be >= 0 and < src.NumRows(); \n +* but to represent an empty range you may use the pair (-1, -1) or any pair of numbers (i, j) such that i >= j. \n + +* @par Inputs: +* Three inputs, including: +* @li x: A Tensor. Must be one of the following types: +* float16, float32. +* @li indices: A Tensor of the indices, type should be int32. +* @li src: A Tensor of the same type as "x". \n + +* @par Outputs: +* @li x: A Tensor. Same as input "x". + +* @par Third-party framework compatibility +* Compatible with the kaldi operator AddRowRanges. +*/ +REG_OP(AddRowRanges) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(src, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(indices, TensorType({DT_INT32})) + .OUTPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .OP_END_FACTORY_REG(AddRowRanges) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h index d7233906..080b9a3f 100644 --- a/third_party/fwkacllib/inc/ops/string_ops.h +++ b/third_party/fwkacllib/inc/ops/string_ops.h @@ -114,6 +114,7 @@ REG_OP(UnicodeDecodeWithOffsets) .ATTR(errors, String, "replace") .ATTR(replacement_char, Int, 65533) .ATTR(replace_control_characters, Bool, false) + .ATTR(Tsplits, Type, DT_INT64) .OP_END_FACTORY_REG(UnicodeDecodeWithOffsets) /** @@ -161,6 +162,7 @@ REG_OP(UnicodeDecode) .ATTR(errors, String, "replace") .ATTR(replacement_char, Int, 65533) .ATTR(replace_control_characters, Bool, false) + .ATTR(Tsplits, Type, DT_INT64) .OP_END_FACTORY_REG(UnicodeDecode) /** diff --git a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h index 71226d87..2cf6e0c4 100644 --- a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h +++ b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h @@ -23,8 +23,8 @@ * @attention 无 * @param option [IN] 调优参数 * @param msg [OUT] 调优异常下返回信息 - * @retval #MSTUNE_SUCCESS 执行成功 - * @retval #MSTUNE_FAILED 执行失败 + * @retval #AOE_SUCCESS 执行成功 + * @retval #AOE_FAILURE 执行失败 * @par 依赖: * @li tune_api.cpp:该接口所属的开发包。 * @li tune_api.h:该接口声明所在的头文件。 @@ -35,33 +35,13 @@ AoeStatus AoeOfflineTuning(const std::map &option, std /** * @ingroup aoe - * @par 描述: 梯度调优 - * - * @attention 无 - * @param tuningGraph [IN] 调优图 - * @param dependGraph [IN] 调优依赖图 - * @param session [IN] ge连接会话 - * @param option [IN] 参数集. 包含调优参数及ge参数 - * @retval #MSTUNE_SUCCESS 执行成功 - * @retval #MSTUNE_FAILED 执行失败 - * @par 依赖: - * @li tune_api.cpp:该接口所属的开发包。 - * @li tune_api.h:该接口声明所在的头文件。 - * @see 无 - * @since - */ -extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector &dependGraph, - ge::Session *session, const std::map> &option); - -/** - * @ingroup aoe * @par 描述: 调优初始化 * * @attention 无 * @param session [IN] ge连接会话 * @param option [IN] 参数集. 包含调优参数及ge参数 * @retval #AOE_SUCCESS 执行成功 - * @retval #AOE_FAILED 执行失败 + * @retval #AOE_FAILURE 执行失败 * @par 依赖: * @li tune_api.cpp:该接口所属的开发包。 * @li tune_api.h:该接口声明所在的头文件。 @@ -77,7 +57,7 @@ extern "C" AoeStatus AoeOnlineInitialize(ge::Session *session, const std::map