From: @ding_fei_fei Reviewed-by: @liucunwei,@ljl0711 Signed-off-by: @liucunweitags/v1.3.0
@@ -57,40 +57,43 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream re | |||||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | ||||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | ||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | ||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | ||||
@@ -57,40 +57,43 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream re | |||||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | ||||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | ||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | ||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | ||||
@@ -2331,6 +2331,22 @@ REG_OP(CacheAllIndexToLocal) | |||||
.REQUIRED_ATTR(dtype, Type) | .REQUIRED_ATTR(dtype, Type) | ||||
.OP_END_FACTORY_REG(CacheAllIndexToLocal) | .OP_END_FACTORY_REG(CacheAllIndexToLocal) | ||||
/** | |||||
*@brief DynamicGetNext, dynamic get next data | |||||
*@par Inputs: | |||||
*x: the iterator, all types are available | |||||
*@par Outputs: | |||||
*y: the date in iterator, all types are available | |||||
*@par Attributes: | |||||
*output_types: types of all outputs | |||||
*output_shapes: shapes of all outputs | |||||
*_dynamic_graph_execute_mode: dynamic graph execution mode, | |||||
value is one of lazy_recompile and dynamic_execute | |||||
*_getnext_inputs_shape_range: shape ranges of outputs, | |||||
it works where _dynamic_graph_execute_mode is dynamic_execute | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(DynamicGetNext) | REG_OP(DynamicGetNext) | ||||
.INPUT(x, TensorType::ALL()) | .INPUT(x, TensorType::ALL()) | ||||
.DYNAMIC_OUTPUT(y, TensorType::ALL()) | .DYNAMIC_OUTPUT(y, TensorType::ALL()) | ||||
@@ -28,7 +28,7 @@ namespace ge { | |||||
*@par Inputs: | *@par Inputs: | ||||
*Dynamic inputs, including: | *Dynamic inputs, including: | ||||
* @li x: A list of Tensor objects, each with same shape and type. The supported types are: | |||||
*x: A list of Tensor objects, each with same shape and type. The supported types are: | |||||
* float16, float32, double, int32, uint8, int16, int8, complex64, int64, | * float16, float32, double, int32, uint8, int16, int8, complex64, int64, | ||||
* qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n | * qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n | ||||
@@ -330,8 +330,8 @@ REG_OP(Sub) | |||||
*@brief computes the absolute value of a tensor. \n | *@brief computes the absolute value of a tensor. \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*One inputs, including: | |||||
* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n | |||||
*One input, including: \n | |||||
*x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as "x". \n | *y: A Tensor. Has the same type as "x". \n | ||||
@@ -3243,9 +3243,11 @@ REG_OP(Fills) | |||||
*@brief Add tensor with scale. \n | *@brief Add tensor with scale. \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*Five inputs, including: | |||||
* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32. | |||||
* @li x2: A scale. Must be float. \n | |||||
*One input, including: \n | |||||
*x: A Tensor. Must be one of the following types:int32,int16, float16, float32. \n | |||||
*@par Attributes: | |||||
*value: A scale. Must be float. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*@li y: A Tensor. Has the same type and shape as "x1". \n | *@li y: A Tensor. Has the same type and shape as "x1". \n | ||||
@@ -1134,7 +1134,7 @@ REG_OP(DecodeBmp) | |||||
.ATTR(channels, Int, 0) | .ATTR(channels, Int, 0) | ||||
.OP_END_FACTORY_REG(DecodeBmp) | .OP_END_FACTORY_REG(DecodeBmp) | ||||
/* | |||||
/** | |||||
*@brief Function parse image from string to int. \n | *@brief Function parse image from string to int. \n | ||||
*@par Inputs: | *@par Inputs: | ||||
@@ -1602,11 +1602,11 @@ REG_OP(DecodeJpeg) | |||||
*@brief Image warping using per-pixel flow vectors. \n | *@brief Image warping using per-pixel flow vectors. \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li images: 4-D Tensor with shape `[batch, height, width, channels]`. | |||||
*@li image: 4-D Tensor with shape `[batch, height, width, channels]`. | |||||
*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n | *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: Returns 4-D with the same shape and dtype as `images`. \n | |||||
*y: Returns 4-D with the same shape and dtype as `image`. \n | |||||
*/ | */ | ||||
REG_OP(DenseImageWarp) | REG_OP(DenseImageWarp) | ||||
.INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16})) | .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16})) | ||||
@@ -1709,11 +1709,11 @@ REG_OP(ResizeGradD) | |||||
*@par Inputs: | *@par Inputs: | ||||
*@li grad: gradients with respect to DenseImageWarp output. | *@li grad: gradients with respect to DenseImageWarp output. | ||||
*@li images: 4-D Tensor with shape `[batch, height, width, channels]`. | |||||
*@li image: 4-D Tensor with shape `[batch, height, width, channels]`. | |||||
*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n | *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*grad_image: Returns 4-D with the same shape and dtype as `images`. | |||||
*grad_image: Returns 4-D with the same shape and dtype as `image`. | |||||
*grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n | *grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n | ||||
*/ | */ | ||||
REG_OP(DenseImageWarpGrad) | REG_OP(DenseImageWarpGrad) | ||||
@@ -1747,6 +1747,9 @@ REG_OP(DenseImageWarpGrad) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with pytorch GridSampler2D operator. | *Compatible with pytorch GridSampler2D operator. | ||||
*@par Restrictions: | |||||
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(GridSampler2D) | REG_OP(GridSampler2D) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
@@ -444,6 +444,9 @@ REG_OP(ScatterNdUpdate) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator TensorScatterUpdate. | * Compatible with the TensorFlow operator TensorScatterUpdate. | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(TensorScatterUpdate) | REG_OP(TensorScatterUpdate) | ||||
.INPUT(x, TensorType::BasicType()) | .INPUT(x, TensorType::BasicType()) | ||||
@@ -565,6 +568,9 @@ REG_OP(ScatterNdAdd) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator TensorScatterAdd. | * Compatible with the TensorFlow operator TensorScatterAdd. | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(TensorScatterAdd) | REG_OP(TensorScatterAdd) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | ||||
@@ -623,6 +629,9 @@ REG_OP(ScatterNdSub) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator TensorScatterSub. | * Compatible with the TensorFlow operator TensorScatterSub. | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(TensorScatterSub) | REG_OP(TensorScatterSub) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | ||||
@@ -1045,6 +1054,28 @@ REG_OP(MatrixDiagV2) | |||||
.OUTPUT(output, TensorType::BasicType()) | .OUTPUT(output, TensorType::BasicType()) | ||||
.OP_END_FACTORY_REG(MatrixDiagV2) | .OP_END_FACTORY_REG(MatrixDiagV2) | ||||
/** | |||||
* @brief Add updates to var_out according to axis and indices. | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li var: A Tensor. Must be one of the following types: | |||||
* float16, float32, int32, int8, uint8. | |||||
* @li indices: A Tensor of the indices, type should be int32. | |||||
* @li updates: A Tensor of the same type as "var". | |||||
* @par Attributes: | |||||
* @li axis: An required int to specify the axis to perform indices add. | |||||
* @par Outputs: | |||||
* @li var_out: A Tensor. Same as input "var". | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator index_add. | |||||
* @par Restrictions: | |||||
* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(IndexAdd) | REG_OP(IndexAdd) | ||||
.INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | .INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | ||||
.INPUT(indices, TensorType({DT_INT32})) | .INPUT(indices, TensorType({DT_INT32})) | ||||
@@ -419,35 +419,7 @@ REG_OP(BNInference) | |||||
.ATTR(use_global_stats, Bool,true) | .ATTR(use_global_stats, Bool,true) | ||||
.ATTR(mode, Int,1) | .ATTR(mode, Int,1) | ||||
.OP_END_FACTORY_REG(BNInference) | .OP_END_FACTORY_REG(BNInference) | ||||
/** | |||||
*@brief aicpu batch normalization host . \n | |||||
*@par Inputs: | |||||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | |||||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | |||||
*@li momentum: An optional float, mean and variance's Scale factor | |||||
*@par Attributes: | |||||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | |||||
*@li use_global_stats: mean inference mode , only can be "True". | |||||
*@li mode: An optional attr, not use | |||||
*@par Outputs: | |||||
*@li alpha: A Tensor of type float16 or float32 for the cpu calculate mean | |||||
*@li beta: A Tensor of type float16 or float32 for the cpu calculate variance | |||||
*/ | |||||
REG_OP(BnHost) | |||||
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(momentum, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OPTIONAL_INPUT(offset, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.ATTR(epsilon, Float, 0.00001) | |||||
.ATTR(mode, Int, 1) | |||||
.ATTR(use_global_stats, Bool, true) | |||||
.OUTPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(mu, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(BnHost) | |||||
/** | /** | ||||
*@brief Performs batch normalization . \n | *@brief Performs batch normalization . \n | ||||
@@ -989,6 +989,8 @@ REG_OP(Conv2DCompress) | |||||
*@li deformable_groups: Optional. An integer of type int32. The number of | *@li deformable_groups: Optional. An integer of type int32. The number of | ||||
* deformable group partitions. In_channels must be divisible by | * deformable group partitions. In_channels must be divisible by | ||||
* "deformable_groups". Defaults to 1. | * "deformable_groups". Defaults to 1. | ||||
*@li modulated: Optional. Specify version of DeformableConv2D, true means v2, | |||||
* false means v1, currently only support v2. | |||||
*\n | *\n | ||||
*\n | *\n | ||||
* The following value range restrictions must be met: | * The following value range restrictions must be met: | ||||
@@ -1037,6 +1039,7 @@ REG_OP(DeformableConv2D) | |||||
.ATTR(groups, Int, 1) | .ATTR(groups, Int, 1) | ||||
.ATTR(data_format, String, "NHWC") | .ATTR(data_format, String, "NHWC") | ||||
.ATTR(deformable_groups, Int, 1) | .ATTR(deformable_groups, Int, 1) | ||||
.ATTR(modulated, Bool, true) | |||||
.OP_END_FACTORY_REG(DeformableConv2D) | .OP_END_FACTORY_REG(DeformableConv2D) | ||||
/** | /** | ||||
@@ -1202,35 +1202,6 @@ REG_OP(RpnProposalsD) | |||||
.OUTPUT(sorted_box, TensorType({DT_FLOAT16})) | .OUTPUT(sorted_box, TensorType({DT_FLOAT16})) | ||||
.OP_END_FACTORY_REG(RpnProposalsD) | .OP_END_FACTORY_REG(RpnProposalsD) | ||||
/** | |||||
*@brief Computes Score Filte Pre-Sort function. | |||||
*@par Inputs: | |||||
*Inputs include: | |||||
* @li rois: A Tensor. Must be float16. N-D with shape [N, 4]. | |||||
* @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1]. | |||||
*@par Attributes: | |||||
* @li score_threshold: required, float, threahold of topk process. | |||||
* @li k: required, Int, threahold of topk process. | |||||
* @li score_filter: bool, mark of score_filter. Defaults to "true" | |||||
* @li core_max_num: int, max number of core. Defaults to "8" | |||||
*@par Outputs: | |||||
* @li sorted_proposal: A Tensor. Must be float16. | |||||
* N-D with shape [8*6002, 8]. | |||||
* @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8]. | |||||
*/ | |||||
REG_OP(ScoreFiltePreSort) | |||||
.INPUT(rois, TensorType({DT_FLOAT16})) | |||||
.INPUT(cls_bg_prob, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(sorted_proposal, TensorType({ DT_FLOAT16})) | |||||
.OUTPUT(proposal_num, TensorType({ DT_UINT32})) | |||||
.REQUIRED_ATTR(score_threshold, Float) | |||||
.REQUIRED_ATTR(k, Int) | |||||
.ATTR(score_filter, Bool, true) | |||||
.ATTR(core_max_num, Int, 8) | |||||
.OP_END_FACTORY_REG(ScoreFiltePreSort) | |||||
/** | /** | ||||
*@brief Computes Score Filte Pre-Sort function. | *@brief Computes Score Filte Pre-Sort function. | ||||
@@ -1500,6 +1471,26 @@ REG_OP(Sort) | |||||
.ATTR(descending, Bool, false) | .ATTR(descending, Bool, false) | ||||
.OP_END_FACTORY_REG(Sort) | .OP_END_FACTORY_REG(Sort) | ||||
/** | |||||
*@brief Computes iou for input bboxes and gtboxes. | |||||
*@par Inputs: | |||||
* Two inputs, including: | |||||
*@li bboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1), | |||||
*@li gtboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1).\n | |||||
*@par Attributes: | |||||
*@li mode: A optional attribute of type string, whether judge the mode of iou. \n | |||||
*@par Outputs: | |||||
*@li overlap: A 2D Tensor of type float16 with shape [n, m]. \n | |||||
*@attention Constraints: | |||||
* Only computation of float16 data is supported. | |||||
*@par Restrictions: | |||||
*Warning:THIS FUNCTION IS DEPRECATED. Please use Iou instead. | |||||
*/ | |||||
REG_OP(PtIou) | REG_OP(PtIou) | ||||
.INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
@@ -1541,6 +1532,9 @@ selected indices from the boxes tensor, where M <= max_output_size. \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with onnx NonMaxSuppression operator. | *Compatible with onnx NonMaxSuppression operator. | ||||
*@par Restrictions: | |||||
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(NonMaxSuppressionV6) | REG_OP(NonMaxSuppressionV6) | ||||
@@ -1729,3 +1723,4 @@ REG_OP(PSROIPoolingGradV2D) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ | ||||
@@ -1073,26 +1073,6 @@ REG_OP(INInferV2D) | |||||
.OP_END_FACTORY_REG(INInferV2D) | .OP_END_FACTORY_REG(INInferV2D) | ||||
/** | /** | ||||
*@brief Performs instance normalization for inference of InHost part. | |||||
*@par Inputs:\n | |||||
* One input, including: (NC1HWC0 supported) | |||||
* variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance. | |||||
*@par Attributes: | |||||
* epsilon: An optional float32, specifying the small value added to | |||||
variance to avoid dividing by zero. Defaults to "0.00001" . \n | |||||
*@par Outputs:\n | |||||
* variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt. | |||||
*/ | |||||
REG_OP(InHost) | |||||
.INPUT(variance, TensorType({DT_FLOAT})) | |||||
.OUTPUT(variance_sqrt, TensorType({DT_FLOAT})) | |||||
.ATTR(epsilon, Float, 0.00001) | |||||
.OP_END_FACTORY_REG(InHost) | |||||
/** | |||||
* @brief perform instance normalization to x. \n | * @brief perform instance normalization to x. \n | ||||
* @par Inputs: | * @par Inputs: | ||||
@@ -1124,6 +1104,26 @@ REG_OP(InstanceNorm) | |||||
.REQUIRED_ATTR(epsilon, Float) | .REQUIRED_ATTR(epsilon, Float) | ||||
.OP_END_FACTORY_REG(InstanceNorm) | .OP_END_FACTORY_REG(InstanceNorm) | ||||
/** | |||||
* @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li grad: A Tensor. Must be one of the following types: float16, float32. | |||||
* Required. | |||||
* @li input: A Tensor. Has the same type as "grad". Required. | |||||
* @li target: A Tensor. Has the same type as "grad". Required. \n | |||||
* @par Attributes: | |||||
* @li reduction: An optional attribute of type String. Defaults to "mean". \n | |||||
* @li log_target: An optional attribute of type Bool. Defaults to false. \n | |||||
* @par Outputs: | |||||
* @li y: A Tensor. Has the same type as "grad". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator KlDivLossGrad. | |||||
*/ | |||||
REG_OP(KlDivLossGrad) | REG_OP(KlDivLossGrad) | ||||
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
@@ -1453,12 +1453,12 @@ REG_OP(PoissonNllLoss) | |||||
* | * | ||||
* | * | ||||
* @par Output: | * @par Output: | ||||
* y: A mutable Tensor of type int32, with the shape of [num_step, batch_size, hidden_size]. \n | |||||
* y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n | |||||
* | * | ||||
*/ | */ | ||||
REG_OP(RnnGenMask) | REG_OP(RnnGenMask) | ||||
.INPUT(seq_length, TensorType({DT_INT32})) | .INPUT(seq_length, TensorType({DT_INT32})) | ||||
.OUTPUT(seq_mask, TensorType({DT_INT32})) | |||||
.OUTPUT(seq_mask, TensorType({DT_FLOAT16})) | |||||
.REQUIRED_ATTR(num_step, Int) | .REQUIRED_ATTR(num_step, Int) | ||||
.REQUIRED_ATTR(hidden_size, Int) | .REQUIRED_ATTR(hidden_size, Int) | ||||
.OP_END_FACTORY_REG(RnnGenMask) | .OP_END_FACTORY_REG(RnnGenMask) | ||||
@@ -237,18 +237,18 @@ REG_OP(AvgPool3DD) | |||||
* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. | * @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. | ||||
* @li count_include_pad: When true, will include the zero-padding in the averaging calculation. | * @li count_include_pad: When true, will include the zero-padding in the averaging calculation. | ||||
* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. | * @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. | ||||
* @li data_format: A string, format of input data . | |||||
* @li data_format: A string, format of input data. | |||||
* @par Outputs: | * @par Outputs: | ||||
* @output: A mutable tensor with the same shape and type as "orig_input". | |||||
* @output: A mutable tensor with the same shape and type as "orig_input_shape". | |||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
* @li Compatible with the TensorFlow operator AvgPoolGrad. | * @li Compatible with the TensorFlow operator AvgPoolGrad. | ||||
*/ | */ | ||||
REG_OP(AvgPool3DGrad) | REG_OP(AvgPool3DGrad) | ||||
.INPUT(orig_input_shape, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.INPUT(grads, TensorType({DT_INT32})) | |||||
.INPUT(orig_input_shape, TensorType({DT_INT32})) | |||||
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | ||||
.REQUIRED_ATTR(ksize, ListInt) | .REQUIRED_ATTR(ksize, ListInt) | ||||
.REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
@@ -888,7 +888,7 @@ REG_OP(AvgPoolV2Grad) | |||||
* @brief Computes gradients of averagev2 pooling function. | * @brief Computes gradients of averagev2 pooling function. | ||||
* @par Inputs: | * @par Inputs: | ||||
* @li input_grad: An NHWC tensor of type float16, float32, or double. | |||||
*input_grad: An NHWC tensor of type float16, float32, or double. | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li orig_input_shape: A required tuple or list of type int32. | * @li orig_input_shape: A required tuple or list of type int32. | ||||
@@ -906,10 +906,10 @@ REG_OP(AvgPoolV2Grad) | |||||
* @li data_format: An optional string. Defaults to "NHWC". | * @li data_format: An optional string. Defaults to "NHWC". | ||||
* @par Outputs: | * @par Outputs: | ||||
* @out_grad: A mutable tensor with the same shape and type as "orig_input". | |||||
*out_grad: A mutable tensor with the same shape and type as "orig_input". | |||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
* @li Compatible with the TensorFlow operator AvgPoolGrad. | |||||
*Compatible with the TensorFlow operator AvgPoolGrad. | |||||
*/ | */ | ||||
REG_OP(AvgPoolV2GradD) | REG_OP(AvgPoolV2GradD) | ||||
.INPUT(input_grad, TensorType({DT_FLOAT16})) | .INPUT(input_grad, TensorType({DT_FLOAT16})) | ||||
@@ -1682,7 +1682,27 @@ REG_OP(MaxPoolWithArgmaxV1) | |||||
.ATTR(ceil_mode, Bool, false) | .ATTR(ceil_mode, Bool, false) | ||||
.OP_END_FACTORY_REG(MaxPoolWithArgmaxV1) | .OP_END_FACTORY_REG(MaxPoolWithArgmaxV1) | ||||
// SubSample | |||||
/** | |||||
*@brief Randomly sample a subset of positive and negative examples,and overwrite | |||||
the label vector to the ignore value (-1) for all elements that are not | |||||
included in the sample.\n | |||||
* @par Inputs: | |||||
* One input: | |||||
* labels: shape of labels,(N, ) label vector with values. \n | |||||
* @par Attributes: | |||||
* @li batch_size_per_images: A require attribute of type int. | |||||
* @li positive_fraction: A require attribute of type float. | |||||
*@par Outputs: | |||||
*y: The result of subSample. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator SubSample. | |||||
*@par Restrictions: | |||||
*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. | |||||
*/ | |||||
REG_OP(SubSample) | REG_OP(SubSample) | ||||
.INPUT(labels, TensorType({DT_INT32})) | .INPUT(labels, TensorType({DT_INT32})) | ||||
.OUTPUT(y, TensorType({DT_INT32})) | .OUTPUT(y, TensorType({DT_INT32})) | ||||
@@ -1690,7 +1710,28 @@ REG_OP(SubSample) | |||||
.REQUIRED_ATTR(positive_fraction, Float) | .REQUIRED_ATTR(positive_fraction, Float) | ||||
.OP_END_FACTORY_REG(SubSample) | .OP_END_FACTORY_REG(SubSample) | ||||
// SubSampleLabels | |||||
/** | |||||
*@brief Randomly sample a subset of positive and negative examples,and overwrite | |||||
the label vector to the ignore value (-1) for all elements that are not | |||||
included in the sample.\n | |||||
* @par Inputs: | |||||
* two inputs, including: | |||||
* @li labels: shape of labels,(N, ) label vector with values:. | |||||
* @li shuffle_matrix: random matrix with shape (N, ). \n | |||||
* @par Attributes: | |||||
* @li batch_size_per_images: A require attribute of type int. | |||||
* @li positive_fraction: A require attribute of type float. | |||||
*@par Outputs: | |||||
*y: The result of subSample. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator SubSampleLabels. | |||||
*@par Restrictions: | |||||
*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. | |||||
*/ | |||||
REG_OP(SubSampleLabels) | REG_OP(SubSampleLabels) | ||||
.INPUT(labels, TensorType({DT_INT32})) | .INPUT(labels, TensorType({DT_INT32})) | ||||
.INPUT(shuffle_matrix, TensorType({DT_INT32})) | .INPUT(shuffle_matrix, TensorType({DT_INT32})) | ||||
@@ -2102,6 +2102,55 @@ REG_OP(FusedMulApplyMomentumExtern) | |||||
.OP_END_FACTORY_REG(FusedMulApplyMomentumExtern) | .OP_END_FACTORY_REG(FusedMulApplyMomentumExtern) | ||||
/** | /** | ||||
*@brief Updates '*var' according to the momentum scheme. | |||||
* accum = accum * momentum - x1 * x2 * lr | |||||
* if use_nesterov is True: | |||||
* var += accum * momentum - x1 * x2 * lr | |||||
* else: | |||||
* var += accum | |||||
* | |||||
*@par Inputs: | |||||
*@li var: A mutable tensor. Must be one of the data types defined in | |||||
* TensorType::NumberType(). Should be from a Variable(). | |||||
*@li accum: A mutable tensor. Has the same type as "var". Should be from a | |||||
* Variable(). | |||||
*@li lr: A tensor for the learning rate. Has the same type as "var". Should be | |||||
* from a Variable(). | |||||
*@li x1: A Tensor has type TensorType::NumberType(). | |||||
*@li momentum: A scalar. Has the same type as "var". | |||||
*@li x2: A scalar has the same type as "var". | |||||
* | |||||
*@par Attributes: | |||||
*@li use_nesterov: An optional bool. Defaults to "False". | |||||
* If "True", var will be updated by using Nesterov momentum. | |||||
*@li use_locking: An optional bool. Defaults to "False". | |||||
* If "True", updating of the "var" tensor is protected by a lock; | |||||
* otherwise the behavior is undefined, but may exhibit less contention. | |||||
* | |||||
*@par Outputs: | |||||
* var: A mutable tensor. Has the same type as input "var". | |||||
* | |||||
*@attention Constraints: | |||||
* The input tensors must have the same shape. | |||||
* | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator ResourceApplyKerasMomentum. | |||||
* | |||||
*/ | |||||
REG_OP(FusedMulApplyKerasMomentum) | |||||
.INPUT(var, TensorType::NumberType()) | |||||
.INPUT(accum, TensorType::NumberType()) | |||||
.INPUT(lr, TensorType::NumberType()) | |||||
.INPUT(x1, TensorType::NumberType()) | |||||
.INPUT(momentum, TensorType::NumberType()) | |||||
.INPUT(x2, TensorType::NumberType()) | |||||
.OUTPUT(var, TensorType::NumberType()) | |||||
.OUTPUT(accum, TensorType::NumberType()) | |||||
.ATTR(use_locking, Bool, false) | |||||
.ATTR(use_nesterov, Bool, false) | |||||
.OP_END_FACTORY_REG(FusedMulApplyKerasMomentum) | |||||
/** | |||||
*@brief Update "g" according to the LARS algorithm . \n | *@brief Update "g" according to the LARS algorithm . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
@@ -86,7 +86,7 @@ REG_OP(ParseSingleExample) | |||||
.ATTR(sparse_keys, ListString, {}) | .ATTR(sparse_keys, ListString, {}) | ||||
.ATTR(dense_keys, ListString, {}) | .ATTR(dense_keys, ListString, {}) | ||||
.ATTR(sparse_types, ListType, {}) | .ATTR(sparse_types, ListType, {}) | ||||
.ATTR(dense_types, ListType, {}) | |||||
.ATTR(Tdense, ListType, {}) | |||||
.ATTR(dense_shapes, ListListInt, {}) | .ATTR(dense_shapes, ListListInt, {}) | ||||
.OP_END_FACTORY_REG(ParseSingleExample) | .OP_END_FACTORY_REG(ParseSingleExample) | ||||
@@ -173,9 +173,9 @@ REG_OP(ParseTensor) | |||||
REG_OP(DecodeCSV) | REG_OP(DecodeCSV) | ||||
.INPUT(records, TensorType({DT_STRING})) | .INPUT(records, TensorType({DT_STRING})) | ||||
.DYNAMIC_INPUT(record_defaults, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, | .DYNAMIC_INPUT(record_defaults, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, | ||||
DT_INT64, DT_STRING, DT_RESOURCE})) | |||||
DT_INT64, DT_STRING})) | |||||
.DYNAMIC_OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, | .DYNAMIC_OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, | ||||
DT_INT64, DT_STRING, DT_RESOURCE})) | |||||
DT_INT64, DT_STRING})) | |||||
.ATTR(OUT_TYPE, ListType, {}) | .ATTR(OUT_TYPE, ListType, {}) | ||||
.ATTR(field_delim, String, ",") | .ATTR(field_delim, String, ",") | ||||
.ATTR(use_quote_delim, Bool, true) | .ATTR(use_quote_delim, Bool, true) | ||||
@@ -283,12 +283,12 @@ REG_OP(ParseSingleSequenceExample) | |||||
.ATTR(Ncontext_dense, Int, 0) | .ATTR(Ncontext_dense, Int, 0) | ||||
.ATTR(Nfeature_list_sparse, Int, 0) | .ATTR(Nfeature_list_sparse, Int, 0) | ||||
.ATTR(Nfeature_list_dense, Int, 0) | .ATTR(Nfeature_list_dense, Int, 0) | ||||
.REQUIRED_ATTR(context_sparse_types, ListType) | |||||
.REQUIRED_ATTR(Tcontext_dense, ListType) | |||||
.REQUIRED_ATTR(feature_list_dense_types, ListType) | |||||
.REQUIRED_ATTR(context_dense_shapes, ListListInt) | |||||
.REQUIRED_ATTR(feature_list_sparse_types, ListType) | |||||
.REQUIRED_ATTR(feature_list_dense_shapes, ListListInt) | |||||
.ATTR(context_sparse_types, ListType, {}) | |||||
.ATTR(Tcontext_dense, ListType, {}) | |||||
.ATTR(feature_list_dense_types, ListType, {}) | |||||
.ATTR(context_dense_shapes, ListListInt, {}) | |||||
.ATTR(feature_list_sparse_types, ListType, {}) | |||||
.ATTR(feature_list_dense_shapes, ListListInt, {}) | |||||
.OP_END_FACTORY_REG(ParseSingleSequenceExample) | .OP_END_FACTORY_REG(ParseSingleSequenceExample) | ||||
} // namespace ge | } // namespace ge | ||||
@@ -548,6 +548,8 @@ REG_OP(ShuffleChannel) | |||||
* Each value along the axis zero represents the outcome of | * Each value along the axis zero represents the outcome of | ||||
* the corresponding sample in a batch. | * the corresponding sample in a batch. | ||||
* | * | ||||
* @par Restrictions: | |||||
* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(MultinomialFuss) | REG_OP(MultinomialFuss) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64})) | ||||
@@ -190,7 +190,7 @@ REG_OP(DynamicRNNGrad) | |||||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | *@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | ||||
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | ||||
*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND. | |||||
*@li seq_length:A optional Tensor. Only Support float16 in FRACTAL_NZ and int32 in ND. | |||||
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | *@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | ||||
@@ -228,7 +228,7 @@ REG_OP(DynamicRNN) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) | |||||
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16})) | |||||
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) | .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) | .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
@@ -698,9 +698,6 @@ REG_OP(DynamicGRU) | |||||
*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(DynamicGRUV2) | REG_OP(DynamicGRUV2) | ||||
.INPUT(x, TensorType({DT_FLOAT16})) | .INPUT(x, TensorType({DT_FLOAT16})) | ||||
@@ -989,6 +986,27 @@ REG_OP(CommonLSTM) | |||||
.OP_END_FACTORY_REG(CommonLSTM) | .OP_END_FACTORY_REG(CommonLSTM) | ||||
/** | /** | ||||
* @brief Calculate the mask. According to hidden_size and num_step, convert seq_length to mask. | |||||
* | |||||
* @par Inputs: | |||||
* @li seq_length: A 1D Tensor. Must be one of the following types: int32. Record the current length of each batch. [batch_size]. | |||||
* @li b: A 1D Tensor. Must be one of the following types: fp16/fp32. Record the hidden_size. [4 * hidden_size]. | |||||
* @li x: A 3D Tensor. Must be one of the following types: fp16/fp32. Record the num_step/batch_size/input_size. [num_step, batch_size, input_size]. | |||||
* | |||||
* @par Outputs: | |||||
* seq_mask: A 3D Tensor. Must be one of the following types: fp16/fp32. with the shape of [num_step, batch_size, hidden_size]. And has the same type as "b" \n | |||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(RnnGenMaskV2) | |||||
.INPUT(seq_length, TensorType({DT_INT32})) | |||||
.INPUT(b, TensorType({{DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(seq_mask, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(RnnGenMaskV2) | |||||
/** | |||||
* @brief Common GRU calculation. | * @brief Common GRU calculation. | ||||
* @par Inputs: | * @par Inputs: | ||||
@@ -1002,22 +1020,15 @@ REG_OP(CommonLSTM) | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li activation_alpha: Optional scaling values used by some activation functions. \n | * @li activation_alpha: Optional scaling values used by some activation functions. \n | ||||
* @li activation_beta: Optional scaling values used by some activation functions. \n | * @li activation_beta: Optional scaling values used by some activation functions. \n | ||||
* @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates. \n | * @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates. \n | ||||
* @li clip: Cell clip threshold. \n | * @li clip: Cell clip threshold. \n | ||||
* @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n | * @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n | ||||
* @li hidden_size: Number of neurons in the hidden layer. \n | * @li hidden_size: Number of neurons in the hidden layer. \n | ||||
* @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n | * @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n | ||||
* @par Outputs: | * @par Outputs: | ||||
* @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ | * @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ | ||||
* @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ | * @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ | ||||
*/ | */ | ||||
REG_OP(CommonGRU) | REG_OP(CommonGRU) | ||||
@@ -2254,6 +2254,33 @@ REG_OP(IndexFillD) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | ||||
.REQUIRED_ATTR(dim, Int) | .REQUIRED_ATTR(dim, Int) | ||||
.OP_END_FACTORY_REG(IndexFillD) | .OP_END_FACTORY_REG(IndexFillD) | ||||
/** | |||||
* @brief For each row r of this and for each column c, do (*this)(r, c) += src(j, c), \n | |||||
* where j ranges from indexes[r].first through indexes[r].second - 1. \n | |||||
* In general indexes must be >= 0 and < src.NumRows(); \n | |||||
* but to represent an empty range you may use the pair (-1, -1) or any pair of numbers (i, j) such that i >= j. \n | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li x: A Tensor. Must be one of the following types: | |||||
* float16, float32. | |||||
* @li indices: A Tensor of the indices, type should be int32. | |||||
* @li src: A Tensor of the same type as "x". \n | |||||
* @par Outputs: | |||||
* @li x: A Tensor. Same as input "x". | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the kaldi operator AddRowRanges. | |||||
*/ | |||||
REG_OP(AddRowRanges) | |||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(src, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(indices, TensorType({DT_INT32})) | |||||
.OUTPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(AddRowRanges) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ |
@@ -114,6 +114,7 @@ REG_OP(UnicodeDecodeWithOffsets) | |||||
.ATTR(errors, String, "replace") | .ATTR(errors, String, "replace") | ||||
.ATTR(replacement_char, Int, 65533) | .ATTR(replacement_char, Int, 65533) | ||||
.ATTR(replace_control_characters, Bool, false) | .ATTR(replace_control_characters, Bool, false) | ||||
.ATTR(Tsplits, Type, DT_INT64) | |||||
.OP_END_FACTORY_REG(UnicodeDecodeWithOffsets) | .OP_END_FACTORY_REG(UnicodeDecodeWithOffsets) | ||||
/** | /** | ||||
@@ -161,6 +162,7 @@ REG_OP(UnicodeDecode) | |||||
.ATTR(errors, String, "replace") | .ATTR(errors, String, "replace") | ||||
.ATTR(replacement_char, Int, 65533) | .ATTR(replacement_char, Int, 65533) | ||||
.ATTR(replace_control_characters, Bool, false) | .ATTR(replace_control_characters, Bool, false) | ||||
.ATTR(Tsplits, Type, DT_INT64) | |||||
.OP_END_FACTORY_REG(UnicodeDecode) | .OP_END_FACTORY_REG(UnicodeDecode) | ||||
/** | /** | ||||
@@ -23,8 +23,8 @@ | |||||
* @attention 无 | * @attention 无 | ||||
* @param option [IN] 调优参数 | * @param option [IN] 调优参数 | ||||
* @param msg [OUT] 调优异常下返回信息 | * @param msg [OUT] 调优异常下返回信息 | ||||
* @retval #MSTUNE_SUCCESS 执行成功 | |||||
* @retval #MSTUNE_FAILED 执行失败 | |||||
* @retval #AOE_SUCCESS 执行成功 | |||||
* @retval #AOE_FAILURE 执行失败 | |||||
* @par 依赖: | * @par 依赖: | ||||
* @li tune_api.cpp:该接口所属的开发包。 | * @li tune_api.cpp:该接口所属的开发包。 | ||||
* @li tune_api.h:该接口声明所在的头文件。 | * @li tune_api.h:该接口声明所在的头文件。 | ||||
@@ -35,33 +35,13 @@ AoeStatus AoeOfflineTuning(const std::map<std::string, std::string> &option, std | |||||
/** | /** | ||||
* @ingroup aoe | * @ingroup aoe | ||||
* @par 描述: 梯度调优 | |||||
* | |||||
* @attention 无 | |||||
* @param tuningGraph [IN] 调优图 | |||||
* @param dependGraph [IN] 调优依赖图 | |||||
* @param session [IN] ge连接会话 | |||||
* @param option [IN] 参数集. 包含调优参数及ge参数 | |||||
* @retval #MSTUNE_SUCCESS 执行成功 | |||||
* @retval #MSTUNE_FAILED 执行失败 | |||||
* @par 依赖: | |||||
* @li tune_api.cpp:该接口所属的开发包。 | |||||
* @li tune_api.h:该接口声明所在的头文件。 | |||||
* @see 无 | |||||
* @since | |||||
*/ | |||||
extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph, | |||||
ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option); | |||||
/** | |||||
* @ingroup aoe | |||||
* @par 描述: 调优初始化 | * @par 描述: 调优初始化 | ||||
* | * | ||||
* @attention 无 | * @attention 无 | ||||
* @param session [IN] ge连接会话 | * @param session [IN] ge连接会话 | ||||
* @param option [IN] 参数集. 包含调优参数及ge参数 | * @param option [IN] 参数集. 包含调优参数及ge参数 | ||||
* @retval #AOE_SUCCESS 执行成功 | * @retval #AOE_SUCCESS 执行成功 | ||||
* @retval #AOE_FAILED 执行失败 | |||||
* @retval #AOE_FAILURE 执行失败 | |||||
* @par 依赖: | * @par 依赖: | ||||
* @li tune_api.cpp:该接口所属的开发包。 | * @li tune_api.cpp:该接口所属的开发包。 | ||||
* @li tune_api.h:该接口声明所在的头文件。 | * @li tune_api.h:该接口声明所在的头文件。 | ||||
@@ -77,7 +57,7 @@ extern "C" AoeStatus AoeOnlineInitialize(ge::Session *session, const std::map<st | |||||
* @attention 无 | * @attention 无 | ||||
* @param 无 | * @param 无 | ||||
* @retval #AOE_SUCCESS 执行成功 | * @retval #AOE_SUCCESS 执行成功 | ||||
* @retval #AOE_FAILED 执行失败 | |||||
* @retval #AOE_FAILURE 执行失败 | |||||
* @par 依赖: | * @par 依赖: | ||||
* @li tune_api.cpp:该接口所属的开发包。 | * @li tune_api.cpp:该接口所属的开发包。 | ||||
* @li tune_api.h:该接口声明所在的头文件。 | * @li tune_api.h:该接口声明所在的头文件。 | ||||
@@ -96,7 +76,7 @@ extern "C" AoeStatus AoeOnlineFinalize(); | |||||
* @param session [IN] ge连接会话 | * @param session [IN] ge连接会话 | ||||
* @param option [IN] 参数集. 包含调优参数及ge参数 | * @param option [IN] 参数集. 包含调优参数及ge参数 | ||||
* @retval #AOE_SUCCESS 执行成功 | * @retval #AOE_SUCCESS 执行成功 | ||||
* @retval #AOE_FAILED 执行失败 | |||||
* @retval #AOE_FAILURE 执行失败 | |||||
* @par 依赖: | * @par 依赖: | ||||
* @li tune_api.cpp:该接口所属的开发包。 | * @li tune_api.cpp:该接口所属的开发包。 | ||||
* @li tune_api.h:该接口声明所在的头文件。 | * @li tune_api.h:该接口声明所在的头文件。 | ||||