hummingbird
/
graphengine

/**
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*!
 * \file nn_detect_ops.h
 * \brief
 */
#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_

#include "graph/operator_reg.h"
#include "graph/operator.h"

namespace ge {

/**
*@brief Generates bounding boxes based on "rois" and "deltas".
* It is a customized FasterRcnn operator . \n

*@par Inputs:
* Two inputs, including:
*@li rois: Region of interests (ROIs) generated by the region proposal
* network (RPN). A 2D Tensor of type float32 or float16 with shape (N, 4).
* "N" indicates the number of ROIs, and the value "4" refers to "x0", "x1",
* "y0", and "y1".
*@li deltas: Absolute variation between the ROIs generated by the RPN and
* ground truth boxes. A 2D Tensor of type float32 or float16 with shape (N, 4).
* "N" indicates the number of errors, and 4 indicates "dx", "dy", "dw", and "dh" . \n

*@par Attributes:
*@li means: An index of type int. Defaults to [0,0,0,0].
* "deltas" = "deltas" x "stds" + "means".
*@li stds: An index of type int. Defaults to [1.0,1.0,1.0,1.0].
* "deltas" = "deltas" x "stds" + "means".
*@li max_shape: Shape [h, w], specifying the size of the image transferred to
* the network. Used to ensure that the bbox shape after conversion does not
* exceed "max_shape".
*@li wh_ratio_clip: Defaults to "16/1000". The values of "dw" and "dh" fall
* within (-wh_ratio_clip, wh_ratio_clip) . \n

*@par Outputs:
*bboxes: Bboxes generated based on "rois" and "deltas". Have the same format
* and type as "rois".
*/
REG_OP(BoundingBoxDecode)
    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(deltas, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(means, ListFloat, {0.0, 0.0, 0.0, 0.0})
    .ATTR(stds, ListFloat, {1.0, 1.0, 1.0, 1.0})
    .REQUIRED_ATTR(max_shape, ListInt)
    .ATTR(wh_ratio_clip, Float, 0.016)
    .OP_END_FACTORY_REG(BoundingBoxDecode)

/**
*@brief Computes the coordinate variations between bboxes and ground truth
* boxes. It is a customized FasterRcnn operator . \n

*@par Inputs:
* Two inputs, including:
*@li anchor_box: Anchor boxes. A 2D Tensor of float32 with shape (N, 4).
* "N" indicates the number of bounding boxes, and the value "4" refers to
* "x0", "x1", "y0", and "y1".
*@li ground_truth_box: Ground truth boxes. A 2D Tensor of float32 with
* shape (N, 4). "N" indicates the number of bounding boxes, and the value "4"
* refers to "x0", "x1", "y0", and "y1" . \n

*@par Attributes:
*@li means: An index of type int. Defaults to [0,0,0,0].
* "deltas" = "deltas" x "stds" + "means".
*@li stds: An index of type int. Defaults to [1.0,1.0,1.0,1.0].
* "deltas" = "deltas" x "stds" + "means" . \n

*@par Outputs:
*delats: A 2D Tensor of type float32 with shape (N, 4), specifying the variations between all anchor boxes and ground truth boxes.
*/
REG_OP(BoundingBoxEncode)
    .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(ground_truth_box, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(delats, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(means, ListFloat, {0.0, 0.0, 0.0, 0.0})
    .ATTR(stds, ListFloat, {1.0, 1.0, 1.0, 1.0})
    .OP_END_FACTORY_REG(BoundingBoxEncode)

/**
*@brief Judges whether the bounding box is valid. It is a customized
* FasterRcnn operator . \n

*@par Inputs:
* Two inputs, including:
*@li bbox_tensor: Bounding box. A 2D Tensor of type float16 with shape (N, 4).
* "N" indicates the number of bounding boxes, the value "4" indicates "x0",
* "x1", "y0", and "y1".
*@li img_metas: Valid boundary value of the image. A 1D Tensor of type float16
* with shape (16,)

*@par Outputs:
*valid_tensor: A bool with shape (N, 1), specifying whether an input anchor is
* in an image. "1" indicates valid, while "0" indicates invalid . \n

*@attention Constraints:
* 16 "img_metas" are input. The first three numbers (height, width, ratio) are
* valid, specifying the valid boundary (heights x ratio, weights x ratio).
*/
REG_OP(CheckValid)
    .INPUT(bbox_tensor, TensorType({DT_FLOAT16}))
    .INPUT(img_metas, TensorType({DT_FLOAT16}))
    .OUTPUT(valid_tensor, TensorType({DT_INT8}))
    .OP_END_FACTORY_REG(CheckValid)

/**
*@brief Computes the intersection over union (iou) or the intersection over
* foreground (iof) based on the ground-truth and predicted regions . \n

*@par Inputs:
* Two inputs, including:
*@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
* shape (N, 4). "N" indicates the number of bounding boxes, and the value
* "4" refers to "x0", "x1", "y0", and "y1".
*@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
* with shape (M, 4). "M" indicates the number of ground truth boxes, and
* the value "4" refers to "x0", "x1", "y0", and "y1" . \n

*@par Attributes:
*mode: Computation mode, a character string with the value range of [iou, iof] . \n

*@par Outputs:
*overlap: A 2D Tensor of type float16 or float32 with shape [M, N], specifying
* the IoU or IoF ratio . \n

*@attention Constraints:
* Only computation of float16 data is supported. To avoid overflow, the input
* length and width are scaled by 0.2 internally.
*/
REG_OP(Iou)
    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(mode, String, "iou")
    .OP_END_FACTORY_REG(Iou)

/**
*@brief Performs the backpropagation of ROIAlign for training scenarios . \n

*@par Inputs:
* Three inputs, including:
*@li ydiff: A 5HD gradient input of type float32.
*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs,
the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1".
*@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved . \n

*@par Attributes:
*@li xdiff_shape: A required list of 4 ints, obtained based on the shape of "features" of ROIAlign.
*@li pooled_width: A required attribute of type int, specifying the W dimension.
*@li pooled_height: A required attribute of type int, specifying the H dimension.
*@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image.
*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical
sampling frequency of each output. If this attribute is set to "0", the sampling frequency is
equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n

*@par Outputs:
*xdiff: Gradient added to input "features". Has the same 5HD shape as input "features".
*/
REG_OP(ROIAlignGrad)
    .INPUT(ydiff, TensorType({DT_FLOAT}))
    .INPUT(rois, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(rois_n, TensorType({DT_INT32}))
    .OUTPUT(xdiff, TensorType({DT_FLOAT}))
    .REQUIRED_ATTR(xdiff_shape, ListInt)
    .REQUIRED_ATTR(pooled_width, Int)
    .REQUIRED_ATTR(pooled_height, Int)
    .REQUIRED_ATTR(spatial_scale, Float)
    .ATTR(sample_num, Int, 2)
    .OP_END_FACTORY_REG(ROIAlignGrad)

/**
*@brief Obtains the ROI feature matrix from the feature map. It is a customized FasterRcnn operator . \n

*@par Inputs:
* Three inputs, including:
*@li features: A 5HD Tensor of type float32 or float16.
*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
the value "5" indicates the indexes of images where the ROIs are located,
* "x0", "y0", "x1", and "y1".
*@li rois_n: An optional input of type int32, specifying the number of valid ROIs. This parameter is reserved . \n

*@par Attributes:
*@li spatial_scale: A required attribute of type float32, specifying the scaling ratio of "features" to the original image.
*@li pooled_height: A required attribute of type int32, specifying the H dimension.
*@li pooled_width: A required attribute of type int32, specifying the W dimension.
*@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0",
* the sampling frequency is equal to the rounded up value of "rois", which is a floating point number. Defaults to "2".
*@li roi_end_mode: An optional attribute of type int32. Defaults to "1" . \n

*@par Outputs:
* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
The axis N is the number of input ROIs. Axes H, W, and C are consistent
* with the values of "pooled_height",
* "pooled_width", and "features", respectively.
*/
REG_OP(ROIAlign)
    .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(rois_n, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(spatial_scale, Float)
    .REQUIRED_ATTR(pooled_height, Int)
    .REQUIRED_ATTR(pooled_width, Int)
    .ATTR(sample_num, Int, 2)
    .ATTR(roi_end_mode, Int, 1)
    .OP_END_FACTORY_REG(ROIAlign)

/**
*@brief Performs SSD prior box detection . \n

*@par Inputs:
* Two inputs, including:
*@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
*@li img: source image. Has the same type and format as "x" . \n

*@par Attributes:
*@li min_size: A required float32, specifying the minimum edge length of a square prior box.
*@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
*@li aspect_ratio: An required float32, specifying the aspect ratio for generated rectangle boxes. The height
is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaults to "1.0".
*@li img_h: An optional int32, specifying the source image height. Defaults to "0".
*@li img_w: An optional int32, specifying the source image width. Defaults to "0".
*@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image. Defaults to "0.0".
*@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image. Defaults to "0.0".
*@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
*@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
*@li offset: An optional float32, specifying the offset. Defaults to "0.5".
*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n

*@par Outputs:
*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n

*@attention Constraints:
* This operator applies only to SSD networks.
*@see SSDDetectionOutput()
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(PriorBox)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(min_size, ListFloat)
    .REQUIRED_ATTR(max_size, ListFloat)
    .REQUIRED_ATTR(aspect_ratio, ListFloat)
    .ATTR(img_h, Int, 0)
    .ATTR(img_w, Int, 0)
    .ATTR(step_h, Float, 0.0)
    .ATTR(step_w, Float, 0.0)
    .ATTR(flip, Bool, true)
    .ATTR(clip, Bool, false)
    .ATTR(offset, Float, 0.5)
    .ATTR(variance, ListFloat, {0.1})
    .OP_END_FACTORY_REG(PriorBox);

/**
*@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n

*@par Inputs:
* Six inputs, including:
*@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
*@li img: source image. Has the same type and format as "x".
*@li data_h: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map height.
*@li data_w: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map width.
*@li box_height: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the height of each prior box.
*@li box_width: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the width of each prior box . \n

*@par Attributes:
*@li min_size: A required float32, specifying the minimum edge length of a square prior box.
*@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
*@li img_h: An optional int32, specifying the height of the source image.
*@li img_w: An optional int32, specifying the width of the source image.
*@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image.
*@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image.
*@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
*@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
*@li offset: An optional float32, specifying the offset. Defaults to "0.5".
*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n

*@par Outputs:
*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n

*@attention Constraints:
* This operator applies only to SSD networks.
*@see SSDDetectionOutput()
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*@par Restrictions:
*Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
*/
REG_OP(PriorBoxD)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(min_size, ListFloat)
    .REQUIRED_ATTR(max_size, ListFloat)
    .ATTR(img_h, Int, 0)
    .ATTR(img_w, Int, 0)
    .ATTR(step_h, Float, 0.0)
    .ATTR(step_w, Float, 0.0)
    .ATTR(flip, Bool, true)
    .ATTR(clip, Bool, false)
    .ATTR(offset, Float, 0.5)
    .ATTR(variance, ListFloat, {0.1})
    .OP_END_FACTORY_REG(PriorBoxD);

/**
*@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n

*@par Inputs:
* Six inputs, including:
*@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
*@li img: source image. Has the same type and format as "x".
*@li boxes: An ND tensor of type float32 or float16, specifying the prior box information. Same as output y

*@par Attributes:
*@li min_size: A required float32, specifying the minimum edge length of a square prior box.
*@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
*@li img_h: An optional int32, specifying the height of the source image.
*@li img_w: An optional int32, specifying the width of the source image.
*@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image.
*@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image.
*@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
*@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
*@li offset: An optional float32, specifying the offset. Defaults to "0.5".
*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n

*@par Outputs:
*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n

*@attention Constraints:
* This operator applies only to SSD networks.
*@see SSDDetectionOutput()
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*@par Restrictions:
*Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
*/
REG_OP(PriorBoxDV2)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(min_size, ListFloat)
    .REQUIRED_ATTR(max_size, ListFloat)
    .ATTR(img_h, Int, 0)
    .ATTR(img_w, Int, 0)
    .ATTR(step_h, Float, 0.0)
    .ATTR(step_w, Float, 0.0)
    .ATTR(flip, Bool, true)
    .ATTR(clip, Bool, false)
    .ATTR(offset, Float, 0.5)
    .ATTR(variance, ListFloat, {0.1})
    .OP_END_FACTORY_REG(PriorBoxDV2);

/**
*@brief Performs Position Sensitive ROI Pooling . \n

*@par Inputs:
* Two inputs, including:
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
* map, dimension C1 must be equal to
* (int(output_dim+15)/C0))*group_size*group_size.
*@li rois: A tensor of type float16 or float32, with shape
* [batch, 5, rois_num], describing the ROIs, each ROI consists of five
* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
* the index of the input feature map, "x1", "y1", "x2", or "y2" must be
* greater than or equal to "0.0" . \n

*@par Attributes:
*@li output_dim: A required int32, specifying the number of output channels,
* must be greater than 0.
*@li group_size: A required int32, specifying the number of groups to encode
* position-sensitive score maps, must be within the range (0, 128).
*@li spatial_scale: A required float32, scaling factor for mapping the input
* coordinates to the ROI coordinates . \n

*@par Outputs:
*y: An NC1HWC0 tensor of type float16 or float32, describing the result
* feature map . \n

*@attention Constraints:
* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
*/
REG_OP(PSROIPooling)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
    .REQUIRED_ATTR(output_dim, Int)
    .REQUIRED_ATTR(group_size, Int)
    .REQUIRED_ATTR(spatial_scale, Float)
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OP_END_FACTORY_REG(PSROIPooling)

/**
*@brief Returns detection result . \n

*@par Inputs:
* Five inputs, including:
*@li rois: An NCHW tensor of type floa16 or float32, output from operator proposal_d at the preceding layer, used as the input of operator FSRDetectionOutput.
*@li bbox_delta: An NCHWC0 tensor of type floa16 or float32, specifying the prediction offset, used to update the coordinates [x1, y1, x2, y2] of each ROI.
*@li score: An NCHWC0 tensor of type floa16 or float32, specifying the probability of each class. Class 0 is the background class.
*@li im_info: An ND tensor of type float16 or float32, specifying the Image information.
*@li actual_rois_num: An optional NCHW tensor of type int32, specifying the number of valid boxes per batch.
*@par Attributes:
*@li batch_rois: An optional int32, specifying the number of images to be predicted. Defaults to "1".
*@li num_classes: An required int32, specifying the number of classes to be predicted. The value must be greater than 0.
*@li score_threshold: An required float32, specifying the threshold for box filtering. The value range is [0.0, 1.0].
*@li iou_threshold: An required float32, specifying the confidence threshold for box filtering, which is the output "obj" of operator Region. The value range is (0.0, 1.0).
*@par Outputs:
*@li box: A tensor of type float16 or float32 for proposal of actual output, with output shape [batch, numBoxes,8].
* 8 means [x1, y1, x2, y2, score, label, batchID, NULL], the maximum value of numBoxes is 1024.
That is, take min (the maximum number of input boxes, 1024)
*@li actual_bbox_num: A tensor of type int32 With shape [bacth, num_classes], specifying the number of output boxes . \n

*@attention Constraints:
*@li totalnum < max_rois_num * batch_rois.
*@li "score" must be with shape (total_num, (num_classes+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images.
*@li "bbox_delta" must be with shape (total_num, (num_classes*4+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images.
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(FSRDetectionOutput)
    .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(bbox_delta, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(score, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(im_info, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OPTIONAL_INPUT(actual_rois_num, TensorType({DT_INT32}))
    .OUTPUT(actual_bbox_num, TensorType({DT_INT32}))
    .OUTPUT(box, TensorType({DT_FLOAT, DT_FLOAT16}))
    .ATTR(batch_rois, Int, 1)
    .REQUIRED_ATTR(num_classes, Int)
    .REQUIRED_ATTR(score_threshold, Float)
    .REQUIRED_ATTR(iou_threshold, Float)
    .OP_END_FACTORY_REG(FSRDetectionOutput)

/**
*@brief Returns detection result . \n

*@par Inputs:
* Four inputs, including:
*@li bbox_delta: An ND tensor of type floa16 or float32, specifying the box loc predictions, used as the input of operator SSDDetectionOutput.
*@li score: An ND tensor of type floa16 or float32, specifying the box confidences data, used as the input of operator SSDDetectionOutput.
*@li anchors: An ND tensor of type floa16 or float32, output from operator PriorBoxD, used as the input of operator SSDDetectionOutput.
*@par Attributes:
*@li num_classes: An optional int32, specifying the number of classes to be predicted. Defaults to "2". The value must be greater than 1 and lesser than 1025.
*@li share_location: An optional bool, specify the shared location. Defaults to True
*@li background_label_id: An optional int32, specify the background label id. Must be 0
*@li iou_threshold: An optional float32, specify the nms threshold
*@li top_k: An optional int32, specify the topk value. Defaults to 200
*@li eta: An optional float32, specify the eta value. Defaults to 1.0
*@li variance_encoded_in_target: An optional bool, specify whether variance encoded in target or not. Defaults to False
*@li code_type: An optional int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3
*@li keep_top_k: An optional int32, specify the topk value after nms. Defaults to -1
*@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold
*@par Outputs:
*@li out_boxnum: A tensor of type int32, specifying the number of output boxes.
*@li y: A tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box.
* In output shape, 8 means (batchID, label(classID), score (class probability), xmin, ymin, xmax, ymax, null)
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(SSDDetectionOutput)
    .INPUT(bbox_delta, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(score, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(anchors, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(out_boxnum, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .ATTR(num_classes, Int, 2)
    .ATTR(share_location, Bool, true)
    .ATTR(background_label_id, Int, 0)
    .ATTR(iou_threshold, Float, 0.3)
    .ATTR(top_k, Int, 200)
    .ATTR(eta, Float, 1.0)
    .ATTR(variance_encoded_in_target, Bool, false)
    .ATTR(code_type, Int, 1)
    .ATTR(keep_top_k, Int, -1)
    .ATTR(confidence_threshold, Float, 0.0)
    .OP_END_FACTORY_REG(SSDDetectionOutput)

/**
*@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3 . \n

*@par Inputs:
*x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W),
where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged
as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n

*@par Attributes:
*@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3.
*@li coords: An int32, specifying the number of parameters required for locating an object. The value is fixed at "4", corresponding to (x,y,w,h).
*@li classes: An int32, specifying the number of prediction classes. Defaults to "80". The value range is [1, 1024].
*@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".Defaults to "V3"
*@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". Defaults to "false".
*@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". Defaults to "false".
*@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false" . \n

*@par Outputs:
*@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2],
* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box.
*@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2],
* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence.
*@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2],
* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n

*@attention Constraints:
*@li This operator applies to YOLO v2 and v3 networks.
*@li The succeeding layer of the Yolo operator must be operator Yolov3DetectionOutput.
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(Yolo)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(boxes, Int, 3)
    .ATTR(coords, Int, 4)
    .ATTR(classes, Int, 80)
    .ATTR(yolo_version, String, "V3")
    .ATTR(softmax, Bool, false)
    .ATTR(background, Bool, false)
    .ATTR(softmaxtree, Bool, false)
    .OP_END_FACTORY_REG(Yolo)

/**
*@brief Performs YOLO V2 detection . \n

*@par Inputs:
* Four inputs, including:
*@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov3DetectionOutput.
* Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
*@li img_info: A float16 or float32, describing the image information including the required image height and width
* and the actual image height and width.
*
*@par Attributes:
*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
*@li coords: Specifies the number of coordinate parameters. Must be 4.
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 20].
*@li relative: An optional bool. Defaults to and must be "true".
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering,
* which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n

*@li post_nms_topn: An optional int32. This attribute is reserved.
*@li score_threshold: A required float, specifying the class score threshold for box filtering,
 which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
*
*@par Outputs:
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box,
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid,
* the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
*
*@attention Constraints:
*@li This operator applies only to the YOLO v2 network.
*@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator.
*
*@see Yolo()
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(YoloV2DetectionOutput)
    .INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(biases, ListFloat)
    .ATTR(boxes, Int, 5)
    .ATTR(coords, Int, 4)
    .ATTR(classes, Int, 20)
    .ATTR(relative, Bool, true)
    .ATTR(obj_threshold, Float, 0.5)
    .ATTR(post_nms_topn, Int, 512)
    .ATTR(score_threshold, Float, 0.5)
    .ATTR(iou_threshold, Float, 0.45)
    .ATTR(pre_nms_topn, Int, 512)
    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(box_out_num, TensorType({DT_INT32}))
    .OP_END_FACTORY_REG(YoloV2DetectionOutput)

/**
*@brief Performs YOLO V2 detection . \n

*@par Inputs:
*Six inputs, including:
*@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov2DetectionOutput.
* Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
*@li imginfo: A float16, describing the image information including the required image height and width
* and the actual image height and width.
*@li windex: A windex tensor with shape [height, weight]. Has the same type as the inputs.
* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed.

*@li hindex: A hindex tensor with shape [height, weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]].

*
*@par Attributes:
*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
*@li coords: Specifies the number of coordinate parameters. Must be 4.
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 20].
*@li relative: An optional bool. Defaults to and must be "true".
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
*@li post_nms_topn: An optional int32. This attribute is reserved.
*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0] . \n

*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
*
*@par Outputs:
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box,
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid,
* the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
*
*@attention Constraints:
*@li This operator applies only to the YOLO v2 network.
*@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator . \n

*@see Yolo()
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*@par Restrictions:
*Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV2DetectionOutput instead.
*/
REG_OP(YoloV2DetectionOutputD)
    .INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(biases, ListFloat)
    .ATTR(boxes, Int, 5)
    .ATTR(coords, Int, 4)
    .ATTR(classes, Int, 20)
    .ATTR(relative, Bool, true)
    .ATTR(obj_threshold, Float, 0.5)
    .ATTR(post_nms_topn, Int, 512)
    .ATTR(score_threshold, Float, 0.5)
    .ATTR(iou_threshold, Float, 0.45)
    .ATTR(pre_nms_topn, Int, 512)
    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(box_out_num, TensorType({DT_INT32}))
    .OP_END_FACTORY_REG(YoloV2DetectionOutputD)

/**
*@brief Performs YOLO V3 detection . \n

*@par Inputs:
*Ten inputs, including:
*@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class".
* There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo.
*@li img_info: A float16 or float32, describing the image information including the required image height and width
* and the actual image height and width.

*@par Attributes:
*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
*@li coords: Specifies the number of coordinate parameters. Must be 4.
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
*@li relative: An optional bool. Defaults to and must be "true".
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n

*@li post_nms_topn: An optional int32. This attribute is reserved.
*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0] . \n

*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].

*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".

*@par Outputs:
*@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box.
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024

*@attention Constraints:
*@li This operator applies only to the YOLO v3 network.
*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators . \n

*@see Yolo()
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(YoloV3DetectionOutput)
    .INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(biases_low, ListFloat)
    .REQUIRED_ATTR(biases_mid, ListFloat)
    .REQUIRED_ATTR(biases_high, ListFloat)
    .ATTR(boxes, Int, 3)
    .ATTR(coords, Int, 4)
    .ATTR(classes, Int, 80)
    .ATTR(relative, Bool, true)
    .ATTR(obj_threshold, Float, 0.5)
    .ATTR(post_nms_topn, Int, 512)
    .ATTR(score_threshold, Float, 0.5)
    .ATTR(iou_threshold, Float, 0.45)
    .ATTR(pre_nms_topn, Int, 512)
    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(box_out_num, TensorType({DT_INT32}))
    .OP_END_FACTORY_REG(YoloV3DetectionOutput)

/**
*@brief Performs YOLO V3 detection . \n

*@par Inputs:
*16 Input, including:
*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput.
* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
*@li imginfo: A float16, describing the image information including the required image height and width
* and the actual image height and width.
*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n

*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs.
* [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
s
*@par Attributes:
*@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
*@li coords: Specifies the number of coordinate parameters. Must be 4.
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
*@li relative: An optional bool. Defaults to and must be "true".
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
*@li post_nms_topn: An optional int32. This attribute is reserved.
*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".

*@par Outputs:
*@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box.
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024

*@attention Constraints:
*@li This operator applies only to the YOLO v3 network.
*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
*@see Yolo()
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*@par Restrictions:
*Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutput instead.
*/
REG_OP(YoloV3DetectionOutputD)
    .INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(windex1, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(windex2, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(windex3, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(hindex1, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(hindex2, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(hindex3, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(biases_low, ListFloat)
    .REQUIRED_ATTR(biases_mid, ListFloat)
    .REQUIRED_ATTR(biases_high, ListFloat)
    .ATTR(boxes, Int, 3)
    .ATTR(coords, Int, 4)
    .ATTR(classes, Int, 80)
    .ATTR(relative, Bool, true)
    .ATTR(obj_threshold, Float, 0.5)
    .ATTR(post_nms_topn, Int, 512)
    .ATTR(score_threshold, Float, 0.5)
    .ATTR(iou_threshold, Float, 0.45)
    .ATTR(pre_nms_topn, Int, 512)
    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(box_out_num, TensorType({DT_INT32}))
    .OP_END_FACTORY_REG(YoloV3DetectionOutputD)

/**
*@brief Performs YOLO V3 detection . \n

*@par Inputs:
*Ten inputs, including:
*@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n
There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo.
*@li img_info: A float16 or float32, describing the image information including the required image height and width \n
* and the actual image height and width.

*@par Attributes:
*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
*@li coords: Specifies the number of coordinate parameters. Must be 4.
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
*@li relative: An optional bool. Defaults to and must be "true".
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].

*@li post_nms_topn: An optional int32. This attribute is reserved.
*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].

*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n

*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".

*@par Outputs:
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024

*@attention Constraints:\n
*@li This operator applies only to the YOLO v3 network.
*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.

*@see Yolo()
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(YoloV3DetectionOutputV2)
    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(biases, ListFloat)
    .ATTR(boxes, Int, 3)
    .ATTR(coords, Int, 4)
    .ATTR(classes, Int, 80)
    .ATTR(relative, Bool, true)
    .ATTR(obj_threshold, Float, 0.5)
    .ATTR(post_nms_topn, Int, 512)
    .ATTR(score_threshold, Float, 0.5)
    .ATTR(iou_threshold, Float, 0.45)
    .ATTR(pre_nms_topn, Int, 512)
    .ATTR(N, Int, 10)
    .ATTR(resize_origin_img_to_net, Bool, false)
    .ATTR(out_box_dim, Int, 3)
    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(box_out_num, TensorType({DT_INT32}))
    .OP_END_FACTORY_REG(YoloV3DetectionOutputV2)

/**
*@brief Performs YOLO V3 detection.

*@par Inputs:
*16 Input, including:
*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput.
* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
*@li imginfo: A float16, describing the image information including the required image height and width
* and the actual image height and width.
*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)]
* is formed for the three Yolo outputs, respectively .It's a dynamic input. \n

*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
*@par Attributes:
*@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
*@li coords: Specifies the number of coordinate parameters. Must be 4.
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
*@li relative: An optional bool. Defaults to and must be "true".
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
*@li post_nms_topn: An optional int32. This attribute is reserved.
*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
*
*@par Outputs:
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
*            describing the information of each output box.
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
*
*@attention Constraints:
*@li This operator applies only to the YOLO v3 network.
*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
*@see Yolo()
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.

* @par Restrictions:
* Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead.
*/
REG_OP(YoloV3DetectionOutputV2D)
    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
    .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(biases, ListFloat)
    .ATTR(boxes, Int, 3)
    .ATTR(coords, Int, 4)
    .ATTR(classes, Int, 80)
    .ATTR(relative, Bool, true)
    .ATTR(obj_threshold, Float, 0.5)
    .ATTR(post_nms_topn, Int, 512)
    .ATTR(score_threshold, Float, 0.5)
    .ATTR(iou_threshold, Float, 0.45)
    .ATTR(pre_nms_topn, Int, 512)
    .ATTR(N, Int, 10)
    .ATTR(resize_origin_img_to_net, Bool, false)
    .ATTR(out_box_dim, Int, 3)
    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(box_out_num, TensorType({DT_INT32}))
    .OP_END_FACTORY_REG(YoloV3DetectionOutputV2D)

/**
*@brief Spatial Pyramid Pooling, multi-level pooling.
* Pooling out(n, sigma(c*2^i*2^i)) tensor, i in range[0,pyramid_height) . \n

*@par Inputs:
*x: An NCHW tensor, support float16 or float32 type . \n

*@par Attributes:
* @li pyramid_height: An required int32.
* Multi-level pooling out from 2^0 to 2^(pyramid_height-1).
* @li pool_method: An optional int32, pooling method: 0-MAX, 1-AVE.
* Defaults to "0" . \n

*@par Outputs:
*y: A NCHW tensor, support float16 or float32 type . \n

*@attention Constraints:
* @li pyramid_height: pyramid_heigjt should be in range [0,7).
* Pooling paramter should statisfied with caffe pooling param(pad<kernel).
* @li feature_size:input feture map h and w should be [1, 510] . \n

*@par Third-party framework compatibility
* Compatible with the Caffe operator SPP.
*/
REG_OP(SPP)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .REQUIRED_ATTR(pyramid_height, Int)
    .ATTR(pool_method, Int, 0)
    .OP_END_FACTORY_REG(SPP)

/**
*@brief Performs Region of Interest (ROI) Pooling . \n

*@par Inputs:
* Three inputs, including:
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
* map.
*@li rois: A tensor of type float16 or float32, with 3D shape
* [batch, 5, roi_max_num], describing the RIOs.
* roi_max_num must be less than or equal to 6000 and must be divided by 16.
*@li roi_actual_num: A  optional tensor of type int32, with shape [batch, 8], specifying
* the number of ROIs per batch . \n

*@par Attributes:
*@li pooled_h: A required int32, specifying the pooled H. Must be greater
* than 0.
*@li pooled_w: A required int32, specifying the pooled W. Must be greater
* than 0.
*@li spatial_scale_h: An required scaling factor for mapping the input
* coordinates of height to the ROI coordinates.
*@li spatial_scale_w: An required scaling factor for mapping the input
* coordinates of width to the ROI coordinates . \n

*@par Outputs:
*y: An NC1HWC0 tensor of type float16 or float32, describing the result
* feature map . \n

*@attention Constraints:
* For the feature map input:
*@li If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
*@li If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
*@li If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
*@li If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
*@li If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
*@li If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
*@li If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
*@li If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
*@li If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(ROIPooling)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OPTIONAL_INPUT(roi_actual_num, TensorType({DT_INT32}))
    .REQUIRED_ATTR(pooled_h, Int)
    .REQUIRED_ATTR(pooled_w, Int)
    .REQUIRED_ATTR(spatial_scale_h, Float)
    .REQUIRED_ATTR(spatial_scale_w, Float)
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OP_END_FACTORY_REG(ROIPooling)

/**
*@brief Computes decode bbox function.

*@par Inputs:
*Inputs include:
* @li box_predictions: A Tensor. Must be float16.
* @li anchors: A Tensor. Must have the same type as box_predictions.

*@par Attributes:
* @ decode_clip: required, float, threahold of decode process.

*@par Outputs:
* @ decoded_boxes: A Tensor. Must have the same type as box_predictions.
*                    N-D with shape [N, 4].

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
*/
REG_OP(DecodeBbox)
    .INPUT(box_predictions, TensorType{DT_FLOAT16})
    .INPUT(anchors, TensorType{DT_FLOAT16})
    .OUTPUT(decoded_boxes, TensorType{DT_FLOAT16})
    .REQUIRED_ATTR(decode_clip, Float)
    .OP_END_FACTORY_REG(DecodeBbox)

/**
*@brief Computes ClipBoxes function . \n

*@par Inputs:
*@li boxes_input: A Tensor. Must be float16. N-D with shape [N, 4].
*@li img_size: A Tensor. Must be int32. shape [H, W] . \n

*@par Outputs:
*boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4].

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
*/
REG_OP(ClipBoxes)
    .INPUT(boxes_input, TensorType({DT_FLOAT16}))
    .INPUT(img_size, TensorType({DT_INT32}))
    .OUTPUT(boxes_output, TensorType({DT_FLOAT16}))
    .OP_END_FACTORY_REG(ClipBoxes)

/**
*@brief Computes ClipBoxesD function . \n

*@par Attributes:
*img_size: A Tensor of shape [H, W] . \n

*@par Inputs:
*boxes_input: A Tensor. Must be float16. N-D with shape [N, 4] . \n

*@par Outputs:
*boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4] . \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(ClipBoxesD)
    .INPUT(boxes_input, TensorType({DT_FLOAT16}))
    .REQUIRED_ATTR(img_size, ListInt)
    .OUTPUT(boxes_output, TensorType({DT_FLOAT16}))
    .OP_END_FACTORY_REG(ClipBoxesD)

/**
*@brief Computes Fastrcnn Predictions function.
*
*@par Inputs:
*Inputs include:
* @li rois: A Tensor. Must be float16. N-D with shape [N*C, 4].
* @li score: A Tensor. Must be float16. N-D with shape [N, C+1].
*
*@par Attributes:
* @li nms_threshold: required, float, threahold of nms process.
* @li score_threshold: required, float, threahold of topk process.
* @li k: required, Int, threahold of topk process.
*@par Outputs:
* @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
* @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
* @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1].
*/
REG_OP(FastrcnnPredictions)
    .INPUT(rois, TensorType({DT_FLOAT16}))
    .INPUT(score, TensorType({DT_FLOAT16}))
    .REQUIRED_ATTR(nms_threshold, Float)
    .REQUIRED_ATTR(score_threshold, Float)
    .REQUIRED_ATTR(k, Int)
    .OUTPUT(sorted_rois, TensorType({DT_FLOAT16}))
    .OUTPUT(sorted_scores, TensorType({DT_FLOAT16}))
    .OUTPUT(sorted_classes, TensorType({DT_FLOAT16}))
    .OP_END_FACTORY_REG(FastrcnnPredictions)

/**
*@brief Computes Fastrcnn RpnProposals function . \n

*@par Inputs:
*Inputs include:
* @li rois: A Tensor. Must be float16. N-D with shape [N, 4].
* @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1].
* @li img_size: A Tensor. Must be int32. shape [H, W] . \n

*@par Attributes:
* @li score_threshold: required, float, threahold of topk process.
* @li k: required, Int, threahold of topk process.
* @li min_size: required, float, threahold of nms process.
* @li nms_threshold: required, float, threahold of nms process.
* @li post_nms_num: required, float, threahold of nms process.
* @li score_filter: bool, mark of score_filter. Defaults to "true"
* @li box_filter: bool, mark of box_filter. Defaults to "true"
* @li score_sigmoid: bool, mark of score_sigmoid. Defaults to "false"

*@par Outputs:
* @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
* @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
* @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1] . \n

* @par Third-party framework compatibility
* Compatible with the TensorFlow operator Unpack.
*/
REG_OP(RpnProposals)
    .INPUT(rois, TensorType({DT_FLOAT16}))
    .INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
    .INPUT(img_size, TensorType({DT_INT32}))
    .REQUIRED_ATTR(score_threshold, Float)
    .REQUIRED_ATTR(k, Int)
    .REQUIRED_ATTR(min_size, Float)
    .REQUIRED_ATTR(nms_threshold, Float)
    .REQUIRED_ATTR(post_nms_num, Int)
    .ATTR(score_filter, Bool, true)
    .ATTR(box_filter, Bool, true)
    .ATTR(score_sigmoid, Bool, false)
    .OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
    .OP_END_FACTORY_REG(RpnProposals)

/**
*@brief Computes Fastrcnn RpnProposalsD function . \n

*@par Inputs:
*@li rois: A Tensor. Must be float16. N-D with shape [N, 4].
*@li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1] . \n

*@par Attributes:
*@li img_size: A Tensor size of image. Must be int32. shape [H, W].
*@li score_threshold: required, float, threahold of topk process.
*@li k: required, Int, threahold of topk process.
*@li min_size: required, float, threahold of nms process.
*@li nms_threshold: required, float, threahold of nms process.
*@li post_nms_num: required, float, threahold of nms process.
*@li score_filter: bool, mark of score_filter. Defaults to "true"
*@li box_filter: bool, mark of box_filter. Defaults to "true"
*@li score_sigmoid: bool, mark of score_sigmoid. Defaults to "false"

*@par Outputs:
*sorted_box: A Tensor of output. Must be float16. N-D with shape [N, 1] . \n

* @par Third-party framework compatibility
* Compatible with the pytorch operator RPNProposals . \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*@par Restrictions:
*Warning: THIS FUNCTION IS DEPRECATED. Please use RpnProposals instead.
*/
REG_OP(RpnProposalsD)
    .INPUT(rois, TensorType({DT_FLOAT16}))
    .INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
    .REQUIRED_ATTR(img_size, ListInt)
    .REQUIRED_ATTR(score_threshold, Float)
    .REQUIRED_ATTR(k, Int)
    .REQUIRED_ATTR(min_size, Float)
    .REQUIRED_ATTR(nms_threshold, Float)
    .REQUIRED_ATTR(post_nms_num, Int)
    .ATTR(score_filter, Bool, true)
    .ATTR(box_filter, Bool, true)
    .ATTR(score_sigmoid, Bool, false)
    .OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
    .OP_END_FACTORY_REG(RpnProposalsD)


/**
*@brief Computes Score Filte Pre-Sort function.
*
*@par Inputs:
*Inputs include:
* @li sorted_proposal: A Tensor. Must be float16.
*                      N-D with shape [8*6002, 8].
* @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8].
*
*@par Attributes:
* @li min_size: required, float, threahold of nms process.
* @li score_threshold: required, float, threahold of topk process.
* @li k: required, Int, threahold of topk process.
* @li min_size: required, float, threahold of nms process.
* @li nms_threshold: required, float, threahold of nms process.
* @li post_nms_num: required, float, threahold of nms process.
* @li box_filter: bool, mark of box_filter. Defaults to "true"
* @li core_max_num: int, max number of core. Defaults to "8"
*@par Outputs:
* @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
* @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
* @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1].
*/
REG_OP(RpnProposalPostProcessing)
    .INPUT(sorted_proposal, TensorType({DT_FLOAT16}))
    .INPUT(proposal_num, TensorType({DT_UINT32}))
    .OUTPUT(sorted_box, TensorType({ DT_FLOAT16}))
    .REQUIRED_ATTR(img_size, ListInt)
    .REQUIRED_ATTR(score_threshold, Float)
    .REQUIRED_ATTR(k, Int)
    .REQUIRED_ATTR(min_size, Float)
    .REQUIRED_ATTR(nms_threshold, Float)
    .REQUIRED_ATTR(post_nms_num, Int)
    .ATTR(box_filter, Bool, true)
    .ATTR(core_max_num, Int, 8)
    .OP_END_FACTORY_REG(RpnProposalPostProcessing)
/**
*@brief Computes DecodeBoundariesTarget function.

*@par Inputs:
*Inputs include:
* @li boundary_predictions: A Tensor. Must be float16.
* @li anchors: A Tensor. Must be float16.

*@par Outputs:
* @ boundary_encoded: A Tensor. Must be float16.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
*/
REG_OP(DecodeBoundariesTarget)
    .INPUT(boundary_predictions, TensorType({DT_FLOAT16}))
    .INPUT(anchors, TensorType({DT_FLOAT16}))
    .OUTPUT(boundary_encoded, TensorType({DT_FLOAT16}))
    .OP_END_FACTORY_REG(DecodeBoundariesTarget)

/**
*@brief Computes DecodeCornerpointsTargetBG function.
*
*@par Inputs:
*Inputs include:
* @li keypoints_prediction: A Tensor. Must be float16.
* @li anchors: A Tensor. Must be float16.
*
*@par Outputs:
* @ keypoints_decoded: A Tensor. Must be float16.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
*/
REG_OP(DecodeCornerpointsTargetBG)
    .INPUT(keypoints_prediction, TensorType({DT_FLOAT16}))
    .INPUT(anchors, TensorType({DT_FLOAT16}))
    .OUTPUT(keypoints_decoded, TensorType({DT_FLOAT16}))
    .OP_END_FACTORY_REG(DecodeCornerpointsTargetBG);

/**
*@brief Computes DecodeCornerpointsTargetWrtCenterV1 function.
*
*@par Inputs:
*Inputs include:
* @li keypoints_prediction: A Tensor. Must be float16.
* @li anchors: A Tensor. Must be float16.
*
*@par Outputs:
* @ keypoints_decoded: A Tensor. Must be float16.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
*/
REG_OP(DecodeCornerpointsTargetWrtCenterV1)
    .INPUT(keypoints_prediction, TensorType({DT_FLOAT16}))
    .INPUT(anchors, TensorType({DT_FLOAT16}))
    .OUTPUT(keypoints_decoded, TensorType({DT_FLOAT16}))
    .OP_END_FACTORY_REG(DecodeCornerpointsTargetWrtCenterV1)

/**
*@brief Computes DecodeWheelsTarget function.
*
*@par Inputs:
*Inputs include:
* @li boundary_predictions: A Tensor. Must be float16.
* @li anchors: A Tensor. Must be float16.
*
*@par Outputs:
* @ boundary_encoded: A Tensor. Must be float16.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
*/
REG_OP(DecodeWheelsTarget)
    .INPUT(boundary_predictions, TensorType({DT_FLOAT16}))
    .INPUT(anchors, TensorType({DT_FLOAT16}))
    .OUTPUT(boundary_encoded, TensorType({DT_FLOAT16}))
    .OP_END_FACTORY_REG(DecodeWheelsTarget)

/**
*@brief Computes nms for input boxes and score, support multiple batch and classes.
* will do clip to window, score filter, top_k, and nms

*@par Inputs:
* Four inputs, including:
*@li boxes: boxes, a 4D Tensor of type float16 with
* shape (batch, num_anchors, num_classes, 4). "batch" indicates the batch size of image,
* and "num_anchors" indicates num of boxes, and "num_classes" indicates classes of detect.
* and the value "4" refers to "x0", "x1", "y0", and "y1".
*@li scores: boxes, a 4D Tensor of type float16 with
* shape (batch, num_anchors, num_classes).
*@li clip_window: window size, a 2D Tensor of type float16 with
* shape (batch, 4). 4" refers to "anchor_x0", "anchor_x1", "anchor_y0", and "anchor_y1".
*@li num_valid_boxes: valid boxes number for each batch, a 1D Tensor of type int32 with
* shape (batch,) . \n

*@par Attributes:
*@li score_threshold: A required attribute of type float32, specifying the score filter iou iou_threshold.
*@li iou_threshold: A required attribute of type float32, specifying the nms iou iou_threshold.
*@li max_size_per_class: A required attribute of type int, specifying the nms output num per class.
*@li max_total_size: A required attribute of type int, specifying the the nms output num per batch.
*@li change_coordinate_frame: A optional attribute of type bool, whether to normalize coordinates after clipping.
*@li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false" . \n

*@par Outputs:
*@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4),
* specifying the output nms boxes per batch.
*@li nmsed_scores: A 2D Tensor of type float16 with shape (batch, max_total_size),
* specifying the output nms score per batch.
*@li nmsed_classes: A 2D Tensor of type float16 with shape (batch, max_total_size),
* specifying the output nms class per batch.
*@li nmsed_num: A 1D Tensor of type int32 with shape (batch), specifying the valid num of nmsed_boxes . \n

*@attention Constraints:
* Only computation of float16 data is supported.
* Note: when the class num per image * max_size_per_class is too big, will compile fail with ERROR-insufficient memory
*/
REG_OP(BatchMultiClassNonMaxSuppression)
    .INPUT(boxes, TensorType({DT_FLOAT16}))
    .INPUT(scores, TensorType({DT_FLOAT16}))
    .OPTIONAL_INPUT(clip_window, TensorType({DT_FLOAT16}))
    .OPTIONAL_INPUT(num_valid_boxes, TensorType({DT_INT32}))
    .OUTPUT(nmsed_boxes, TensorType({DT_FLOAT16}))
    .OUTPUT(nmsed_scores, TensorType({DT_FLOAT16}))
    .OUTPUT(nmsed_classes, TensorType({DT_FLOAT16}))
    .OUTPUT(nmsed_num, TensorType({DT_INT32}))
    .REQUIRED_ATTR(score_threshold, Float)
    .REQUIRED_ATTR(iou_threshold, Float)
    .REQUIRED_ATTR(max_size_per_class, Int)
    .REQUIRED_ATTR(max_total_size, Int)
    .ATTR(change_coordinate_frame, Bool, false)
    .ATTR(transpose_box, Bool, false)
    .OP_END_FACTORY_REG(BatchMultiClassNonMaxSuppression)

/**
* @brief To absolute the bounding box . \n

* @par Inputs:
* @li normalized_boxes: A 3D Tensor of type float16 or float32.
* @li shape_hw: A 1D Tensor of type int32 . \n

* @par Attributes:
* @li reversed_box: An optional bool, specifying the last two dims is "4,num" or
* "num,4", "true" for "4,num", "false" for "num,4". Defaults to "false" . \n

* @par Outputs:
* y: A Tensor. Has the same type and shape as "normalized_boxes" . \n

* @attention Constraints:
* "normalized_boxes"'s shape must be (batch,num,4) or (batch,4,num).
* "shape_hw"'s shape must be (4,)
*/
REG_OP(ToAbsoluteBBox)
    .INPUT(normalized_boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(shape_hw, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(reversed_box, Bool, false)
    .OP_END_FACTORY_REG(ToAbsoluteBBox)

/**
*@brief Computes Normalize bbox function.
*
*@par Inputs:
*Inputs include:
* @li boxes: A Tensor. Must be float16 or float32.
* @li shape_hw: A Tensor. Must be int32.
*
*@par Attributes:
* reversed_box: optional, bool. Defaults to "False"
*
*@par Outputs:
* y: A Tensor. Must have the same type and shape as boxes.
*/
REG_OP(NormalizeBBox)
    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(shape_hw, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(reversed_box, Bool, false)
    .OP_END_FACTORY_REG(NormalizeBBox)

/**
*@brief Computes decode bboxv2 function.
*
*@par Inputs:
*Inputs include:
* @li boxes: A Tensor. Must be float16 or float32.
* @li anchors: A Tensor. Must be int32.
*
*@par Attributes:
* @li scales: optional, listfloat.
* @li decode_clip: optional, float, threahold of decode process.
* @li reversed_boxes: optional, bool.
*
*@par Outputs:
* y: A Tensor. Must have the same type as box_predictions.
*/
REG_OP(DecodeBboxV2)
    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(anchors, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(scales, ListFloat, {1.0, 1.0, 1.0, 1.0})
    .ATTR(decode_clip, Float, 0.0)
    .ATTR(reversed_box, Bool, false)
    .OP_END_FACTORY_REG(DecodeBboxV2)

/**
*@brief sort the input tensor and return the value of index.
*
*@par Inputs:
*Inputs include:
* x: A Tensor. Dtype support: float16, float, int16, int8,
                          uint8, int32, int64.
*
*@par Attributes:
* @li axis: An optional attribute indicates the sorting axis.
* @li descending: An optional attribute indicates desending sort or not.
*
*@par Outputs:
* @li y1: A Tensor. Must have the same type as x.
* @li y2: A Tensor. Indices of y1 in x. Dtype must be int32.
*
*/
REG_OP(Sort)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8,
                          DT_UINT8, DT_INT32, DT_INT64}))
    .OUTPUT(y1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8,
                            DT_UINT8, DT_INT32, DT_INT64}))
    .OUTPUT(y2, TensorType({DT_INT32}))
    .ATTR(axis, Int, -1)
    .ATTR(descending, Bool, false)
    .OP_END_FACTORY_REG(Sort)

/**
*@brief Computes iou for input bboxes and gtboxes.

*@par Inputs:
* Two inputs, including:
*@li bboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1),
*@li gtboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1).\n

*@par Attributes:
*@li mode: A optional attribute of type string, whether judge the mode of iou. \n

*@par Outputs:
*@li overlap: A 2D Tensor of type float16 with shape [n, m]. \n

*@attention Constraints:
* Only computation of float16 data is supported.

*@par Restrictions:
*Warning:THIS FUNCTION IS DEPRECATED. Please use Iou instead.
*/
REG_OP(PtIou)
    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(mode, String, "iou")
    .OP_END_FACTORY_REG(PtIou)

/**
*@brief Greedily selects a subset of bounding boxes in descending order of
score . \n

*@par Inputs:
*Input boxes and  scores must be float16 type. Inputs include:
*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
The single box data format is indicated by center_point_box.
*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
*@li max_output_size: A scalar integer tensor representing the maximum number
of boxes to be selected by non max suppression.
*@li iou_threshold: A 0-D float tensor representing the threshold for deciding
whether boxes overlap too much with respect to IOU.
*@li score_threshold: A 0-D float tensor representing the threshold for
deciding when to remove boxes based on score . \n

*@par Attributes:
*center_point_box:Integer indicate the format of the box data.
The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2]
where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair
of box corners and the coordinates can be provided as normalized
(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
1 - the box data is supplied as [x_center, y_center, width, height].
 Mostly used for Pytorch models. \n

*@par Outputs:
*@li selected_indices: A 2-D integer tensor of shape [M] representing the
selected indices from the boxes tensor, where M <= max_output_size. \n

*@attention Constraints:
*Input boxes and  scores must be float16 type . \n

*@par Third-party framework compatibility
*Compatible with onnx NonMaxSuppression operator.

*@par Restrictions:
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(NonMaxSuppressionV6)
    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
    .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
    .OUTPUT(selected_indices, TensorType({DT_INT32}))
    .ATTR(center_point_box, Int, 0)
    .ATTR(max_boxes_size, Int, 0)
    .OP_END_FACTORY_REG(NonMaxSuppressionV6)

/**
*@brief Greedily selects a subset of bounding boxes in descending order of
score . \n

*@par Inputs:
*Input boxes and  scores must be float16 type. Inputs include:
*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
The single box data format is indicated by center_point_box.
*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
*@li max_output_size: A scalar integer tensor representing the maximum number
of boxes to be selected by non max suppression.
*@li iou_threshold: A 0-D float tensor representing the threshold for deciding
whether boxes overlap too much with respect to IOU.
*@li score_threshold: A 0-D float tensor representing the threshold for
deciding when to remove boxes based on score . \n
*@li index_id: A input tensor with shape [num_batches,num_classes,spatial_dimension,3]
the last dim representing (batch_id,class_id,index_id)  . \n

*@par Attributes:
*@li center_point_box:Integer indicate the format of the box data.
The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2]
where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair
of box corners and the coordinates can be provided as normalized
(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
1 - the box data is supplied as [x_center, y_center, width, height].
 Mostly used for Pytorch models.
*@li max_boxes_size: An optional attribute integer representing the real maximum
*number of boxes to be selected by non max suppression . \n

*@par Outputs:
*selected_indices: A 2-D integer tensor of shape [M] representing the
selected indices from the boxes tensor, where M <= max_output_size. \n

*@attention Constraints:
*Input boxes and  scores must be float16 type . \n

*@par Third-party framework compatibility
*Compatible with onnx NonMaxSuppression operator.
*/

REG_OP(NonMaxSuppressionV7)
    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
    .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(index_id, TensorType({DT_FLOAT16}))
    .OUTPUT(selected_indices, TensorType({DT_INT32}))
    .ATTR(center_point_box, Int, 0)
    .ATTR(max_boxes_size, Int, 0)
    .OP_END_FACTORY_REG(NonMaxSuppressionV7)

/**
*@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n

*@par Inputs:
* Two inputs, including:
*@li features: A 5HD Tensor list of type float32 or float16.
*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
* the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1".

*@par Attributes:
*@li finest_scale: A optional attribute of type int, specifying the scale of calculate levels of "rois".
*@li roi_scale_factor: A optional attribute of type float32, specifying the rescaling of "rois" coordinates.
*@li spatial_scale: A optional attribute of type list float32, specifying the scaling ratio of "features"
* to the original image.
*@li pooled_height: A optional attribute of type int32, specifying the H dimension.
*@li pooled_width: A optional attribute of type int32, specifying the W dimension.
*@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency
* of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois",
* which is a floating point number. Defaults to "0".
*@li pool_mode: An optional attribute of type string to indicate pooling mode. Defaults to "avg" . \n
*@li aligned: An optional attribute of type bool, specifying the align to corner. Defaults to true . \n

*@par Outputs:
* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
* The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height",
* "pooled_width", and "features", respectively.

*@par Third-party framework compatibility
*Compatible with mmdetection SingleRoIExtractor operator.
*/
REG_OP(RoiExtractor)
    .DYNAMIC_INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(finest_scale, Int, 56)
    .ATTR(roi_scale_factor, Float, 0)
    .ATTR(spatial_scale, ListFloat, {1.f / 4, 1.f / 8, 1.f / 16, 1.f / 32})
    .ATTR(pooled_height, Int, 7)
    .ATTR(pooled_width, Int, 7)
    .ATTR(sample_num, Int, 0)
    .ATTR(pool_mode, String, "avg")
    .ATTR(aligned, Bool, true)
    .OP_END_FACTORY_REG(RoiExtractor)

/**
*@brief Performs Position Sensitive PS ROI Pooling . \n

*@par Inputs:
* Two inputs, including:
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
* map, dimension C1 must be equal to
* (int(output_dim+15)/C0))*group_size*group_size.
*@li rois: A tensor of type float16 or float32, with shape
* [batch, 5, rois_num], describing the ROIs, each ROI consists of five
* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
* the index of the input feature map, "x1", "y1", "x2", or "y2" must be
* greater than or equal to "0.0" . \n

*@par Attributes:
*@li output_dim: A required int32, specifying the number of output channels,
* must be greater than 0.
*@li group_size: A required int32, specifying the number of groups to encode
* position-sensitive score maps, must be within the range (0, 128).
*@li spatial_scale: A required float32, scaling factor for mapping the input
* coordinates to the ROI coordinates . \n

*@par Outputs:
*y: An NC1HWC0 tensor of type float16 or float32, describing the result
* feature map . \n

*@attention Constraints:
* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
*/
REG_OP(PSROIPoolingV2)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(spatial_scale, Float)
    .REQUIRED_ATTR(output_dim, Int)
    .REQUIRED_ATTR(group_size, Int)
    .OP_END_FACTORY_REG(PSROIPoolingV2)

/**
*@brief Performs Position Sensitive PS ROI Pooling Grad . \n

*@par Inputs:
* Two inputs, including:
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the result
* feature map . \n
*@li rois: A tensor of type float16 or float32, with shape
* [batch, 5, rois_num], describing the ROIs, each ROI consists of five
* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
* the index of the input feature map, "x1", "y1", "x2", or "y2" must be
* greater than or equal to "0.0" . \n

*@par Attributes:
*@li output_dim: A required int32, specifying the number of output channels,
* must be greater than 0.
*@li group_size: A required int32, specifying the number of groups to encode
* position-sensitive score maps, must be within the range (0, 128).
*@li spatial_scale: A required float32, scaling factor for mapping the input
* coordinates to the ROI coordinates . \n
*@li input_size: A required listInt, mapping the gradinput size: (H, W)

*@par Outputs:
*y: An NC1HWC0 tensor of type float16 or float32, describing the feature
* map, dimension C1 must be equal to
* (int(output_dim+15)/C0))*group_size*group_size.

*@attention Constraints:
* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
*/
REG_OP(PSROIPoolingGradV2D)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(spatial_scale, Float)
    .REQUIRED_ATTR(output_dim, Int)
    .REQUIRED_ATTR(group_size, Int)
    .REQUIRED_ATTR(input_size, ListInt)
    .OP_END_FACTORY_REG(PSROIPoolingGradV2D)

/**
*@brief Generate the responsible flags of anchor in a single feature map.

*@par Inputs:
*@li gt_bboxes: Ground truth box, 2-D Tensor with shape `[batch, 4]`.

*@par Attributes:
*@li featmap_size: The size of feature maps, listint.
*@li strides: Stride of current level, listint.
*@li num_base_anchors: The number of base anchors.

*@par Outputs:
*flags: The valid flags of each anchor in a single level.
*/
REG_OP(AnchorResponseFlags)
    .INPUT(gt_bboxes, TensorType({DT_FLOAT}))
    .OUTPUT(flags, TensorType({DT_UINT8}))
    .REQUIRED_ATTR(featmap_size, ListInt)
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(num_base_anchors, Int)
    .OP_END_FACTORY_REG(AnchorResponseFlags)

/**
*@brief Generates bounding boxes based on yolo's "anchor" and "ground-truth" boxes.
* It is a customized mmdetection operator . \n

*@par Inputs:
* Three inputs, including:
*@li anchor_boxes: anchor boxes generated by the yolo training set.
*  A 2D Tensor of type float32 or float16 with shape (N, 4). "N" indicates the number
* of ROIs, "N" indicates the number of ROIs, and the value "4" refers to (tx, ty, tw, th).
*@li gt_bboxes: target of the transformation, e.g, ground-truth boxes.
*  A 2D Tensor of type float32 or float16 with shape (N, 4).
* "N" indicates the number of ROIs, and 4 indicates "dx", "dy", "dw", and "dh" .
*@li stride: Scale for each box.
*  A 1D Tensor of type int32 shape (N,).
* "N" indicates the number of ROIs. \n

*@par Attributes:
*@li performance_mode: select performance mode, "high_precision" or "high_performance".
* select "high_precision" when input type is float32, the output tensor precision
* will be smaller than 0.0001, select "high_performance" when input type is float32,
* the ops will be best performance, but precision will be only smaller than 0.005.

*@par Outputs:
*encoded_bboxes: Bboxes generated based on "anchor_boxes" and "gt_bboxes". Have the
* same format and type as "anchor_boxes".
*
*@attention Constraints:
* input anchor boxes only support maximum N=20480. \n
*/
REG_OP(YoloBoxesEncode)
    .INPUT(anchor_boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(gt_bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(stride, TensorType({DT_INT32}))
    .ATTR(performance_mode, String, "high_precision")
    .OUTPUT(encoded_bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OP_END_FACTORY_REG(YoloBoxesEncode)

/**
*@brief Performs Position Sensitive PS ROI Pooling Grad.

*@par Inputs:
* Eight inputs, including:
*@li assigned_gt_inds: Tensor of type float16 or float32, shape (n, )
*@li overlaps: A Tensor. Datatype is same as assigned_gt_inds. IOU between gt_bboxes and bboxes. shape(k, n)
*@li box_responsible_flags: A Tensor. Support uint8. Flag to indicate whether box is responsible.
*@li max_overlaps: A Tensor. Datatype is same as assigned_gt_inds. overlaps.max(axis=0).
*@li argmax_overlaps: A Tensor. Support int32. overlaps.argmax(axis=0).
*@li gt_max_overlaps: A Tensor. Datatype is same as assigned_gt_inds. overlaps.max(axis=1).
*@li gt_argmax_overlaps: A Tensor. Support int32. overlaps.argmax(axis=1).
*@li num_gts: A Tensor. Support int32. real k. shape (1, )

*@par Attributes:
*@li output_dim: float. IOU threshold for positive bboxes.
*@li group_size: float. minimum iou for a bbox to be considered as a positive bbox
*@li spatial_scale: bool. whether to assign all bboxes with the same highest overlap with some gt to that gt.

*@par Outputs:
*@li assigned_gt_inds_pos: A Tensor. Support float16/float32. shape (n, ).
*/
REG_OP(GridAssignPositive)
    .INPUT(assigned_gt_inds, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(box_responsible_flags, TensorType({ DT_UINT8 }))
    .INPUT(max_overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(argmax_overlaps, TensorType({ DT_INT32 }))
    .INPUT(gt_max_overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(gt_argmax_overlaps, TensorType({ DT_INT32 }))
    .INPUT(num_gts, TensorType({ DT_INT32 }))
    .OUTPUT(assigned_gt_inds_pos, TensorType({DT_FLOAT, DT_FLOAT16}))
    .REQUIRED_ATTR(pos_iou_thr, Float)
    .REQUIRED_ATTR(min_pos_iou, Float)
    .REQUIRED_ATTR(gt_max_assign_all, Bool)
    .OP_END_FACTORY_REG(GridAssignPositive)
}  // namespace ge

#endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_