You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

nn_detect_ops.h 85 kB

5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*!
  17. * \file nn_detect_ops.h
  18. * \brief
  19. */
  20. #ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
  21. #define OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
  22. #include "graph/operator_reg.h"
  23. #include "graph/operator.h"
  24. namespace ge {
  25. /**
  26. *@brief Generates bounding boxes based on "rois" and "deltas".
  27. * It is a customized FasterRcnn operator . \n
  28. *@par Inputs:
  29. * Two inputs, including:
  30. *@li rois: Region of interests (ROIs) generated by the region proposal
  31. * network (RPN). A 2D Tensor of type float32 or float16 with shape (N, 4).
  32. * "N" indicates the number of ROIs, and the value "4" refers to "x0", "x1",
  33. * "y0", and "y1".
  34. *@li deltas: Absolute variation between the ROIs generated by the RPN and
  35. * ground truth boxes. A 2D Tensor of type float32 or float16 with shape (N, 4).
  36. * "N" indicates the number of errors, and 4 indicates "dx", "dy", "dw", and "dh" . \n
  37. *@par Attributes:
  38. *@li means: An index of type int. Defaults to [0,0,0,0].
  39. * "deltas" = "deltas" x "stds" + "means".
  40. *@li stds: An index of type int. Defaults to [1.0,1.0,1.0,1.0].
  41. * "deltas" = "deltas" x "stds" + "means".
  42. *@li max_shape: Shape [h, w], specifying the size of the image transferred to
  43. * the network. Used to ensure that the bbox shape after conversion does not
  44. * exceed "max_shape".
  45. *@li wh_ratio_clip: Defaults to "16/1000". The values of "dw" and "dh" fall
  46. * within (-wh_ratio_clip, wh_ratio_clip) . \n
  47. *@par Outputs:
  48. *bboxes: Bboxes generated based on "rois" and "deltas". Have the same format
  49. * and type as "rois".
  50. */
  51. REG_OP(BoundingBoxDecode)
  52. .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
  53. .INPUT(deltas, TensorType({DT_FLOAT16, DT_FLOAT}))
  54. .OUTPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  55. .ATTR(means, ListFloat, {0.0, 0.0, 0.0, 0.0})
  56. .ATTR(stds, ListFloat, {1.0, 1.0, 1.0, 1.0})
  57. .REQUIRED_ATTR(max_shape, ListInt)
  58. .ATTR(wh_ratio_clip, Float, 0.016)
  59. .OP_END_FACTORY_REG(BoundingBoxDecode)
  60. /**
  61. *@brief Computes the coordinate variations between bboxes and ground truth
  62. * boxes. It is a customized FasterRcnn operator . \n
  63. *@par Inputs:
  64. * Two inputs, including:
  65. *@li anchor_box: Anchor boxes. A 2D Tensor of float32 with shape (N, 4).
  66. * "N" indicates the number of bounding boxes, and the value "4" refers to
  67. * "x0", "x1", "y0", and "y1".
  68. *@li ground_truth_box: Ground truth boxes. A 2D Tensor of float32 with
  69. * shape (N, 4). "N" indicates the number of bounding boxes, and the value "4"
  70. * refers to "x0", "x1", "y0", and "y1" . \n
  71. *@par Attributes:
  72. *@li means: An index of type int. Defaults to [0,0,0,0].
  73. * "deltas" = "deltas" x "stds" + "means".
  74. *@li stds: An index of type int. Defaults to [1.0,1.0,1.0,1.0].
  75. * "deltas" = "deltas" x "stds" + "means" . \n
  76. *@par Outputs:
  77. *delats: A 2D Tensor of type float32 with shape (N, 4), specifying the variations between all anchor boxes and ground truth boxes.
  78. */
  79. REG_OP(BoundingBoxEncode)
  80. .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT}))
  81. .INPUT(ground_truth_box, TensorType({DT_FLOAT16, DT_FLOAT}))
  82. .OUTPUT(delats, TensorType({DT_FLOAT16, DT_FLOAT}))
  83. .ATTR(means, ListFloat, {0.0, 0.0, 0.0, 0.0})
  84. .ATTR(stds, ListFloat, {1.0, 1.0, 1.0, 1.0})
  85. .OP_END_FACTORY_REG(BoundingBoxEncode)
  86. /**
  87. *@brief Judges whether the bounding box is valid. It is a customized
  88. * FasterRcnn operator . \n
  89. *@par Inputs:
  90. * Two inputs, including:
  91. *@li bbox_tensor: Bounding box. A 2D Tensor of type float16 with shape (N, 4).
  92. * "N" indicates the number of bounding boxes, the value "4" indicates "x0",
  93. * "x1", "y0", and "y1".
  94. *@li img_metas: Valid boundary value of the image. A 1D Tensor of type float16
  95. * with shape (16,)
  96. *@par Outputs:
  97. *valid_tensor: A bool with shape (N, 1), specifying whether an input anchor is
  98. * in an image. "1" indicates valid, while "0" indicates invalid . \n
  99. *@attention Constraints:
  100. * 16 "img_metas" are input. The first three numbers (height, width, ratio) are
  101. * valid, specifying the valid boundary (heights x ratio, weights x ratio).
  102. */
  103. REG_OP(CheckValid)
  104. .INPUT(bbox_tensor, TensorType({DT_FLOAT16}))
  105. .INPUT(img_metas, TensorType({DT_FLOAT16}))
  106. .OUTPUT(valid_tensor, TensorType({DT_INT8}))
  107. .OP_END_FACTORY_REG(CheckValid)
  108. /**
  109. *@brief Computes the intersection over union (iou) or the intersection over
  110. * foreground (iof) based on the ground-truth and predicted regions . \n
  111. *@par Inputs:
  112. * Two inputs, including:
  113. *@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
  114. * shape (N, 4). "N" indicates the number of bounding boxes, and the value
  115. * "4" refers to "x0", "x1", "y0", and "y1".
  116. *@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
  117. * with shape (M, 4). "M" indicates the number of ground truth boxes, and
  118. * the value "4" refers to "x0", "x1", "y0", and "y1" . \n
  119. *@par Attributes:
  120. *mode: Computation mode, a character string with the value range of [iou, iof] . \n
  121. *@par Outputs:
  122. *overlap: A 2D Tensor of type float16 or float32 with shape [M, N], specifying
  123. * the IoU or IoF ratio . \n
  124. *@attention Constraints:
  125. * Only computation of float16 data is supported. To avoid overflow, the input
  126. * length and width are scaled by 0.2 internally.
  127. */
  128. REG_OP(Iou)
  129. .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  130. .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  131. .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
  132. .ATTR(mode, String, "iou")
  133. .OP_END_FACTORY_REG(Iou)
  134. /**
  135. *@brief Performs the backpropagation of ROIAlign for training scenarios . \n
  136. *@par Inputs:
  137. * Three inputs, including:
  138. *@li ydiff: A 5HD gradient input of type float32.
  139. *@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs,
  140. the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1".
  141. *@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved . \n
  142. *@par Attributes:
  143. *@li xdiff_shape: A required list of 4 ints, obtained based on the shape of "features" of ROIAlign.
  144. *@li pooled_width: A required attribute of type int, specifying the W dimension.
  145. *@li pooled_height: A required attribute of type int, specifying the H dimension.
  146. *@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image.
  147. *@li sample_num: An optional attribute of type int, specifying the horizontal and vertical
  148. sampling frequency of each output. If this attribute is set to "0", the sampling frequency is
  149. equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n
  150. *@par Outputs:
  151. *xdiff: Gradient added to input "features". Has the same 5HD shape as input "features".
  152. */
  153. REG_OP(ROIAlignGrad)
  154. .INPUT(ydiff, TensorType({DT_FLOAT}))
  155. .INPUT(rois, TensorType({DT_FLOAT}))
  156. .OPTIONAL_INPUT(rois_n, TensorType({DT_INT32}))
  157. .OUTPUT(xdiff, TensorType({DT_FLOAT}))
  158. .REQUIRED_ATTR(xdiff_shape, ListInt)
  159. .REQUIRED_ATTR(pooled_width, Int)
  160. .REQUIRED_ATTR(pooled_height, Int)
  161. .REQUIRED_ATTR(spatial_scale, Float)
  162. .ATTR(sample_num, Int, 2)
  163. .OP_END_FACTORY_REG(ROIAlignGrad)
  164. /**
  165. *@brief Obtains the ROI feature matrix from the feature map. It is a customized FasterRcnn operator . \n
  166. *@par Inputs:
  167. * Three inputs, including:
  168. *@li features: A 5HD Tensor of type float32 or float16.
  169. *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
  170. the value "5" indicates the indexes of images where the ROIs are located,
  171. * "x0", "y0", "x1", and "y1".
  172. *@li rois_n: An optional input of type int32, specifying the number of valid ROIs. This parameter is reserved . \n
  173. *@par Attributes:
  174. *@li spatial_scale: A required attribute of type float32, specifying the scaling ratio of "features" to the original image.
  175. *@li pooled_height: A required attribute of type int32, specifying the H dimension.
  176. *@li pooled_width: A required attribute of type int32, specifying the W dimension.
  177. *@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0",
  178. * the sampling frequency is equal to the rounded up value of "rois", which is a floating point number. Defaults to "2".
  179. *@li roi_end_mode: An optional attribute of type int32. Defaults to "1" . \n
  180. *@par Outputs:
  181. * output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
  182. The axis N is the number of input ROIs. Axes H, W, and C are consistent
  183. * with the values of "pooled_height",
  184. * "pooled_width", and "features", respectively.
  185. */
  186. REG_OP(ROIAlign)
  187. .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
  188. .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
  189. .OPTIONAL_INPUT(rois_n, TensorType({DT_INT32}))
  190. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  191. .REQUIRED_ATTR(spatial_scale, Float)
  192. .REQUIRED_ATTR(pooled_height, Int)
  193. .REQUIRED_ATTR(pooled_width, Int)
  194. .ATTR(sample_num, Int, 2)
  195. .ATTR(roi_end_mode, Int, 1)
  196. .OP_END_FACTORY_REG(ROIAlign)
  197. /**
  198. *@brief Performs SSD prior box detection . \n
  199. *@par Inputs:
  200. * Two inputs, including:
  201. *@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
  202. *@li img: source image. Has the same type and format as "x" . \n
  203. *@par Attributes:
  204. *@li min_size: A required float32, specifying the minimum edge length of a square prior box.
  205. *@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
  206. *@li aspect_ratio: An required float32, specifying the aspect ratio for generated rectangle boxes. The height
  207. is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaults to "1.0".
  208. *@li img_h: An optional int32, specifying the source image height. Defaults to "0".
  209. *@li img_w: An optional int32, specifying the source image width. Defaults to "0".
  210. *@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image. Defaults to "0.0".
  211. *@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image. Defaults to "0.0".
  212. *@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
  213. *@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
  214. *@li offset: An optional float32, specifying the offset. Defaults to "0.5".
  215. *@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n
  216. *@par Outputs:
  217. *y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n
  218. *@attention Constraints:
  219. * This operator applies only to SSD networks.
  220. *@see SSDDetectionOutput()
  221. *@par Third-party framework compatibility
  222. * It is a custom operator. It has no corresponding operator in Caffe.
  223. */
  224. REG_OP(PriorBox)
  225. .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  226. .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
  227. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  228. .REQUIRED_ATTR(min_size, ListFloat)
  229. .REQUIRED_ATTR(max_size, ListFloat)
  230. .REQUIRED_ATTR(aspect_ratio, ListFloat)
  231. .ATTR(img_h, Int, 0)
  232. .ATTR(img_w, Int, 0)
  233. .ATTR(step_h, Float, 0.0)
  234. .ATTR(step_w, Float, 0.0)
  235. .ATTR(flip, Bool, true)
  236. .ATTR(clip, Bool, false)
  237. .ATTR(offset, Float, 0.5)
  238. .ATTR(variance, ListFloat, {0.1})
  239. .OP_END_FACTORY_REG(PriorBox);
  240. /**
  241. *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
  242. *@par Inputs:
  243. * Six inputs, including:
  244. *@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
  245. *@li img: source image. Has the same type and format as "x".
  246. *@li data_h: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map height.
  247. *@li data_w: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map width.
  248. *@li box_height: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the height of each prior box.
  249. *@li box_width: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the width of each prior box . \n
  250. *@par Attributes:
  251. *@li min_size: A required float32, specifying the minimum edge length of a square prior box.
  252. *@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
  253. *@li img_h: An optional int32, specifying the height of the source image.
  254. *@li img_w: An optional int32, specifying the width of the source image.
  255. *@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image.
  256. *@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image.
  257. *@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
  258. *@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
  259. *@li offset: An optional float32, specifying the offset. Defaults to "0.5".
  260. *@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n
  261. *@par Outputs:
  262. *y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n
  263. *@attention Constraints:
  264. * This operator applies only to SSD networks.
  265. *@see SSDDetectionOutput()
  266. *@par Third-party framework compatibility
  267. * It is a custom operator. It has no corresponding operator in Caffe.
  268. *@par Restrictions:
  269. *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
  270. */
  271. REG_OP(PriorBoxD)
  272. .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  273. .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
  274. .INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT}))
  275. .INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT}))
  276. .INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT}))
  277. .INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT}))
  278. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  279. .REQUIRED_ATTR(min_size, ListFloat)
  280. .REQUIRED_ATTR(max_size, ListFloat)
  281. .ATTR(img_h, Int, 0)
  282. .ATTR(img_w, Int, 0)
  283. .ATTR(step_h, Float, 0.0)
  284. .ATTR(step_w, Float, 0.0)
  285. .ATTR(flip, Bool, true)
  286. .ATTR(clip, Bool, false)
  287. .ATTR(offset, Float, 0.5)
  288. .ATTR(variance, ListFloat, {0.1})
  289. .OP_END_FACTORY_REG(PriorBoxD);
  290. /**
  291. *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
  292. *@par Inputs:
  293. * Six inputs, including:
  294. *@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
  295. *@li img: source image. Has the same type and format as "x".
  296. *@li boxes: An ND tensor of type float32 or float16, specifying the prior box information. Same as output y
  297. *@par Attributes:
  298. *@li min_size: A required float32, specifying the minimum edge length of a square prior box.
  299. *@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
  300. *@li img_h: An optional int32, specifying the height of the source image.
  301. *@li img_w: An optional int32, specifying the width of the source image.
  302. *@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image.
  303. *@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image.
  304. *@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
  305. *@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
  306. *@li offset: An optional float32, specifying the offset. Defaults to "0.5".
  307. *@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n
  308. *@par Outputs:
  309. *y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n
  310. *@attention Constraints:
  311. * This operator applies only to SSD networks.
  312. *@see SSDDetectionOutput()
  313. *@par Third-party framework compatibility
  314. * It is a custom operator. It has no corresponding operator in Caffe.
  315. *@par Restrictions:
  316. *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
  317. */
  318. REG_OP(PriorBoxDV2)
  319. .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  320. .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
  321. .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  322. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  323. .REQUIRED_ATTR(min_size, ListFloat)
  324. .REQUIRED_ATTR(max_size, ListFloat)
  325. .ATTR(img_h, Int, 0)
  326. .ATTR(img_w, Int, 0)
  327. .ATTR(step_h, Float, 0.0)
  328. .ATTR(step_w, Float, 0.0)
  329. .ATTR(flip, Bool, true)
  330. .ATTR(clip, Bool, false)
  331. .ATTR(offset, Float, 0.5)
  332. .ATTR(variance, ListFloat, {0.1})
  333. .OP_END_FACTORY_REG(PriorBoxDV2);
  334. /**
  335. *@brief Performs Position Sensitive ROI Pooling . \n
  336. *@par Inputs:
  337. * Two inputs, including:
  338. *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
  339. * map, dimension C1 must be equal to
  340. * (int(output_dim+15)/C0))*group_size*group_size.
  341. *@li rois: A tensor of type float16 or float32, with shape
  342. * [batch, 5, rois_num], describing the ROIs, each ROI consists of five
  343. * elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
  344. * the index of the input feature map, "x1", "y1", "x2", or "y2" must be
  345. * greater than or equal to "0.0" . \n
  346. *@par Attributes:
  347. *@li output_dim: A required int32, specifying the number of output channels,
  348. * must be greater than 0.
  349. *@li group_size: A required int32, specifying the number of groups to encode
  350. * position-sensitive score maps, must be within the range (0, 128).
  351. *@li spatial_scale: A required float32, scaling factor for mapping the input
  352. * coordinates to the ROI coordinates . \n
  353. *@par Outputs:
  354. *y: An NC1HWC0 tensor of type float16 or float32, describing the result
  355. * feature map . \n
  356. *@attention Constraints:
  357. * HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
  358. */
  359. REG_OP(PSROIPooling)
  360. .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
  361. .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
  362. .REQUIRED_ATTR(output_dim, Int)
  363. .REQUIRED_ATTR(group_size, Int)
  364. .REQUIRED_ATTR(spatial_scale, Float)
  365. .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
  366. .OP_END_FACTORY_REG(PSROIPooling)
  367. /**
  368. *@brief Returns detection result . \n
  369. *@par Inputs:
  370. * Five inputs, including:
  371. *@li rois: An NCHW tensor of type floa16 or float32, output from operator proposal_d at the preceding layer, used as the input of operator FSRDetectionOutput.
  372. *@li bbox_delta: An NCHWC0 tensor of type floa16 or float32, specifying the prediction offset, used to update the coordinates [x1, y1, x2, y2] of each ROI.
  373. *@li score: An NCHWC0 tensor of type floa16 or float32, specifying the probability of each class. Class 0 is the background class.
  374. *@li im_info: An ND tensor of type float16 or float32, specifying the Image information.
  375. *@li actual_rois_num: An optional NCHW tensor of type int32, specifying the number of valid boxes per batch.
  376. *@par Attributes:
  377. *@li batch_rois: An optional int32, specifying the number of images to be predicted. Defaults to "1".
  378. *@li num_classes: An required int32, specifying the number of classes to be predicted. The value must be greater than 0.
  379. *@li score_threshold: An required float32, specifying the threshold for box filtering. The value range is [0.0, 1.0].
  380. *@li iou_threshold: An required float32, specifying the confidence threshold for box filtering, which is the output "obj" of operator Region. The value range is (0.0, 1.0).
  381. *@par Outputs:
  382. *@li box: A tensor of type float16 or float32 for proposal of actual output, with output shape [batch, numBoxes,8].
  383. * 8 means [x1, y1, x2, y2, score, label, batchID, NULL], the maximum value of numBoxes is 1024.
  384. That is, take min (the maximum number of input boxes, 1024)
  385. *@li actual_bbox_num: A tensor of type int32 With shape [bacth, num_classes], specifying the number of output boxes . \n
  386. *@attention Constraints:
  387. *@li totalnum < max_rois_num * batch_rois.
  388. *@li "score" must be with shape (total_num, (num_classes+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images.
  389. *@li "bbox_delta" must be with shape (total_num, (num_classes*4+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images.
  390. *@par Third-party framework compatibility
  391. * It is a custom operator. It has no corresponding operator in Caffe.
  392. */
  393. REG_OP(FSRDetectionOutput)
  394. .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
  395. .INPUT(bbox_delta, TensorType({DT_FLOAT, DT_FLOAT16}))
  396. .INPUT(score, TensorType({DT_FLOAT, DT_FLOAT16}))
  397. .INPUT(im_info, TensorType({DT_FLOAT, DT_FLOAT16}))
  398. .OPTIONAL_INPUT(actual_rois_num, TensorType({DT_INT32}))
  399. .OUTPUT(actual_bbox_num, TensorType({DT_INT32}))
  400. .OUTPUT(box, TensorType({DT_FLOAT, DT_FLOAT16}))
  401. .ATTR(batch_rois, Int, 1)
  402. .REQUIRED_ATTR(num_classes, Int)
  403. .REQUIRED_ATTR(score_threshold, Float)
  404. .REQUIRED_ATTR(iou_threshold, Float)
  405. .OP_END_FACTORY_REG(FSRDetectionOutput)
  406. /**
  407. *@brief Returns detection result . \n
  408. *@par Inputs:
  409. * Four inputs, including:
  410. *@li bbox_delta: An ND tensor of type floa16 or float32, specifying the box loc predictions, used as the input of operator SSDDetectionOutput.
  411. *@li score: An ND tensor of type floa16 or float32, specifying the box confidences data, used as the input of operator SSDDetectionOutput.
  412. *@li anchors: An ND tensor of type floa16 or float32, output from operator PriorBoxD, used as the input of operator SSDDetectionOutput.
  413. *@par Attributes:
  414. *@li num_classes: An optional int32, specifying the number of classes to be predicted. Defaults to "2". The value must be greater than 1 and lesser than 1025.
  415. *@li share_location: An optional bool, specify the shared location. Defaults to True
  416. *@li background_label_id: An optional int32, specify the background label id. Must be 0
  417. *@li iou_threshold: An optional float32, specify the nms threshold
  418. *@li top_k: An optional int32, specify the topk value. Defaults to 200
  419. *@li eta: An optional float32, specify the eta value. Defaults to 1.0
  420. *@li variance_encoded_in_target: An optional bool, specify whether variance encoded in target or not. Defaults to False
  421. *@li code_type: An optional int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3
  422. *@li keep_top_k: An optional int32, specify the topk value after nms. Defaults to -1
  423. *@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold
  424. *@par Outputs:
  425. *@li out_boxnum: A tensor of type int32, specifying the number of output boxes.
  426. *@li y: A tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box.
  427. * In output shape, 8 means (batchID, label(classID), score (class probability), xmin, ymin, xmax, ymax, null)
  428. * It is a custom operator. It has no corresponding operator in Caffe.
  429. */
  430. REG_OP(SSDDetectionOutput)
  431. .INPUT(bbox_delta, TensorType({DT_FLOAT, DT_FLOAT16}))
  432. .INPUT(score, TensorType({DT_FLOAT, DT_FLOAT16}))
  433. .INPUT(anchors, TensorType({DT_FLOAT, DT_FLOAT16}))
  434. .OUTPUT(out_boxnum, TensorType({DT_INT32}))
  435. .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
  436. .ATTR(num_classes, Int, 2)
  437. .ATTR(share_location, Bool, true)
  438. .ATTR(background_label_id, Int, 0)
  439. .ATTR(iou_threshold, Float, 0.3)
  440. .ATTR(top_k, Int, 200)
  441. .ATTR(eta, Float, 1.0)
  442. .ATTR(variance_encoded_in_target, Bool, false)
  443. .ATTR(code_type, Int, 1)
  444. .ATTR(keep_top_k, Int, -1)
  445. .ATTR(confidence_threshold, Float, 0.0)
  446. .OP_END_FACTORY_REG(SSDDetectionOutput)
  447. /**
  448. *@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3 . \n
  449. *@par Inputs:
  450. *x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W),
  451. where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged
  452. as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n
  453. *@par Attributes:
  454. *@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3.
  455. *@li coords: An int32, specifying the number of parameters required for locating an object. The value is fixed at "4", corresponding to (x,y,w,h).
  456. *@li classes: An int32, specifying the number of prediction classes. Defaults to "80". The value range is [1, 1024].
  457. *@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".Defaults to "V3"
  458. *@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". Defaults to "false".
  459. *@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". Defaults to "false".
  460. *@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false" . \n
  461. *@par Outputs:
  462. *@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2],
  463. * where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box.
  464. *@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2],
  465. * where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence.
  466. *@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2],
  467. * where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n
  468. *@attention Constraints:
  469. *@li This operator applies to YOLO v2 and v3 networks.
  470. *@li The succeeding layer of the Yolo operator must be operator Yolov3DetectionOutput.
  471. *@par Third-party framework compatibility
  472. * It is a custom operator. It has no corresponding operator in Caffe.
  473. */
  474. REG_OP(Yolo)
  475. .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  476. .OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
  477. .OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
  478. .OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
  479. .ATTR(boxes, Int, 3)
  480. .ATTR(coords, Int, 4)
  481. .ATTR(classes, Int, 80)
  482. .ATTR(yolo_version, String, "V3")
  483. .ATTR(softmax, Bool, false)
  484. .ATTR(background, Bool, false)
  485. .ATTR(softmaxtree, Bool, false)
  486. .OP_END_FACTORY_REG(Yolo)
  487. /**
  488. *@brief Performs YOLO V2 detection . \n
  489. *@par Inputs:
  490. * Four inputs, including:
  491. *@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov3DetectionOutput.
  492. * Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
  493. *@li img_info: A float16 or float32, describing the image information including the required image height and width
  494. * and the actual image height and width.
  495. *
  496. *@par Attributes:
  497. *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
  498. *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
  499. *@li coords: Specifies the number of coordinate parameters. Must be 4.
  500. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 20].
  501. *@li relative: An optional bool. Defaults to and must be "true".
  502. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering,
  503. * which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n
  504. *@li post_nms_topn: An optional int32. This attribute is reserved.
  505. *@li score_threshold: A required float, specifying the class score threshold for box filtering,
  506. which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
  507. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
  508. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
  509. *
  510. *@par Outputs:
  511. *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box,
  512. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
  513. *@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid,
  514. * the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
  515. *
  516. *@attention Constraints:
  517. *@li This operator applies only to the YOLO v2 network.
  518. *@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator.
  519. *
  520. *@see Yolo()
  521. *@par Third-party framework compatibility
  522. * It is a custom operator. It has no corresponding operator in Caffe.
  523. */
  524. REG_OP(YoloV2DetectionOutput)
  525. .INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
  526. .INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
  527. .INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
  528. .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
  529. .REQUIRED_ATTR(biases, ListFloat)
  530. .ATTR(boxes, Int, 5)
  531. .ATTR(coords, Int, 4)
  532. .ATTR(classes, Int, 20)
  533. .ATTR(relative, Bool, true)
  534. .ATTR(obj_threshold, Float, 0.5)
  535. .ATTR(post_nms_topn, Int, 512)
  536. .ATTR(score_threshold, Float, 0.5)
  537. .ATTR(iou_threshold, Float, 0.45)
  538. .ATTR(pre_nms_topn, Int, 512)
  539. .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
  540. .OUTPUT(box_out_num, TensorType({DT_INT32}))
  541. .OP_END_FACTORY_REG(YoloV2DetectionOutput)
  542. /**
  543. *@brief Performs YOLO V2 detection . \n
  544. *@par Inputs:
  545. *Six inputs, including:
  546. *@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov2DetectionOutput.
  547. * Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
  548. *@li imginfo: A float16, describing the image information including the required image height and width
  549. * and the actual image height and width.
  550. *@li windex: A windex tensor with shape [height, weight]. Has the same type as the inputs.
  551. * [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed.
  552. *@li hindex: A hindex tensor with shape [height, weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]].
  553. *
  554. *@par Attributes:
  555. *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
  556. *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
  557. *@li coords: Specifies the number of coordinate parameters. Must be 4.
  558. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 20].
  559. *@li relative: An optional bool. Defaults to and must be "true".
  560. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
  561. *@li post_nms_topn: An optional int32. This attribute is reserved.
  562. *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0] . \n
  563. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
  564. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
  565. *
  566. *@par Outputs:
  567. *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box,
  568. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
  569. *@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid,
  570. * the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
  571. *
  572. *@attention Constraints:
  573. *@li This operator applies only to the YOLO v2 network.
  574. *@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator . \n
  575. *@see Yolo()
  576. *@par Third-party framework compatibility
  577. * It is a custom operator. It has no corresponding operator in Caffe.
  578. *@par Restrictions:
  579. *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV2DetectionOutput instead.
  580. */
  581. REG_OP(YoloV2DetectionOutputD)
  582. .INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
  583. .INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
  584. .INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
  585. .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
  586. .INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
  587. .INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
  588. .REQUIRED_ATTR(biases, ListFloat)
  589. .ATTR(boxes, Int, 5)
  590. .ATTR(coords, Int, 4)
  591. .ATTR(classes, Int, 20)
  592. .ATTR(relative, Bool, true)
  593. .ATTR(obj_threshold, Float, 0.5)
  594. .ATTR(post_nms_topn, Int, 512)
  595. .ATTR(score_threshold, Float, 0.5)
  596. .ATTR(iou_threshold, Float, 0.45)
  597. .ATTR(pre_nms_topn, Int, 512)
  598. .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
  599. .OUTPUT(box_out_num, TensorType({DT_INT32}))
  600. .OP_END_FACTORY_REG(YoloV2DetectionOutputD)
  601. /**
  602. *@brief Performs YOLO V3 detection . \n
  603. *@par Inputs:
  604. *Ten inputs, including:
  605. *@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class".
  606. * There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo.
  607. *@li img_info: A float16 or float32, describing the image information including the required image height and width
  608. * and the actual image height and width.
  609. *@par Attributes:
  610. *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
  611. *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
  612. *@li coords: Specifies the number of coordinate parameters. Must be 4.
  613. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
  614. *@li relative: An optional bool. Defaults to and must be "true".
  615. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n
  616. *@li post_nms_topn: An optional int32. This attribute is reserved.
  617. *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0] . \n
  618. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
  619. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
  620. *@par Outputs:
  621. *@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box.
  622. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
  623. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
  624. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
  625. *@attention Constraints:
  626. *@li This operator applies only to the YOLO v3 network.
  627. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators . \n
  628. *@see Yolo()
  629. *@par Third-party framework compatibility
  630. * It is a custom operator. It has no corresponding operator in Caffe.
  631. */
  632. REG_OP(YoloV3DetectionOutput)
  633. .INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT}))
  634. .INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
  635. .INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT}))
  636. .INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
  637. .INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
  638. .INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
  639. .INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
  640. .INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
  641. .INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
  642. .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
  643. .REQUIRED_ATTR(biases_low, ListFloat)
  644. .REQUIRED_ATTR(biases_mid, ListFloat)
  645. .REQUIRED_ATTR(biases_high, ListFloat)
  646. .ATTR(boxes, Int, 3)
  647. .ATTR(coords, Int, 4)
  648. .ATTR(classes, Int, 80)
  649. .ATTR(relative, Bool, true)
  650. .ATTR(obj_threshold, Float, 0.5)
  651. .ATTR(post_nms_topn, Int, 512)
  652. .ATTR(score_threshold, Float, 0.5)
  653. .ATTR(iou_threshold, Float, 0.45)
  654. .ATTR(pre_nms_topn, Int, 512)
  655. .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
  656. .OUTPUT(box_out_num, TensorType({DT_INT32}))
  657. .OP_END_FACTORY_REG(YoloV3DetectionOutput)
  658. /**
  659. *@brief Performs YOLO V3 detection . \n
  660. *@par Inputs:
  661. *16 Input, including:
  662. *@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput.
  663. * A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
  664. *@li imginfo: A float16, describing the image information including the required image height and width
  665. * and the actual image height and width.
  666. *@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
  667. * [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n
  668. *@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs.
  669. * [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
  670. s
  671. *@par Attributes:
  672. *@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
  673. *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
  674. *@li coords: Specifies the number of coordinate parameters. Must be 4.
  675. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
  676. *@li relative: An optional bool. Defaults to and must be "true".
  677. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
  678. *@li post_nms_topn: An optional int32. This attribute is reserved.
  679. *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
  680. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
  681. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
  682. *@par Outputs:
  683. *@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box.
  684. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
  685. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
  686. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
  687. *@attention Constraints:
  688. *@li This operator applies only to the YOLO v3 network.
  689. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
  690. *@see Yolo()
  691. *@par Third-party framework compatibility
  692. * It is a custom operator. It has no corresponding operator in Caffe.
  693. *@par Restrictions:
  694. *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutput instead.
  695. */
  696. REG_OP(YoloV3DetectionOutputD)
  697. .INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT}))
  698. .INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
  699. .INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT}))
  700. .INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
  701. .INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
  702. .INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
  703. .INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
  704. .INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
  705. .INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
  706. .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
  707. .INPUT(windex1, TensorType({DT_FLOAT16, DT_FLOAT}))
  708. .INPUT(windex2, TensorType({DT_FLOAT16, DT_FLOAT}))
  709. .INPUT(windex3, TensorType({DT_FLOAT16, DT_FLOAT}))
  710. .INPUT(hindex1, TensorType({DT_FLOAT16, DT_FLOAT}))
  711. .INPUT(hindex2, TensorType({DT_FLOAT16, DT_FLOAT}))
  712. .INPUT(hindex3, TensorType({DT_FLOAT16, DT_FLOAT}))
  713. .REQUIRED_ATTR(biases_low, ListFloat)
  714. .REQUIRED_ATTR(biases_mid, ListFloat)
  715. .REQUIRED_ATTR(biases_high, ListFloat)
  716. .ATTR(boxes, Int, 3)
  717. .ATTR(coords, Int, 4)
  718. .ATTR(classes, Int, 80)
  719. .ATTR(relative, Bool, true)
  720. .ATTR(obj_threshold, Float, 0.5)
  721. .ATTR(post_nms_topn, Int, 512)
  722. .ATTR(score_threshold, Float, 0.5)
  723. .ATTR(iou_threshold, Float, 0.45)
  724. .ATTR(pre_nms_topn, Int, 512)
  725. .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
  726. .OUTPUT(box_out_num, TensorType({DT_INT32}))
  727. .OP_END_FACTORY_REG(YoloV3DetectionOutputD)
  728. /**
  729. *@brief Performs YOLO V3 detection . \n
  730. *@par Inputs:
  731. *Ten inputs, including:
  732. *@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n
  733. There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo.
  734. *@li img_info: A float16 or float32, describing the image information including the required image height and width \n
  735. * and the actual image height and width.
  736. *@par Attributes:
  737. *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
  738. *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
  739. *@li coords: Specifies the number of coordinate parameters. Must be 4.
  740. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
  741. *@li relative: An optional bool. Defaults to and must be "true".
  742. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
  743. *@li post_nms_topn: An optional int32. This attribute is reserved.
  744. *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
  745. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n
  746. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
  747. *@par Outputs:
  748. *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
  749. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
  750. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
  751. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
  752. *@attention Constraints:\n
  753. *@li This operator applies only to the YOLO v3 network.
  754. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
  755. *@see Yolo()
  756. *@par Third-party framework compatibility
  757. * It is a custom operator. It has no corresponding operator in Caffe.
  758. */
  759. REG_OP(YoloV3DetectionOutputV2)
  760. .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  761. .REQUIRED_ATTR(biases, ListFloat)
  762. .ATTR(boxes, Int, 3)
  763. .ATTR(coords, Int, 4)
  764. .ATTR(classes, Int, 80)
  765. .ATTR(relative, Bool, true)
  766. .ATTR(obj_threshold, Float, 0.5)
  767. .ATTR(post_nms_topn, Int, 512)
  768. .ATTR(score_threshold, Float, 0.5)
  769. .ATTR(iou_threshold, Float, 0.45)
  770. .ATTR(pre_nms_topn, Int, 512)
  771. .ATTR(N, Int, 10)
  772. .ATTR(resize_origin_img_to_net, Bool, false)
  773. .ATTR(out_box_dim, Int, 3)
  774. .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
  775. .OUTPUT(box_out_num, TensorType({DT_INT32}))
  776. .OP_END_FACTORY_REG(YoloV3DetectionOutputV2)
  777. /**
  778. *@brief Performs YOLO V3 detection.
  779. *@par Inputs:
  780. *16 Input, including:
  781. *@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput.
  782. * A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
  783. *@li imginfo: A float16, describing the image information including the required image height and width
  784. * and the actual image height and width.
  785. *@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
  786. * [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)]
  787. * is formed for the three Yolo outputs, respectively .It's a dynamic input. \n
  788. *@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
  789. *@par Attributes:
  790. *@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
  791. *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
  792. *@li coords: Specifies the number of coordinate parameters. Must be 4.
  793. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
  794. *@li relative: An optional bool. Defaults to and must be "true".
  795. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
  796. *@li post_nms_topn: An optional int32. This attribute is reserved.
  797. *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
  798. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
  799. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
  800. *
  801. *@par Outputs:
  802. *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
  803. * describing the information of each output box.
  804. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
  805. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
  806. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
  807. *
  808. *@attention Constraints:
  809. *@li This operator applies only to the YOLO v3 network.
  810. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
  811. *@see Yolo()
  812. *@par Third-party framework compatibility
  813. * It is a custom operator. It has no corresponding operator in Caffe.
  814. * @par Restrictions:
  815. * Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead.
  816. */
  817. REG_OP(YoloV3DetectionOutputV2D)
  818. .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  819. .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
  820. .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
  821. .REQUIRED_ATTR(biases, ListFloat)
  822. .ATTR(boxes, Int, 3)
  823. .ATTR(coords, Int, 4)
  824. .ATTR(classes, Int, 80)
  825. .ATTR(relative, Bool, true)
  826. .ATTR(obj_threshold, Float, 0.5)
  827. .ATTR(post_nms_topn, Int, 512)
  828. .ATTR(score_threshold, Float, 0.5)
  829. .ATTR(iou_threshold, Float, 0.45)
  830. .ATTR(pre_nms_topn, Int, 512)
  831. .ATTR(N, Int, 10)
  832. .ATTR(resize_origin_img_to_net, Bool, false)
  833. .ATTR(out_box_dim, Int, 3)
  834. .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
  835. .OUTPUT(box_out_num, TensorType({DT_INT32}))
  836. .OP_END_FACTORY_REG(YoloV3DetectionOutputV2D)
  837. /**
  838. *@brief Spatial Pyramid Pooling, multi-level pooling.
  839. * Pooling out(n, sigma(c*2^i*2^i)) tensor, i in range[0,pyramid_height) . \n
  840. *@par Inputs:
  841. *x: An NCHW tensor, support float16 or float32 type . \n
  842. *@par Attributes:
  843. * @li pyramid_height: An required int32.
  844. * Multi-level pooling out from 2^0 to 2^(pyramid_height-1).
  845. * @li pool_method: An optional int32, pooling method: 0-MAX, 1-AVE.
  846. * Defaults to "0" . \n
  847. *@par Outputs:
  848. *y: A NCHW tensor, support float16 or float32 type . \n
  849. *@attention Constraints:
  850. * @li pyramid_height: pyramid_heigjt should be in range [0,7).
  851. * Pooling paramter should statisfied with caffe pooling param(pad<kernel).
  852. * @li feature_size:input feture map h and w should be [1, 510] . \n
  853. *@par Third-party framework compatibility
  854. * Compatible with the Caffe operator SPP.
  855. */
  856. REG_OP(SPP)
  857. .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
  858. .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
  859. .REQUIRED_ATTR(pyramid_height, Int)
  860. .ATTR(pool_method, Int, 0)
  861. .OP_END_FACTORY_REG(SPP)
  862. /**
  863. *@brief Performs Region of Interest (ROI) Pooling . \n
  864. *@par Inputs:
  865. * Three inputs, including:
  866. *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
  867. * map.
  868. *@li rois: A tensor of type float16 or float32, with 3D shape
  869. * [batch, 5, roi_max_num], describing the RIOs.
  870. * roi_max_num must be less than or equal to 6000 and must be divided by 16.
  871. *@li roi_actual_num: A optional tensor of type int32, with shape [batch, 8], specifying
  872. * the number of ROIs per batch . \n
  873. *@par Attributes:
  874. *@li pooled_h: A required int32, specifying the pooled H. Must be greater
  875. * than 0.
  876. *@li pooled_w: A required int32, specifying the pooled W. Must be greater
  877. * than 0.
  878. *@li spatial_scale_h: An required scaling factor for mapping the input
  879. * coordinates of height to the ROI coordinates.
  880. *@li spatial_scale_w: An required scaling factor for mapping the input
  881. * coordinates of width to the ROI coordinates . \n
  882. *@par Outputs:
  883. *y: An NC1HWC0 tensor of type float16 or float32, describing the result
  884. * feature map . \n
  885. *@attention Constraints:
  886. * For the feature map input:
  887. *@li If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
  888. *@li If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
  889. *@li If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
  890. *@li If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
  891. *@li If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
  892. *@li If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
  893. *@li If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
  894. *@li If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
  895. *@li If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
  896. *@li If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
  897. *@li If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
  898. *@li If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
  899. *@li If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
  900. *@li If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
  901. *@li If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
  902. *@li If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
  903. *@li If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
  904. *@li If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
  905. *@li If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
  906. *@par Third-party framework compatibility
  907. * It is a custom operator. It has no corresponding operator in Caffe.
  908. */
  909. REG_OP(ROIPooling)
  910. .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
  911. .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
  912. .OPTIONAL_INPUT(roi_actual_num, TensorType({DT_INT32}))
  913. .REQUIRED_ATTR(pooled_h, Int)
  914. .REQUIRED_ATTR(pooled_w, Int)
  915. .REQUIRED_ATTR(spatial_scale_h, Float)
  916. .REQUIRED_ATTR(spatial_scale_w, Float)
  917. .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
  918. .OP_END_FACTORY_REG(ROIPooling)
  919. /**
  920. *@brief Computes decode bbox function.
  921. *@par Inputs:
  922. *Inputs include:
  923. * @li box_predictions: A Tensor. Must be float16.
  924. * @li anchors: A Tensor. Must have the same type as box_predictions.
  925. *@par Attributes:
  926. * @ decode_clip: required, float, threahold of decode process.
  927. *@par Outputs:
  928. * @ decoded_boxes: A Tensor. Must have the same type as box_predictions.
  929. * N-D with shape [N, 4].
  930. *@par Restrictions:
  931. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  932. */
  933. REG_OP(DecodeBbox)
  934. .INPUT(box_predictions, TensorType{DT_FLOAT16})
  935. .INPUT(anchors, TensorType{DT_FLOAT16})
  936. .OUTPUT(decoded_boxes, TensorType{DT_FLOAT16})
  937. .REQUIRED_ATTR(decode_clip, Float)
  938. .OP_END_FACTORY_REG(DecodeBbox)
  939. /**
  940. *@brief Computes ClipBoxes function . \n
  941. *@par Inputs:
  942. *@li boxes_input: A Tensor. Must be float16. N-D with shape [N, 4].
  943. *@li img_size: A Tensor. Must be int32. shape [H, W] . \n
  944. *@par Outputs:
  945. *boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4].
  946. *@par Restrictions:
  947. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  948. */
  949. REG_OP(ClipBoxes)
  950. .INPUT(boxes_input, TensorType({DT_FLOAT16}))
  951. .INPUT(img_size, TensorType({DT_INT32}))
  952. .OUTPUT(boxes_output, TensorType({DT_FLOAT16}))
  953. .OP_END_FACTORY_REG(ClipBoxes)
  954. /**
  955. *@brief Computes ClipBoxesD function . \n
  956. *@par Attributes:
  957. *img_size: A Tensor of shape [H, W] . \n
  958. *@par Inputs:
  959. *boxes_input: A Tensor. Must be float16. N-D with shape [N, 4] . \n
  960. *@par Outputs:
  961. *boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4] . \n
  962. *@par Restrictions:
  963. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  964. */
  965. REG_OP(ClipBoxesD)
  966. .INPUT(boxes_input, TensorType({DT_FLOAT16}))
  967. .REQUIRED_ATTR(img_size, ListInt)
  968. .OUTPUT(boxes_output, TensorType({DT_FLOAT16}))
  969. .OP_END_FACTORY_REG(ClipBoxesD)
  970. /**
  971. *@brief Computes Fastrcnn Predictions function.
  972. *
  973. *@par Inputs:
  974. *Inputs include:
  975. * @li rois: A Tensor. Must be float16. N-D with shape [N*C, 4].
  976. * @li score: A Tensor. Must be float16. N-D with shape [N, C+1].
  977. *
  978. *@par Attributes:
  979. * @li nms_threshold: required, float, threahold of nms process.
  980. * @li score_threshold: required, float, threahold of topk process.
  981. * @li k: required, Int, threahold of topk process.
  982. *@par Outputs:
  983. * @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
  984. * @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
  985. * @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1].
  986. */
  987. REG_OP(FastrcnnPredictions)
  988. .INPUT(rois, TensorType({DT_FLOAT16}))
  989. .INPUT(score, TensorType({DT_FLOAT16}))
  990. .REQUIRED_ATTR(nms_threshold, Float)
  991. .REQUIRED_ATTR(score_threshold, Float)
  992. .REQUIRED_ATTR(k, Int)
  993. .OUTPUT(sorted_rois, TensorType({DT_FLOAT16}))
  994. .OUTPUT(sorted_scores, TensorType({DT_FLOAT16}))
  995. .OUTPUT(sorted_classes, TensorType({DT_FLOAT16}))
  996. .OP_END_FACTORY_REG(FastrcnnPredictions)
  997. /**
  998. *@brief Computes Fastrcnn RpnProposals function . \n
  999. *@par Inputs:
  1000. *Inputs include:
  1001. * @li rois: A Tensor. Must be float16. N-D with shape [N, 4].
  1002. * @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1].
  1003. * @li img_size: A Tensor. Must be int32. shape [H, W] . \n
  1004. *@par Attributes:
  1005. * @li score_threshold: required, float, threahold of topk process.
  1006. * @li k: required, Int, threahold of topk process.
  1007. * @li min_size: required, float, threahold of nms process.
  1008. * @li nms_threshold: required, float, threahold of nms process.
  1009. * @li post_nms_num: required, float, threahold of nms process.
  1010. * @li score_filter: bool, mark of score_filter. Defaults to "true"
  1011. * @li box_filter: bool, mark of box_filter. Defaults to "true"
  1012. * @li score_sigmoid: bool, mark of score_sigmoid. Defaults to "false"
  1013. *@par Outputs:
  1014. * @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
  1015. * @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
  1016. * @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1] . \n
  1017. * @par Third-party framework compatibility
  1018. * Compatible with the TensorFlow operator Unpack.
  1019. */
  1020. REG_OP(RpnProposals)
  1021. .INPUT(rois, TensorType({DT_FLOAT16}))
  1022. .INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
  1023. .INPUT(img_size, TensorType({DT_INT32}))
  1024. .REQUIRED_ATTR(score_threshold, Float)
  1025. .REQUIRED_ATTR(k, Int)
  1026. .REQUIRED_ATTR(min_size, Float)
  1027. .REQUIRED_ATTR(nms_threshold, Float)
  1028. .REQUIRED_ATTR(post_nms_num, Int)
  1029. .ATTR(score_filter, Bool, true)
  1030. .ATTR(box_filter, Bool, true)
  1031. .ATTR(score_sigmoid, Bool, false)
  1032. .OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
  1033. .OP_END_FACTORY_REG(RpnProposals)
  1034. /**
  1035. *@brief Computes Fastrcnn RpnProposalsD function . \n
  1036. *@par Inputs:
  1037. *@li rois: A Tensor. Must be float16. N-D with shape [N, 4].
  1038. *@li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1] . \n
  1039. *@par Attributes:
  1040. *@li img_size: A Tensor size of image. Must be int32. shape [H, W].
  1041. *@li score_threshold: required, float, threahold of topk process.
  1042. *@li k: required, Int, threahold of topk process.
  1043. *@li min_size: required, float, threahold of nms process.
  1044. *@li nms_threshold: required, float, threahold of nms process.
  1045. *@li post_nms_num: required, float, threahold of nms process.
  1046. *@li score_filter: bool, mark of score_filter. Defaults to "true"
  1047. *@li box_filter: bool, mark of box_filter. Defaults to "true"
  1048. *@li score_sigmoid: bool, mark of score_sigmoid. Defaults to "false"
  1049. *@par Outputs:
  1050. *sorted_box: A Tensor of output. Must be float16. N-D with shape [N, 1] . \n
  1051. * @par Third-party framework compatibility
  1052. * Compatible with the pytorch operator RPNProposals . \n
  1053. *@par Restrictions:
  1054. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  1055. *@par Restrictions:
  1056. *Warning: THIS FUNCTION IS DEPRECATED. Please use RpnProposals instead.
  1057. */
  1058. REG_OP(RpnProposalsD)
  1059. .INPUT(rois, TensorType({DT_FLOAT16}))
  1060. .INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
  1061. .REQUIRED_ATTR(img_size, ListInt)
  1062. .REQUIRED_ATTR(score_threshold, Float)
  1063. .REQUIRED_ATTR(k, Int)
  1064. .REQUIRED_ATTR(min_size, Float)
  1065. .REQUIRED_ATTR(nms_threshold, Float)
  1066. .REQUIRED_ATTR(post_nms_num, Int)
  1067. .ATTR(score_filter, Bool, true)
  1068. .ATTR(box_filter, Bool, true)
  1069. .ATTR(score_sigmoid, Bool, false)
  1070. .OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
  1071. .OP_END_FACTORY_REG(RpnProposalsD)
  1072. /**
  1073. *@brief Computes Score Filte Pre-Sort function.
  1074. *
  1075. *@par Inputs:
  1076. *Inputs include:
  1077. * @li sorted_proposal: A Tensor. Must be float16.
  1078. * N-D with shape [8*6002, 8].
  1079. * @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8].
  1080. *
  1081. *@par Attributes:
  1082. * @li min_size: required, float, threahold of nms process.
  1083. * @li score_threshold: required, float, threahold of topk process.
  1084. * @li k: required, Int, threahold of topk process.
  1085. * @li min_size: required, float, threahold of nms process.
  1086. * @li nms_threshold: required, float, threahold of nms process.
  1087. * @li post_nms_num: required, float, threahold of nms process.
  1088. * @li box_filter: bool, mark of box_filter. Defaults to "true"
  1089. * @li core_max_num: int, max number of core. Defaults to "8"
  1090. *@par Outputs:
  1091. * @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
  1092. * @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
  1093. * @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1].
  1094. */
  1095. REG_OP(RpnProposalPostProcessing)
  1096. .INPUT(sorted_proposal, TensorType({DT_FLOAT16}))
  1097. .INPUT(proposal_num, TensorType({DT_UINT32}))
  1098. .OUTPUT(sorted_box, TensorType({ DT_FLOAT16}))
  1099. .REQUIRED_ATTR(img_size, ListInt)
  1100. .REQUIRED_ATTR(score_threshold, Float)
  1101. .REQUIRED_ATTR(k, Int)
  1102. .REQUIRED_ATTR(min_size, Float)
  1103. .REQUIRED_ATTR(nms_threshold, Float)
  1104. .REQUIRED_ATTR(post_nms_num, Int)
  1105. .ATTR(box_filter, Bool, true)
  1106. .ATTR(core_max_num, Int, 8)
  1107. .OP_END_FACTORY_REG(RpnProposalPostProcessing)
  1108. /**
  1109. *@brief Computes DecodeBoundariesTarget function.
  1110. *@par Inputs:
  1111. *Inputs include:
  1112. * @li boundary_predictions: A Tensor. Must be float16.
  1113. * @li anchors: A Tensor. Must be float16.
  1114. *@par Outputs:
  1115. * @ boundary_encoded: A Tensor. Must be float16.
  1116. *@par Restrictions:
  1117. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  1118. */
  1119. REG_OP(DecodeBoundariesTarget)
  1120. .INPUT(boundary_predictions, TensorType({DT_FLOAT16}))
  1121. .INPUT(anchors, TensorType({DT_FLOAT16}))
  1122. .OUTPUT(boundary_encoded, TensorType({DT_FLOAT16}))
  1123. .OP_END_FACTORY_REG(DecodeBoundariesTarget)
  1124. /**
  1125. *@brief Computes DecodeCornerpointsTargetBG function.
  1126. *
  1127. *@par Inputs:
  1128. *Inputs include:
  1129. * @li keypoints_prediction: A Tensor. Must be float16.
  1130. * @li anchors: A Tensor. Must be float16.
  1131. *
  1132. *@par Outputs:
  1133. * @ keypoints_decoded: A Tensor. Must be float16.
  1134. *@par Restrictions:
  1135. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  1136. */
  1137. REG_OP(DecodeCornerpointsTargetBG)
  1138. .INPUT(keypoints_prediction, TensorType({DT_FLOAT16}))
  1139. .INPUT(anchors, TensorType({DT_FLOAT16}))
  1140. .OUTPUT(keypoints_decoded, TensorType({DT_FLOAT16}))
  1141. .OP_END_FACTORY_REG(DecodeCornerpointsTargetBG);
  1142. /**
  1143. *@brief Computes DecodeCornerpointsTargetWrtCenterV1 function.
  1144. *
  1145. *@par Inputs:
  1146. *Inputs include:
  1147. * @li keypoints_prediction: A Tensor. Must be float16.
  1148. * @li anchors: A Tensor. Must be float16.
  1149. *
  1150. *@par Outputs:
  1151. * @ keypoints_decoded: A Tensor. Must be float16.
  1152. *@par Restrictions:
  1153. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  1154. */
  1155. REG_OP(DecodeCornerpointsTargetWrtCenterV1)
  1156. .INPUT(keypoints_prediction, TensorType({DT_FLOAT16}))
  1157. .INPUT(anchors, TensorType({DT_FLOAT16}))
  1158. .OUTPUT(keypoints_decoded, TensorType({DT_FLOAT16}))
  1159. .OP_END_FACTORY_REG(DecodeCornerpointsTargetWrtCenterV1)
  1160. /**
  1161. *@brief Computes DecodeWheelsTarget function.
  1162. *
  1163. *@par Inputs:
  1164. *Inputs include:
  1165. * @li boundary_predictions: A Tensor. Must be float16.
  1166. * @li anchors: A Tensor. Must be float16.
  1167. *
  1168. *@par Outputs:
  1169. * @ boundary_encoded: A Tensor. Must be float16.
  1170. *@par Restrictions:
  1171. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  1172. */
  1173. REG_OP(DecodeWheelsTarget)
  1174. .INPUT(boundary_predictions, TensorType({DT_FLOAT16}))
  1175. .INPUT(anchors, TensorType({DT_FLOAT16}))
  1176. .OUTPUT(boundary_encoded, TensorType({DT_FLOAT16}))
  1177. .OP_END_FACTORY_REG(DecodeWheelsTarget)
  1178. /**
  1179. *@brief Computes nms for input boxes and score, support multiple batch and classes.
  1180. * will do clip to window, score filter, top_k, and nms
  1181. *@par Inputs:
  1182. * Four inputs, including:
  1183. *@li boxes: boxes, a 4D Tensor of type float16 with
  1184. * shape (batch, num_anchors, num_classes, 4). "batch" indicates the batch size of image,
  1185. * and "num_anchors" indicates num of boxes, and "num_classes" indicates classes of detect.
  1186. * and the value "4" refers to "x0", "x1", "y0", and "y1".
  1187. *@li scores: boxes, a 4D Tensor of type float16 with
  1188. * shape (batch, num_anchors, num_classes).
  1189. *@li clip_window: window size, a 2D Tensor of type float16 with
  1190. * shape (batch, 4). 4" refers to "anchor_x0", "anchor_x1", "anchor_y0", and "anchor_y1".
  1191. *@li num_valid_boxes: valid boxes number for each batch, a 1D Tensor of type int32 with
  1192. * shape (batch,) . \n
  1193. *@par Attributes:
  1194. *@li score_threshold: A required attribute of type float32, specifying the score filter iou iou_threshold.
  1195. *@li iou_threshold: A required attribute of type float32, specifying the nms iou iou_threshold.
  1196. *@li max_size_per_class: A required attribute of type int, specifying the nms output num per class.
  1197. *@li max_total_size: A required attribute of type int, specifying the the nms output num per batch.
  1198. *@li change_coordinate_frame: A optional attribute of type bool, whether to normalize coordinates after clipping.
  1199. *@li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false" . \n
  1200. *@par Outputs:
  1201. *@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4),
  1202. * specifying the output nms boxes per batch.
  1203. *@li nmsed_scores: A 2D Tensor of type float16 with shape (batch, max_total_size),
  1204. * specifying the output nms score per batch.
  1205. *@li nmsed_classes: A 2D Tensor of type float16 with shape (batch, max_total_size),
  1206. * specifying the output nms class per batch.
  1207. *@li nmsed_num: A 1D Tensor of type int32 with shape (batch), specifying the valid num of nmsed_boxes . \n
  1208. *@attention Constraints:
  1209. * Only computation of float16 data is supported.
  1210. * Note: when the class num per image * max_size_per_class is too big, will compile fail with ERROR-insufficient memory
  1211. */
  1212. REG_OP(BatchMultiClassNonMaxSuppression)
  1213. .INPUT(boxes, TensorType({DT_FLOAT16}))
  1214. .INPUT(scores, TensorType({DT_FLOAT16}))
  1215. .OPTIONAL_INPUT(clip_window, TensorType({DT_FLOAT16}))
  1216. .OPTIONAL_INPUT(num_valid_boxes, TensorType({DT_INT32}))
  1217. .OUTPUT(nmsed_boxes, TensorType({DT_FLOAT16}))
  1218. .OUTPUT(nmsed_scores, TensorType({DT_FLOAT16}))
  1219. .OUTPUT(nmsed_classes, TensorType({DT_FLOAT16}))
  1220. .OUTPUT(nmsed_num, TensorType({DT_INT32}))
  1221. .REQUIRED_ATTR(score_threshold, Float)
  1222. .REQUIRED_ATTR(iou_threshold, Float)
  1223. .REQUIRED_ATTR(max_size_per_class, Int)
  1224. .REQUIRED_ATTR(max_total_size, Int)
  1225. .ATTR(change_coordinate_frame, Bool, false)
  1226. .ATTR(transpose_box, Bool, false)
  1227. .OP_END_FACTORY_REG(BatchMultiClassNonMaxSuppression)
  1228. /**
  1229. * @brief To absolute the bounding box . \n
  1230. * @par Inputs:
  1231. * @li normalized_boxes: A 3D Tensor of type float16 or float32.
  1232. * @li shape_hw: A 1D Tensor of type int32 . \n
  1233. * @par Attributes:
  1234. * @li reversed_box: An optional bool, specifying the last two dims is "4,num" or
  1235. * "num,4", "true" for "4,num", "false" for "num,4". Defaults to "false" . \n
  1236. * @par Outputs:
  1237. * y: A Tensor. Has the same type and shape as "normalized_boxes" . \n
  1238. * @attention Constraints:
  1239. * "normalized_boxes"'s shape must be (batch,num,4) or (batch,4,num).
  1240. * "shape_hw"'s shape must be (4,)
  1241. */
  1242. REG_OP(ToAbsoluteBBox)
  1243. .INPUT(normalized_boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1244. .INPUT(shape_hw, TensorType({DT_INT32}))
  1245. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  1246. .ATTR(reversed_box, Bool, false)
  1247. .OP_END_FACTORY_REG(ToAbsoluteBBox)
  1248. /**
  1249. *@brief Computes Normalize bbox function.
  1250. *
  1251. *@par Inputs:
  1252. *Inputs include:
  1253. * @li boxes: A Tensor. Must be float16 or float32.
  1254. * @li shape_hw: A Tensor. Must be int32.
  1255. *
  1256. *@par Attributes:
  1257. * reversed_box: optional, bool. Defaults to "False"
  1258. *
  1259. *@par Outputs:
  1260. * y: A Tensor. Must have the same type and shape as boxes.
  1261. */
  1262. REG_OP(NormalizeBBox)
  1263. .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1264. .INPUT(shape_hw, TensorType({DT_INT32}))
  1265. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  1266. .ATTR(reversed_box, Bool, false)
  1267. .OP_END_FACTORY_REG(NormalizeBBox)
  1268. /**
  1269. *@brief Computes decode bboxv2 function.
  1270. *
  1271. *@par Inputs:
  1272. *Inputs include:
  1273. * @li boxes: A Tensor. Must be float16 or float32.
  1274. * @li anchors: A Tensor. Must be int32.
  1275. *
  1276. *@par Attributes:
  1277. * @li scales: optional, listfloat.
  1278. * @li decode_clip: optional, float, threahold of decode process.
  1279. * @li reversed_boxes: optional, bool.
  1280. *
  1281. *@par Outputs:
  1282. * y: A Tensor. Must have the same type as box_predictions.
  1283. */
  1284. REG_OP(DecodeBboxV2)
  1285. .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1286. .INPUT(anchors, TensorType({DT_FLOAT16, DT_FLOAT}))
  1287. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  1288. .ATTR(scales, ListFloat, {1.0, 1.0, 1.0, 1.0})
  1289. .ATTR(decode_clip, Float, 0.0)
  1290. .ATTR(reversed_box, Bool, false)
  1291. .OP_END_FACTORY_REG(DecodeBboxV2)
  1292. /**
  1293. *@brief sort the input tensor and return the value of index.
  1294. *
  1295. *@par Inputs:
  1296. *Inputs include:
  1297. * x: A Tensor. Dtype support: float16, float, int16, int8,
  1298. uint8, int32, int64.
  1299. *
  1300. *@par Attributes:
  1301. * @li axis: An optional attribute indicates the sorting axis.
  1302. * @li descending: An optional attribute indicates desending sort or not.
  1303. *
  1304. *@par Outputs:
  1305. * @li y1: A Tensor. Must have the same type as x.
  1306. * @li y2: A Tensor. Indices of y1 in x. Dtype must be int32.
  1307. *
  1308. */
  1309. REG_OP(Sort)
  1310. .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8,
  1311. DT_UINT8, DT_INT32, DT_INT64}))
  1312. .OUTPUT(y1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8,
  1313. DT_UINT8, DT_INT32, DT_INT64}))
  1314. .OUTPUT(y2, TensorType({DT_INT32}))
  1315. .ATTR(axis, Int, -1)
  1316. .ATTR(descending, Bool, false)
  1317. .OP_END_FACTORY_REG(Sort)
  1318. /**
  1319. *@brief Computes iou for input bboxes and gtboxes.
  1320. *@par Inputs:
  1321. * Two inputs, including:
  1322. *@li bboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1),
  1323. *@li gtboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1).\n
  1324. *@par Attributes:
  1325. *@li mode: A optional attribute of type string, whether judge the mode of iou. \n
  1326. *@par Outputs:
  1327. *@li overlap: A 2D Tensor of type float16 with shape [n, m]. \n
  1328. *@attention Constraints:
  1329. * Only computation of float16 data is supported.
  1330. *@par Restrictions:
  1331. *Warning:THIS FUNCTION IS DEPRECATED. Please use Iou instead.
  1332. */
  1333. REG_OP(PtIou)
  1334. .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1335. .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1336. .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
  1337. .ATTR(mode, String, "iou")
  1338. .OP_END_FACTORY_REG(PtIou)
  1339. /**
  1340. *@brief Greedily selects a subset of bounding boxes in descending order of
  1341. score . \n
  1342. *@par Inputs:
  1343. *Input boxes and scores must be float16 type. Inputs include:
  1344. *@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
  1345. The single box data format is indicated by center_point_box.
  1346. *@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
  1347. *@li max_output_size: A scalar integer tensor representing the maximum number
  1348. of boxes to be selected by non max suppression.
  1349. *@li iou_threshold: A 0-D float tensor representing the threshold for deciding
  1350. whether boxes overlap too much with respect to IOU.
  1351. *@li score_threshold: A 0-D float tensor representing the threshold for
  1352. deciding when to remove boxes based on score . \n
  1353. *@par Attributes:
  1354. *center_point_box:Integer indicate the format of the box data.
  1355. The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2]
  1356. where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair
  1357. of box corners and the coordinates can be provided as normalized
  1358. (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
  1359. 1 - the box data is supplied as [x_center, y_center, width, height].
  1360. Mostly used for Pytorch models. \n
  1361. *@par Outputs:
  1362. *@li selected_indices: A 2-D integer tensor of shape [M] representing the
  1363. selected indices from the boxes tensor, where M <= max_output_size. \n
  1364. *@attention Constraints:
  1365. *Input boxes and scores must be float16 type . \n
  1366. *@par Third-party framework compatibility
  1367. *Compatible with onnx NonMaxSuppression operator.
  1368. *@par Restrictions:
  1369. *Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  1370. */
  1371. REG_OP(NonMaxSuppressionV6)
  1372. .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1373. .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
  1374. .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
  1375. .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
  1376. .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
  1377. .OUTPUT(selected_indices, TensorType({DT_INT32}))
  1378. .ATTR(center_point_box, Int, 0)
  1379. .ATTR(max_boxes_size, Int, 0)
  1380. .OP_END_FACTORY_REG(NonMaxSuppressionV6)
  1381. /**
  1382. *@brief Greedily selects a subset of bounding boxes in descending order of
  1383. score . \n
  1384. *@par Inputs:
  1385. *Input boxes and scores must be float16 type. Inputs include:
  1386. *@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
  1387. The single box data format is indicated by center_point_box.
  1388. *@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
  1389. *@li max_output_size: A scalar integer tensor representing the maximum number
  1390. of boxes to be selected by non max suppression.
  1391. *@li iou_threshold: A 0-D float tensor representing the threshold for deciding
  1392. whether boxes overlap too much with respect to IOU.
  1393. *@li score_threshold: A 0-D float tensor representing the threshold for
  1394. deciding when to remove boxes based on score . \n
  1395. *@li index_id: A input tensor with shape [num_batches,num_classes,spatial_dimension,3]
  1396. the last dim representing (batch_id,class_id,index_id) . \n
  1397. *@par Attributes:
  1398. *@li center_point_box:Integer indicate the format of the box data.
  1399. The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2]
  1400. where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair
  1401. of box corners and the coordinates can be provided as normalized
  1402. (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
  1403. 1 - the box data is supplied as [x_center, y_center, width, height].
  1404. Mostly used for Pytorch models.
  1405. *@li max_boxes_size: An optional attribute integer representing the real maximum
  1406. *number of boxes to be selected by non max suppression . \n
  1407. *@par Outputs:
  1408. *selected_indices: A 2-D integer tensor of shape [M] representing the
  1409. selected indices from the boxes tensor, where M <= max_output_size. \n
  1410. *@attention Constraints:
  1411. *Input boxes and scores must be float16 type . \n
  1412. *@par Third-party framework compatibility
  1413. *Compatible with onnx NonMaxSuppression operator.
  1414. */
  1415. REG_OP(NonMaxSuppressionV7)
  1416. .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1417. .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
  1418. .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
  1419. .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
  1420. .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
  1421. .OPTIONAL_INPUT(index_id, TensorType({DT_FLOAT16}))
  1422. .OUTPUT(selected_indices, TensorType({DT_INT32}))
  1423. .ATTR(center_point_box, Int, 0)
  1424. .ATTR(max_boxes_size, Int, 0)
  1425. .OP_END_FACTORY_REG(NonMaxSuppressionV7)
  1426. /**
  1427. *@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n
  1428. *@par Inputs:
  1429. * Two inputs, including:
  1430. *@li features: A 5HD Tensor list of type float32 or float16.
  1431. *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
  1432. * the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1".
  1433. *@par Attributes:
  1434. *@li finest_scale: A optional attribute of type int, specifying the scale of calculate levels of "rois".
  1435. *@li roi_scale_factor: A optional attribute of type float32, specifying the rescaling of "rois" coordinates.
  1436. *@li spatial_scale: A optional attribute of type list float32, specifying the scaling ratio of "features"
  1437. * to the original image.
  1438. *@li pooled_height: A optional attribute of type int32, specifying the H dimension.
  1439. *@li pooled_width: A optional attribute of type int32, specifying the W dimension.
  1440. *@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency
  1441. * of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois",
  1442. * which is a floating point number. Defaults to "0".
  1443. *@li pool_mode: An optional attribute of type string to indicate pooling mode. Defaults to "avg" . \n
  1444. *@li aligned: An optional attribute of type bool, specifying the align to corner. Defaults to true . \n
  1445. *@par Outputs:
  1446. * output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
  1447. * The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height",
  1448. * "pooled_width", and "features", respectively.
  1449. *@par Third-party framework compatibility
  1450. *Compatible with mmdetection SingleRoIExtractor operator.
  1451. */
  1452. REG_OP(RoiExtractor)
  1453. .DYNAMIC_INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
  1454. .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
  1455. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  1456. .ATTR(finest_scale, Int, 56)
  1457. .ATTR(roi_scale_factor, Float, 0)
  1458. .ATTR(spatial_scale, ListFloat, {1.f / 4, 1.f / 8, 1.f / 16, 1.f / 32})
  1459. .ATTR(pooled_height, Int, 7)
  1460. .ATTR(pooled_width, Int, 7)
  1461. .ATTR(sample_num, Int, 0)
  1462. .ATTR(pool_mode, String, "avg")
  1463. .ATTR(aligned, Bool, true)
  1464. .OP_END_FACTORY_REG(RoiExtractor)
  1465. /**
  1466. *@brief Performs Position Sensitive PS ROI Pooling . \n
  1467. *@par Inputs:
  1468. * Two inputs, including:
  1469. *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
  1470. * map, dimension C1 must be equal to
  1471. * (int(output_dim+15)/C0))*group_size*group_size.
  1472. *@li rois: A tensor of type float16 or float32, with shape
  1473. * [batch, 5, rois_num], describing the ROIs, each ROI consists of five
  1474. * elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
  1475. * the index of the input feature map, "x1", "y1", "x2", or "y2" must be
  1476. * greater than or equal to "0.0" . \n
  1477. *@par Attributes:
  1478. *@li output_dim: A required int32, specifying the number of output channels,
  1479. * must be greater than 0.
  1480. *@li group_size: A required int32, specifying the number of groups to encode
  1481. * position-sensitive score maps, must be within the range (0, 128).
  1482. *@li spatial_scale: A required float32, scaling factor for mapping the input
  1483. * coordinates to the ROI coordinates . \n
  1484. *@par Outputs:
  1485. *y: An NC1HWC0 tensor of type float16 or float32, describing the result
  1486. * feature map . \n
  1487. *@attention Constraints:
  1488. * HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
  1489. */
  1490. REG_OP(PSROIPoolingV2)
  1491. .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  1492. .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
  1493. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  1494. .REQUIRED_ATTR(spatial_scale, Float)
  1495. .REQUIRED_ATTR(output_dim, Int)
  1496. .REQUIRED_ATTR(group_size, Int)
  1497. .OP_END_FACTORY_REG(PSROIPoolingV2)
  1498. /**
  1499. *@brief Performs Position Sensitive PS ROI Pooling Grad . \n
  1500. *@par Inputs:
  1501. * Two inputs, including:
  1502. *@li x: An NC1HWC0 tensor of type float16 or float32, describing the result
  1503. * feature map . \n
  1504. *@li rois: A tensor of type float16 or float32, with shape
  1505. * [batch, 5, rois_num], describing the ROIs, each ROI consists of five
  1506. * elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
  1507. * the index of the input feature map, "x1", "y1", "x2", or "y2" must be
  1508. * greater than or equal to "0.0" . \n
  1509. *@par Attributes:
  1510. *@li output_dim: A required int32, specifying the number of output channels,
  1511. * must be greater than 0.
  1512. *@li group_size: A required int32, specifying the number of groups to encode
  1513. * position-sensitive score maps, must be within the range (0, 128).
  1514. *@li spatial_scale: A required float32, scaling factor for mapping the input
  1515. * coordinates to the ROI coordinates . \n
  1516. *@li input_size: A required listInt, mapping the gradinput size: (H, W)
  1517. *@par Outputs:
  1518. *y: An NC1HWC0 tensor of type float16 or float32, describing the feature
  1519. * map, dimension C1 must be equal to
  1520. * (int(output_dim+15)/C0))*group_size*group_size.
  1521. *@attention Constraints:
  1522. * HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
  1523. */
  1524. REG_OP(PSROIPoolingGradV2D)
  1525. .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  1526. .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
  1527. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  1528. .REQUIRED_ATTR(spatial_scale, Float)
  1529. .REQUIRED_ATTR(output_dim, Int)
  1530. .REQUIRED_ATTR(group_size, Int)
  1531. .REQUIRED_ATTR(input_size, ListInt)
  1532. .OP_END_FACTORY_REG(PSROIPoolingGradV2D)
  1533. /**
  1534. *@brief Generate the responsible flags of anchor in a single feature map.
  1535. *@par Inputs:
  1536. *@li gt_bboxes: Ground truth box, 2-D Tensor with shape `[batch, 4]`.
  1537. *@par Attributes:
  1538. *@li featmap_size: The size of feature maps, listint.
  1539. *@li strides: Stride of current level, listint.
  1540. *@li num_base_anchors: The number of base anchors.
  1541. *@par Outputs:
  1542. *flags: The valid flags of each anchor in a single level.
  1543. */
  1544. REG_OP(AnchorResponseFlags)
  1545. .INPUT(gt_bboxes, TensorType({DT_FLOAT}))
  1546. .OUTPUT(flags, TensorType({DT_UINT8}))
  1547. .REQUIRED_ATTR(featmap_size, ListInt)
  1548. .REQUIRED_ATTR(strides, ListInt)
  1549. .REQUIRED_ATTR(num_base_anchors, Int)
  1550. .OP_END_FACTORY_REG(AnchorResponseFlags)
  1551. /**
  1552. *@brief Generates bounding boxes based on yolo's "anchor" and "ground-truth" boxes.
  1553. * It is a customized mmdetection operator . \n
  1554. *@par Inputs:
  1555. * Three inputs, including:
  1556. *@li anchor_boxes: anchor boxes generated by the yolo training set.
  1557. * A 2D Tensor of type float32 or float16 with shape (N, 4). "N" indicates the number
  1558. * of ROIs, "N" indicates the number of ROIs, and the value "4" refers to (tx, ty, tw, th).
  1559. *@li gt_bboxes: target of the transformation, e.g, ground-truth boxes.
  1560. * A 2D Tensor of type float32 or float16 with shape (N, 4).
  1561. * "N" indicates the number of ROIs, and 4 indicates "dx", "dy", "dw", and "dh" .
  1562. *@li stride: Scale for each box.
  1563. * A 1D Tensor of type int32 shape (N,).
  1564. * "N" indicates the number of ROIs. \n
  1565. *@par Attributes:
  1566. *@li performance_mode: select performance mode, "high_precision" or "high_performance".
  1567. * select "high_precision" when input type is float32, the output tensor precision
  1568. * will be smaller than 0.0001, select "high_performance" when input type is float32,
  1569. * the ops will be best performance, but precision will be only smaller than 0.005.
  1570. *@par Outputs:
  1571. *encoded_bboxes: Bboxes generated based on "anchor_boxes" and "gt_bboxes". Have the
  1572. * same format and type as "anchor_boxes".
  1573. *
  1574. *@attention Constraints:
  1575. * input anchor boxes only support maximum N=20480. \n
  1576. */
  1577. REG_OP(YoloBoxesEncode)
  1578. .INPUT(anchor_boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1579. .INPUT(gt_bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1580. .INPUT(stride, TensorType({DT_INT32}))
  1581. .ATTR(performance_mode, String, "high_precision")
  1582. .OUTPUT(encoded_bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1583. .OP_END_FACTORY_REG(YoloBoxesEncode)
  1584. /**
  1585. *@brief Performs Position Sensitive PS ROI Pooling Grad.
  1586. *@par Inputs:
  1587. * Eight inputs, including:
  1588. *@li assigned_gt_inds: Tensor of type float16 or float32, shape (n, )
  1589. *@li overlaps: A Tensor. Datatype is same as assigned_gt_inds. IOU between gt_bboxes and bboxes. shape(k, n)
  1590. *@li box_responsible_flags: A Tensor. Support uint8. Flag to indicate whether box is responsible.
  1591. *@li max_overlaps: A Tensor. Datatype is same as assigned_gt_inds. overlaps.max(axis=0).
  1592. *@li argmax_overlaps: A Tensor. Support int32. overlaps.argmax(axis=0).
  1593. *@li gt_max_overlaps: A Tensor. Datatype is same as assigned_gt_inds. overlaps.max(axis=1).
  1594. *@li gt_argmax_overlaps: A Tensor. Support int32. overlaps.argmax(axis=1).
  1595. *@li num_gts: A Tensor. Support int32. real k. shape (1, )
  1596. *@par Attributes:
  1597. *@li output_dim: float. IOU threshold for positive bboxes.
  1598. *@li group_size: float. minimum iou for a bbox to be considered as a positive bbox
  1599. *@li spatial_scale: bool. whether to assign all bboxes with the same highest overlap with some gt to that gt.
  1600. *@par Outputs:
  1601. *@li assigned_gt_inds_pos: A Tensor. Support float16/float32. shape (n, ).
  1602. */
  1603. REG_OP(GridAssignPositive)
  1604. .INPUT(assigned_gt_inds, TensorType({ DT_FLOAT, DT_FLOAT16 }))
  1605. .INPUT(overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 }))
  1606. .INPUT(box_responsible_flags, TensorType({ DT_UINT8 }))
  1607. .INPUT(max_overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 }))
  1608. .INPUT(argmax_overlaps, TensorType({ DT_INT32 }))
  1609. .INPUT(gt_max_overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 }))
  1610. .INPUT(gt_argmax_overlaps, TensorType({ DT_INT32 }))
  1611. .INPUT(num_gts, TensorType({ DT_INT32 }))
  1612. .OUTPUT(assigned_gt_inds_pos, TensorType({DT_FLOAT, DT_FLOAT16}))
  1613. .REQUIRED_ATTR(pos_iou_thr, Float)
  1614. .REQUIRED_ATTR(min_pos_iou, Float)
  1615. .REQUIRED_ATTR(gt_max_assign_all, Bool)
  1616. .OP_END_FACTORY_REG(GridAssignPositive)
  1617. } // namespace ge
  1618. #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示