You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

nn_detect_ops.h 107 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
3 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
5 years ago
3 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*!
  17. * \file nn_detect_ops.h
  18. * \brief
  19. */
  20. #ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
  21. #define OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
  22. #include "graph/operator_reg.h"
  23. #include "graph/operator.h"
  24. namespace ge {
  25. /**
  26. *@brief Generates bounding boxes based on "rois" and "deltas".
  27. * It is a customized FasterRcnn operator . \n
  28. *@par Inputs:
  29. * Two inputs, including:
  30. *@li rois: Region of interests (ROIs) generated by the region proposal
  31. * network (RPN). A 2D Tensor of type float32 or float16 with shape (N, 4).
  32. * "N" indicates the number of ROIs, and the value "4" refers to "x0", "x1",
  33. * "y0", and "y1".
  34. *@li deltas: Absolute variation between the ROIs generated by the RPN and
  35. * ground truth boxes. A 2D Tensor of type float32 or float16 with shape (N, 4).
  36. * "N" indicates the number of errors, and 4 indicates "dx", "dy", "dw", and "dh" . \n
  37. *@par Attributes:
  38. *@li means: An index of type int. Defaults to [0,0,0,0].
  39. * "deltas" = "deltas" x "stds" + "means".
  40. *@li stds: An index of type int. Defaults to [1.0,1.0,1.0,1.0].
  41. * "deltas" = "deltas" x "stds" + "means".
  42. *@li max_shape: Shape [h, w], specifying the size of the image transferred to
  43. * the network. Used to ensure that the bbox shape after conversion does not
  44. * exceed "max_shape".
  45. *@li wh_ratio_clip: Defaults to "16/1000". The values of "dw" and "dh" fall
  46. * within (-wh_ratio_clip, wh_ratio_clip) . \n
  47. *@par Outputs:
  48. *bboxes: Bboxes generated based on "rois" and "deltas". Have the same format
  49. * and type as "rois".
  50. */
  51. REG_OP(BoundingBoxDecode)
  52. .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
  53. .INPUT(deltas, TensorType({DT_FLOAT16, DT_FLOAT}))
  54. .OUTPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  55. .ATTR(means, ListFloat, {0.0, 0.0, 0.0, 0.0})
  56. .ATTR(stds, ListFloat, {1.0, 1.0, 1.0, 1.0})
  57. .REQUIRED_ATTR(max_shape, ListInt)
  58. .ATTR(wh_ratio_clip, Float, 0.016)
  59. .OP_END_FACTORY_REG(BoundingBoxDecode)
  60. /**
  61. *@brief Computes the coordinate variations between bboxes and ground truth
  62. * boxes. It is a customized FasterRcnn operator . \n
  63. *@par Inputs:
  64. * Two inputs, including:
  65. *@li anchor_box: Anchor boxes. A 2D Tensor of float32 with shape (N, 4).
  66. * "N" indicates the number of bounding boxes, and the value "4" refers to
  67. * "x0", "x1", "y0", and "y1".
  68. *@li ground_truth_box: Ground truth boxes. A 2D Tensor of float32 with
  69. * shape (N, 4). "N" indicates the number of bounding boxes, and the value "4"
  70. * refers to "x0", "x1", "y0", and "y1" . \n
  71. *@par Attributes:
  72. *@li means: An index of type int. Defaults to [0,0,0,0].
  73. * "deltas" = "deltas" x "stds" + "means".
  74. *@li stds: An index of type int. Defaults to [1.0,1.0,1.0,1.0].
  75. * "deltas" = "deltas" x "stds" + "means" . \n
  76. *@par Outputs:
  77. *delats: A 2D Tensor of type float32 with shape (N, 4), specifying the variations between all anchor boxes and ground truth boxes.
  78. */
  79. REG_OP(BoundingBoxEncode)
  80. .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT}))
  81. .INPUT(ground_truth_box, TensorType({DT_FLOAT16, DT_FLOAT}))
  82. .OUTPUT(delats, TensorType({DT_FLOAT16, DT_FLOAT}))
  83. .ATTR(means, ListFloat, {0.0, 0.0, 0.0, 0.0})
  84. .ATTR(stds, ListFloat, {1.0, 1.0, 1.0, 1.0})
  85. .OP_END_FACTORY_REG(BoundingBoxEncode)
  86. /**
  87. *@brief Judges whether the bounding box is valid. It is a customized
  88. * FasterRcnn operator . \n
  89. *@par Inputs:
  90. * Two inputs, including:
  91. *@li bbox_tensor: Bounding box. A 2D Tensor of type float16 with shape (N, 4).
  92. * "N" indicates the number of bounding boxes, the value "4" indicates "x0",
  93. * "x1", "y0", and "y1".
  94. *@li img_metas: Valid boundary value of the image. A 1D Tensor of type float16
  95. * with shape (16,)
  96. *@par Outputs:
  97. *valid_tensor: A bool with shape (N, 1), specifying whether an input anchor is
  98. * in an image. "1" indicates valid, while "0" indicates invalid . \n
  99. *@attention Constraints:
  100. * 16 "img_metas" are input. The first three numbers (height, width, ratio) are
  101. * valid, specifying the valid boundary (heights x ratio, weights x ratio).
  102. */
  103. REG_OP(CheckValid)
  104. .INPUT(bbox_tensor, TensorType({DT_FLOAT16}))
  105. .INPUT(img_metas, TensorType({DT_FLOAT16}))
  106. .OUTPUT(valid_tensor, TensorType({DT_INT8}))
  107. .OP_END_FACTORY_REG(CheckValid)
  108. /**
  109. *@brief Computes the intersection over union (iou) or the intersection over
  110. * foreground (iof) based on the ground-truth and predicted regions . \n
  111. *@par Inputs:
  112. * Two inputs, including:
  113. *@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
  114. * shape (N, 4). "N" indicates the number of bounding boxes, and the value
  115. * "4" refers to "x0", "x1", "y0", and "y1".
  116. *@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
  117. * with shape (M, 4). "M" indicates the number of ground truth boxes, and
  118. * the value "4" refers to "x0", "x1", "y0", and "y1" . \n
  119. *@par Attributes:
  120. *@li mode: Computation mode, a character string with the value range of [iou, iof]
  121. *@li eps: An optional float, prevent division by 0, default value is 1.0 . \n
  122. *@par Outputs:
  123. *overlap: A 2D Tensor of type float16 or float32 with shape [M, N], specifying
  124. * the IoU or IoF ratio . \n
  125. *@attention Constraints:
  126. * Only computation of float16 data is supported. To avoid overflow, the input
  127. * length and width are scaled by 0.2 internally.
  128. */
  129. REG_OP(Iou)
  130. .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  131. .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  132. .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
  133. .ATTR(mode, String, "iou")
  134. .ATTR(eps, Float, 1.0)
  135. .OP_END_FACTORY_REG(Iou)
  136. /**
  137. *@brief First calculate the minimum closure area of the two boxes, IoU,
  138. * the proportion of the closed area that does not belong to the two boxes in the closure area,
  139. * and finally subtract this proportion from IoU to get GIoU . \n
  140. *@par Inputs:
  141. * Two inputs, including:
  142. *@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
  143. * shape (N, 4). "N" indicates the number of bounding boxes, and the value
  144. * "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
  145. *@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
  146. * with shape (M, 4). "M" indicates the number of ground truth boxes, and
  147. * the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n
  148. *@par Attributes:
  149. *@li trans: An optional bool, true for 'xywh', false for 'xyxy'.
  150. *@li is_cross: An optional bool, control whether the output shape is [M, N] or [1, N]
  151. *@li mode: Computation mode, a character string with the value range of [iou, iof] . \n
  152. *@par Outputs:
  153. * overlap: A 2D Tensor of type float16 or float32 with shape [M, N] or [1, N],
  154. * specifying the IoU or IoF ratio . \n
  155. *@attention Constraints:
  156. * Only computation of float16 data is supported. To avoid overflow, the input
  157. * length and width are scaled by 0.2 internally.
  158. */
  159. REG_OP(GIoU)
  160. .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  161. .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  162. .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
  163. .ATTR(trans, Bool, false)
  164. .ATTR(is_cross, Bool, true)
  165. .ATTR(mode, String, "iou")
  166. .OP_END_FACTORY_REG(GIoU)
  167. /**
  168. *@brief Performs the backpropagation of ROIAlign for training scenarios . \n
  169. *@par Inputs:
  170. * Three inputs, including:
  171. *@li ydiff: A 5HD gradient input of type float32.
  172. *@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs,
  173. the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1".
  174. *@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved . \n
  175. *@par Attributes:
  176. *@li xdiff_shape: A required list of 4 ints, obtained based on the shape of "features" of ROIAlign.
  177. *@li pooled_width: A required attribute of type int, specifying the W dimension.
  178. *@li pooled_height: A required attribute of type int, specifying the H dimension.
  179. *@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image.
  180. *@li sample_num: An optional attribute of type int, specifying the horizontal and vertical
  181. sampling frequency of each output. If this attribute is set to "0", the sampling frequency is
  182. equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" .
  183. *@li roi_end_mode: An optional attribute of type int, specifying the align mode .\n
  184. *@par Outputs:
  185. *xdiff: Gradient added to input "features". Has the same 5HD shape as input "features".
  186. */
  187. REG_OP(ROIAlignGrad)
  188. .INPUT(ydiff, TensorType({DT_FLOAT}))
  189. .INPUT(rois, TensorType({DT_FLOAT}))
  190. .OPTIONAL_INPUT(rois_n, TensorType({DT_INT32}))
  191. .OUTPUT(xdiff, TensorType({DT_FLOAT}))
  192. .REQUIRED_ATTR(xdiff_shape, ListInt)
  193. .REQUIRED_ATTR(pooled_width, Int)
  194. .REQUIRED_ATTR(pooled_height, Int)
  195. .REQUIRED_ATTR(spatial_scale, Float)
  196. .ATTR(sample_num, Int, 2)
  197. .ATTR(roi_end_mode, Int, 1)
  198. .OP_END_FACTORY_REG(ROIAlignGrad)
  199. /**
  200. *@brief Obtains the ROI feature matrix from the feature map. It is a customized FasterRcnn operator . \n
  201. *@par Inputs:
  202. * Three inputs, including:
  203. *@li features: A 5HD Tensor of type float32 or float16.
  204. *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
  205. the value "5" indicates the indexes of images where the ROIs are located,
  206. * "x0", "y0", "x1", and "y1".
  207. *@li rois_n: An optional input of type int32, specifying the number of valid ROIs. This parameter is reserved . \n
  208. *@par Attributes:
  209. *@li spatial_scale: A required attribute of type float32, specifying the scaling ratio of "features" to the original image.
  210. *@li pooled_height: A required attribute of type int32, specifying the H dimension.
  211. *@li pooled_width: A required attribute of type int32, specifying the W dimension.
  212. *@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0",
  213. * the sampling frequency is equal to the rounded up value of "rois", which is a floating point number. Defaults to "2".
  214. *@li roi_end_mode: An optional attribute of type int32. Defaults to "1" . \n
  215. *@par Outputs:
  216. * output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
  217. The axis N is the number of input ROIs. Axes H, W, and C are consistent
  218. * with the values of "pooled_height",
  219. * "pooled_width", and "features", respectively.
  220. */
  221. REG_OP(ROIAlign)
  222. .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
  223. .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
  224. .OPTIONAL_INPUT(rois_n, TensorType({DT_INT32}))
  225. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  226. .REQUIRED_ATTR(spatial_scale, Float)
  227. .REQUIRED_ATTR(pooled_height, Int)
  228. .REQUIRED_ATTR(pooled_width, Int)
  229. .ATTR(sample_num, Int, 2)
  230. .ATTR(roi_end_mode, Int, 1)
  231. .OP_END_FACTORY_REG(ROIAlign)
  232. /**
  233. *@brief Performs SSD prior box detection . \n
  234. *@par Inputs:
  235. * Two inputs, including:
  236. *@li x: An NCHW feature map of type is float32 or float16.
  237. *@li img: source image. Has the same type and format as "x" . \n
  238. *@par Attributes:
  239. *@li min_size: A required float32, specifying the minimum edge length of a square prior box.
  240. *@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
  241. *@li aspect_ratio: An required float32, specifying the aspect ratio for generated rectangle boxes. The height
  242. is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaults to "1.0".
  243. *@li img_h: An optional int32, specifying the source image height. Defaults to "0".
  244. *@li img_w: An optional int32, specifying the source image width. Defaults to "0".
  245. *@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image. Defaults to "0.0".
  246. *@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image. Defaults to "0.0".
  247. *@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
  248. *@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
  249. *@li offset: An optional float32, specifying the offset. Defaults to "0.5".
  250. *@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n
  251. *@par Outputs:
  252. *y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n
  253. *@attention Constraints:
  254. * This operator applies only to SSD networks.
  255. *@see SSDDetectionOutput()
  256. *@par Third-party framework compatibility
  257. * It is a custom operator. It has no corresponding operator in Caffe.
  258. */
  259. REG_OP(PriorBox)
  260. .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  261. .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
  262. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  263. .REQUIRED_ATTR(min_size, ListFloat)
  264. .REQUIRED_ATTR(max_size, ListFloat)
  265. .REQUIRED_ATTR(aspect_ratio, ListFloat)
  266. .ATTR(img_h, Int, 0)
  267. .ATTR(img_w, Int, 0)
  268. .ATTR(step_h, Float, 0.0)
  269. .ATTR(step_w, Float, 0.0)
  270. .ATTR(flip, Bool, true)
  271. .ATTR(clip, Bool, false)
  272. .ATTR(offset, Float, 0.5)
  273. .ATTR(variance, ListFloat, {0.1})
  274. .OP_END_FACTORY_REG(PriorBox);
  275. /**
  276. *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
  277. *@par Inputs:
  278. * Six inputs, including:
  279. *@li x: An NCHW feature map of type is float32 or float16.
  280. *@li img: source image. Has the same type and format as "x".
  281. *@li data_h: An NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map height.
  282. *@li data_w: An NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map width.
  283. *@li box_height: An NCHW tensor of type float32 or float16, specifying the height of each prior box.
  284. *@li box_width: An NCHW tensor of type float32 or float16, specifying the width of each prior box . \n
  285. *@par Attributes:
  286. *@li min_size: A required float32, specifying the minimum edge length of a square prior box.
  287. *@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
  288. *@li img_h: An optional int32, specifying the height of the source image.
  289. *@li img_w: An optional int32, specifying the width of the source image.
  290. *@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image.
  291. *@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image.
  292. *@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
  293. *@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
  294. *@li offset: An optional float32, specifying the offset. Defaults to "0.5".
  295. *@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n
  296. *@par Outputs:
  297. *y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n
  298. *@attention Constraints:
  299. * This operator applies only to SSD networks.
  300. *@see SSDDetectionOutput()
  301. *@par Third-party framework compatibility
  302. * It is a custom operator. It has no corresponding operator in Caffe.
  303. *@par Restrictions:
  304. *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
  305. */
  306. REG_OP(PriorBoxD)
  307. .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  308. .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
  309. .INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT}))
  310. .INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT}))
  311. .INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT}))
  312. .INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT}))
  313. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  314. .REQUIRED_ATTR(min_size, ListFloat)
  315. .REQUIRED_ATTR(max_size, ListFloat)
  316. .ATTR(img_h, Int, 0)
  317. .ATTR(img_w, Int, 0)
  318. .ATTR(step_h, Float, 0.0)
  319. .ATTR(step_w, Float, 0.0)
  320. .ATTR(flip, Bool, true)
  321. .ATTR(clip, Bool, false)
  322. .ATTR(offset, Float, 0.5)
  323. .ATTR(variance, ListFloat, {0.1})
  324. .OP_END_FACTORY_REG(PriorBoxD);
  325. /**
  326. *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
  327. *@par Inputs:
  328. * Six inputs, including:
  329. *@li x: An NCHW feature map of type is float32 or float16.
  330. *@li img: source image. Has the same type and format as "x".
  331. *@li boxes: An ND tensor of type float32 or float16, specifying the prior box information. Same as output y
  332. *@par Attributes:
  333. *@li min_size: A required float32, specifying the minimum edge length of a square prior box.
  334. *@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
  335. *@li img_h: An optional int32, specifying the height of the source image.
  336. *@li img_w: An optional int32, specifying the width of the source image.
  337. *@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image.
  338. *@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image.
  339. *@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
  340. *@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
  341. *@li offset: An optional float32, specifying the offset. Defaults to "0.5".
  342. *@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n
  343. *@par Outputs:
  344. *y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n
  345. *@attention Constraints:
  346. * This operator applies only to SSD networks.
  347. *@see SSDDetectionOutput()
  348. *@par Third-party framework compatibility
  349. * It is a custom operator. It has no corresponding operator in Caffe.
  350. *@par Restrictions:
  351. *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
  352. */
  353. REG_OP(PriorBoxDV2)
  354. .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  355. .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
  356. .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  357. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  358. .REQUIRED_ATTR(min_size, ListFloat)
  359. .REQUIRED_ATTR(max_size, ListFloat)
  360. .ATTR(img_h, Int, 0)
  361. .ATTR(img_w, Int, 0)
  362. .ATTR(step_h, Float, 0.0)
  363. .ATTR(step_w, Float, 0.0)
  364. .ATTR(flip, Bool, true)
  365. .ATTR(clip, Bool, false)
  366. .ATTR(offset, Float, 0.5)
  367. .ATTR(variance, ListFloat, {0.1})
  368. .OP_END_FACTORY_REG(PriorBoxDV2);
  369. /**
  370. *@brief Performs Position Sensitive ROI Pooling . \n
  371. *@par Inputs:
  372. * Two inputs, including:
  373. *@li x: A tensor of type float16 or float32, describing the feature
  374. * map, dimension C1 must be equal to
  375. * (int(output_dim+15)/C0))*group_size*group_size.
  376. *@li rois: A tensor of type float16 or float32, with shape
  377. * [batch, 5, rois_num], describing the ROIs, each ROI consists of five
  378. * elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
  379. * the index of the input feature map, "x1", "y1", "x2", or "y2" must be
  380. * greater than or equal to "0.0" . \n
  381. *@par Attributes:
  382. *@li output_dim: A required int32, specifying the number of output channels,
  383. * must be greater than 0.
  384. *@li group_size: A required int32, specifying the number of groups to encode
  385. * position-sensitive score maps, must be within the range (0, 128).
  386. *@li spatial_scale: A required float32, scaling factor for mapping the input
  387. * coordinates to the ROI coordinates . \n
  388. *@par Outputs:
  389. *y: A tensor of type float16 or float32, describing the result
  390. * feature map . \n
  391. *@attention Constraints:
  392. * HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
  393. */
  394. REG_OP(PSROIPooling)
  395. .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
  396. .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
  397. .REQUIRED_ATTR(output_dim, Int)
  398. .REQUIRED_ATTR(group_size, Int)
  399. .REQUIRED_ATTR(spatial_scale, Float)
  400. .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
  401. .OP_END_FACTORY_REG(PSROIPooling)
  402. /**
  403. *@brief Returns detection result . \n
  404. *@par Inputs:
  405. * Five inputs, including:
  406. *@li rois: An NCHW tensor of type floa16 or float32, output from operator proposal_d at the preceding layer, used as the input of operator FSRDetectionOutput.
  407. *@li bbox_delta: An NCHWC0 tensor of type floa16 or float32, specifying the prediction offset, used to update the coordinates [x1, y1, x2, y2] of each ROI.
  408. *@li score: An NCHWC0 tensor of type floa16 or float32, specifying the probability of each class. Class 0 is the background class.
  409. *@li im_info: An ND tensor of type float16 or float32, specifying the Image information.
  410. *@li actual_rois_num: An optional NCHW tensor of type int32, specifying the number of valid boxes per batch.
  411. *@par Attributes:
  412. *@li batch_rois: An optional int32, specifying the number of images to be predicted. Defaults to "1".
  413. *@li num_classes: An required int32, specifying the number of classes to be predicted. The value must be greater than 0.
  414. *@li score_threshold: An required float32, specifying the threshold for box filtering. The value range is [0.0, 1.0].
  415. *@li iou_threshold: An required float32, specifying the confidence threshold for box filtering, which is the output "obj" of operator Region. The value range is (0.0, 1.0).
  416. *@par Outputs:
  417. *@li box: A tensor of type float16 or float32 for proposal of actual output, with output shape [batch, numBoxes,8].
  418. * 8 means [x1, y1, x2, y2, score, label, batchID, NULL], the maximum value of numBoxes is 1024.
  419. That is, take min (the maximum number of input boxes, 1024)
  420. *@li actual_bbox_num: A tensor of type int32 With shape [bacth, num_classes], specifying the number of output boxes . \n
  421. *@attention Constraints:
  422. *@li totalnum < max_rois_num * batch_rois.
  423. *@li "score" must be with shape (total_num, (num_classes+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images.
  424. *@li "bbox_delta" must be with shape (total_num, (num_classes*4+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images.
  425. *@par Third-party framework compatibility
  426. * It is a custom operator. It has no corresponding operator in Caffe.
  427. */
  428. REG_OP(FSRDetectionOutput)
  429. .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
  430. .INPUT(bbox_delta, TensorType({DT_FLOAT, DT_FLOAT16}))
  431. .INPUT(score, TensorType({DT_FLOAT, DT_FLOAT16}))
  432. .INPUT(im_info, TensorType({DT_FLOAT, DT_FLOAT16}))
  433. .OPTIONAL_INPUT(actual_rois_num, TensorType({DT_INT32}))
  434. .OUTPUT(actual_bbox_num, TensorType({DT_INT32}))
  435. .OUTPUT(box, TensorType({DT_FLOAT, DT_FLOAT16}))
  436. .ATTR(batch_rois, Int, 1)
  437. .REQUIRED_ATTR(num_classes, Int)
  438. .REQUIRED_ATTR(score_threshold, Float)
  439. .REQUIRED_ATTR(iou_threshold, Float)
  440. .OP_END_FACTORY_REG(FSRDetectionOutput)
  441. /**
  442. *@brief Returns detection result . \n
  443. *@par Inputs:
  444. * Three inputs, including:
  445. *@li bbox_delta: An ND tensor of type floa16 or float32, specifying the box loc predictions, used as the input of operator SSDDetectionOutput.
  446. *@li score: An ND tensor of type floa16 or float32, specifying the box confidences data, used as the input of operator SSDDetectionOutput.
  447. *@li anchors: An ND tensor of type floa16 or float32, output from operator PriorBoxD, used as the input of operator SSDDetectionOutput.
  448. *@par Attributes:
  449. *@li num_classes: An optional int32, specifying the number of classes to be predicted. Defaults to "2". The value must be greater than 1 and lesser than 1025.
  450. *@li share_location: An optional bool, specify the shared location. Defaults to True
  451. *@li background_label_id: An optional int32, specify the background label id. Must be 0
  452. *@li iou_threshold: An optional float32, specify the nms threshold
  453. *@li top_k: An optional int32, specify the topk value. Defaults to 200
  454. *@li eta: An optional float32, specify the eta value. Defaults to 1.0
  455. *@li variance_encoded_in_target: An optional bool, specify whether variance encoded in target or not. Defaults to False
  456. *@li code_type: An optional int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3
  457. *@li keep_top_k: An optional int32, specify the topk value after nms. Defaults to -1
  458. *@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold
  459. *@par Outputs:
  460. *@li out_boxnum: A tensor of type int32, specifying the number of output boxes.
  461. *@li y: A tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box.
  462. * In output shape, 8 means (batchID, label(classID), score (class probability), xmin, ymin, xmax, ymax, null)
  463. * It is a custom operator. It has no corresponding operator in Caffe.
  464. */
  465. REG_OP(SSDDetectionOutput)
  466. .INPUT(bbox_delta, TensorType({DT_FLOAT, DT_FLOAT16}))
  467. .INPUT(score, TensorType({DT_FLOAT, DT_FLOAT16}))
  468. .INPUT(anchors, TensorType({DT_FLOAT, DT_FLOAT16}))
  469. .OUTPUT(out_boxnum, TensorType({DT_INT32}))
  470. .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
  471. .ATTR(num_classes, Int, 2)
  472. .ATTR(share_location, Bool, true)
  473. .ATTR(background_label_id, Int, 0)
  474. .ATTR(iou_threshold, Float, 0.3)
  475. .ATTR(top_k, Int, 200)
  476. .ATTR(eta, Float, 1.0)
  477. .ATTR(variance_encoded_in_target, Bool, false)
  478. .ATTR(code_type, Int, 1)
  479. .ATTR(keep_top_k, Int, -1)
  480. .ATTR(confidence_threshold, Float, 0.0)
  481. .OP_END_FACTORY_REG(SSDDetectionOutput)
  482. /**
  483. *@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3 . \n
  484. *@par Inputs:
  485. *x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W),
  486. where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged
  487. as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n
  488. *@par Attributes:
  489. *@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3.
  490. *@li coords: An int32, specifying the number of parameters required for locating an object. The value is fixed at "4", corresponding to (x,y,w,h).
  491. *@li classes: An int32, specifying the number of prediction classes. Defaults to "80". The value range is [1, 1024].
  492. *@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".Defaults to "V3"
  493. *@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". Defaults to "false".
  494. *@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". Defaults to "false".
  495. *@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false" . \n
  496. *@par Outputs:
  497. *@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2],
  498. * where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box.
  499. *@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2],
  500. * where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence.
  501. *@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2],
  502. * where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n
  503. *@attention Constraints:
  504. *@li This operator applies to YOLO v2 and v3 networks.
  505. *@li The succeeding layer of the Yolo operator must be operator Yolov3DetectionOutput.
  506. *@par Third-party framework compatibility
  507. * It is a custom operator. It has no corresponding operator in Caffe.
  508. */
  509. REG_OP(Yolo)
  510. .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  511. .OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
  512. .OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
  513. .OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
  514. .ATTR(boxes, Int, 3)
  515. .ATTR(coords, Int, 4)
  516. .ATTR(classes, Int, 80)
  517. .ATTR(yolo_version, String, "V3")
  518. .ATTR(softmax, Bool, false)
  519. .ATTR(background, Bool, false)
  520. .ATTR(softmaxtree, Bool, false)
  521. .OP_END_FACTORY_REG(Yolo)
  522. /**
  523. *@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3 . \n
  524. *@par Inputs:
  525. *x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W),
  526. where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged
  527. as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n
  528. *@par Attributes:
  529. *@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3.
  530. *@li coords: An int32, specifying the number of parameters required for locating an object. The value is fixed at "4", corresponding to (x,y,w,h).
  531. *@li classes: An int32, specifying the number of prediction classes. Defaults to "80". The value range is [1, 1024].
  532. *@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".Defaults to "V3"
  533. *@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". Defaults to "false".
  534. *@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". Defaults to "false".
  535. *@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false" . \n
  536. *@par Outputs:
  537. *@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2],
  538. * where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box.
  539. *@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2],
  540. * where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence.
  541. *@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2],
  542. * where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n
  543. *@attention Constraints:
  544. *@li This operator applies to YOLO v2,v3 and v5 networks.
  545. *@li The succeeding layer of the Yolo operator must be operator Yolov5DetectionOutput.
  546. *@par Third-party framework compatibility
  547. * It is a custom operator.
  548. */
  549. REG_OP(YoloPreDetection)
  550. .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  551. .OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
  552. .OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
  553. .OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
  554. .ATTR(boxes, Int, 3)
  555. .ATTR(coords, Int, 4)
  556. .ATTR(classes, Int, 80)
  557. .ATTR(yolo_version, String, "V5")
  558. .ATTR(softmax, Bool, false)
  559. .ATTR(background, Bool, false)
  560. .ATTR(softmaxtree, Bool, false)
  561. .OP_END_FACTORY_REG(YoloPreDetection)
  562. /**
  563. *@brief Performs YOLO V5 detection . \n
  564. *@par Inputs:
  565. *Ten inputs, including:
  566. *@li Operator Yolov5DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n
  567. There are three Yolo operators at Yolov5DetectionOutput's preceding layer on Yolo v5. For details, see the description of operator Yolo.
  568. *@li img_info: A float16 or float32, describing the image information including the required image height and width \n
  569. * and the actual image height and width.
  570. *@par Attributes:
  571. *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
  572. *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
  573. *@li coords: Specifies the number of coordinate parameters. Must be 4.
  574. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
  575. *@li relative: An optional bool. Defaults to and must be "true".
  576. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
  577. *@li post_nms_topn: An optional int32. This attribute is reserved.
  578. *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
  579. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n
  580. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
  581. *@par Outputs:
  582. *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
  583. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
  584. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
  585. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
  586. *@attention Constraints:\n
  587. *@li This operator applies only to the YOLO v5 network.
  588. *@li The preceding layer of operator Yolov5DetectionOutput must be three Yolo operators.
  589. *@see Yolo()
  590. *@par Third-party framework compatibility
  591. * It is a custom operator. It has no corresponding operator in Caffe.
  592. */
  593. REG_OP(YoloV5DetectionOutput)
  594. .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  595. .REQUIRED_ATTR(biases, ListFloat)
  596. .ATTR(boxes, Int, 3)
  597. .ATTR(coords, Int, 4)
  598. .ATTR(classes, Int, 80)
  599. .ATTR(relative, Bool, true)
  600. .ATTR(obj_threshold, Float, 0.5)
  601. .ATTR(post_nms_topn, Int, 512)
  602. .ATTR(score_threshold, Float, 0.5)
  603. .ATTR(iou_threshold, Float, 0.45)
  604. .ATTR(pre_nms_topn, Int, 512)
  605. .ATTR(N, Int, 10)
  606. .ATTR(resize_origin_img_to_net, Bool, false)
  607. .ATTR(out_box_dim, Int, 3)
  608. .ATTR(alpha, Float, 2.0)
  609. .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
  610. .OUTPUT(box_out_num, TensorType({DT_INT32}))
  611. .OP_END_FACTORY_REG(YoloV5DetectionOutput)
  612. /**
  613. *@brief Performs YOLO V5 detection.
  614. *@par Inputs:
  615. *16 Input, including:
  616. *@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v5) are used as the inputs of operator Yolov5DetectionOutput.
  617. * A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
  618. *@li imginfo: A float16, describing the image information including the required image height and width
  619. * and the actual image height and width.
  620. *@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
  621. * [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)]
  622. * is formed for the three Yolo outputs, respectively .It's a dynamic input. \n
  623. *@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
  624. *@par Attributes:
  625. *@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
  626. *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
  627. *@li coords: Specifies the number of coordinate parameters. Must be 4.
  628. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
  629. *@li relative: An optional bool. Defaults to and must be "true".
  630. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
  631. *@li post_nms_topn: An optional int32. This attribute is reserved.
  632. *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
  633. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
  634. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
  635. *
  636. *@par Outputs:
  637. *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
  638. * describing the information of each output box.
  639. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
  640. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
  641. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
  642. *
  643. *@attention Constraints:
  644. *@li This operator applies only to the YOLO v5 network.
  645. *@li The preceding layer of operator Yolov5DetectionOutput must be three Yolo operators.
  646. *@see Yolo()
  647. *@par Third-party framework compatibility
  648. * It is a custom operator.
  649. */
  650. REG_OP(YoloV5DetectionOutputD)
  651. .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  652. .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
  653. .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
  654. .REQUIRED_ATTR(biases, ListFloat)
  655. .ATTR(boxes, Int, 3)
  656. .ATTR(coords, Int, 4)
  657. .ATTR(classes, Int, 80)
  658. .ATTR(relative, Bool, true)
  659. .ATTR(obj_threshold, Float, 0.5)
  660. .ATTR(post_nms_topn, Int, 512)
  661. .ATTR(score_threshold, Float, 0.5)
  662. .ATTR(iou_threshold, Float, 0.45)
  663. .ATTR(pre_nms_topn, Int, 512)
  664. .ATTR(N, Int, 10)
  665. .ATTR(resize_origin_img_to_net, Bool, false)
  666. .ATTR(out_box_dim, Int, 3)
  667. .ATTR(alpha, Float, 2.0)
  668. .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
  669. .OUTPUT(box_out_num, TensorType({DT_INT32}))
  670. .OP_END_FACTORY_REG(YoloV5DetectionOutputD)
  671. /**
  672. *@brief Performs YOLO V2 detection . \n
  673. *@par Inputs:
  674. * Four inputs, including:
  675. *@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov3DetectionOutput.
  676. * Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
  677. *@li img_info: A float16 or float32, describing the image information including the required image height and width
  678. * and the actual image height and width.
  679. *
  680. *@par Attributes:
  681. *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
  682. *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
  683. *@li coords: Specifies the number of coordinate parameters. Must be 4.
  684. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 20].
  685. *@li relative: An optional bool. Defaults to and must be "true".
  686. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering,
  687. * which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n
  688. *@li post_nms_topn: An optional int32. This attribute is reserved.
  689. *@li score_threshold: A required float, specifying the class score threshold for box filtering,
  690. which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
  691. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
  692. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
  693. *
  694. *@par Outputs:
  695. *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box,
  696. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
  697. *@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid,
  698. * the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
  699. *
  700. *@attention Constraints:
  701. *@li This operator applies only to the YOLO v2 network.
  702. *@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator.
  703. *
  704. *@see Yolo()
  705. *@par Third-party framework compatibility
  706. * It is a custom operator. It has no corresponding operator in Caffe.
  707. */
  708. REG_OP(YoloV2DetectionOutput)
  709. .INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
  710. .INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
  711. .INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
  712. .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
  713. .REQUIRED_ATTR(biases, ListFloat)
  714. .ATTR(boxes, Int, 5)
  715. .ATTR(coords, Int, 4)
  716. .ATTR(classes, Int, 20)
  717. .ATTR(relative, Bool, true)
  718. .ATTR(obj_threshold, Float, 0.5)
  719. .ATTR(post_nms_topn, Int, 512)
  720. .ATTR(score_threshold, Float, 0.5)
  721. .ATTR(iou_threshold, Float, 0.45)
  722. .ATTR(pre_nms_topn, Int, 512)
  723. .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
  724. .OUTPUT(box_out_num, TensorType({DT_INT32}))
  725. .OP_END_FACTORY_REG(YoloV2DetectionOutput)
  726. /**
  727. *@brief Performs YOLO V2 detection . \n
  728. *@par Inputs:
  729. *Six inputs, including:
  730. *@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov2DetectionOutput.
  731. * Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
  732. *@li imginfo: A float16, describing the image information including the required image height and width
  733. * and the actual image height and width.
  734. *@li windex: A windex tensor with shape [height, weight]. Has the same type as the inputs.
  735. * [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed.
  736. *@li hindex: A hindex tensor with shape [height, weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]].
  737. *
  738. *@par Attributes:
  739. *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
  740. *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
  741. *@li coords: Specifies the number of coordinate parameters. Must be 4.
  742. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 20].
  743. *@li relative: An optional bool. Defaults to and must be "true".
  744. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
  745. *@li post_nms_topn: An optional int32. This attribute is reserved.
  746. *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0] . \n
  747. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
  748. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
  749. *
  750. *@par Outputs:
  751. *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box,
  752. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
  753. *@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid,
  754. * the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
  755. *
  756. *@attention Constraints:
  757. *@li This operator applies only to the YOLO v2 network.
  758. *@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator . \n
  759. *@see Yolo()
  760. *@par Third-party framework compatibility
  761. * It is a custom operator. It has no corresponding operator in Caffe.
  762. *@par Restrictions:
  763. *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV2DetectionOutput instead.
  764. */
  765. REG_OP(YoloV2DetectionOutputD)
  766. .INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
  767. .INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
  768. .INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
  769. .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
  770. .INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
  771. .INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
  772. .REQUIRED_ATTR(biases, ListFloat)
  773. .ATTR(boxes, Int, 5)
  774. .ATTR(coords, Int, 4)
  775. .ATTR(classes, Int, 20)
  776. .ATTR(relative, Bool, true)
  777. .ATTR(obj_threshold, Float, 0.5)
  778. .ATTR(post_nms_topn, Int, 512)
  779. .ATTR(score_threshold, Float, 0.5)
  780. .ATTR(iou_threshold, Float, 0.45)
  781. .ATTR(pre_nms_topn, Int, 512)
  782. .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
  783. .OUTPUT(box_out_num, TensorType({DT_INT32}))
  784. .OP_END_FACTORY_REG(YoloV2DetectionOutputD)
  785. /**
  786. *@brief Performs YOLO V3 detection . \n
  787. *@par Inputs:
  788. *Ten inputs, including:
  789. *@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class".
  790. * There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo.
  791. *@li img_info: A float16 or float32, describing the image information including the required image height and width
  792. * and the actual image height and width.
  793. *@par Attributes:
  794. *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
  795. *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
  796. *@li coords: Specifies the number of coordinate parameters. Must be 4.
  797. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
  798. *@li relative: An optional bool. Defaults to and must be "true".
  799. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n
  800. *@li post_nms_topn: An optional int32. This attribute is reserved.
  801. *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0] . \n
  802. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
  803. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
  804. *@par Outputs:
  805. *@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box.
  806. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
  807. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
  808. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
  809. *@attention Constraints:
  810. *@li This operator applies only to the YOLO v3 network.
  811. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators . \n
  812. *@see Yolo()
  813. *@par Third-party framework compatibility
  814. * It is a custom operator. It has no corresponding operator in Caffe.
  815. */
  816. REG_OP(YoloV3DetectionOutput)
  817. .INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT}))
  818. .INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
  819. .INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT}))
  820. .INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
  821. .INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
  822. .INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
  823. .INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
  824. .INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
  825. .INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
  826. .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
  827. .REQUIRED_ATTR(biases_low, ListFloat)
  828. .REQUIRED_ATTR(biases_mid, ListFloat)
  829. .REQUIRED_ATTR(biases_high, ListFloat)
  830. .ATTR(boxes, Int, 3)
  831. .ATTR(coords, Int, 4)
  832. .ATTR(classes, Int, 80)
  833. .ATTR(relative, Bool, true)
  834. .ATTR(obj_threshold, Float, 0.5)
  835. .ATTR(post_nms_topn, Int, 512)
  836. .ATTR(score_threshold, Float, 0.5)
  837. .ATTR(iou_threshold, Float, 0.45)
  838. .ATTR(pre_nms_topn, Int, 512)
  839. .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
  840. .OUTPUT(box_out_num, TensorType({DT_INT32}))
  841. .OP_END_FACTORY_REG(YoloV3DetectionOutput)
  842. /**
  843. *@brief Performs YOLO V3 detection . \n
  844. *@par Inputs:
  845. *16 Input, including:
  846. *@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput.
  847. * A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
  848. *@li imginfo: A float16, describing the image information including the required image height and width
  849. * and the actual image height and width.
  850. *@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
  851. * [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n
  852. *@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs.
  853. * [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
  854. s
  855. *@par Attributes:
  856. *@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
  857. *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
  858. *@li coords: Specifies the number of coordinate parameters. Must be 4.
  859. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
  860. *@li relative: An optional bool. Defaults to and must be "true".
  861. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
  862. *@li post_nms_topn: An optional int32. This attribute is reserved.
  863. *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
  864. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
  865. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
  866. *@par Outputs:
  867. *@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box.
  868. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
  869. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
  870. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
  871. *@attention Constraints:
  872. *@li This operator applies only to the YOLO v3 network.
  873. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
  874. *@see Yolo()
  875. *@par Third-party framework compatibility
  876. * It is a custom operator. It has no corresponding operator in Caffe.
  877. *@par Restrictions:
  878. *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutput instead.
  879. */
  880. REG_OP(YoloV3DetectionOutputD)
  881. .INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT}))
  882. .INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
  883. .INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT}))
  884. .INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
  885. .INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
  886. .INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
  887. .INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
  888. .INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
  889. .INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
  890. .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
  891. .INPUT(windex1, TensorType({DT_FLOAT16, DT_FLOAT}))
  892. .INPUT(windex2, TensorType({DT_FLOAT16, DT_FLOAT}))
  893. .INPUT(windex3, TensorType({DT_FLOAT16, DT_FLOAT}))
  894. .INPUT(hindex1, TensorType({DT_FLOAT16, DT_FLOAT}))
  895. .INPUT(hindex2, TensorType({DT_FLOAT16, DT_FLOAT}))
  896. .INPUT(hindex3, TensorType({DT_FLOAT16, DT_FLOAT}))
  897. .REQUIRED_ATTR(biases_low, ListFloat)
  898. .REQUIRED_ATTR(biases_mid, ListFloat)
  899. .REQUIRED_ATTR(biases_high, ListFloat)
  900. .ATTR(boxes, Int, 3)
  901. .ATTR(coords, Int, 4)
  902. .ATTR(classes, Int, 80)
  903. .ATTR(relative, Bool, true)
  904. .ATTR(obj_threshold, Float, 0.5)
  905. .ATTR(post_nms_topn, Int, 512)
  906. .ATTR(score_threshold, Float, 0.5)
  907. .ATTR(iou_threshold, Float, 0.45)
  908. .ATTR(pre_nms_topn, Int, 512)
  909. .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
  910. .OUTPUT(box_out_num, TensorType({DT_INT32}))
  911. .OP_END_FACTORY_REG(YoloV3DetectionOutputD)
  912. /**
  913. *@brief Performs YOLO V3 detection . \n
  914. *@par Inputs:
  915. *Ten inputs, including:
  916. *@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n
  917. There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo.
  918. *@li img_info: A float16 or float32, describing the image information including the required image height and width \n
  919. * and the actual image height and width.
  920. *@par Attributes:
  921. *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
  922. *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
  923. *@li coords: Specifies the number of coordinate parameters. Must be 4.
  924. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
  925. *@li relative: An optional bool. Defaults to and must be "true".
  926. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
  927. *@li post_nms_topn: An optional int32. This attribute is reserved.
  928. *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
  929. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n
  930. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
  931. *@par Outputs:
  932. *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
  933. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
  934. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
  935. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
  936. *@attention Constraints:\n
  937. *@li This operator applies only to the YOLO v3 network.
  938. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
  939. *@see Yolo()
  940. *@par Third-party framework compatibility
  941. * It is a custom operator. It has no corresponding operator in Caffe.
  942. */
  943. REG_OP(YoloV3DetectionOutputV2)
  944. .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  945. .REQUIRED_ATTR(biases, ListFloat)
  946. .ATTR(boxes, Int, 3)
  947. .ATTR(coords, Int, 4)
  948. .ATTR(classes, Int, 80)
  949. .ATTR(relative, Bool, true)
  950. .ATTR(obj_threshold, Float, 0.5)
  951. .ATTR(post_nms_topn, Int, 512)
  952. .ATTR(score_threshold, Float, 0.5)
  953. .ATTR(iou_threshold, Float, 0.45)
  954. .ATTR(pre_nms_topn, Int, 512)
  955. .ATTR(N, Int, 10)
  956. .ATTR(resize_origin_img_to_net, Bool, false)
  957. .ATTR(out_box_dim, Int, 3)
  958. .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
  959. .OUTPUT(box_out_num, TensorType({DT_INT32}))
  960. .OP_END_FACTORY_REG(YoloV3DetectionOutputV2)
  961. /**
  962. *@brief Performs YOLO V3 detection.
  963. *@par Inputs:
  964. *16 Input, including:
  965. *@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput.
  966. * A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
  967. *@li imginfo: A float16, describing the image information including the required image height and width
  968. * and the actual image height and width.
  969. *@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
  970. * [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)]
  971. * is formed for the three Yolo outputs, respectively .It's a dynamic input. \n
  972. *@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
  973. *@par Attributes:
  974. *@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
  975. *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
  976. *@li coords: Specifies the number of coordinate parameters. Must be 4.
  977. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
  978. *@li relative: An optional bool. Defaults to and must be "true".
  979. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
  980. *@li post_nms_topn: An optional int32. This attribute is reserved.
  981. *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
  982. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
  983. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
  984. *
  985. *@par Outputs:
  986. *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
  987. * describing the information of each output box.
  988. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
  989. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
  990. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
  991. *
  992. *@attention Constraints:
  993. *@li This operator applies only to the YOLO v3 network.
  994. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
  995. *@see Yolo()
  996. *@par Third-party framework compatibility
  997. * It is a custom operator. It has no corresponding operator in Caffe.
  998. * @par Restrictions:
  999. * Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead.
  1000. */
  1001. REG_OP(YoloV3DetectionOutputV2D)
  1002. .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  1003. .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
  1004. .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
  1005. .REQUIRED_ATTR(biases, ListFloat)
  1006. .ATTR(boxes, Int, 3)
  1007. .ATTR(coords, Int, 4)
  1008. .ATTR(classes, Int, 80)
  1009. .ATTR(relative, Bool, true)
  1010. .ATTR(obj_threshold, Float, 0.5)
  1011. .ATTR(post_nms_topn, Int, 512)
  1012. .ATTR(score_threshold, Float, 0.5)
  1013. .ATTR(iou_threshold, Float, 0.45)
  1014. .ATTR(pre_nms_topn, Int, 512)
  1015. .ATTR(N, Int, 10)
  1016. .ATTR(resize_origin_img_to_net, Bool, false)
  1017. .ATTR(out_box_dim, Int, 3)
  1018. .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
  1019. .OUTPUT(box_out_num, TensorType({DT_INT32}))
  1020. .OP_END_FACTORY_REG(YoloV3DetectionOutputV2D)
  1021. /**
  1022. *@brief Spatial Pyramid Pooling, multi-level pooling.
  1023. * Pooling out(n, sigma(c*2^i*2^i)) tensor, i in range[0,pyramid_height) . \n
  1024. *@par Inputs:
  1025. *x: An NCHW tensor, support float16 or float32 type . \n
  1026. *@par Attributes:
  1027. * @li pyramid_height: An required int32.
  1028. * Multi-level pooling out from 2^0 to 2^(pyramid_height-1).
  1029. * @li pool_method: An optional int32, pooling method: 0-MAX, 1-AVE.
  1030. * Defaults to "0" . \n
  1031. *@par Outputs:
  1032. *y: A NCHW tensor, support float16 or float32 type . \n
  1033. *@attention Constraints:
  1034. * @li pyramid_height: pyramid_heigjt should be in range [0,7).
  1035. * Pooling paramter should statisfied with caffe pooling param(pad<kernel).
  1036. * @li feature_size:input feture map h and w should be [1, 510] . \n
  1037. *@par Third-party framework compatibility
  1038. * Compatible with the Caffe operator SPP.
  1039. */
  1040. REG_OP(SPP)
  1041. .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
  1042. .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
  1043. .REQUIRED_ATTR(pyramid_height, Int)
  1044. .ATTR(pool_method, Int, 0)
  1045. .OP_END_FACTORY_REG(SPP)
  1046. /**
  1047. *@brief Performs Region of Interest (ROI) Pooling . \n
  1048. *@par Inputs:
  1049. * Three inputs, including:
  1050. *@li x: A tensor of type float16 or float32, describing the feature
  1051. * map. The data of x must be greater than or equal to "0.0".
  1052. *@li rois: A tensor of type float16 or float32, with 3D shape
  1053. * [batch, 5, roi_max_num], describing the RIOs. Each ROI consists of five
  1054. * elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
  1055. * the index of the input feature map, "x1", "y1", "x2", or "y2" must be
  1056. * greater than or equal to "0.0".
  1057. * roi_max_num must be less than or equal to 6000 and must be divided by 16.
  1058. * The input data of the rois cannot exceed the width and height range of the x,
  1059. * otherwise, the accuracy of the output result may not be as expected.
  1060. *@li roi_actual_num: A optional tensor of type int32, with shape [batch, 8], specifying
  1061. * the number of ROIs per batch . \n
  1062. *@par Attributes:
  1063. *@li pooled_h: A required int32, specifying the pooled H. Must be greater
  1064. * than 0.
  1065. *@li pooled_w: A required int32, specifying the pooled W. Must be greater
  1066. * than 0.
  1067. *@li spatial_scale_h: An required scaling factor for mapping the input
  1068. * coordinates of height to the ROI coordinates.
  1069. *@li spatial_scale_w: An required scaling factor for mapping the input
  1070. * coordinates of width to the ROI coordinates . \n
  1071. *@par Outputs:
  1072. *y: A tensor of type float16 or float32, describing the result
  1073. * feature map . \n
  1074. *@attention Constraints:
  1075. * For the feature map input:
  1076. *@li If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
  1077. *@li If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
  1078. *@li If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
  1079. *@li If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
  1080. *@li If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
  1081. *@li If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
  1082. *@li If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
  1083. *@li If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
  1084. *@li If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
  1085. *@li If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
  1086. *@li If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
  1087. *@li If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
  1088. *@li If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
  1089. *@li If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
  1090. *@li If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
  1091. *@li If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
  1092. *@li If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
  1093. *@li If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
  1094. *@li If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
  1095. *@par Third-party framework compatibility
  1096. * It is a custom operator. It has no corresponding operator in Caffe.
  1097. */
  1098. REG_OP(ROIPooling)
  1099. .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
  1100. .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
  1101. .OPTIONAL_INPUT(roi_actual_num, TensorType({DT_INT32}))
  1102. .REQUIRED_ATTR(pooled_h, Int)
  1103. .REQUIRED_ATTR(pooled_w, Int)
  1104. .REQUIRED_ATTR(spatial_scale_h, Float)
  1105. .REQUIRED_ATTR(spatial_scale_w, Float)
  1106. .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
  1107. .OP_END_FACTORY_REG(ROIPooling)
  1108. /**
  1109. *@brief Computes decode bbox function.
  1110. *@par Inputs:
  1111. *Inputs include:
  1112. * @li box_predictions: A Tensor. Must be float16.
  1113. * @li anchors: A Tensor. Must have the same type as box_predictions.
  1114. *@par Attributes:
  1115. * @ decode_clip: required, float, threahold of decode process.
  1116. *@par Outputs:
  1117. * @ decoded_boxes: A Tensor. Must have the same type as box_predictions.
  1118. * N-D with shape [N, 4].
  1119. *@par Restrictions:
  1120. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  1121. */
  1122. REG_OP(DecodeBbox)
  1123. .INPUT(box_predictions, TensorType{DT_FLOAT16})
  1124. .INPUT(anchors, TensorType{DT_FLOAT16})
  1125. .OUTPUT(decoded_boxes, TensorType{DT_FLOAT16})
  1126. .REQUIRED_ATTR(decode_clip, Float)
  1127. .OP_END_FACTORY_REG(DecodeBbox)
  1128. /**
  1129. *@brief Computes ClipBoxes function . \n
  1130. *@par Inputs:
  1131. *@li boxes_input: A Tensor. Must be float16. N-D with shape [N, 4].
  1132. *@li img_size: A Tensor. Must be int32. shape [H, W] . \n
  1133. *@par Outputs:
  1134. *boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4].
  1135. *@par Restrictions:
  1136. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  1137. */
  1138. REG_OP(ClipBoxes)
  1139. .INPUT(boxes_input, TensorType({DT_FLOAT16}))
  1140. .INPUT(img_size, TensorType({DT_INT32}))
  1141. .OUTPUT(boxes_output, TensorType({DT_FLOAT16}))
  1142. .OP_END_FACTORY_REG(ClipBoxes)
  1143. /**
  1144. *@brief Computes ClipBoxesD function . \n
  1145. *@par Attributes:
  1146. *img_size: A Tensor of shape [H, W] . \n
  1147. *@par Inputs:
  1148. *boxes_input: A Tensor. Must be float16. N-D with shape [N, 4] . \n
  1149. *@par Outputs:
  1150. *boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4] . \n
  1151. *@par Restrictions:
  1152. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  1153. */
  1154. REG_OP(ClipBoxesD)
  1155. .INPUT(boxes_input, TensorType({DT_FLOAT16}))
  1156. .REQUIRED_ATTR(img_size, ListInt)
  1157. .OUTPUT(boxes_output, TensorType({DT_FLOAT16}))
  1158. .OP_END_FACTORY_REG(ClipBoxesD)
  1159. /**
  1160. *@brief Computes Fastrcnn Predictions function.
  1161. *
  1162. *@par Inputs:
  1163. *Inputs include:
  1164. * @li rois: A Tensor. Must be float16. N-D with shape [N*C, 4].
  1165. * @li score: A Tensor. Must be float16. N-D with shape [N, C+1].
  1166. *
  1167. *@par Attributes:
  1168. * @li nms_threshold: required, float, threahold of nms process.
  1169. * @li score_threshold: required, float, threahold of topk process.
  1170. * @li k: required, Int, threahold of topk process.
  1171. *@par Outputs:
  1172. * @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
  1173. * @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
  1174. * @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1].
  1175. */
  1176. REG_OP(FastrcnnPredictions)
  1177. .INPUT(rois, TensorType({DT_FLOAT16}))
  1178. .INPUT(score, TensorType({DT_FLOAT16}))
  1179. .REQUIRED_ATTR(nms_threshold, Float)
  1180. .REQUIRED_ATTR(score_threshold, Float)
  1181. .REQUIRED_ATTR(k, Int)
  1182. .OUTPUT(sorted_rois, TensorType({DT_FLOAT16}))
  1183. .OUTPUT(sorted_scores, TensorType({DT_FLOAT16}))
  1184. .OUTPUT(sorted_classes, TensorType({DT_FLOAT16}))
  1185. .OP_END_FACTORY_REG(FastrcnnPredictions)
  1186. /**
  1187. *@brief Computes Fastrcnn RpnProposals function . \n
  1188. *@par Inputs:
  1189. *Inputs include:
  1190. * @li rois: A Tensor. Must be float16. N-D with shape [N, 4].
  1191. * @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1].
  1192. * @li img_size: A Tensor. Must be int32. shape [H, W] . \n
  1193. *@par Attributes:
  1194. * @li score_threshold: required, float, threahold of topk process.
  1195. * @li k: required, Int, threahold of topk process.
  1196. * @li min_size: required, float, threahold of nms process.
  1197. * @li nms_threshold: required, float, threahold of nms process.
  1198. * @li post_nms_num: required, float, threahold of nms process.
  1199. * @li score_filter: bool, mark of score_filter. Defaults to "true"
  1200. * @li box_filter: bool, mark of box_filter. Defaults to "true"
  1201. * @li score_sigmoid: bool, mark of score_sigmoid. Defaults to "false"
  1202. *@par Outputs:
  1203. * @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
  1204. * @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
  1205. * @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1] . \n
  1206. * @par Third-party framework compatibility
  1207. * Compatible with the TensorFlow operator Unpack.
  1208. */
  1209. REG_OP(RpnProposals)
  1210. .INPUT(rois, TensorType({DT_FLOAT16}))
  1211. .INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
  1212. .INPUT(img_size, TensorType({DT_INT32}))
  1213. .REQUIRED_ATTR(score_threshold, Float)
  1214. .REQUIRED_ATTR(k, Int)
  1215. .REQUIRED_ATTR(min_size, Float)
  1216. .REQUIRED_ATTR(nms_threshold, Float)
  1217. .REQUIRED_ATTR(post_nms_num, Int)
  1218. .ATTR(score_filter, Bool, true)
  1219. .ATTR(box_filter, Bool, true)
  1220. .ATTR(score_sigmoid, Bool, false)
  1221. .OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
  1222. .OP_END_FACTORY_REG(RpnProposals)
  1223. /**
  1224. *@brief Computes Fastrcnn RpnProposalsD function . \n
  1225. *@par Inputs:
  1226. *@li rois: A Tensor. Must be float16. N-D with shape [N, 4].
  1227. *@li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1] . \n
  1228. *@par Attributes:
  1229. *@li img_size: A Tensor size of image. Must be int32. shape [H, W].
  1230. *@li score_threshold: required, float, threahold of topk process.
  1231. *@li k: required, Int, threahold of topk process.
  1232. *@li min_size: required, float, threahold of nms process.
  1233. *@li nms_threshold: required, float, threahold of nms process.
  1234. *@li post_nms_num: required, float, threahold of nms process.
  1235. *@li score_filter: bool, mark of score_filter. Defaults to "true"
  1236. *@li box_filter: bool, mark of box_filter. Defaults to "true"
  1237. *@li score_sigmoid: bool, mark of score_sigmoid. Defaults to "false"
  1238. *@par Outputs:
  1239. *sorted_box: A Tensor of output. Must be float16. N-D with shape [N, 1] . \n
  1240. * @par Third-party framework compatibility
  1241. * Compatible with the pytorch operator RPNProposals . \n
  1242. *@par Restrictions:
  1243. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  1244. *@par Restrictions:
  1245. *Warning: THIS FUNCTION IS DEPRECATED. Please use RpnProposals instead.
  1246. */
  1247. REG_OP(RpnProposalsD)
  1248. .INPUT(rois, TensorType({DT_FLOAT16}))
  1249. .INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
  1250. .REQUIRED_ATTR(img_size, ListInt)
  1251. .REQUIRED_ATTR(score_threshold, Float)
  1252. .REQUIRED_ATTR(k, Int)
  1253. .REQUIRED_ATTR(min_size, Float)
  1254. .REQUIRED_ATTR(nms_threshold, Float)
  1255. .REQUIRED_ATTR(post_nms_num, Int)
  1256. .ATTR(score_filter, Bool, true)
  1257. .ATTR(box_filter, Bool, true)
  1258. .ATTR(score_sigmoid, Bool, false)
  1259. .OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
  1260. .OP_END_FACTORY_REG(RpnProposalsD)
  1261. /**
  1262. *@brief Computes Score Filte Pre-Sort function.
  1263. *
  1264. *@par Inputs:
  1265. *Inputs include:
  1266. * @li sorted_proposal: A Tensor. Must be float16.
  1267. * N-D with shape [8*6002, 8].
  1268. * @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8].
  1269. *
  1270. *@par Attributes:
  1271. * @li min_size: required, float, threahold of nms process.
  1272. * @li score_threshold: required, float, threahold of topk process.
  1273. * @li k: required, Int, threahold of topk process.
  1274. * @li min_size: required, float, threahold of nms process.
  1275. * @li nms_threshold: required, float, threahold of nms process.
  1276. * @li post_nms_num: required, float, threahold of nms process.
  1277. * @li box_filter: bool, mark of box_filter. Defaults to "true"
  1278. * @li core_max_num: int, max number of core. Defaults to "8"
  1279. *@par Outputs:
  1280. *sorted_box: A Tensor. Must be float16. N-D with shape [N, 1].
  1281. */
  1282. REG_OP(RpnProposalPostProcessing)
  1283. .INPUT(sorted_proposal, TensorType({DT_FLOAT16}))
  1284. .INPUT(proposal_num, TensorType({DT_UINT32}))
  1285. .OUTPUT(sorted_box, TensorType({ DT_FLOAT16}))
  1286. .REQUIRED_ATTR(img_size, ListInt)
  1287. .REQUIRED_ATTR(score_threshold, Float)
  1288. .REQUIRED_ATTR(k, Int)
  1289. .REQUIRED_ATTR(min_size, Float)
  1290. .REQUIRED_ATTR(nms_threshold, Float)
  1291. .REQUIRED_ATTR(post_nms_num, Int)
  1292. .ATTR(box_filter, Bool, true)
  1293. .ATTR(core_max_num, Int, 8)
  1294. .OP_END_FACTORY_REG(RpnProposalPostProcessing)
  1295. /**
  1296. *@brief Computes DecodeBoundariesTarget function.
  1297. *@par Inputs:
  1298. *Inputs include:
  1299. * @li boundary_predictions: A Tensor. Must be float16.
  1300. * @li anchors: A Tensor. Must be float16.
  1301. *@par Outputs:
  1302. * @ boundary_encoded: A Tensor. Must be float16.
  1303. *@par Restrictions:
  1304. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  1305. */
  1306. REG_OP(DecodeBoundariesTarget)
  1307. .INPUT(boundary_predictions, TensorType({DT_FLOAT16}))
  1308. .INPUT(anchors, TensorType({DT_FLOAT16}))
  1309. .OUTPUT(boundary_encoded, TensorType({DT_FLOAT16}))
  1310. .OP_END_FACTORY_REG(DecodeBoundariesTarget)
  1311. /**
  1312. *@brief Computes DecodeCornerpointsTargetBG function.
  1313. *
  1314. *@par Inputs:
  1315. *Inputs include:
  1316. * @li keypoints_prediction: A Tensor. Must be float16.
  1317. * @li anchors: A Tensor. Must be float16.
  1318. *
  1319. *@par Outputs:
  1320. * @ keypoints_decoded: A Tensor. Must be float16.
  1321. *@par Restrictions:
  1322. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  1323. */
  1324. REG_OP(DecodeCornerpointsTargetBG)
  1325. .INPUT(keypoints_prediction, TensorType({DT_FLOAT16}))
  1326. .INPUT(anchors, TensorType({DT_FLOAT16}))
  1327. .OUTPUT(keypoints_decoded, TensorType({DT_FLOAT16}))
  1328. .OP_END_FACTORY_REG(DecodeCornerpointsTargetBG);
  1329. /**
  1330. *@brief Computes DecodeCornerpointsTargetWrtCenterV1 function.
  1331. *
  1332. *@par Inputs:
  1333. *Inputs include:
  1334. * @li keypoints_prediction: A Tensor. Must be float16.
  1335. * @li anchors: A Tensor. Must be float16.
  1336. *
  1337. *@par Outputs:
  1338. * @ keypoints_decoded: A Tensor. Must be float16.
  1339. *@par Restrictions:
  1340. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  1341. */
  1342. REG_OP(DecodeCornerpointsTargetWrtCenterV1)
  1343. .INPUT(keypoints_prediction, TensorType({DT_FLOAT16}))
  1344. .INPUT(anchors, TensorType({DT_FLOAT16}))
  1345. .OUTPUT(keypoints_decoded, TensorType({DT_FLOAT16}))
  1346. .OP_END_FACTORY_REG(DecodeCornerpointsTargetWrtCenterV1)
  1347. /**
  1348. *@brief Computes DecodeWheelsTarget function.
  1349. *
  1350. *@par Inputs:
  1351. *Inputs include:
  1352. * @li boundary_predictions: A Tensor. Must be float16.
  1353. * @li anchors: A Tensor. Must be float16.
  1354. *
  1355. *@par Outputs:
  1356. * @ boundary_encoded: A Tensor. Must be float16.
  1357. *@par Restrictions:
  1358. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  1359. */
  1360. REG_OP(DecodeWheelsTarget)
  1361. .INPUT(boundary_predictions, TensorType({DT_FLOAT16}))
  1362. .INPUT(anchors, TensorType({DT_FLOAT16}))
  1363. .OUTPUT(boundary_encoded, TensorType({DT_FLOAT16}))
  1364. .OP_END_FACTORY_REG(DecodeWheelsTarget)
  1365. /**
  1366. *@brief Computes nms for input boxes and score, support multiple batch and classes.
  1367. * will do clip to window, score filter, top_k, and nms
  1368. *@par Inputs:
  1369. * Four inputs, including:
  1370. *@li boxes: boxes, a 4D Tensor of type float16 with
  1371. * shape (batch, num_anchors, num_classes, 4). "batch" indicates the batch size of image,
  1372. * and "num_anchors" indicates num of boxes, and "num_classes" indicates classes of detect.
  1373. * and the value "4" refers to "x0", "x1", "y0", and "y1".
  1374. *@li scores: boxes, a 4D Tensor of type float16 with
  1375. * shape (batch, num_anchors, num_classes).
  1376. *@li clip_window: window size, a 2D Tensor of type float16 with
  1377. * shape (batch, 4). 4" refers to "anchor_x0", "anchor_x1", "anchor_y0", and "anchor_y1".
  1378. *@li num_valid_boxes: valid boxes number for each batch, a 1D Tensor of type int32 with
  1379. * shape (batch,) . \n
  1380. *@par Attributes:
  1381. *@li score_threshold: A required attribute of type float32, specifying the score filter iou iou_threshold.
  1382. *@li iou_threshold: A required attribute of type float32, specifying the nms iou iou_threshold.
  1383. *@li max_size_per_class: A required attribute of type int, specifying the nms output num per class.
  1384. *@li max_total_size: A required attribute of type int, specifying the the nms output num per batch.
  1385. *@li change_coordinate_frame: A optional attribute of type bool, whether to normalize coordinates after clipping.
  1386. *@li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false" . \n
  1387. *@par Outputs:
  1388. *@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4),
  1389. * specifying the output nms boxes per batch.
  1390. *@li nmsed_scores: A 2D Tensor of type float16 with shape (batch, max_total_size),
  1391. * specifying the output nms score per batch.
  1392. *@li nmsed_classes: A 2D Tensor of type float16 with shape (batch, max_total_size),
  1393. * specifying the output nms class per batch.
  1394. *@li nmsed_num: A 1D Tensor of type int32 with shape (batch), specifying the valid num of nmsed_boxes . \n
  1395. *@attention Constraints:
  1396. * Only computation of float16 data is supported.
  1397. * Note: when the class num per image * max_size_per_class is too big, will compile fail with ERROR-insufficient memory
  1398. */
  1399. REG_OP(BatchMultiClassNonMaxSuppression)
  1400. .INPUT(boxes, TensorType({DT_FLOAT16}))
  1401. .INPUT(scores, TensorType({DT_FLOAT16}))
  1402. .OPTIONAL_INPUT(clip_window, TensorType({DT_FLOAT16}))
  1403. .OPTIONAL_INPUT(num_valid_boxes, TensorType({DT_INT32}))
  1404. .OUTPUT(nmsed_boxes, TensorType({DT_FLOAT16}))
  1405. .OUTPUT(nmsed_scores, TensorType({DT_FLOAT16}))
  1406. .OUTPUT(nmsed_classes, TensorType({DT_FLOAT16}))
  1407. .OUTPUT(nmsed_num, TensorType({DT_INT32}))
  1408. .REQUIRED_ATTR(score_threshold, Float)
  1409. .REQUIRED_ATTR(iou_threshold, Float)
  1410. .REQUIRED_ATTR(max_size_per_class, Int)
  1411. .REQUIRED_ATTR(max_total_size, Int)
  1412. .ATTR(change_coordinate_frame, Bool, false)
  1413. .ATTR(transpose_box, Bool, false)
  1414. .OP_END_FACTORY_REG(BatchMultiClassNonMaxSuppression)
  1415. /**
  1416. * @brief To absolute the bounding box . \n
  1417. * @par Inputs:
  1418. * @li normalized_boxes: A 3D Tensor of type float16 or float32.
  1419. * @li shape_hw: A 1D Tensor of type int32 . \n
  1420. * @par Attributes:
  1421. * reversed_box: An optional bool, specifying the last two dims is "4,num" or
  1422. * "num,4", "true" for "4,num", "false" for "num,4". Defaults to "false" . \n
  1423. * @par Outputs:
  1424. * y: A Tensor. Has the same type and shape as "normalized_boxes" . \n
  1425. * @attention Constraints:
  1426. * "normalized_boxes"'s shape must be (batch,num,4) or (batch,4,num).
  1427. * "shape_hw"'s shape must be (4,)
  1428. */
  1429. REG_OP(ToAbsoluteBBox)
  1430. .INPUT(normalized_boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1431. .INPUT(shape_hw, TensorType({DT_INT32}))
  1432. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  1433. .ATTR(reversed_box, Bool, false)
  1434. .OP_END_FACTORY_REG(ToAbsoluteBBox)
  1435. /**
  1436. *@brief Computes Normalize bbox function.
  1437. *
  1438. *@par Inputs:
  1439. *Inputs include:
  1440. * @li boxes: A Tensor. Must be float16 or float32.
  1441. * @li shape_hw: A Tensor. Must be int32.
  1442. *
  1443. *@par Attributes:
  1444. * reversed_box: optional, bool. Defaults to "False"
  1445. *
  1446. *@par Outputs:
  1447. * y: A Tensor. Must have the same type and shape as boxes.
  1448. */
  1449. REG_OP(NormalizeBBox)
  1450. .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1451. .INPUT(shape_hw, TensorType({DT_INT32}))
  1452. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  1453. .ATTR(reversed_box, Bool, false)
  1454. .OP_END_FACTORY_REG(NormalizeBBox)
  1455. /**
  1456. *@brief Computes decode bboxv2 function.
  1457. *
  1458. *@par Inputs:
  1459. *Inputs include:
  1460. * @li boxes: A Tensor. Must be float16 or float32.
  1461. * @li anchors: A Tensor. Must be int32.
  1462. *
  1463. *@par Attributes:
  1464. * @li scales: optional, listfloat.
  1465. * @li decode_clip: optional, float, threahold of decode process.
  1466. * @li reversed_boxes: optional, bool.
  1467. *
  1468. *@par Outputs:
  1469. * y: A Tensor. Must have the same type as box_predictions.
  1470. */
  1471. REG_OP(DecodeBboxV2)
  1472. .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1473. .INPUT(anchors, TensorType({DT_FLOAT16, DT_FLOAT}))
  1474. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  1475. .ATTR(scales, ListFloat, {1.0, 1.0, 1.0, 1.0})
  1476. .ATTR(decode_clip, Float, 0.0)
  1477. .ATTR(reversed_box, Bool, false)
  1478. .OP_END_FACTORY_REG(DecodeBboxV2)
  1479. /**
  1480. *@brief sort the input tensor and return the value of index.
  1481. *
  1482. *@par Inputs:
  1483. *Inputs include:
  1484. * x: A Tensor. Dtype support: float16, float, int16, int8,
  1485. uint8, int32, int64.
  1486. *@par Attributes:
  1487. * @li axis: An optional attribute indicates the sorting axis.
  1488. * @li descending: An optional attribute indicates desending sort or not.
  1489. *
  1490. *@par Outputs:
  1491. * @li y1: A Tensor. Must have the same type as x.
  1492. * @li y2: A Tensor. Indices of y1 in x. Dtype must be int32.
  1493. *
  1494. *@attention Constraints:
  1495. * The operator depends on the unstable sorting algorithm.
  1496. */
  1497. REG_OP(Sort)
  1498. .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8,
  1499. DT_UINT8, DT_INT32, DT_INT64}))
  1500. .OUTPUT(y1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8,
  1501. DT_UINT8, DT_INT32, DT_INT64}))
  1502. .OUTPUT(y2, TensorType({DT_INT32}))
  1503. .ATTR(axis, Int, -1)
  1504. .ATTR(descending, Bool, false)
  1505. .OP_END_FACTORY_REG(Sort)
  1506. /**
  1507. *@brief Computes iou for input bboxes and gtboxes.
  1508. *@par Inputs:
  1509. * Two inputs, including:
  1510. *@li bboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1),
  1511. *@li gtboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1).\n
  1512. *@par Attributes:
  1513. *@li mode: A optional attribute of type string, whether judge the mode of iou. \n
  1514. *@par Outputs:
  1515. *@li overlap: A 2D Tensor of type float16 with shape [n, m]. \n
  1516. *@attention Constraints:
  1517. * Only computation of float16 data is supported.
  1518. *@par Restrictions:
  1519. *Warning:THIS FUNCTION IS DEPRECATED. Please use Iou instead.
  1520. */
  1521. REG_OP(PtIou)
  1522. .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1523. .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1524. .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
  1525. .ATTR(mode, String, "iou")
  1526. .OP_END_FACTORY_REG(PtIou)
  1527. /**
  1528. *@brief Greedily selects a subset of bounding boxes in descending order of
  1529. score . \n
  1530. *@par Inputs:
  1531. *Input boxes and scores must be float16 type. Inputs include:
  1532. *@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
  1533. The single box data format is indicated by center_point_box.
  1534. *@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
  1535. *@li max_output_size: A scalar integer tensor representing the maximum number
  1536. of boxes to be selected by non max suppression.
  1537. *@li iou_threshold: A 0-D float tensor representing the threshold for deciding
  1538. whether boxes overlap too much with respect to IOU.
  1539. *@li score_threshold: A 0-D float tensor representing the threshold for
  1540. deciding when to remove boxes based on score . \n
  1541. *@par Attributes:
  1542. *center_point_box:Integer indicate the format of the box data.
  1543. The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2]
  1544. where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair
  1545. of box corners and the coordinates can be provided as normalized
  1546. (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
  1547. 1 - the box data is supplied as [x_center, y_center, width, height].
  1548. Mostly used for Pytorch models. \n
  1549. *@par Outputs:
  1550. *@li selected_indices: A 2-D integer tensor of shape [M] representing the
  1551. selected indices from the boxes tensor, where M <= max_output_size. \n
  1552. *@attention Constraints:
  1553. *Input boxes and scores must be float16 type . \n
  1554. *@par Third-party framework compatibility
  1555. *Compatible with onnx NonMaxSuppression operator.
  1556. *@par Restrictions:
  1557. *Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  1558. */
  1559. REG_OP(NonMaxSuppressionV6)
  1560. .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1561. .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
  1562. .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
  1563. .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
  1564. .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
  1565. .OUTPUT(selected_indices, TensorType({DT_INT32}))
  1566. .ATTR(center_point_box, Int, 0)
  1567. .ATTR(max_boxes_size, Int, 0)
  1568. .OP_END_FACTORY_REG(NonMaxSuppressionV6)
  1569. /**
  1570. *@brief Greedily selects a subset of bounding boxes in descending order of
  1571. score . \n
  1572. *@par Inputs:
  1573. *Input boxes and scores must be float16 type. Inputs include:
  1574. *@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
  1575. The single box data format is indicated by center_point_box.
  1576. *@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
  1577. *@li max_output_size: A scalar integer tensor representing the maximum number
  1578. of boxes to be selected by non max suppression.
  1579. *@li iou_threshold: A 0-D float tensor representing the threshold for deciding
  1580. whether boxes overlap too much with respect to IOU.
  1581. *@li score_threshold: A 0-D float tensor representing the threshold for
  1582. deciding when to remove boxes based on score . \n
  1583. *@li index_id: A input tensor with shape [num_batches,num_classes,spatial_dimension,3]
  1584. the last dim representing (batch_id,class_id,index_id) . \n
  1585. *@par Attributes:
  1586. *@li center_point_box:Integer indicate the format of the box data.
  1587. The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2]
  1588. where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair
  1589. of box corners and the coordinates can be provided as normalized
  1590. (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
  1591. 1 - the box data is supplied as [x_center, y_center, width, height].
  1592. Mostly used for Pytorch models. \n
  1593. *@li max_boxes_size: An optional attribute integer representing the real maximum
  1594. *number of boxes to be selected by non max suppression . \n
  1595. *@par Outputs:
  1596. *selected_indices: A 2-D integer tensor of shape [M] representing the
  1597. selected indices from the boxes tensor, where M <= max_output_size. \n
  1598. *@attention Constraints:
  1599. *Input boxes and scores must be float16 type . \n
  1600. *@par Third-party framework compatibility
  1601. *Compatible with onnx NonMaxSuppression operator.
  1602. */
  1603. REG_OP(NonMaxSuppressionV7)
  1604. .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1605. .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
  1606. .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
  1607. .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
  1608. .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
  1609. .OPTIONAL_INPUT(index_id, TensorType({DT_FLOAT16}))
  1610. .OUTPUT(selected_indices, TensorType({DT_INT32}))
  1611. .ATTR(center_point_box, Int, 0)
  1612. .ATTR(max_boxes_size, Int, 0)
  1613. .OP_END_FACTORY_REG(NonMaxSuppressionV7)
  1614. /**
  1615. *@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n
  1616. *@par Inputs:
  1617. * Two inputs, including:
  1618. *@li features: A 5HD Tensor list of type float32 or float16.
  1619. *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
  1620. * the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1".
  1621. *@par Attributes:
  1622. *@li finest_scale: A optional attribute of type int, specifying the scale of calculate levels of "rois".
  1623. *@li roi_scale_factor: A optional attribute of type float32, specifying the rescaling of "rois" coordinates.
  1624. *@li spatial_scale: A optional attribute of type list float32, specifying the scaling ratio of "features"
  1625. * to the original image.
  1626. *@li pooled_height: A optional attribute of type int32, specifying the H dimension.
  1627. *@li pooled_width: A optional attribute of type int32, specifying the W dimension.
  1628. *@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency
  1629. * of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois",
  1630. * which is a floating point number. Defaults to "0".
  1631. *@li pool_mode: An optional attribute of type string to indicate pooling mode. Defaults to "avg" . \n
  1632. *@li aligned: An optional attribute of type bool, specifying the align to corner. Defaults to true . \n
  1633. *@par Outputs:
  1634. * output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
  1635. * The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height",
  1636. * "pooled_width", and "features", respectively.
  1637. *@par Third-party framework compatibility
  1638. *Compatible with mmdetection SingleRoIExtractor operator.
  1639. */
  1640. REG_OP(RoiExtractor)
  1641. .DYNAMIC_INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
  1642. .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
  1643. .OPTIONAL_INPUT(index, TensorType({DT_INT32}))
  1644. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  1645. .ATTR(finest_scale, Int, 56)
  1646. .ATTR(roi_scale_factor, Float, 0)
  1647. .ATTR(spatial_scale, ListFloat, {1.f / 4, 1.f / 8, 1.f / 16, 1.f / 32})
  1648. .ATTR(pooled_height, Int, 7)
  1649. .ATTR(pooled_width, Int, 7)
  1650. .ATTR(sample_num, Int, 0)
  1651. .ATTR(pool_mode, String, "avg")
  1652. .ATTR(aligned, Bool, true)
  1653. .OP_END_FACTORY_REG(RoiExtractor)
  1654. /**
  1655. *@brief Performs Position Sensitive PS ROI Pooling . \n
  1656. *@par Inputs:
  1657. * Two inputs, including:
  1658. *@li x: A tensor of type float16 or float32, describing the feature
  1659. * map, dimension C1 must be equal to
  1660. * (int(output_dim+15)/C0))*group_size*group_size.
  1661. *@li rois: A tensor of type float16 or float32, with shape
  1662. * [batch, 5, rois_num], describing the ROIs, each ROI consists of five
  1663. * elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
  1664. * the index of the input feature map, "x1", "y1", "x2", or "y2" must be
  1665. * greater than or equal to "0.0" . \n
  1666. *@par Attributes:
  1667. *@li output_dim: A required int32, specifying the number of output channels,
  1668. * must be greater than 0.
  1669. *@li group_size: A required int32, specifying the number of groups to encode
  1670. * position-sensitive score maps, must be within the range (0, 128).
  1671. *@li spatial_scale: A required float32, scaling factor for mapping the input
  1672. * coordinates to the ROI coordinates . \n
  1673. *@par Outputs:
  1674. *y: A tensor of type float16 or float32, describing the result
  1675. * feature map . \n
  1676. *@attention Constraints:
  1677. * HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
  1678. */
  1679. REG_OP(PSROIPoolingV2)
  1680. .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  1681. .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
  1682. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  1683. .REQUIRED_ATTR(spatial_scale, Float)
  1684. .REQUIRED_ATTR(output_dim, Int)
  1685. .REQUIRED_ATTR(group_size, Int)
  1686. .OP_END_FACTORY_REG(PSROIPoolingV2)
  1687. /**
  1688. *@brief Performs Position Sensitive PS ROI Pooling Grad . \n
  1689. *@par Inputs:
  1690. * Two inputs, including:
  1691. *@li x: A tensor of type float16 or float32, describing the result
  1692. * feature map . \n
  1693. *@li rois: A tensor of type float16 or float32, with shape
  1694. * [batch, 5, rois_num], describing the ROIs, each ROI consists of five
  1695. * elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
  1696. * the index of the input feature map, "x1", "y1", "x2", or "y2" must be
  1697. * greater than or equal to "0.0" . \n
  1698. *@par Attributes:
  1699. *@li output_dim: A required int32, specifying the number of output channels,
  1700. * must be greater than 0.
  1701. *@li group_size: A required int32, specifying the number of groups to encode
  1702. * position-sensitive score maps, must be within the range (0, 128).
  1703. *@li spatial_scale: A required float32, scaling factor for mapping the input
  1704. * coordinates to the ROI coordinates . \n
  1705. *@li input_size: A required listInt, mapping the gradinput size: (H, W)
  1706. *@par Outputs:
  1707. *y: A tensor of type float16 or float32, describing the feature
  1708. * map, dimension C1 must be equal to
  1709. * (int(output_dim+15)/C0))*group_size*group_size.
  1710. *@attention Constraints:
  1711. * HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
  1712. */
  1713. REG_OP(PSROIPoolingGradV2D)
  1714. .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
  1715. .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
  1716. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  1717. .REQUIRED_ATTR(spatial_scale, Float)
  1718. .REQUIRED_ATTR(output_dim, Int)
  1719. .REQUIRED_ATTR(group_size, Int)
  1720. .REQUIRED_ATTR(input_size, ListInt)
  1721. .OP_END_FACTORY_REG(PSROIPoolingGradV2D)
  1722. /**
  1723. *@brief Generate the responsible flags of anchor in a single feature map.
  1724. *@par Inputs:
  1725. *@li gt_bboxes: Ground truth box, 2-D Tensor with shape `[batch, 4]`.
  1726. *@par Attributes:
  1727. *@li featmap_size: The size of feature maps, listint.
  1728. *@li strides: Stride of current level, listint.
  1729. *@li num_base_anchors: The number of base anchors.
  1730. *@par Outputs:
  1731. *flags: The valid flags of each anchor in a single level.
  1732. */
  1733. REG_OP(AnchorResponseFlags)
  1734. .INPUT(gt_bboxes, TensorType({DT_FLOAT}))
  1735. .OUTPUT(flags, TensorType({DT_UINT8}))
  1736. .REQUIRED_ATTR(featmap_size, ListInt)
  1737. .REQUIRED_ATTR(strides, ListInt)
  1738. .REQUIRED_ATTR(num_base_anchors, Int)
  1739. .OP_END_FACTORY_REG(AnchorResponseFlags)
  1740. /**
  1741. *@brief Generates bounding boxes based on yolo's "anchor" and "ground-truth" boxes.
  1742. * It is a customized mmdetection operator . \n
  1743. *@par Inputs:
  1744. * Three inputs, including:
  1745. *@li anchor_boxes: anchor boxes generated by the yolo training set.
  1746. * A 2D Tensor of type float32 or float16 with shape (N, 4). "N" indicates the number
  1747. * of ROIs, "N" indicates the number of ROIs, and the value "4" refers to (tx, ty, tw, th).
  1748. *@li gt_bboxes: target of the transformation, e.g, ground-truth boxes.
  1749. * A 2D Tensor of type float32 or float16 with shape (N, 4).
  1750. * "N" indicates the number of ROIs, and 4 indicates "dx", "dy", "dw", and "dh" .
  1751. *@li stride: Scale for each box.
  1752. * A 1D Tensor of type int32 shape (N,).
  1753. * "N" indicates the number of ROIs. \n
  1754. *@par Attributes:
  1755. *performance_mode: select performance mode, "high_precision" or "high_performance".
  1756. * select "high_precision" when input type is float32, the output tensor precision
  1757. * will be smaller than 0.0001, select "high_performance" when input type is float32,
  1758. * the ops will be best performance, but precision will be only smaller than 0.005.
  1759. *@par Outputs:
  1760. *encoded_bboxes: Bboxes generated based on "anchor_boxes" and "gt_bboxes". Have the
  1761. * same format and type as "anchor_boxes".
  1762. *
  1763. *@attention Constraints:
  1764. * input anchor boxes only support maximum N=20480. \n
  1765. */
  1766. REG_OP(YoloBoxesEncode)
  1767. .INPUT(anchor_boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1768. .INPUT(gt_bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1769. .INPUT(stride, TensorType({DT_INT32}))
  1770. .ATTR(performance_mode, String, "high_precision")
  1771. .OUTPUT(encoded_bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1772. .OP_END_FACTORY_REG(YoloBoxesEncode)
  1773. /**
  1774. *@brief Performs Position Sensitive PS ROI Pooling Grad.
  1775. *@par Inputs:
  1776. * Eight inputs, including:
  1777. *@li assigned_gt_inds: Tensor of type float16 or float32, shape (n, )
  1778. *@li overlaps: A Tensor. Datatype is same as assigned_gt_inds. IOU between gt_bboxes and bboxes. shape(k, n)
  1779. *@li box_responsible_flags: A Tensor. Support uint8. Flag to indicate whether box is responsible.
  1780. *@li max_overlaps: A Tensor. Datatype is same as assigned_gt_inds. overlaps.max(axis=0).
  1781. *@li argmax_overlaps: A Tensor. Support int32. overlaps.argmax(axis=0).
  1782. *@li gt_max_overlaps: A Tensor. Datatype is same as assigned_gt_inds. overlaps.max(axis=1).
  1783. *@li gt_argmax_overlaps: A Tensor. Support int32. overlaps.argmax(axis=1).
  1784. *@li num_gts: A Tensor. Support int32. real k. shape (1, )
  1785. *@par Attributes:
  1786. *@li pos_iou_thr: float. IOU threshold for positive bboxes.
  1787. *@li min_pos_iou: float. minimum iou for a bbox to be considered as a positive bbox
  1788. *@li gt_max_assign_all: bool. whether to assign all bboxes with the same highest overlap with some gt to that gt.
  1789. *@par Outputs:
  1790. * assigned_gt_inds_pos: A Tensor. Support float16/float32. shape (n, ).
  1791. */
  1792. REG_OP(GridAssignPositive)
  1793. .INPUT(assigned_gt_inds, TensorType({ DT_FLOAT, DT_FLOAT16 }))
  1794. .INPUT(overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 }))
  1795. .INPUT(box_responsible_flags, TensorType({ DT_UINT8 }))
  1796. .INPUT(max_overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 }))
  1797. .INPUT(argmax_overlaps, TensorType({ DT_INT32 }))
  1798. .INPUT(gt_max_overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 }))
  1799. .INPUT(gt_argmax_overlaps, TensorType({ DT_INT32 }))
  1800. .INPUT(num_gts, TensorType({ DT_INT32 }))
  1801. .OUTPUT(assigned_gt_inds_pos, TensorType({DT_FLOAT, DT_FLOAT16}))
  1802. .REQUIRED_ATTR(pos_iou_thr, Float)
  1803. .REQUIRED_ATTR(min_pos_iou, Float)
  1804. .REQUIRED_ATTR(gt_max_assign_all, Bool)
  1805. .OP_END_FACTORY_REG(GridAssignPositive)
  1806. /**
  1807. * @brief Calculate the inverse gradient of GIoU. \n
  1808. *@par Inputs:
  1809. *@li dy : data of grad increment, a 1D Tensor of type float16 or float32 with
  1810. * shape (N,).
  1811. *@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
  1812. * shape (4, N). "N" indicates the number of bounding boxes, and the value
  1813. * "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
  1814. *@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
  1815. * with shape (4, M). "M" indicates the number of ground truth boxes, and
  1816. * the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n
  1817. *@par Attributes:
  1818. *@li trans: An optional attr, true for 'xywh', false for 'xyxy', only support true now.
  1819. *@li is_cross: An optional attr, if false M equals N, only support false now.
  1820. *@li mode: An optional attr, a character string with the value range of ['iou', 'iof'],
  1821. * only support 'iou' now. \n
  1822. *@par Outputs:
  1823. *@li dbboxes: A 2D Tensor of type float16 or float32 with shape [4, N].
  1824. *@li dgtboxes: A 2D Tensor of type float16 or float32 with shape [4, M].
  1825. */
  1826. REG_OP(GIoUGrad)
  1827. .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
  1828. .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1829. .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1830. .OUTPUT(dbboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1831. .OUTPUT(dgtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1832. .ATTR(trans, Bool, false)
  1833. .ATTR(is_cross, Bool, true)
  1834. .ATTR(mode, String, "iou")
  1835. .OP_END_FACTORY_REG(GIoUGrad)
  1836. /**
  1837. * @brief Calculate the inverse gradient of DIoU. \n
  1838. * @par Inputs:
  1839. * @li dy : data of grad increment, a 1D Tensor of type float16 or float32 with
  1840. * shape (N,).
  1841. * @li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
  1842. * shape (4, N). "N" indicates the number of bounding boxes, and the value
  1843. * "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
  1844. * @li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
  1845. * with shape (4, M). "M" indicates the number of ground truth boxes, and
  1846. * the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n
  1847. * @par Attributes:
  1848. * @li trans: An optional attr, true for 'xywh', false for 'xyxy', only support true now.
  1849. * @li is_cross: An optional attr, if false M equals N, only support false now.
  1850. * @li mode: An optional attr, a character string with the value range of ['iou', 'iof'],
  1851. * only support 'iou' now. \n
  1852. * @par Outputs:
  1853. * @li dbboxes: A 2D Tensor of type float16 or float32 with shape [4, N].
  1854. * @li dgtboxes: A 2D Tensor of type float16 or float32 with shape [4, M].
  1855. */
  1856. REG_OP(DIoUGrad)
  1857. .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
  1858. .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1859. .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1860. .OUTPUT(dbboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1861. .OUTPUT(dgtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1862. .ATTR(trans, Bool, false)
  1863. .ATTR(is_cross, Bool, true)
  1864. .ATTR(mode, String, "iou")
  1865. .OP_END_FACTORY_REG(DIoUGrad)
  1866. /**
  1867. * @brief Calculate the inverse gradient of CIoU. \n
  1868. * @par Inputs:
  1869. * @li dy : data of grad increment, a 1D Tensor of type float16 or float32 with
  1870. * shape (N,).
  1871. * @li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
  1872. * shape (4, N). "N" indicates the number of bounding boxes, and the value
  1873. * "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
  1874. * @li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
  1875. * with shape (4, M). "M" indicates the number of ground truth boxes, and
  1876. * the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] .
  1877. * @li atan_sub: Intermediate result of forward calculation,
  1878. * a 1D Tensor of type float16 or float32 with shape (N,). \n
  1879. * @par Attributes:
  1880. * @li trans: An optional attr, true for 'xywh', false for 'xyxy', only support true now.
  1881. * @li is_cross: An optional attr, if false M equals N, only support false now.
  1882. * @li mode: An optional attr, a character string with the value range of ['iou', 'iof'],
  1883. * only support 'iou' now. \n
  1884. * @par Outputs:
  1885. * @li dbboxes: A 2D Tensor of type float16 or float32 with shape [4, N].
  1886. * @li dgtboxes: A 2D Tensor of type float16 or float32 with shape [4, M].
  1887. */
  1888. REG_OP(CIoUGrad)
  1889. .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
  1890. .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1891. .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1892. .INPUT(atan_sub, TensorType({DT_FLOAT16, DT_FLOAT}))
  1893. .OUTPUT(dbboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1894. .OUTPUT(dgtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
  1895. .ATTR(trans, Bool, false)
  1896. .ATTR(is_cross, Bool, true)
  1897. .ATTR(mode, String, "iou")
  1898. .OP_END_FACTORY_REG(CIoUGrad)
  1899. /**
  1900. * @brief RotatedOverlaps . \n
  1901. *@par Inputs:
  1902. *@li boxes : data of grad increment, a 3D Tensor of type float32 with
  1903. * shape (B, 5, N). "N" indicates the number of boxes, and the value
  1904. * "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta].
  1905. * @li query_boxes: Bounding boxes, a 3D Tensor of type float32 with
  1906. * shape (B, 5, K). "K" indicates the number of boxes, and the value
  1907. * "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta].
  1908. * @par Attributes:
  1909. * trans: An optional attr, true for 'xyxyt', false for 'xywht'.
  1910. *@par Outputs:
  1911. * overlaps: A 3D Tensor of type float32 with shape [B, N, K].
  1912. *@attention Constraints:
  1913. * In each batch, the invalid box cannot appear before the valid box.
  1914. */
  1915. REG_OP(RotatedOverlaps)
  1916. .INPUT(boxes, TensorType({DT_FLOAT}))
  1917. .INPUT(query_boxes, TensorType({DT_FLOAT}))
  1918. .OUTPUT(overlaps, TensorType({DT_FLOAT}))
  1919. .ATTR(trans, Bool, false)
  1920. .OP_END_FACTORY_REG(RotatedOverlaps)
  1921. /**
  1922. *@brief RotatedIou . \n
  1923. * @par Inputs:
  1924. *@li boxes : data of grad increment, a 3D Tensor of type float32 with
  1925. * shape (B, 5, N). "N" indicates the number of boxes, and the value
  1926. * "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta].
  1927. * @li query_boxes: Bounding boxes, a 3D Tensor of type float32 with
  1928. * shape (B, 5, K). "K" indicates the number of boxes, and the value
  1929. * "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta].
  1930. * @par Attributes:
  1931. *@li trans: An optional attr, true for 'xyxyt', false for 'xywht'.
  1932. * @li mode: An optional attr, a character string with the value range of ['iou', 'iof'],
  1933. * only support 'iou' now.
  1934. *@li is_cross: Cross calculation when it is True, and one-to-one calculation when it is False.
  1935. *@li v_threshold: An optional attr, provide condition relaxation for intersection calculation.
  1936. * @li e_threshold: An optional attr, provide condition relaxation for intersection calculation.
  1937. *@par Outputs:
  1938. * iou: A 3D Tensor of float32 with shape [B, N, K].
  1939. *@attention Constraints:
  1940. * In each batch, the invalid box cannot appear before the valid box.
  1941. */
  1942. REG_OP(RotatedIou)
  1943. .INPUT(boxes, TensorType({DT_FLOAT}))
  1944. .INPUT(query_boxes, TensorType({DT_FLOAT}))
  1945. .OUTPUT(iou, TensorType({DT_FLOAT}))
  1946. .ATTR(trans, Bool, false)
  1947. .ATTR(mode, String, "iou")
  1948. .ATTR(is_cross, Bool, true)
  1949. .ATTR(v_threshold, Float, 0)
  1950. .ATTR(e_threshold, Float, 0)
  1951. .OP_END_FACTORY_REG(RotatedIou)
  1952. /**
  1953. *@brief RotatedBoxEncode. \n
  1954. *@par Inputs:
  1955. * Two inputs, including:
  1956. *@li anchor_box: A 3D Tensor of float32 (float16) with shape (B, 5, N).
  1957. * "B" indicates the number of batch size
  1958. * "N" indicates the number of bounding boxes, and the value "5" refers to
  1959. * "x0", "x1", "y0", "y1" and "angle".
  1960. *@li gt_box: A 3D Tensor of float32 (float16) with shape (B, 5, N).
  1961. * "B" indicates the number of batch size
  1962. * "N" indicates the number of bounding boxes, and the value "5" refers to
  1963. * "x0", "x1", "y0", "y1" and "angle". \n
  1964. * @par Attributes:
  1965. *@li weight: A float list for "x0", "x1", "y0", "y1" and "angle",
  1966. * defaults to [1.0, 1.0, 1.0, 1.0, 1.0].
  1967. *@par Outputs:
  1968. *@li y: A 3D Tensor of type float32 (float16) with shape (B, 5, N),
  1969. * specifying the variations between all anchor boxes and ground truth boxes.
  1970. */
  1971. REG_OP(RotatedBoxEncode)
  1972. .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT}))
  1973. .INPUT(gt_box, TensorType({DT_FLOAT16, DT_FLOAT}))
  1974. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  1975. .ATTR(weight, ListFloat, {1.0, 1.0, 1.0, 1.0, 1.0})
  1976. .OP_END_FACTORY_REG(RotatedBoxEncode)
  1977. /**
  1978. *@brief RotatedBoxDecode. \n
  1979. *@par Inputs:
  1980. * Two inputs, including:
  1981. *@li anchor_box: A 3D Tensor of float32 (float16) with shape (B, 5, N).
  1982. * "B" indicates the number of batch size
  1983. * "N" indicates the number of bounding boxes, and the value "5" refers to
  1984. * "x0", "x1", "y0", "y1" and "angle".
  1985. *@li deltas: A 3D Tensor of float32 (float16) with shape (B, 5, N).
  1986. * "B" indicates the number of batch size
  1987. * "N" indicates the number of bounding boxes, and the value "5" refers to
  1988. * "x0", "x1", "y0", "y1" and "angle". \n
  1989. *@par Attributes:
  1990. *@li weight: A float list for "x0", "x1", "y0", "y1" and "angle",
  1991. * defaults to [1.0, 1.0, 1.0, 1.0, 1.0].
  1992. *@par Outputs:
  1993. *@li y: A 3D Tensor of type float32 (float16) with shape (B, 5, N),
  1994. * specifying the variations between all anchor boxes and ground truth boxes.
  1995. */
  1996. REG_OP(RotatedBoxDecode)
  1997. .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT}))
  1998. .INPUT(deltas, TensorType({DT_FLOAT16, DT_FLOAT}))
  1999. .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
  2000. .ATTR(weight, ListFloat, {1.0, 1.0, 1.0, 1.0, 1.0})
  2001. .OP_END_FACTORY_REG(RotatedBoxDecode)
  2002. /**
  2003. * @brief sort rois to balance on each core. \n
  2004. * @par Inputs:
  2005. * one inputs, including:
  2006. * @li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
  2007. * the value "5" indicates the indexes of images where the ROIs are located, "batch", "x0", "y0", "x1", and "y1".
  2008. * @par Outputs:
  2009. * @li balance_rois: A 2D Tensor of float32 or float16 with shape (N, 5), Outputs of the rois which balance.
  2010. * @li index: 1D Tensor 0f int32 with shape (N,), that is the index of origin rois.
  2011. * @par Restrictions:
  2012. * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  2013. */
  2014. REG_OP(BalanceRois)
  2015. .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
  2016. .OUTPUT(balance_rois, TensorType({DT_FLOAT16, DT_FLOAT}))
  2017. .OUTPUT(index, TensorType({DT_INT32}))
  2018. .OP_END_FACTORY_REG(BalanceRois)
  2019. } // namespace ge
  2020. #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示