You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ocr_ops.h 10 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*!
  17. * \file ocr_ops.h
  18. * \brief
  19. */
  20. #ifndef OPS_BUILT_IN_OP_PROTO_INC_OCR_OPS_H_
  21. #define OPS_BUILT_IN_OP_PROTO_INC_OCR_OPS_H_
  22. #include "graph/operator_reg.h"
  23. namespace ge {
  24. /**
  25. *@brief batch input x acording to attr batch_size and enqueue.
  26. *@par Inputs:
  27. *@li x: A Tensor need to batch of type float16/float32/float64/int8/int32/int64/uint8/uint32/uint64. \n
  28. *@li queue_id:A Tensor of type uint32, queue id.
  29. *@par Outputs:
  30. *enqueue_count: A Tensor of type int64, enqueue tensor number.
  31. *@par Attributes:
  32. *@li batch_size: An optional int. Batch size.
  33. *@li queue_name: An optional string. Queue name.
  34. *@li queue_depth: An optional int. Queue depth.
  35. *@li pad_mode: An optional string from: '"REPLICATE", "ZERO"'. Defaults to
  36. "REPLICATE". Pad mode.
  37. */
  38. REG_OP(BatchEnqueue)
  39. .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \
  40. DT_INT8, DT_INT32, DT_INT64, DT_UINT8, DT_UINT32, DT_UINT64}))
  41. .OPTIONAL_INPUT(queue_id, TensorType({DT_UINT32}))
  42. .OUTPUT(enqueue_count, TensorType({DT_INT32}))
  43. .ATTR(batch_size, Int, 8)
  44. .ATTR(queue_name, String, "")
  45. .ATTR(queue_depth, Int, 100)
  46. .ATTR(pad_mode, String, "REPLICATE")
  47. .OP_END_FACTORY_REG(BatchEnqueue)
  48. /**
  49. *@brief batch input x acording to attr batch_size and enqueue.
  50. *@par Inputs:
  51. *@li imgs_data: A Tensor of type uint8. Multi img data value. \n
  52. *@li imgs_offset:A Tensor of type int32. Offset of every img data in input imgs_data. \n
  53. *@li imgs_size:A Tensor of type int32. Shape of every img data. \n
  54. *@li langs:A Tensor of type int32. Lang of every img data. \n
  55. *@li langs_score:A Tensor of type int32. Lang score of every img data. \n
  56. *@par Outputs:
  57. *@liimgs: A Tensor of type uint8. Multi imgs data after reconition pre handle.
  58. *@liimgs_relation: A Tensor of type int32. Output imgs orders in input imgs.
  59. *@liimgs_lang: A Tensor of type int32. Output batch imgs langs.
  60. *@par Attributes:
  61. *@li batch_size: An optional int. Batch size.
  62. *@li data_format: An optional string from: '"NHWC", "NCHW"'. Defaults to
  63. "NHWC". Data format.
  64. *@li pad_mode: An optional string from: '"REPLICATE", "ZERO"'. Defaults to
  65. "REPLICATE". Pad mode.
  66. */
  67. REG_OP(OCRRecognitionPreHandle)
  68. .INPUT(imgs_data, TensorType({DT_UINT8}))
  69. .INPUT(imgs_offset, TensorType({DT_INT32}))
  70. .INPUT(imgs_size, TensorType({DT_INT32}))
  71. .INPUT(langs, TensorType({DT_INT32}))
  72. .INPUT(langs_score, TensorType({DT_FLOAT, DT_FLOAT16}))
  73. .OUTPUT(imgs, TensorType({DT_UINT8}))
  74. .OUTPUT(imgs_relation, TensorType({DT_INT32}))
  75. .OUTPUT(imgs_lang, TensorType({DT_INT32}))
  76. .OUTPUT(imgs_piece_fillers, TensorType({DT_INT32}))
  77. .ATTR(batch_size, Int, 8)
  78. .ATTR(data_format, String, "NHWC")
  79. .ATTR(pad_mode, String, "REPLICATE")
  80. .OP_END_FACTORY_REG(OCRRecognitionPreHandle)
  81. /**
  82. *@brief ocr detection pre handle.
  83. *@par Inputs:
  84. *img: A Tensor of type uint8. img data value. \n
  85. *@par Outputs:
  86. *@li resized_img: A Tensor of type uint8. Img after detection pre handle.
  87. *@li h_scale: A Tensor of type float. H scale.
  88. *@li w_scale: A Tensor of type float. W scale.
  89. *@par Attributes:
  90. *data_format: An optional string from: '"NHWC", "NCHW"'. Defaults to
  91. "NHWC". Data format.
  92. */
  93. REG_OP(OCRDetectionPreHandle)
  94. .INPUT(img, TensorType({DT_UINT8}))
  95. .OUTPUT(resized_img, TensorType({DT_UINT8}))
  96. .OUTPUT(h_scale, TensorType({DT_FLOAT}))
  97. .OUTPUT(w_scale, TensorType({DT_FLOAT}))
  98. .ATTR(data_format, String, "NHWC")
  99. .OP_END_FACTORY_REG(OCRDetectionPreHandle)
  100. /**
  101. *@brief ocr identify prehandle.
  102. *@par Inputs:
  103. *@li imgs_data: A Tensor of type uint8. Multi img data value. \n
  104. *@li imgs_offset:A Tensor of type int32. Offset of every img data in input imgs_data. \n
  105. *@li imgs_size:A Tensor of type int32. Shape of every img data. \n
  106. *@par Outputs:
  107. *resized_imgs: A Tensor of type uint8. Multi imgs after identify pre handle.
  108. *@par Attributes:
  109. *@li size: An optional int. Size.
  110. *@li data_format: An optional string from: '"NHWC", "NCHW"'. Defaults to
  111. "NHWC". Data format.
  112. */
  113. REG_OP(OCRIdentifyPreHandle)
  114. .INPUT(imgs_data, TensorType({DT_UINT8}))
  115. .INPUT(imgs_offset, TensorType({DT_INT32}))
  116. .INPUT(imgs_size, TensorType({DT_INT32}))
  117. .OUTPUT(resized_imgs, TensorType({DT_UINT8}))
  118. .REQUIRED_ATTR(size, ListInt)
  119. .ATTR(data_format, String, "NHWC")
  120. .OP_END_FACTORY_REG(OCRIdentifyPreHandle)
  121. /**
  122. *@brief batch dilate polygons according to expand_scale.
  123. *@par Inputs:
  124. *@li polys_data: A Tensor of type int32. point data of every polygon. \n
  125. *@li polys_offset:A Tensor of type int32. Offset of every polygon . \n
  126. *@li polys_size:A Tensor of type int32. Size of every polygon. \n
  127. *@li score:A Tensor of type float. Score of every point in image. \n
  128. *@li min_border:A Tensor of type int32. Minimum width of each polygon. \n
  129. *@li min_area_thr:A Tensor of type int32. Minimum area of each polygon. \n
  130. *@li score_thr:A Tensor of type float. Minimum confidence score of each polygon. \n
  131. *@li expands_cale:A Tensor of type float. Polygon expansion multiple. \n
  132. *@par Outputs:
  133. *@li dilated_polys_data: A Tensor of type int32. Point data of every dilated polygon. \n
  134. *@li dilated_polys_offset: A Tensor of type int32. Offset of every dilated polygon . \n
  135. *@li dilated_polys_size: A Tensor of type int32. Size of every dilated polygon. \n
  136. */
  137. REG_OP(BatchDilatePolys)
  138. .INPUT(polys_data, TensorType({DT_INT32}))
  139. .INPUT(polys_offset, TensorType({DT_INT32}))
  140. .INPUT(polys_size, TensorType({DT_INT32}))
  141. .INPUT(score, TensorType({DT_FLOAT}))
  142. .INPUT(min_border, TensorType({DT_INT32}))
  143. .INPUT(min_area_thr, TensorType({DT_INT32}))
  144. .INPUT(score_thr, TensorType({DT_FLOAT}))
  145. .INPUT(expands_cale, TensorType({DT_FLOAT}))
  146. .OUTPUT(dilated_polys_data, TensorType({DT_INT32}))
  147. .OUTPUT(dilated_polys_offset, TensorType({DT_INT32}))
  148. .OUTPUT(dilated_polys_size, TensorType({DT_INT32}))
  149. .OP_END_FACTORY_REG(BatchDilatePolys)
  150. /**
  151. *@brief find contours acording to img.
  152. *@par Inputs:
  153. *@li img: A Tensor of type uint8. Img data value. \n
  154. *@par Outputs:
  155. *@li polys_data: A Tensor of type int32. Point data of every contours. \n
  156. *@li polys_offset:A Tensor of type int32. Offset of every contours . \n
  157. *@li polys_size:A Tensor of type int32. Size of every contours. \n
  158. */
  159. REG_OP(OCRFindContours)
  160. .INPUT(img, TensorType({DT_UINT8}))
  161. .OUTPUT(polys_data, TensorType({DT_INT32}))
  162. .OUTPUT(polys_offset, TensorType({DT_INT32}))
  163. .OUTPUT(polys_size, TensorType({DT_INT32}))
  164. .ATTR(value_mode, Int, 0)
  165. .OP_END_FACTORY_REG(OCRFindContours)
  166. /**
  167. *@brief dequeue data acording to queue_id and queue_name.
  168. *@par Inputs:
  169. *@li queue_id:An Tensor of type uint32, queue id. \n
  170. *@par Outputs:
  171. *data: A Tensor of type RealNumberType, dequeue tensor. \n
  172. *@par Attributes:
  173. *@li output_type: A required type. dequeue data type.
  174. *@li output_shape: A required listint. dequeue data shape.
  175. *@li queue_name: An optional string. Queue name. \n
  176. */
  177. REG_OP(Dequeue)
  178. .OPTIONAL_INPUT(queue_id, TensorType({DT_UINT32}))
  179. .OUTPUT(data, TensorType::RealNumberType())
  180. .REQUIRED_ATTR(output_type, Type)
  181. .REQUIRED_ATTR(output_shape, ListInt)
  182. .ATTR(queue_name, String, "")
  183. .OP_END_FACTORY_REG(Dequeue);
  184. /**
  185. *@brief ocr detection post handle.
  186. *@par Inputs:
  187. *@li img: A Tensor of type uint8. original image data.
  188. *@li polys_data: A Tensor of type int32. point data of every poly.
  189. *@li polys_offset:A Tensor of type int32. Offset of every poly.
  190. *@li polys_size:A Tensor of type int32. Size of every poly. \n
  191. *@par Outputs:
  192. *@li imgs_data: A Tensor of type int32. imgs_data of original image.
  193. *@li imgs_offset: A Tensor of type int32. Offset of every imgs data.
  194. *@li imgs_size: A Tensor of type int32. Shape of every imgs data.
  195. *@li rect_points: A Tensor of type int32. Rect points of every imgs. \n
  196. *@par Attributes:
  197. *@li data_format: An optional string from: '"NHWC", "NCHW"'. Defaults to
  198. "NHWC". Data format.
  199. */
  200. REG_OP(OCRDetectionPostHandle)
  201. .INPUT(img, TensorType({DT_UINT8}))
  202. .INPUT(polys_data, TensorType({DT_INT32}))
  203. .INPUT(polys_offset, TensorType({DT_INT32}))
  204. .INPUT(polys_size, TensorType({DT_INT32}))
  205. .OUTPUT(imgs_data, TensorType({DT_UINT8}))
  206. .OUTPUT(imgs_offset, TensorType({DT_INT32}))
  207. .OUTPUT(imgs_size, TensorType({DT_INT32}))
  208. .OUTPUT(rect_points, TensorType({DT_INT32}))
  209. .ATTR(data_format, String, "NHWC")
  210. .OP_END_FACTORY_REG(OCRDetectionPostHandle);
  211. /**
  212. *@brief resize and clip polys.
  213. *@par Inputs:
  214. *@li polys_data: A Tensor of type int32. point data of every poly.
  215. *@li polys_offset:A Tensor of type int32. Offset of every poly .
  216. *@li polys_size:A Tensor of type int32. Size of every poly.
  217. *@li img_h:A Tensor of type int32. Height of original image.
  218. *@li img_w:A Tensor of type int32. Width of original image.
  219. *@li h_scale:A Tensor of type float. Expand scale of height.
  220. *@li w_scale:A Tensor of type float. Expand scale of width. \n
  221. *@par Outputs:
  222. *@li clipped_polys_data: A Tensor of type int32. point data of every clipped poly. \n
  223. *@li clipped_polys_offset: A Tensor of type int32. Offset of every clipped poly . \n
  224. *@li clipped_polys_size: A Tensor of type int32. Size of every clipped poly. \n
  225. *@li clipped_polys_num: A Tensor of type int32. Number of clipped polys. \n
  226. */
  227. REG_OP(ResizeAndClipPolys)
  228. .INPUT(polys_data, TensorType({DT_INT32}))
  229. .INPUT(polys_offset, TensorType({DT_INT32}))
  230. .INPUT(polys_size, TensorType({DT_INT32}))
  231. .INPUT(img_h, TensorType({DT_INT32}))
  232. .INPUT(img_w, TensorType({DT_INT32}))
  233. .INPUT(h_scale, TensorType({DT_FLOAT}))
  234. .INPUT(w_scale, TensorType({DT_FLOAT}))
  235. .OUTPUT(clipped_polys_data, TensorType({DT_INT32}))
  236. .OUTPUT(clipped_polys_offset, TensorType({DT_INT32}))
  237. .OUTPUT(clipped_polys_size, TensorType({DT_INT32}))
  238. .OUTPUT(clipped_polys_num, TensorType({DT_INT32}))
  239. .OP_END_FACTORY_REG(ResizeAndClipPolys);
  240. } // namespace ge
  241. #endif // OPS_BUILT_IN_OP_PROTO_INC_OCR_OPS_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示