You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

nn_ops.h 7.2 kB

5 years ago
5 years ago
3 years ago
5 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*!
  17. * \file nn_ops.h
  18. * \brief
  19. */
  20. #ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
  21. #define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
  22. #include "graph/operator_reg.h"
  23. #include "nn_pooling_ops.h"
  24. namespace ge {
  25. /**
  26. * @brief Says whether the targets are in the top "k" predictions . \n
  27. * @par Inputs:
  28. * Three inputs, including:
  29. * @li predictions: A 2D Tensor of type float32. A "batch_size * classes" tensor.
  30. * @li targets: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids.
  31. * @li k: A 1D Tensor of the same type as "targets".
  32. * Specifies the number of top elements to look at for computing precision . \n
  33. * @par Outputs:
  34. * precision: A Tensor of type bool . \n
  35. * @attention Constraints:
  36. * @li targets must be non-negative tensor.
  37. * @par Third-party framework compatibility
  38. * @li Compatible with the TensorFlow operator InTopKV2.
  39. */
  40. REG_OP(InTopKV2)
  41. .INPUT(predictions, TensorType({DT_FLOAT}))
  42. .INPUT(targets, TensorType(IndexNumberType))
  43. .INPUT(k, TensorType({IndexNumberType}))
  44. .OUTPUT(precision, TensorType({DT_BOOL}))
  45. .OP_END_FACTORY_REG(InTopKV2)
  46. /**
  47. *@brief Performs batch normalization . \n
  48. *@par Inputs:
  49. * Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
  50. *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
  51. *@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
  52. if input "x" is with format NC1HWC0. Specifies the scaling factor.
  53. *@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
  54. if input "x" is with format NC1HWC0. Specifies the offset.
  55. *@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
  56. if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
  57. operation is used for training.
  58. *@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be
  59. 5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
  60. if the operation is used for training . \n
  61. *@par Attributes:
  62. *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
  63. *@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
  64. *@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
  65. *@par Outputs:
  66. * Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
  67. *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
  68. *@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
  69. if input "x" is with format NC1HWC0. Specifies the mean of "x".
  70. *@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
  71. Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x".
  72. *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
  73. Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
  74. *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
  75. Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
  76. *@attention Constraints:
  77. *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
  78. then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
  79. *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n
  80. */
  81. REG_OP(FusedBatchNormV2)
  82. .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
  83. .INPUT(scale, TensorType({DT_FLOAT}))
  84. .INPUT(offset, TensorType({DT_FLOAT}))
  85. .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
  86. .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
  87. .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
  88. .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
  89. .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
  90. .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
  91. .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
  92. .ATTR(epsilon, Float, 0.0001)
  93. .ATTR(data_format, String, "NHWC")
  94. .ATTR(is_training, Bool, true)
  95. .OP_END_FACTORY_REG(FusedBatchNormV2)
  96. /**
  97. * @brief Large amount of data sort.First operator of TopK.
  98. * @par Inputs:
  99. * two input, including:
  100. * @li input_data: A Tensor. Data to be sorted. Support float16
  101. * @li input_index: A Tensor. Range(0, 2048). Datatype and format is same as input_data.
  102. * @par Attributes:
  103. * k_num: Int.Number to be sorted.
  104. * @par Outputs:
  105. * One output, including:
  106. * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
  107. */
  108. REG_OP(SegmentSort)
  109. .INPUT(input_data, TensorType({DT_FLOAT16}))
  110. .INPUT(input_index, TensorType({DT_FLOAT16}))
  111. .OUTPUT(output_proposal, TensorType({DT_FLOAT16}))
  112. .REQUIRED_ATTR(k_num, Int)
  113. .OP_END_FACTORY_REG(SegmentSort)
  114. /**
  115. * @brief: Large amount of data sort.Second operator of TopK.
  116. * @par Inputs:
  117. * One input, including:
  118. * input_proposal: A Tensor. Proposal sorted for each channel. Support float16
  119. * @par Attributes:
  120. * k_num: Int.Number to be sorted.
  121. * @par Outputs:
  122. * One output, including:
  123. * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
  124. */
  125. REG_OP(MultiMerge)
  126. .INPUT(input_proposal, TensorType({DT_FLOAT16}))
  127. .OUTPUT(output_proposal, TensorType({DT_FLOAT16}))
  128. .REQUIRED_ATTR(k_num, Int)
  129. .OP_END_FACTORY_REG(MultiMerge)
  130. /**
  131. * @brief Large amount of data sort.Third operator of TopK.
  132. * @par Inputs:
  133. * One input, including:
  134. * input_proposal: A Tensor. Proposal sorted for each channel. Support float16
  135. * @par Attributes:
  136. * k_num: Int.Number to be sorted.
  137. * @par Outputs:
  138. * Two output, including:
  139. * @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted.
  140. * @li output_index: A Tensor. int32. Data index.
  141. */
  142. REG_OP(SingleMerge)
  143. .INPUT(input_proposal, TensorType({DT_FLOAT16}))
  144. .OUTPUT(output_data, TensorType({DT_FLOAT16}))
  145. .OUTPUT(output_index, TensorType({DT_INT32}))
  146. .REQUIRED_ATTR(k_num, Int)
  147. .OP_END_FACTORY_REG(SingleMerge)
  148. }// namespace ge
  149. #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示