You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

batch_ops.h 6.2 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_OP_BATCH_OPS_H_
  17. #define GE_OP_BATCH_OPS_H_
  18. #include "graph/operator_reg.h"
  19. namespace ge {
  20. /**
  21. *@brief Creates batches of tensors in tensors.
  22. *@par Inputs:
  23. *The input x_tensors can be a list or a dictionary of tensors. Inputs include: \n
  24. *x_tensors:The list or dictionary of tensors to enqueue.
  25. *@par Attributes:
  26. *@li num_batch_threads:The number of threads enqueuing tensors. The batching will be nondeterministic if num_batch_threads > 1.
  27. *@li max_batch_size:Max batch size pulled from the queue.
  28. *@li max_enqueued_batches:Maximum number of batches pulled from the queue.
  29. *@li batch_timeout_micros:Batch processing timeout in microseconds unit.
  30. *@li allowed_batch_sizes:Allowed batch size pulled from the queue.
  31. *@li grad_timeout_micros:Calculate the gradient batch processing timeout in microseconds unit.
  32. *@li container:If non-empty, this queue is placed in the given container. Otherwise, a default container is used.
  33. *@li shared_name:If set, this queue will be shared under the given name across multiple sessions.
  34. *@li batching_queue:queue resource container.
  35. *@par Outputs:
  36. *@li y_index:Tensor, index of a BatchTensor. Must be in row-major order.
  37. *@li y_id:Tensor, id of a BatchTensor. Must be in row-major order.
  38. *@li y_tensors:A list or dictionary of tensors with the same types as tensors.
  39. *@attention Constraints: \n
  40. *-The implementation for Batch on Ascend uses AI CPU, with bad performance. \n
  41. *@par Quantization supported or not
  42. *Not supported
  43. *@par Quantized inference supported or not
  44. *Supported
  45. *@par L2 convergence supported or not
  46. *@par Multiple batches supported or not
  47. */
  48. REG_OP(Batch)
  49. .DYNAMIC_INPUT(x_tensors, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, \
  50. DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE}))
  51. .OUTPUT(y_index, TensorType({ DT_INT64 }))
  52. .OUTPUT(y_id, TensorType({ DT_INT64 }))
  53. .DYNAMIC_OUTPUT(y_tensors, TensorType({DT_INT8, DT_UINT8, DT_INT16, \
  54. DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_BOOL}))
  55. .REQUIRED_ATTR(num_batch_threads, Int)
  56. .REQUIRED_ATTR(max_batch_size, Int)
  57. .ATTR(max_enqueued_batches, Int, 10)
  58. .REQUIRED_ATTR(batch_timeout_micros, Int)
  59. .ATTR(allowed_batch_sizes, ListInt, {})
  60. .REQUIRED_ATTR(grad_timeout_micros, Int)
  61. .ATTR(container, String, "")
  62. .ATTR(shared_name, String, "")
  63. .ATTR(batching_queue, String, "")
  64. .OP_END_FACTORY_REG(Batch)
  65. /**
  66. *@brief Reverses the operation of Batch for a single output Tensor.
  67. *@par Inputs:
  68. *The input x_tensors can be a list or a dictionary of tensors. Inputs include: \n
  69. * @li x_tensors:The list or dictionary of tensors to enqueue.
  70. * @li index:The matching batch_index obtained from Batch.
  71. * @li id:The id scalar emitted by Batch.
  72. *@par Attributes:
  73. *@li timeout_micros:Calculate the unbatch processing timeout in microseconds unit.
  74. *@li container:If non-empty, this queue is placed in the given container. Otherwise, a default container is used.
  75. *@li shared_name:If set, this queue will be shared under the given name across multiple sessions.
  76. *@par Outputs:
  77. *y_tensor:A list or dictionary of tensors with the same types as tensors.
  78. *@attention Constraints: \n
  79. *-The implementation for Unbatch on Ascend uses AI CPU, with bad performance. \n
  80. *@par Quantization supported or not
  81. *Not supported
  82. *@par Quantized inference supported or not
  83. *Supported
  84. *@par L2 convergence supported or not
  85. *@par Multiple batches supported or not
  86. */
  87. REG_OP(Unbatch)
  88. .INPUT(x_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
  89. DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
  90. .INPUT(index, TensorType({DT_INT64}))
  91. .INPUT(id, TensorType({DT_INT64}))
  92. .OUTPUT(y_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
  93. DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
  94. .REQUIRED_ATTR(timeout_micros, Int)
  95. .ATTR(container, String, "")
  96. .ATTR(shared_name, String, "")
  97. .OP_END_FACTORY_REG(Unbatch)
  98. /**
  99. *@brief Acts like Batch but using the given batch_index index of batching things as they become available.
  100. *@par Inputs:
  101. *The input x_input can be a list or a dictionary of tensors. Inputs include: \n
  102. * @li x_input:The input to the Unbatch operation.
  103. * @li index:The batch_index given to the Unbatch operation.
  104. * @li id:The id scalar emitted by Batch.
  105. * @li grad:The downstream gradient.
  106. *@par Attributes:
  107. *@li container:If non-empty, this queue is placed in the given container. Otherwise, a default container is used.
  108. *@li shared_name:If set, this queue will be shared under the given name across multiple sessions.
  109. *@par Outputs:
  110. *y_grad:The return value, either an empty tensor or the batched gradient.
  111. *@attention Constraints: \n
  112. *-The implementation for UnbatchGrad on Ascend uses AI CPU, with bad performance. \n
  113. *@par Quantization supported or not
  114. *Not supported
  115. *@par Quantized inference supported or not
  116. *Supported
  117. *@par L2 convergence supported or not
  118. *@par Multiple batches supported or not
  119. */
  120. REG_OP(UnbatchGrad)
  121. .INPUT(x_input, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
  122. DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
  123. .INPUT(index, TensorType({DT_INT64}))
  124. .INPUT(grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
  125. DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
  126. .INPUT(id, TensorType({DT_INT64}))
  127. .OUTPUT(y_grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
  128. DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
  129. .ATTR(container, String, "")
  130. .ATTR(shared_name, String, "")
  131. .OP_END_FACTORY_REG(UnbatchGrad)
  132. } // namespace ge
  133. #endif // GE_OP_BATCH_OPS_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示