You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

add_kernel.cc 6.8 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "host_kernels/add_kernel.h"
  17. #include <cfloat>
  18. #include "common/math/math_util.h"
  19. #include "common/bcast.h"
  20. #include "graph/utils/type_utils.h"
  21. #include "inc/kernel_factory.h"
  22. namespace ge {
  23. namespace {
  24. const size_t kAddFirstInput = 0;
  25. const size_t kAddSecondInput = 1;
  26. const size_t kAddFirstOutput = 0;
  27. const size_t kAddInputSize = 2;
  28. const size_t kAddOutputSize = 1;
  29. #define SET_BCAST_ADD_CASE(DTYPE, TYPE) \
  30. case (DTYPE): \
  31. ret = BCastAdd<TYPE>(op_desc_ptr, input, v_output); \
  32. break;
  33. } // namespace
  34. template <typename T>
  35. Status AddKernel::OverflowCheck(const T &x, const T &y, DataType data_type) {
  36. switch (data_type) {
  37. case DT_INT8:
  38. FMK_INT8_ADDCHECK(x, y)
  39. break;
  40. case DT_INT16:
  41. FMK_INT16_ADDCHECK(x, y)
  42. break;
  43. case DT_INT32:
  44. FMK_INT32_ADDCHECK(x, y)
  45. break;
  46. case DT_INT64:
  47. FMK_INT64_ADDCHECK(x, y)
  48. break;
  49. case DT_UINT8:
  50. FMK_UINT8_ADDCHECK(x, y)
  51. break;
  52. case DT_UINT16:
  53. FMK_UINT16_ADDCHECK(x, y)
  54. break;
  55. case DT_UINT32:
  56. FMK_UINT32_ADDCHECK(x, y)
  57. break;
  58. case DT_UINT64:
  59. FMK_UINT64_ADDCHECK(x, y)
  60. break;
  61. case DT_FLOAT16:
  62. FMK_FP16_ADDCHECK(x, y)
  63. break;
  64. case DT_FLOAT:
  65. FMK_FLOAT_ADDCHECK(x, y)
  66. break;
  67. case DT_DOUBLE:
  68. FMK_DOUBLE_ADDCHECK(x, y)
  69. break;
  70. default:
  71. break;
  72. }
  73. return SUCCESS;
  74. }
  75. template <typename InT>
  76. Status AddKernel::BCastAdd(const OpDescPtr &op_desc_ptr, const std::vector<ConstGeTensorPtr> &input,
  77. std::vector<GeTensorPtr> &v_output) {
  78. // only broadcast shape
  79. BCast bcast;
  80. Status ret = bcast.GenerateBcastInfo(BCast::TransShapeToDimVec(input[kAddFirstInput]->GetTensorDesc()),
  81. BCast::TransShapeToDimVec(input[kAddSecondInput]->GetTensorDesc()));
  82. if (ret != SUCCESS) {
  83. GELOGE(ret, "Greater broadcasting failed.");
  84. return ret;
  85. }
  86. std::vector<int64_t> x_indexes;
  87. std::vector<int64_t> y_indexes;
  88. bcast.BCastIndexes(x_indexes, y_indexes);
  89. auto x1_data = reinterpret_cast<const InT *>(input[kAddFirstInput]->GetData().data());
  90. auto x2_data = reinterpret_cast<const InT *>(input[kAddSecondInput]->GetData().data());
  91. size_t data_num = x_indexes.size();
  92. std::unique_ptr<InT[]> buf(new (std::nothrow) InT[data_num]());
  93. if (buf == nullptr) {
  94. GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed", static_cast<size_t>(sizeof(InT) * data_num));
  95. return MEMALLOC_FAILED;
  96. }
  97. DataType data_type = input[kAddFirstInput]->GetTensorDesc().GetDataType();
  98. for (size_t i = 0; i < data_num; i++) {
  99. auto x_index = *(x1_data + x_indexes[i]);
  100. auto y_index = *(x2_data + y_indexes[i]);
  101. if (OverflowCheck<InT>(x_index, y_index, data_type) != SUCCESS) {
  102. GELOGE(PARAM_INVALID, "Result of add is overflow.");
  103. return PARAM_INVALID;
  104. }
  105. *(buf.get() + i) = x_index + y_index;
  106. }
  107. GeTensorPtr output_ptr = MakeShared<GeTensor>(op_desc_ptr->GetOutputDesc(kAddFirstOutput));
  108. if (output_ptr == nullptr) {
  109. GELOGE(MEMALLOC_FAILED, "Make shared failed");
  110. return MEMALLOC_FAILED;
  111. }
  112. output_ptr->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(InT));
  113. output_ptr->MutableTensorDesc().SetDataType(data_type);
  114. vector<int64_t> bcast_dims = bcast.GetOutputShape();
  115. output_ptr->MutableTensorDesc().SetShape(GeShape(bcast_dims));
  116. v_output.push_back(output_ptr);
  117. return SUCCESS;
  118. }
  119. Status AddKernel::AddCheck(const OpDescPtr &op_desc_ptr, const std::vector<ConstGeTensorPtr> &input) {
  120. if (op_desc_ptr == nullptr) {
  121. GELOGW("Op_desc_ptr must not be null.");
  122. return PARAM_INVALID;
  123. }
  124. // check how many inputs
  125. if ((input.size() != kAddInputSize) || (op_desc_ptr->GetOutputsSize() != kAddOutputSize)) {
  126. GELOGW("The number of input for add must be %zu, output number must be %zu.", kAddInputSize,
  127. kAddOutputSize);
  128. return PARAM_INVALID;
  129. }
  130. // input vector elements must not be null
  131. if ((input[kAddFirstInput] == nullptr) || (input[kAddSecondInput] == nullptr)) {
  132. GELOGW("Input vector elements must not be null.");
  133. return PARAM_INVALID;
  134. }
  135. // Inputs must have the same datatype.
  136. DataType data_type_0 = input[kAddFirstInput]->GetTensorDesc().GetDataType();
  137. DataType data_type_1 = input[kAddSecondInput]->GetTensorDesc().GetDataType();
  138. if (data_type_0 != data_type_1) {
  139. GELOGW("Data type of inputs for add not matched, data_type_0:%s, data_type_1:%s",
  140. TypeUtils::DataTypeToSerialString(data_type_0).c_str(),
  141. TypeUtils::DataTypeToSerialString(data_type_1).c_str());
  142. return PARAM_INVALID;
  143. }
  144. // Checking whether the weightdef contains data
  145. if ((input[kAddFirstInput]->GetData().size() == 0) || (input[kAddSecondInput]->GetData().size() == 0)) {
  146. GELOGW("Data size of input0 is %zu, input1 is %zu.", input[kAddFirstInput]->GetData().size(),
  147. input[kAddSecondInput]->GetData().size());
  148. return PARAM_INVALID;
  149. }
  150. return SUCCESS;
  151. }
  152. Status AddKernel::Compute(const OpDescPtr op_desc_ptr, const std::vector<ConstGeTensorPtr> &input,
  153. std::vector<GeTensorPtr> &v_output) {
  154. if (AddCheck(op_desc_ptr, input) != SUCCESS) {
  155. return NOT_CHANGED;
  156. }
  157. Status ret = NOT_CHANGED;
  158. DataType data_type = input[kAddFirstInput]->GetTensorDesc().GetDataType();
  159. switch (data_type) {
  160. SET_BCAST_ADD_CASE(DT_INT8, int8_t)
  161. SET_BCAST_ADD_CASE(DT_INT16, int16_t)
  162. SET_BCAST_ADD_CASE(DT_INT32, int32_t)
  163. SET_BCAST_ADD_CASE(DT_INT64, int64_t)
  164. SET_BCAST_ADD_CASE(DT_UINT8, uint8_t)
  165. SET_BCAST_ADD_CASE(DT_UINT16, uint16_t)
  166. SET_BCAST_ADD_CASE(DT_UINT32, uint32_t)
  167. SET_BCAST_ADD_CASE(DT_UINT64, uint64_t)
  168. SET_BCAST_ADD_CASE(DT_FLOAT16, fp16_t)
  169. SET_BCAST_ADD_CASE(DT_FLOAT, float)
  170. SET_BCAST_ADD_CASE(DT_DOUBLE, double)
  171. default:
  172. GELOGI("Add kernel data type %s not support.", TypeUtils::DataTypeToSerialString(data_type).c_str());
  173. return NOT_CHANGED;
  174. }
  175. if (ret != SUCCESS) {
  176. GELOGW("Greater broadcasting failed.");
  177. return NOT_CHANGED;
  178. }
  179. return SUCCESS;
  180. }
  181. REGISTER_KERNEL(ADD, AddKernel);
  182. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示