You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

floordiv_kernel.cc 11 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "host_kernels/floordiv_kernel.h"
  17. #include <cfloat>
  18. #include <memory>
  19. #include <set>
  20. #include "framework/common/op/ge_op_utils.h"
  21. #include "framework/common/types.h"
  22. #include "framework/common/debug/ge_log.h"
  23. #include "host_kernels/kernel_utils.h"
  24. #include "graph/utils/type_utils.h"
  25. #include "inc/kernel_factory.h"
  26. namespace ge {
  27. namespace {
  28. const size_t kFloorDivInputX = 0;
  29. const size_t kFloorDivInputY = 1;
  30. const size_t kFloorDivTensorShapeIsEmpty = 0;
  31. const size_t kFloorDivInputSize = 2;
  32. const std::set<DataType> kFloorDivSupportedType = {DT_FLOAT, DT_DOUBLE, DT_UINT8, DT_INT8,
  33. DT_UINT16, DT_INT16, DT_INT32, DT_INT64};
  34. } // namespace
  35. Status FloorDivKernel::FloorDivCheck(const OpDescPtr &op_desc_ptr,
  36. const std::vector<ge::ConstGeTensorPtr> &input) const {
  37. // check input size
  38. if (op_desc_ptr == nullptr) {
  39. GELOGW("Input opdesc is nullptr.");
  40. return PARAM_INVALID;
  41. }
  42. if (input.size() != kFloorDivInputSize) {
  43. GELOGW("Unexpected FloorDiv node, node input size: %zu, node name: %s", input.size(),
  44. op_desc_ptr->GetName().c_str());
  45. return PARAM_INVALID;
  46. }
  47. // check dims of x and y
  48. ConstGeTensorPtr x_tensor = input.at(kFloorDivInputX);
  49. ConstGeTensorPtr y_tensor = input.at(kFloorDivInputY);
  50. GE_CHECK_NOTNULL(x_tensor);
  51. GE_CHECK_NOTNULL(y_tensor);
  52. if (x_tensor->GetTensorDesc().GetShape().GetDimNum() != kFloorDivTensorShapeIsEmpty &&
  53. y_tensor->GetTensorDesc().GetShape().GetDimNum() != kFloorDivTensorShapeIsEmpty) {
  54. // x and y are not scalars
  55. vector<int64_t> x_dims = x_tensor->GetTensorDesc().GetShape().GetDims();
  56. vector<int64_t> y_dims = y_tensor->GetTensorDesc().GetShape().GetDims();
  57. if (x_dims.size() != y_dims.size()) {
  58. GELOGW("FloorDivKernel dims of x and y do not match, node name: %s", op_desc_ptr->GetName().c_str());
  59. return PARAM_INVALID;
  60. } else {
  61. for (size_t i = 0; i < x_dims.size(); ++i) {
  62. if (x_dims[i] != y_dims[i]) {
  63. GELOGW("FloorDivKernel dims of x and y do not match, node name: %s", op_desc_ptr->GetName().c_str());
  64. return PARAM_INVALID;
  65. }
  66. }
  67. }
  68. }
  69. // check data type
  70. DataType x_data_dtype = x_tensor->GetTensorDesc().GetDataType();
  71. DataType y_data_dtype = y_tensor->GetTensorDesc().GetDataType();
  72. if (x_data_dtype != y_data_dtype) {
  73. GELOGW("FloorDivKernel data type of x and y do not match, x data type is %s, but y data type is %s, node name: %s.",
  74. TypeUtils::DataTypeToSerialString(x_data_dtype).c_str(),
  75. TypeUtils::DataTypeToSerialString(y_data_dtype).c_str(), op_desc_ptr->GetName().c_str());
  76. return PARAM_INVALID;
  77. }
  78. if (kFloorDivSupportedType.find(x_data_dtype) == kFloorDivSupportedType.end()) {
  79. GELOGW("FloorDivKernel data type %s not support, node name: %s",
  80. TypeUtils::DataTypeToSerialString(x_data_dtype).c_str(), op_desc_ptr->GetName().c_str());
  81. return PARAM_INVALID;
  82. }
  83. // check data
  84. if (x_tensor->GetData().size() == 0 || y_tensor->GetData().size() == 0) {
  85. GELOGW("FloorDivKernel data size of inputs is 0, node name: %s", op_desc_ptr->GetName().c_str());
  86. return PARAM_INVALID;
  87. }
  88. return SUCCESS;
  89. }
  90. void FloorDivKernel::ShapeCal(const std::vector<ge::ConstGeTensorPtr> &input, GeTensorPtr output_ptr) {
  91. vector<int64_t> output_dims;
  92. size_t x_dim = input.at(kFloorDivInputX)->GetTensorDesc().GetShape().GetDimNum();
  93. size_t y_dim = input.at(kFloorDivInputY)->GetTensorDesc().GetShape().GetDimNum();
  94. if (x_dim >= y_dim) {
  95. output_dims = input.at(kFloorDivInputX)->GetTensorDesc().GetShape().GetDims();
  96. } else {
  97. output_dims = input.at(kFloorDivInputY)->GetTensorDesc().GetShape().GetDims();
  98. }
  99. output_ptr->MutableTensorDesc().SetShape(GeShape(output_dims));
  100. }
  101. template <typename T>
  102. T FloorDivKernel::DivCal(const T &x_i, const T &y_i) {
  103. if ((x_i < static_cast<T>(0)) != (y_i < static_cast<T>(0))) {
  104. T abs_x_i = x_i < 0 ? -x_i : x_i;
  105. T abs_y_i = y_i < 0 ? -y_i : y_i;
  106. return static_cast<T>(static_cast<int32_t>(-(abs_x_i + abs_y_i - 1) / abs_y_i));
  107. } else {
  108. return static_cast<T>(static_cast<int32_t>(x_i / y_i));
  109. }
  110. }
  111. template <typename T>
  112. bool FloorDivKernel::ZeroCheck(const T &element, DataType data_type) {
  113. bool result = false;
  114. if (data_type == DT_UINT8 || data_type == DT_INT8 || data_type == DT_UINT16 || data_type == DT_INT16 ||
  115. data_type == DT_INT32 || data_type == DT_INT64) {
  116. result = (element == 0);
  117. } else if (data_type == DT_FLOAT) {
  118. result = (fabs(element) < FLT_EPSILON);
  119. } else if (data_type == DT_DOUBLE) {
  120. result = (fabs(element) < DBL_EPSILON);
  121. }
  122. return result;
  123. }
  124. template <typename T>
  125. Status FloorDivKernel::DataCalBroadcast(const T &x, const T &y, size_t num_x, size_t num_y, DataType data_type,
  126. GeTensorPtr output_ptr) {
  127. size_t data_num = (num_x > num_y) ? num_x : num_y;
  128. unique_ptr<T[]> buf(new (std::nothrow) T[data_num]());
  129. if (buf == nullptr) {
  130. GELOGE(MEMALLOC_FAILED, "new buf failed");
  131. return INTERNAL_ERROR;
  132. }
  133. if (num_x > num_y) {
  134. if (ZeroCheck<T>(y, data_type)) {
  135. GELOGE(PARAM_INVALID, "The divisor of FloorDiv can not be zero.");
  136. return PARAM_INVALID;
  137. }
  138. for (size_t i = 0; i < num_x; ++i) {
  139. buf[i] = DivCal<T>((&x)[i], y);
  140. }
  141. } else {
  142. for (size_t i = 0; i < num_y; ++i) {
  143. if (ZeroCheck<T>((&y)[i], data_type)) {
  144. GELOGE(PARAM_INVALID, "The divisor of FloorDiv can not be zero.");
  145. return PARAM_INVALID;
  146. }
  147. buf[i] = DivCal<T>(x, (&y)[i]);
  148. }
  149. }
  150. if (output_ptr->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(T)) != GRAPH_SUCCESS) {
  151. GELOGE(PARAM_INVALID, "set data failed");
  152. return PARAM_INVALID;
  153. }
  154. return SUCCESS;
  155. }
  156. template <typename T>
  157. Status FloorDivKernel::DataCal(const std::vector<ConstGeTensorPtr> &input, GeTensorPtr output_ptr) {
  158. ConstGeTensorPtr x_tensor = input.at(kFloorDivInputX);
  159. ConstGeTensorPtr y_tensor = input.at(kFloorDivInputY);
  160. GE_CHECK_NOTNULL(x_tensor);
  161. GE_CHECK_NOTNULL(y_tensor);
  162. T *x = const_cast<T *>(reinterpret_cast<const T *>(x_tensor->GetData().GetData()));
  163. T *y = const_cast<T *>(reinterpret_cast<const T *>(y_tensor->GetData().GetData()));
  164. if (x == nullptr || y == nullptr) {
  165. GELOGE(PARAM_INVALID, "Input tensor is nullptr.");
  166. return PARAM_INVALID;
  167. }
  168. size_t data_num_x = x_tensor->GetData().size() / sizeof(T);
  169. size_t data_num_y = y_tensor->GetData().size() / sizeof(T);
  170. DataType data_type = x_tensor->GetTensorDesc().GetDataType();
  171. if (x_tensor->GetTensorDesc().GetShape().GetDimNum() == y_tensor->GetTensorDesc().GetShape().GetDimNum()) {
  172. // x and y are both scalars or vector, no need broadcast
  173. unique_ptr<T[]> buf(new (std::nothrow) T[data_num_x]());
  174. if (buf == nullptr) {
  175. GELOGE(MEMALLOC_FAILED, "new buf failed");
  176. return INTERNAL_ERROR;
  177. }
  178. for (size_t i = 0; i < data_num_x; ++i) {
  179. if (ZeroCheck<T>(y[i], data_type)) {
  180. GELOGE(PARAM_INVALID, "The divisor of FloorDiv can not be zero.");
  181. return PARAM_INVALID;
  182. }
  183. buf[i] = DivCal<T>(x[i], y[i]);
  184. }
  185. if (output_ptr->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num_x * sizeof(T)) != GRAPH_SUCCESS) {
  186. GELOGE(PARAM_INVALID, "set data failed");
  187. return PARAM_INVALID;
  188. }
  189. } else {
  190. // x-y is vector-scalar, need broadcast
  191. if (DataCalBroadcast<T>(*x, *y, data_num_x, data_num_y, data_type, output_ptr) != SUCCESS) {
  192. return PARAM_INVALID;
  193. }
  194. }
  195. return SUCCESS;
  196. }
  197. Status FloorDivKernel::ComputeByDataType(DataType data_type, const std::vector<ConstGeTensorPtr> &input,
  198. GeTensorPtr output_ptr) {
  199. Status ret;
  200. switch (data_type) {
  201. case DT_FLOAT:
  202. ret = DataCal<float>(input, output_ptr);
  203. break;
  204. case DT_DOUBLE:
  205. ret = DataCal<double>(input, output_ptr);
  206. break;
  207. case DT_UINT8:
  208. ret = DataCal<uint8_t>(input, output_ptr);
  209. break;
  210. case DT_INT8:
  211. ret = DataCal<int8_t>(input, output_ptr);
  212. break;
  213. case DT_UINT16:
  214. ret = DataCal<uint16_t>(input, output_ptr);
  215. break;
  216. case DT_INT16:
  217. ret = DataCal<int16_t>(input, output_ptr);
  218. break;
  219. case DT_INT32:
  220. ret = DataCal<int32_t>(input, output_ptr);
  221. break;
  222. case DT_INT64:
  223. ret = DataCal<int64_t>(input, output_ptr);
  224. break;
  225. default:
  226. GELOGW("FloorDivKernel does not support Data type:%s", TypeUtils::DataTypeToSerialString(data_type).c_str());
  227. return NOT_CHANGED;
  228. }
  229. return ret;
  230. }
  231. Status FloorDivKernel::Compute(const OpDescPtr op_desc_ptr, const std::vector<ConstGeTensorPtr> &input,
  232. std::vector<GeTensorPtr> &v_output) {
  233. GELOGI("FloorDivKernel in");
  234. if (FloorDivCheck(op_desc_ptr, input) != SUCCESS) {
  235. GELOGW("FloorDivKernel input is invalid, failed to fold node.");
  236. return NOT_CHANGED;
  237. }
  238. // Index 0 can always gets a GeTensorDesc object from any OpDescPtr.
  239. auto output_tensor_desc = op_desc_ptr->GetOutputDesc(0);
  240. GeTensorPtr output_ptr = MakeShared<GeTensor>(output_tensor_desc);
  241. if (output_ptr == nullptr) {
  242. GELOGW("make_shared ge::GeTensor failed, node name %s.", op_desc_ptr->GetName().c_str());
  243. return NOT_CHANGED;
  244. }
  245. // calculate shape
  246. ShapeCal(input, output_ptr);
  247. // calculate data and data type
  248. DataType x_data_dtype = input.at(kFloorDivInputX)->GetTensorDesc().GetDataType();
  249. output_ptr->MutableTensorDesc().SetDataType(x_data_dtype);
  250. if (ComputeByDataType(x_data_dtype, input, output_ptr) != SUCCESS) {
  251. return NOT_CHANGED;
  252. }
  253. // print output tensor information, and will be deleted
  254. GELOGD("FloorDiv op %s output tensor data size is %zu", op_desc_ptr->GetName().c_str(), output_ptr->GetData().size());
  255. vector<int64_t> data_dims = output_ptr->GetTensorDesc().GetShape().GetDims();
  256. GELOGD("FloorDiv op %s output tensor dim size is %zu", op_desc_ptr->GetName().c_str(), data_dims.size());
  257. v_output.emplace_back(output_ptr);
  258. GELOGI("FloorDivKernel success.");
  259. return SUCCESS;
  260. }
  261. REGISTER_KERNEL(FLOORDIV, FloorDivKernel);
  262. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示