You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

gather_v2_kernel.h 2.4 kB

5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_GRAPH_PASSES_FOLDING_KERNEL_GATHER_V2_KERNEL_H_
  17. #define GE_GRAPH_PASSES_FOLDING_KERNEL_GATHER_V2_KERNEL_H_
  18. #include <vector>
  19. #include "inc/kernel.h"
  20. namespace ge {
  21. class GatherV2Kernel : public Kernel {
  22. public:
  23. Status Compute(const OpDescPtr op_desc_ptr, const std::vector<ConstGeTensorPtr> &input,
  24. std::vector<GeTensorPtr> &v_output) override;
  25. private:
  26. template <typename T>
  27. Status ProcessAxis0(ConstGeTensorPtr tensor_x, GeTensorPtr output);
  28. template <typename T>
  29. Status ProcessAxis1(ConstGeTensorPtr tensor_x, GeTensorPtr output);
  30. template <typename T>
  31. Status ProcessAxis2(ConstGeTensorPtr tensor_x, GeTensorPtr output);
  32. template <typename T>
  33. Status ProcessAxis3(ConstGeTensorPtr tensor_x, GeTensorPtr output);
  34. template <typename T>
  35. Status GenData(const int64_t data_num, ConstGeTensorPtr tensor_x, int64_t axis, GeTensorPtr output);
  36. Status Check(const OpDescPtr &op_desc_ptr, const vector<ConstGeTensorPtr> &input,
  37. vector<GeTensorPtr> &v_output) const;
  38. Status CalcStride(std::vector<int64_t> &stride, std::vector<int64_t> dims);
  39. Status SaveIndicesByDataType(ConstGeTensorPtr indices_tensor_ptr, GeShape &x_shape, GeShape &indices_shape,
  40. DataType indices_data_type, size_t axis);
  41. Status Process(int64_t axis, DataType data_type, ConstGeTensorPtr input_tensor_ptr, GeTensorPtr output_ptr);
  42. void DebugPrint(int64_t axis, const GeShape &x_shape, const GeShape &indices_shape,
  43. const std::vector<int64_t> &y_shape);
  44. private:
  45. std::vector<int64_t> indicates_;
  46. std::vector<int64_t> xstride_;
  47. std::vector<int64_t> ystride_;
  48. };
  49. } // namespace ge
  50. #endif // GE_GRAPH_PASSES_FOLDING_KERNEL_GATHER_V2_KERNEL_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示