You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zero_copy_offset.h 3.4 kB

4 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_
  17. #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_
  18. #include <map>
  19. #include <set>
  20. #include <string>
  21. #include <vector>
  22. #include "external/ge/ge_api_error_codes.h"
  23. #include "framework/common/ge_types.h"
  24. #include "graph/debug/ge_attr_define.h"
  25. #include "graph/load/new_model_manager/zero_copy_task.h"
  26. #include "graph/utils/attr_utils.h"
  27. #include "graph/utils/tensor_utils.h"
  28. #include "runtime/mem.h"
  29. #include "task_info/task_info.h"
  30. using std::map;
  31. using std::set;
  32. using std::string;
  33. using std::vector;
  34. namespace ge {
  35. class ZeroCopyOffset {
  36. public:
  37. ZeroCopyOffset();
  38. ~ZeroCopyOffset();
  39. Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag);
  40. void SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index,
  41. bool fusion_flag, std::set<const void *> &real_virtual_addrs);
  42. void IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag);
  43. Status InitOutputDataInfo(const vector<int64_t> &input_size_list, const vector<void *> &virtual_addr_list,
  44. const OpDescPtr &op_desc, const size_t &idx, bool &fusion_flag);
  45. void SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr,
  46. std::vector<void *> &tensor_addrs);
  47. bool SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset);
  48. // basic_addr of l2-fusion
  49. void *GetBasicAddr() const { return basic_addr_; }
  50. // total num of out_of_data/in_of_phonyconcat
  51. uint32_t GetDataCount() const { return data_count_; }
  52. uint32_t GetAddrCount() const { return addr_count_; }
  53. // value of *data_info_ from davinci_model
  54. std::vector<std::pair<int64_t, void *>> GetDataInfo() const { return data_info_; }
  55. // relative_offset from zero_copy_relative_offset_
  56. std::vector<int64_t> GetRelativeOffset() const { return relative_offset_; }
  57. // data_size of Data/Netoutput
  58. int64_t GetDataSize() const { return data_size_; }
  59. // value of *outside_addrs_ from davinci_model
  60. std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() { return outside_addrs_; }
  61. private:
  62. void *basic_addr_ = nullptr;
  63. uint32_t data_count_ = 0;
  64. std::vector<std::pair<int64_t, void *>> data_info_;
  65. vector<int64_t> relative_offset_;
  66. int64_t data_size_ = 0;
  67. uint32_t addr_count_ = 0;
  68. std::vector<std::map<const void *, std::vector<void *>>> outside_addrs_;
  69. std::vector<int64_t> zero_copy_basic_offset_;
  70. std::vector<int64_t> zero_copy_relative_offset_;
  71. };
  72. } // namespace ge
  73. #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示