You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cpu_queue_schedule.h 5.4 kB

4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_CPU_QUEUE_SCHEDULE_H_
  17. #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_CPU_QUEUE_SCHEDULE_H_
  18. #include <cstdint>
  19. #include <vector>
  20. #include "common/ge_inner_error_codes.h"
  21. #include "graph/load/new_model_manager/task_info/task_info.h"
  22. #include "graph/load/new_model_manager/zero_copy_offset.h"
  23. #include "runtime/kernel.h"
  24. namespace ge {
  25. // For AICPU task "modelDequeue" / "modelEnqueue"
  26. struct MbufQueueInfo {
  27. uint32_t queue_id; // Op queue id
  28. uintptr_t in_mbuf; // addr for input mbuf
  29. };
  30. // For AICPU task "modelPrepareInput"
  31. struct PrepareInputInfo {
  32. uintptr_t in_mbuf; // input mbuf from dequeue
  33. uint32_t mbuf_offset; // offset of mbuf(current is 0)
  34. uint32_t data_size; // input Tensor size
  35. uintptr_t data_addr; // input Tensor addr
  36. };
  37. // For AICPU task "modelPrepareOutput"
  38. struct PrepareOutputInfo {
  39. uint32_t data_size; // output Tensor size
  40. uintptr_t data_addr; // output Tensor addr
  41. uintptr_t in_mbuf; // input mbuf, for fill output mbuf header
  42. uintptr_t out_mbuf; // output mbuf addr
  43. };
  44. // For AICPU task "modelZeroCopy"
  45. struct AddrMapInfo {
  46. uint32_t addr_num = 0;
  47. uint64_t src_addr_list;
  48. uint64_t dst_addr_list;
  49. };
  50. ///
  51. /// @ingroup ge
  52. /// @brief CpuTask base, inherit from TaskInfo used for manage.
  53. ///
  54. class CpuTaskInfo : public TaskInfo {
  55. public:
  56. explicit CpuTaskInfo(rtStream_t stream);
  57. ~CpuTaskInfo() override;
  58. protected:
  59. void *args_;
  60. uint32_t args_size_;
  61. };
  62. ///
  63. /// @ingroup ge
  64. /// @brief definiteness queue schedule, bind input queue to task.
  65. ///
  66. class CpuTaskModelDequeue : public CpuTaskInfo {
  67. public:
  68. explicit CpuTaskModelDequeue(rtStream_t stream) : CpuTaskInfo(stream) {}
  69. ~CpuTaskModelDequeue() override {}
  70. Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
  71. Status Init(uint32_t queue_id, uintptr_t &in_mbuf);
  72. Status Distribute() override;
  73. };
  74. ///
  75. /// @ingroup ge
  76. /// @brief definiteness queue schedule, zero copy.
  77. ///
  78. class CpuTaskZeroCopy : public CpuTaskInfo {
  79. public:
  80. explicit CpuTaskZeroCopy(rtStream_t stream) : CpuTaskInfo(stream) {}
  81. ~CpuTaskZeroCopy() override;
  82. Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
  83. Status Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs);
  84. Status Distribute() override;
  85. private:
  86. void *src_addr_ = nullptr;
  87. void *dst_addr_ = nullptr;
  88. };
  89. ///
  90. /// @ingroup ge
  91. /// @brief definiteness queue schedule, active original model stream.
  92. ///
  93. class CpuTaskPrepareOutput : public CpuTaskInfo {
  94. public:
  95. explicit CpuTaskPrepareOutput(rtStream_t stream) : CpuTaskInfo(stream) {}
  96. ~CpuTaskPrepareOutput() override {}
  97. Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
  98. Status Init(uintptr_t addr, uint32_t size, uintptr_t in_mbuf, uintptr_t &out_mbuf);
  99. Status Distribute() override;
  100. };
  101. class CpuTaskModelEnqueue : public CpuTaskInfo {
  102. public:
  103. explicit CpuTaskModelEnqueue(rtStream_t stream) : CpuTaskInfo(stream) {}
  104. ~CpuTaskModelEnqueue() override {}
  105. Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
  106. Status Init(uint32_t queue_id, uintptr_t out_mbuf);
  107. Status Distribute() override;
  108. };
  109. ///
  110. /// @ingroup ge
  111. /// @brief definiteness queue schedule, active entry stream.
  112. ///
  113. class CpuTaskActiveEntry : public CpuTaskInfo {
  114. public:
  115. explicit CpuTaskActiveEntry(rtStream_t stream) : CpuTaskInfo(stream), active_stream_(nullptr) {}
  116. ~CpuTaskActiveEntry() override {}
  117. Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
  118. Status Init(rtStream_t stream);
  119. Status Distribute() override;
  120. private:
  121. rtStream_t active_stream_;
  122. };
  123. ///
  124. /// @ingroup ge
  125. /// @brief definiteness queue schedule, wait for end graph.
  126. ///
  127. class CpuTaskWaitEndGraph : public CpuTaskInfo {
  128. public:
  129. explicit CpuTaskWaitEndGraph(rtStream_t stream) : CpuTaskInfo(stream) {}
  130. ~CpuTaskWaitEndGraph() override {}
  131. Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
  132. Status Init(uint32_t model_id);
  133. Status Distribute() override;
  134. };
  135. ///
  136. /// @ingroup ge
  137. /// @brief definiteness queue schedule, repeat run model.
  138. ///
  139. class CpuTaskModelRepeat : public CpuTaskInfo {
  140. public:
  141. explicit CpuTaskModelRepeat(rtStream_t stream) : CpuTaskInfo(stream) {}
  142. ~CpuTaskModelRepeat() override {}
  143. Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
  144. Status Init(uint32_t model_id);
  145. Status Distribute() override;
  146. };
  147. } // namespace ge
  148. #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_CPU_QUEUE_SCHEDULE_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示