You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cpu_queue_schedule.cc 20 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/load/model_manager/cpu_queue_schedule.h"
  17. #include "framework/common/debug/ge_log.h"
  18. #include "framework/common/debug/log.h"
  19. namespace {
  20. const uint32_t kCoreDim = 1; // for rtCpuKernelLaunch
  21. const char *const kCpuTaskModelEnqueue = "modelEnqueue";
  22. const char *const kCpuTaskWaitEndGraph = "modelWaitEndGraph";
  23. const char *const kCpuTaskPrepareOutput = "bufferPrepareOutput";
  24. const char *const kCpuTaskModelDequeue = "modelDequeue";
  25. const char *const kCpuTaskModelRepeat = "modelRepeat";
  26. const char *const kCpuTaskZeroCopy = "zeroCpy";
  27. } // namespace
  28. namespace ge {
  29. CpuTaskInfo::CpuTaskInfo(rtStream_t stream) : args_(nullptr), args_size_(0) { stream_ = stream; }
  30. CpuTaskInfo::~CpuTaskInfo() {
  31. if (args_ == nullptr) {
  32. return;
  33. }
  34. rtError_t status = rtFree(args_);
  35. if (status != RT_ERROR_NONE) {
  36. GELOGW("Call rt free failed, status: 0x%x", status);
  37. }
  38. args_ = nullptr;
  39. }
  40. ///
  41. /// @ingroup ge
  42. /// @brief definiteness queue schedule, bind input queue to task.
  43. /// @param [in] queue_id: input queue id from user.
  44. /// @param [out] in_mbuf: input mbuf addr for input data.
  45. /// @return: 0 for success / others for failed
  46. ///
  47. Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) {
  48. if ((args_ != nullptr) || (args_size_ > 0)) {
  49. REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_);
  50. GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_);
  51. return FAILED;
  52. }
  53. args_size_ = sizeof(MbufQueueInfo) + sizeof(uintptr_t); // sizeof(uintptr_t) for save in_mbuf.
  54. rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
  55. if (status != RT_ERROR_NONE) {
  56. REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status);
  57. GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status);
  58. return RT_ERROR_TO_GE_STATUS(status);
  59. }
  60. in_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(MbufQueueInfo);
  61. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
  62. MbufQueueInfo queue_info;
  63. queue_info.queue_id = queue_id;
  64. queue_info.in_mbuf = in_mbuf; // Placeholder, input mbuf addr will save to this place.
  65. status = rtMemcpy(args_, args_size_, &queue_info, sizeof(MbufQueueInfo), RT_MEMCPY_HOST_TO_DEVICE);
  66. if (status != RT_ERROR_NONE) {
  67. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status);
  68. GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status);
  69. return RT_ERROR_TO_GE_STATUS(status);
  70. }
  71. return SUCCESS;
  72. }
  73. Status CpuTaskModelDequeue::Distribute() {
  74. if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
  75. REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
  76. "check invalid", args_size_);
  77. GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
  78. return FAILED;
  79. }
  80. rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelDequeue, kCoreDim, args_, args_size_, nullptr, stream_);
  81. if (status != RT_ERROR_NONE) {
  82. REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status);
  83. GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status);
  84. return RT_ERROR_TO_GE_STATUS(status);
  85. }
  86. GELOGI("Cpu kernel launch model dequeue task success.");
  87. return SUCCESS;
  88. }
  89. ///
  90. /// @ingroup ge
  91. /// @brief definiteness queue schedule, zero copy.
  92. /// @param [in] mbuf_list: input/output mbuf addr list for input/output data.
  93. /// @param [in] outside_addrs: model input/output memory addr
  94. /// @return: 0 for success / others for failed
  95. ///
  96. Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs) {
  97. if ((args_ != nullptr) || (args_size_ > 0)) {
  98. REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_);
  99. GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_);
  100. return FAILED;
  101. }
  102. args_size_ = sizeof(AddrMapInfo);
  103. GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM));
  104. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
  105. AddrMapInfo addr_map_info;
  106. // init src_addrs/dst_addrs
  107. vector<uint64_t> src_addrs;
  108. vector<uint64_t> dst_addrs;
  109. for (const auto &addrs : outside_addrs) {
  110. const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
  111. GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "[Check][Param] not set outside_addrs");
  112. std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
  113. for (const auto &virtual_args_addr : virtual_args_addrs) {
  114. addr_map_info.addr_num += virtual_args_addr.second.size();
  115. for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) {
  116. src_addrs.emplace_back(mbuf_list.at(addrs.first));
  117. dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i))));
  118. }
  119. }
  120. }
  121. GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num);
  122. // malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs
  123. GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM));
  124. rtError_t status = rtMemcpy(src_addr_, src_addrs.size() * sizeof(uint64_t), src_addrs.data(),
  125. src_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
  126. GE_IF_BOOL_EXEC(status != RT_ERROR_NONE,
  127. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X",
  128. src_addrs.size() * sizeof(uint64_t), status);
  129. GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X",
  130. src_addrs.size() * sizeof(uint64_t), status);
  131. return RT_ERROR_TO_GE_STATUS(status);)
  132. GE_CHK_RT_RET(rtMalloc(&dst_addr_, dst_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM));
  133. status = rtMemcpy(dst_addr_, dst_addrs.size() * sizeof(uint64_t), dst_addrs.data(),
  134. dst_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
  135. GE_IF_BOOL_EXEC(status != RT_ERROR_NONE,
  136. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X",
  137. dst_addrs.size() * sizeof(uint64_t), status);
  138. GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X",
  139. dst_addrs.size() * sizeof(uint64_t), status);
  140. return RT_ERROR_TO_GE_STATUS(status);)
  141. // src_addr_list is init to src_addr, which is the point to src_addrs
  142. if (!src_addrs.empty() && !dst_addrs.empty()) {
  143. addr_map_info.src_addr_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(src_addr_));
  144. addr_map_info.dst_addr_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(dst_addr_));
  145. GELOGI("src_addr_list is %lu, dst_addr_list is %lu", addr_map_info.src_addr_list, addr_map_info.dst_addr_list);
  146. }
  147. status = rtMemcpy(args_, args_size_, &addr_map_info, sizeof(AddrMapInfo), RT_MEMCPY_HOST_TO_DEVICE);
  148. GE_IF_BOOL_EXEC(status != RT_ERROR_NONE,
  149. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status);
  150. GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status);
  151. return RT_ERROR_TO_GE_STATUS(status);)
  152. return SUCCESS;
  153. }
  154. Status CpuTaskZeroCopy::Distribute() {
  155. if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
  156. REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
  157. "check invalid", args_size_);
  158. GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
  159. return FAILED;
  160. }
  161. rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskZeroCopy, kCoreDim, args_, args_size_, nullptr, stream_);
  162. if (status != RT_ERROR_NONE) {
  163. REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status);
  164. GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status);
  165. return RT_ERROR_TO_GE_STATUS(status);
  166. }
  167. GELOGI("Cpu kernel launch zero copy task success.");
  168. return SUCCESS;
  169. }
  170. CpuTaskZeroCopy::~CpuTaskZeroCopy() {
  171. if (src_addr_ == nullptr && dst_addr_ == nullptr) {
  172. return;
  173. }
  174. if (src_addr_ != nullptr) {
  175. rtError_t status = rtFree(src_addr_);
  176. if (status != RT_ERROR_NONE) {
  177. GELOGW("Call rt free failed, status: 0x%x", status);
  178. }
  179. }
  180. if (dst_addr_ != nullptr) {
  181. rtError_t status = rtFree(dst_addr_);
  182. if (status != RT_ERROR_NONE) {
  183. GELOGW("Call rt free failed, status: 0x%x", status);
  184. }
  185. }
  186. src_addr_ = nullptr;
  187. dst_addr_ = nullptr;
  188. }
  189. ///
  190. /// @ingroup ge
  191. /// @brief definiteness queue schedule, bind output queue to task.
  192. /// @param [in] addr: NetOutput Op input tensor address.
  193. /// @param [in] size: NetOutput Op input tensor size.
  194. /// @param [in] in_mbuf: input mbuf addr for input data.
  195. /// @param [out] out_mbuf: output mbuf addr for output data.
  196. /// @return: 0 for success / others for failed
  197. ///
  198. Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mbuf, uintptr_t &out_mbuf) {
  199. if ((args_ != nullptr) || (args_size_ > 0)) {
  200. REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_);
  201. GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_);
  202. return FAILED;
  203. }
  204. args_size_ = sizeof(PrepareOutputInfo) + sizeof(uintptr_t); // sizeof(uintptr_t) for save out_mbuf.
  205. rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
  206. if (status != RT_ERROR_NONE) {
  207. REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status);
  208. GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status);
  209. return RT_ERROR_TO_GE_STATUS(status);
  210. }
  211. out_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(PrepareOutputInfo);
  212. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
  213. // Get NetOutput Input address and bind to queue.
  214. PrepareOutputInfo prepare;
  215. prepare.data_size = size;
  216. prepare.data_addr = addr;
  217. prepare.in_mbuf = in_mbuf;
  218. prepare.out_mbuf = out_mbuf; // Placeholder, output mbuf addr will save to this place.
  219. status = rtMemcpy(args_, args_size_, &prepare, sizeof(PrepareOutputInfo), RT_MEMCPY_HOST_TO_DEVICE);
  220. if (status != RT_ERROR_NONE) {
  221. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status);
  222. GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status);
  223. return RT_ERROR_TO_GE_STATUS(status);
  224. }
  225. return SUCCESS;
  226. }
  227. Status CpuTaskPrepareOutput::Distribute() {
  228. if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
  229. REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
  230. "check invalid", args_size_);
  231. GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
  232. return FAILED;
  233. }
  234. rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskPrepareOutput, kCoreDim, args_, args_size_, nullptr, stream_);
  235. if (status != RT_ERROR_NONE) {
  236. REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status);
  237. GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status);
  238. return RT_ERROR_TO_GE_STATUS(status);
  239. }
  240. GELOGI("Cpu kernel launch prepare output task success.");
  241. return SUCCESS;
  242. }
  243. ///
  244. /// @ingroup ge
  245. /// @brief definiteness queue schedule, bind output queue to task.
  246. /// @param [in] queue_id: output queue id from user.
  247. /// @param [in] out_mbuf: mbuf for output data.
  248. /// @return: 0 for success / others for failed
  249. ///
  250. Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) {
  251. if ((args_ != nullptr) || (args_size_ > 0)) {
  252. REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_);
  253. GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_);
  254. return FAILED;
  255. }
  256. // Get NetOutput Input address and bind to queue.
  257. args_size_ = sizeof(MbufQueueInfo);
  258. rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
  259. if (status != RT_ERROR_NONE) {
  260. REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status);
  261. GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status);
  262. return RT_ERROR_TO_GE_STATUS(status);
  263. }
  264. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
  265. MbufQueueInfo queue_info;
  266. queue_info.queue_id = queue_id;
  267. queue_info.in_mbuf = out_mbuf;
  268. status = rtMemcpy(args_, args_size_, &queue_info, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
  269. if (status != RT_ERROR_NONE) {
  270. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status);
  271. GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status);
  272. return RT_ERROR_TO_GE_STATUS(status);
  273. }
  274. return SUCCESS;
  275. }
  276. Status CpuTaskModelEnqueue::Distribute() {
  277. if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
  278. REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_ is 0 or stream_ is nullptr, arg_size:%u,"
  279. "check invalid", args_size_);
  280. GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
  281. return FAILED;
  282. }
  283. rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelEnqueue, kCoreDim, args_, args_size_, nullptr, stream_);
  284. if (status != RT_ERROR_NONE) {
  285. REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status);
  286. GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status);
  287. return RT_ERROR_TO_GE_STATUS(status);
  288. }
  289. GELOGI("Cpu kernel launch model enqueue task success.");
  290. return SUCCESS;
  291. }
  292. ///
  293. /// @ingroup ge
  294. /// @brief definiteness queue schedule, active entry stream.
  295. /// @param [in] stream: stream to be active.
  296. /// @return: 0 for success / others for failed
  297. ///
  298. Status CpuTaskActiveEntry::Init(rtStream_t stream) {
  299. if (stream == nullptr) {
  300. REPORT_INNER_ERROR("E19999", "Param stream is nullptr, check invalid");
  301. GELOGE(FAILED, "[Check][Param] Task active stream not valid");
  302. return FAILED;
  303. }
  304. active_stream_ = stream;
  305. return SUCCESS;
  306. }
  307. Status CpuTaskActiveEntry::Distribute() {
  308. if ((active_stream_ == nullptr) || (stream_ == nullptr)) {
  309. REPORT_INNER_ERROR("E19999", "Param stream is nullptr or active_stream_ is nullptr, check invalid");
  310. GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
  311. return FAILED;
  312. }
  313. rtError_t ret = rtStreamActive(active_stream_, stream_);
  314. if (ret != RT_ERROR_NONE) {
  315. REPORT_CALL_ERROR("E19999", "Call rtStreamActive failed, ret:0x%X", ret);
  316. GELOGE(RT_FAILED, "[Call][RtStreamActive] failed, ret:0x%X", ret);
  317. return RT_ERROR_TO_GE_STATUS(ret);
  318. }
  319. GELOGI("Cpu kernel launch active entry task success.");
  320. return SUCCESS;
  321. }
  322. ///
  323. /// @ingroup ge
  324. /// @brief definiteness queue schedule, wait for end graph.
  325. /// @param [in] model_id: model id for wait end graph.
  326. /// @return: 0 for success / others for failed
  327. ///
  328. Status CpuTaskWaitEndGraph::Init(uint32_t model_id) {
  329. if ((args_ != nullptr) || (args_size_ > 0)) {
  330. REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_);
  331. GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_);
  332. return FAILED;
  333. }
  334. args_size_ = sizeof(model_id);
  335. rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
  336. if (status != RT_ERROR_NONE) {
  337. REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status);
  338. GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status);
  339. return RT_ERROR_TO_GE_STATUS(status);
  340. }
  341. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
  342. status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
  343. if (status != RT_ERROR_NONE) {
  344. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status);
  345. GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status);
  346. return RT_ERROR_TO_GE_STATUS(status);
  347. }
  348. return SUCCESS;
  349. }
  350. Status CpuTaskWaitEndGraph::Distribute() {
  351. if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
  352. REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
  353. "check invalid", args_size_);
  354. GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
  355. return FAILED;
  356. }
  357. rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskWaitEndGraph, kCoreDim, args_, args_size_, nullptr, stream_);
  358. if (status != RT_ERROR_NONE) {
  359. REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status);
  360. GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status);
  361. return RT_ERROR_TO_GE_STATUS(status);
  362. }
  363. GELOGI("Cpu kernel launch wait end task success.");
  364. return SUCCESS;
  365. }
  366. ///
  367. /// @ingroup ge
  368. /// @brief definiteness queue schedule, repeat run model.
  369. /// @param [in] model_id: model id for repeat run.
  370. /// @return: 0 for success / others for failed
  371. ///
  372. Status CpuTaskModelRepeat::Init(uint32_t model_id) {
  373. if ((args_ != nullptr) || (args_size_ > 0)) {
  374. REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_);
  375. GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_);
  376. return FAILED;
  377. }
  378. args_size_ = sizeof(model_id);
  379. rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
  380. if (status != RT_ERROR_NONE) {
  381. REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status);
  382. GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status);
  383. return RT_ERROR_TO_GE_STATUS(status);
  384. }
  385. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
  386. status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
  387. if (status != RT_ERROR_NONE) {
  388. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status);
  389. GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status);
  390. return RT_ERROR_TO_GE_STATUS(status);
  391. }
  392. return SUCCESS;
  393. }
  394. Status CpuTaskModelRepeat::Distribute() {
  395. if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
  396. REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
  397. "check invalid", args_size_);
  398. GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
  399. return FAILED;
  400. }
  401. rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelRepeat, kCoreDim, args_, args_size_, nullptr, stream_);
  402. if (status != RT_ERROR_NONE) {
  403. REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status);
  404. GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status);
  405. return RT_ERROR_TO_GE_STATUS(status);
  406. }
  407. GELOGI("Cpu kernel launch repeat task success.");
  408. return SUCCESS;
  409. }
  410. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示