You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mem.h 17 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago

  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. * http://www.apache.org/licenses/LICENSE-2.0
  7. * Unless required by applicable law or agreed to in writing, software
  8. * distributed under the License is distributed on an "AS IS" BASIS,
  9. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. * See the License for the specific language governing permissions and
  11. * limitations under the License.
  12. */
  13. #ifndef __CCE_RUNTIME_MEM_H__
  14. #define __CCE_RUNTIME_MEM_H__
  15. /*lint -e7*/
  16. #include <stddef.h>
  17. /*lint +e7*/
  18. #include "base.h"
  19. #include "config.h"
  20. #include "stream.h"
  21. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  22. extern "C" {
  23. #endif
  24. /**
  25. * @ingroup dvrt_mem
  26. * @brief memory type
  27. */
  28. #define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device
  29. #define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device
  30. #define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device
  31. #define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device
  32. #define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device
  33. #define RT_MEMORY_P2P_DDR ((uint32_t)0x11) // DDR memory on other device
  34. #define RT_MEMORY_DDR_NC ((uint32_t)0x20) // DDR memory of non-cache
  35. #define RT_MEMORY_TS_4G ((uint32_t)0x40)
  36. #define RT_MEMORY_TS ((uint32_t)0x80)
  37. #define RT_MEMORY_RESERVED ((uint32_t)0x100)
  38. #define RT_MEMORY_L1 ((uint32_t)0x1<<16)
  39. #define RT_MEMORY_L2 ((uint32_t)0x1<<17)
  40. /**
  41. * @ingroup dvrt_mem
  42. * @brief memory info type
  43. */
  44. #define RT_MEM_INFO_TYPE_DDR_SIZE ((uint32_t)0x1)
  45. #define RT_MEM_INFO_TYPE_HBM_SIZE ((uint32_t)0x2)
  46. #define RT_MEM_INFO_TYPE_DDR_P2P_SIZE ((uint32_t)0x3)
  47. #define RT_MEM_INFO_TYPE_HBM_P2P_SIZE ((uint32_t)0x4)
  48. /**
  49. * @ingroup dvrt_mem
  50. * @brief memory Policy
  51. */
  52. #define RT_MEMORY_POLICY_NONE ((uint32_t)0x0) // Malloc mem prior hage page, then default page
  53. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST ((uint32_t)0x1 << 10) // Malloc mem prior hage page, then default page
  54. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY ((uint32_t)0x1 << 11) // Malloc mem only use hage page
  55. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY ((uint32_t)0x1 << 12) // Malloc mem only use default page
  56. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P ((uint32_t)0x1 << 13) // Malloc mem prior hage page, then default page, use for p2p
  57. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P ((uint32_t)0x1 << 14) // Malloc mem only use hage page, use for p2p
  58. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P ((uint32_t)0x1 << 15) // Malloc mem only use default page, use for p2p
  59. #define MEM_ALLOC_TYPE_BIT ((uint32_t)0x3FF) // mem type bit in <0, 9>
  60. /**
  61. * @ingroup dvrt_mem
  62. * @brief memory type | memory Policy
  63. */
  64. typedef uint32_t rtMemType_t;
  65. /**
  66. * @ingroup dvrt_mem
  67. * @brief memory advise type
  68. */
  69. #define RT_MEMORY_ADVISE_EXE (0x02)
  70. #define RT_MEMORY_ADVISE_THP (0x04)
  71. #define RT_MEMORY_ADVISE_PLE (0x08)
  72. #define RT_MEMORY_ADVISE_PIN (0x16)
  73. /**
  74. * @ingroup dvrt_mem
  75. * @brief memory copy type
  76. */
  77. typedef enum tagRtMemcpyKind {
  78. RT_MEMCPY_HOST_TO_HOST = 0, // host to host
  79. RT_MEMCPY_HOST_TO_DEVICE, // host to device
  80. RT_MEMCPY_DEVICE_TO_HOST, // device to host
  81. RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P
  82. RT_MEMCPY_MANAGED, // managed memory
  83. RT_MEMCPY_ADDR_DEVICE_TO_DEVICE,
  84. RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes)
  85. RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex
  86. RT_MEMCPY_RESERVED,
  87. } rtMemcpyKind_t;
  88. typedef enum tagRtMemInfoType {
  89. RT_MEMORYINFO_DDR,
  90. RT_MEMORYINFO_HBM,
  91. RT_MEMORYINFO_DDR_HUGE, // Hugepage memory of DDR
  92. RT_MEMORYINFO_DDR_NORMAL, // Normal memory of DDR
  93. RT_MEMORYINFO_HBM_HUGE, // Hugepage memory of HBM
  94. RT_MEMORYINFO_HBM_NORMAL, // Normal memory of HBM
  95. RT_MEMORYINFO_DDR_P2P_HUGE, // Hugepage memory of DDR
  96. RT_MEMORYINFO_DDR_P2P_NORMAL, // Normal memory of DDR
  97. RT_MEMORYINFO_HBM_P2P_HUGE, // Hugepage memory of HBM
  98. RT_MEMORYINFO_HBM_P2P_NORMAL, // Normal memory of HBM
  99. } rtMemInfoType_t;
  100. typedef enum tagRtRecudeKind {
  101. RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P
  102. RT_RECUDE_KIND_END
  103. } rtRecudeKind_t;
  104. typedef enum tagRtDataType {
  105. RT_DATA_TYPE_FP32 = 0, // fp32
  106. RT_DATA_TYPE_FP16 = 1, // fp16
  107. RT_DATA_TYPE_INT16 = 2, // int16
  108. RT_DATA_TYPE_END
  109. } rtDataType_t;
  110. /**
  111. * @ingroup dvrt_mem
  112. * @brief memory copy channel type
  113. */
  114. typedef enum tagRtMemcpyChannelType {
  115. RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P
  116. RT_MEMCPY_CHANNEL_TYPE_PCIe,
  117. RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now
  118. RT_MEMCPY_CHANNEL_TYPE_RESERVED,
  119. } rtMemcpyChannelType_t;
  120. /**
  121. * @ingroup rt_kernel
  122. * @brief ai core memory size
  123. */
  124. typedef struct rtAiCoreMemorySize {
  125. uint32_t l0ASize;
  126. uint32_t l0BSize;
  127. uint32_t l0CSize;
  128. uint32_t l1Size;
  129. uint32_t ubSize;
  130. uint32_t l2Size;
  131. uint32_t l2PageNum;
  132. uint32_t blockSize;
  133. uint64_t bankSize;
  134. uint64_t bankNum;
  135. uint64_t burstInOneBlock;
  136. uint64_t bankGroupNum;
  137. } rtAiCoreMemorySize_t;
  138. /**
  139. * @ingroup dvrt_mem
  140. * @brief memory type
  141. */
  142. typedef enum tagRtMemoryType {
  143. RT_MEMORY_TYPE_HOST = 1,
  144. RT_MEMORY_TYPE_DEVICE = 2 ,
  145. RT_MEMORY_TYPE_SVM = 3,
  146. RT_MEMORY_TYPE_DVPP = 4
  147. } rtMemoryType_t;
  148. /**
  149. * @ingroup dvrt_mem
  150. * @brief memory attribute
  151. */
  152. typedef struct tagRtPointerAttributes {
  153. rtMemoryType_t memoryType; // host memory or device memory
  154. rtMemoryType_t locationType;
  155. uint32_t deviceID; // device ID
  156. uint32_t pageSize;
  157. } rtPointerAttributes_t;
  158. typedef struct rtMallocHostSharedMemoryIn {
  159. const char* name;
  160. const uint64_t size;
  161. uint32_t flag;
  162. } rtMallocHostSharedMemoryIn;
  163. typedef struct rtMallocHostSharedMemoryOut {
  164. int fd;
  165. void* ptr;
  166. void* devPtr;
  167. } rtMallocHostSharedMemoryOut;
  168. typedef struct rtFreeHostSharedMemoryIn {
  169. const char* name;
  170. const uint64_t size;
  171. int fd;
  172. void* ptr;
  173. void* devPtr;
  174. } rtFreeHostSharedMemoryIn;
  175. /**
  176. * @ingroup dvrt_mem
  177. * @brief alloc device memory
  178. * @param [in|out] devPtr memory pointer
  179. * @param [in] size memory size
  180. * @param [in] type memory type
  181. * @return RT_ERROR_NONE for ok
  182. * @return RT_ERROR_INVALID_VALUE for error input
  183. */
  184. RTS_API rtError_t rtMalloc(void **devPtr, uint64_t size, rtMemType_t type);
  185. /**
  186. * @ingroup dvrt_mem
  187. * @brief free device memory
  188. * @param [in|out] devPtr memory pointer
  189. * @return RT_ERROR_NONE for ok
  190. * @return RT_ERROR_INVALID_VALUE for error input
  191. */
  192. RTS_API rtError_t rtFree(void *devPtr);
  193. /**
  194. * @ingroup dvrt_mem
  195. * @brief alloc device memory for dvpp
  196. * @param [in|out] devPtr memory pointer
  197. * @param [in] size memory size
  198. * @return RT_ERROR_NONE for ok
  199. * @return RT_ERROR_INVALID_VALUE for error input
  200. */
  201. RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size);
  202. /**
  203. * @ingroup dvrt_mem
  204. * @brief free device memory for dvpp
  205. * @param [in|out] devPtr memory pointer
  206. * @return RT_ERROR_NONE for ok
  207. * @return RT_ERROR_INVALID_VALUE for error input
  208. */
  209. RTS_API rtError_t rtDvppFree(void *devPtr);
  210. /**
  211. * @ingroup dvrt_mem
  212. * @brief alloc host memory
  213. * @param [in|out] hostPtr memory pointer
  214. * @param [in] size memory size
  215. * @return RT_ERROR_NONE for ok
  216. * @return RT_ERROR_INVALID_VALUE for error input
  217. */
  218. RTS_API rtError_t rtMallocHost(void **hostPtr, uint64_t size);
  219. /**
  220. * @ingroup dvrt_mem
  221. * @brief free host memory
  222. * @param [in] hostPtr memory pointer
  223. * @return RT_ERROR_NONE for ok
  224. * @return RT_ERROR_INVALID_VALUE for error input
  225. */
  226. RTS_API rtError_t rtFreeHost(void *hostPtr);
  227. /**
  228. * @ingroup dvrt_mem
  229. * @brief alloc host shared memory
  230. * @param [in] in alloc host shared memory inputPara pointer
  231. * @param [in] out alloc host shared memory outputInfo pointer
  232. * @return RT_ERROR_NONE for ok
  233. * @return RT_ERROR_INVALID_VALUE for error input
  234. */
  235. RTS_API rtError_t rtMallocHostSharedMemory(rtMallocHostSharedMemoryIn *in,
  236. rtMallocHostSharedMemoryOut *out);
  237. /**
  238. * @ingroup dvrt_mem
  239. * @brief free host memory
  240. * @param [in] in free host shared memory inputPara pointer
  241. * @return RT_ERROR_NONE for ok
  242. * @return RT_ERROR_INVALID_VALUE for error input
  243. */
  244. RTS_API rtError_t rtFreeHostSharedMemory(rtFreeHostSharedMemoryIn *in);
  245. /**
  246. * @ingroup dvrt_mem
  247. * @brief alloc managed memory
  248. * @param [in|out] ptr memory pointer
  249. * @param [in] size memory size
  250. * @param [in] flag reserved, set to 0.
  251. * @return RT_ERROR_NONE for ok
  252. * @return RT_ERROR_INVALID_VALUE for error input
  253. */
  254. RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag);
  255. /**
  256. * @ingroup dvrt_mem
  257. * @brief free managed memory
  258. * @param [in] ptr memory pointer
  259. * @return RT_ERROR_NONE for ok
  260. * @return RT_ERROR_INVALID_VALUE for error input
  261. */
  262. RTS_API rtError_t rtMemFreeManaged(void *ptr);
  263. /**
  264. * @ingroup dvrt_mem
  265. * @brief alloc cached device memory
  266. * @param [in| devPtr memory pointer
  267. * @param [in] size memory size
  268. * @param [in] type memory type
  269. * @return RT_ERROR_NONE for ok
  270. */
  271. RTS_API rtError_t rtMallocCached(void **devPtr, uint64_t size, rtMemType_t type);
  272. /**
  273. * @ingroup dvrt_mem
  274. * @brief flush device mempory
  275. * @param [in] base virtal base address
  276. * @param [in] len memory size
  277. * @return RT_ERROR_NONE for ok, errno for failed
  278. */
  279. RTS_API rtError_t rtFlushCache(void *base, size_t len);
  280. /**
  281. * @ingroup dvrt_mem
  282. * @brief invalid device mempory
  283. * @param [in] base virtal base address
  284. * @param [in] len memory size
  285. * @return RT_ERROR_NONE for ok, errno for failed
  286. */
  287. RTS_API rtError_t rtInvalidCache(void *base, size_t len);
  288. /**
  289. * @ingroup dvrt_mem
  290. * @brief synchronized memcpy
  291. * @param [in] dst destination address pointer
  292. * @param [in] Max length of destination address memory
  293. * @param [in] src source address pointer
  294. * @param [in] count the number of byte to copy
  295. * @param [in] kind memcpy type
  296. * @return RT_ERROR_NONE for ok
  297. * @return RT_ERROR_INVALID_VALUE for error input
  298. */
  299. RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind);
  300. /**
  301. * @ingroup dvrt_mem
  302. * @brief asynchronized memcpy
  303. * @param [in] dst destination address pointer
  304. * @param [in] Max length of destination address memory
  305. * @param [in] src source address pointer
  306. * @param [in] count the number of byte to copy
  307. * @param [in] kind memcpy type
  308. * @param [in] stream asynchronized task stream
  309. * @return RT_ERROR_NONE for ok
  310. * @return RT_ERROR_INVALID_VALUE for error input
  311. */
  312. RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind,
  313. rtStream_t stream);
  314. /**
  315. * @ingroup dvrt_mem
  316. * @brief asynchronized reduce memcpy
  317. * @param [in] dst destination address pointer
  318. * @param [in] Max length of destination address memory
  319. * @param [in] src source address pointer
  320. * @param [in] count the number of byte to copy
  321. * @param [in] kind memcpy type
  322. * @param [in] type data type
  323. * @param [in] stream asynchronized task stream
  324. * @return RT_ERROR_NONE for ok
  325. * @return RT_ERROR_INVALID_VALUE for error input
  326. */
  327. RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
  328. rtDataType_t type, rtStream_t stream);
  329. /**
  330. * @ingroup dvrt_mem
  331. * @brief query memory size
  332. * @param [in] aiCoreMemorySize
  333. * @return RT_ERROR_NONE for ok, errno for failed
  334. * @return RT_ERROR_INVALID_VALUE for error input
  335. */
  336. RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  337. /**
  338. * @ingroup dvrt_mem
  339. * @brief set memory size, Setting before model reasoning, Bright screen to prevent model can not be fully
  340. integrated network due to memory limitations.Requirement come from JiaMinHu.Only use for Tiny.
  341. * @param [in] aiCoreMemorySize
  342. * @return RT_ERROR_NONE for ok, errno for failed
  343. * @return RT_ERROR_INVALID_VALUE for error input
  344. */
  345. RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  346. /**
  347. * @ingroup dvrt_mem
  348. * @brief set memory with uint32_t value
  349. * @param [in] devPtr
  350. * @param [in] Max length of destination address memory
  351. * @param [in] value
  352. * @param [in] count byte num
  353. * @return RT_ERROR_NONE for ok, errno for failed
  354. * @return RT_ERROR_INVALID_VALUE for error input
  355. */
  356. RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t value, uint64_t count);
  357. /**
  358. * @ingroup dvrt_mem
  359. * @brief set memory with uint32_t value async
  360. * @param [in] devPtr
  361. * @param [in] Max length of destination address memory
  362. * @param [in] value
  363. * @param [in] count byte num
  364. * @param [in] stream
  365. * @return RT_ERROR_NONE for ok, errno for failed
  366. * @return RT_ERROR_INVALID_VALUE for error input
  367. */
  368. RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uint64_t count, rtStream_t stream);
  369. /**
  370. * @ingroup dvrt_mem
  371. * @brief get current device memory total and free
  372. * @param [out] free
  373. * @param [out] total
  374. * @return RT_ERROR_NONE for ok, errno for failed
  375. * @return RT_ERROR_INVALID_VALUE for error input
  376. */
  377. RTS_API rtError_t rtMemGetInfo(size_t *free, size_t *total);
  378. /**
  379. * @ingroup dvrt_mem
  380. * @brief get current device memory total and free
  381. * @param [in] memInfoType
  382. * @param [out] free
  383. * @param [out] total
  384. * @return RT_ERROR_NONE for ok, errno for failed
  385. */
  386. RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size_t *total);
  387. /**
  388. * @ingroup dvrt_mem
  389. * @brief set memory with uint32_t value
  390. * @param [in] devPtr
  391. * @param [in] len
  392. * @param [in] device
  393. * @return RT_ERROR_NONE for ok, errno for failed
  394. * @return RT_ERROR_INVALID_VALUE for error input
  395. */
  396. RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t device);
  397. /**
  398. * @ingroup dvrt_mem
  399. * @brief get memory attribute:Host or Device
  400. * @param [in] ptr
  401. * @param [out] attributes
  402. * @return RT_ERROR_NONE for ok, errno for failed
  403. * @return RT_ERROR_INVALID_VALUE for error input
  404. */
  405. RTS_API rtError_t rtPointerGetAttributes(rtPointerAttributes_t *attributes, const void *ptr);
  406. /**
  407. * @ingroup dvrt_mem
  408. * @brief make memory shared interprocess and assigned a name
  409. * @param [in] ptr device memory address pointer
  410. * @param [in] name identification name
  411. * @param [in] byteCount identification byteCount
  412. * @return RT_ERROR_NONE for ok
  413. * @return RT_ERROR_INVALID_VALUE for error input
  414. * @return RT_ERROR_DRV_ERR for driver error
  415. */
  416. RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char *name, uint32_t len);
  417. /**
  418. * @ingroup dvrt_mem
  419. * @brief destroy a interprocess shared memory
  420. * @param [in] name identification name
  421. * @return RT_ERROR_NONE for ok
  422. * @return RT_ERROR_INVALID_VALUE for error input
  423. * @return RT_ERROR_DRV_ERR for driver error
  424. */
  425. RTS_API rtError_t rtIpcDestroyMemoryName(const char *name);
  426. /**
  427. * @ingroup dvrt_mem
  428. * @brief open a interprocess shared memory
  429. * @param [in|out] ptr device memory address pointer
  430. * @param [in] name identification name
  431. * @return RT_ERROR_NONE for ok
  432. * @return RT_ERROR_INVALID_VALUE for error input
  433. * @return RT_ERROR_DRV_ERR for driver error
  434. */
  435. RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char *name);
  436. /**
  437. * @ingroup dvrt_mem
  438. * @brief close a interprocess shared memory
  439. * @param [in] ptr device memory address pointer
  440. * @param [in] name identification name
  441. * @return RT_ERROR_NONE for ok
  442. * @return RT_ERROR_INVALID_VALUE for error input
  443. * @return RT_ERROR_DRV_ERR for driver error
  444. */
  445. RTS_API rtError_t rtIpcCloseMemory(const void *ptr);
  446. /**
  447. * @ingroup dvrt_mem
  448. * @brief HCCL Async memory cpy
  449. * @param [in] index sq index
  450. * @param [in] wqeIndex moudle index
  451. * @param [in] stream asynchronized task stream
  452. * @return RT_ERROR_NONE for ok
  453. * @return RT_ERROR_INVALID_VALUE for error input
  454. * @return RT_ERROR_DRV_ERR for driver error
  455. */
  456. RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqeIndex, rtStream_t stream);
  457. /**
  458. * @ingroup dvrt_mem
  459. * @brief Ipc set mem pid
  460. * @param [in] name name to be queried
  461. * @param [in] pid process id
  462. * @param [in] num length of pid[]
  463. * @return RT_ERROR_NONE for ok
  464. * @return RT_ERROR_INVALID_VALUE for error input
  465. * @return RT_ERROR_DRV_ERR for driver error
  466. */
  467. RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num);
  468. /**
  469. * @ingroup dvrt_mem
  470. * @brief HCCL Async memory cpy
  471. * @param [in] dbindex single device 0
  472. * @param [in] dbinfo doorbell info
  473. * @param [in] stream asynchronized task stream
  474. * @return RT_ERROR_NONE for ok
  475. * @return RT_ERROR_INVALID_VALUE for error input
  476. * @return RT_ERROR_DRV_ERR for driver error
  477. */
  478. RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream);
  479. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  480. }
  481. #endif
  482. #endif // __CCE_RUNTIME_MEM_H__

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示