You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mem.h 21 kB

5 years ago
5 years ago
3 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
3 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651
  1. /*
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
  3. * Description: mem.h
  4. * Create: 2020-01-01
  5. */
  6. #ifndef CCE_RUNTIME_MEM_H
  7. #define CCE_RUNTIME_MEM_H
  8. #include <stddef.h>
  9. #include "base.h"
  10. #include "config.h"
  11. #include "stream.h"
  12. #if defined(__cplusplus)
  13. extern "C" {
  14. #endif
  15. /**
  16. * @ingroup dvrt_mem
  17. * @brief memory type
  18. */
  19. #define RT_MEMORY_DEFAULT (0x0U) // default memory on device
  20. #define RT_MEMORY_HBM (0x2U) // HBM memory on device
  21. #define RT_MEMORY_RDMA_HBM (0x3U) // RDMA-HBM memory on device
  22. #define RT_MEMORY_DDR (0x4U) // DDR memory on device
  23. #define RT_MEMORY_SPM (0x8U) // shared physical memory on device
  24. #define RT_MEMORY_P2P_HBM (0x10U) // HBM memory on other 4P device
  25. #define RT_MEMORY_P2P_DDR (0x11U) // DDR memory on other device
  26. #define RT_MEMORY_DDR_NC (0x20U) // DDR memory of non-cache
  27. #define RT_MEMORY_TS (0x40U) // Used for Ts memory
  28. #define RT_MEMORY_TS_4G (0x40U) // Used for Ts memory(only 1951)
  29. #define RT_MEMORY_HOST (0x81U) // Memory on host
  30. #define RT_MEMORY_RESERVED (0x100U)
  31. #define RT_MEMORY_L1 (0x1U << 16U)
  32. #define RT_MEMORY_L2 (0x1U << 17U)
  33. /**
  34. * @ingroup dvrt_mem
  35. * @brief memory info type
  36. */
  37. #define RT_MEM_INFO_TYPE_DDR_SIZE (0x1U)
  38. #define RT_MEM_INFO_TYPE_HBM_SIZE (0x2U)
  39. #define RT_MEM_INFO_TYPE_DDR_P2P_SIZE (0x3U)
  40. #define RT_MEM_INFO_TYPE_HBM_P2P_SIZE (0x4U)
  41. /**
  42. * @ingroup dvrt_mem
  43. * @brief memory Policy
  44. */
  45. #define RT_MEMORY_POLICY_NONE (0x0U) // Malloc mem prior huge page, then default page
  46. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST (0x400U) // Malloc mem prior huge page, then default page, 0x1U << 10U
  47. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY (0x800U) // Malloc mem only use huge page, 0x1U << 11U
  48. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY (0x1000U) // Malloc mem only use default page, 0x1U << 12U
  49. // Malloc mem prior huge page, then default page, for p2p, 0x1U << 13U
  50. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P (0x2000U)
  51. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P (0x4000U) // Malloc mem only use huge page, use for p2p, 0x1U << 14U
  52. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P (0x8000U) // Malloc mem only use default page, use for p2p, 0x1U << 15U
  53. /**
  54. * @ingroup dvrt_mem
  55. * @brief memory attribute
  56. */
  57. #define RT_MEMORY_ATTRIBUTE_DEFAULT (0x0U)
  58. // memory read only attribute, now only dvpp memory support.
  59. #define RT_MEMORY_ATTRIBUTE_READONLY (0x100000U) // Malloc readonly, 1<<20.
  60. #define MEM_ALLOC_TYPE_BIT (0x3FFU) // mem type bit in <0, 9>
  61. /**
  62. * @ingroup dvrt_mem
  63. * @brief memory type | memory Policy
  64. */
  65. typedef uint32_t rtMemType_t;
  66. /**
  67. * @ingroup dvrt_mem
  68. * @brief memory advise type
  69. */
  70. #define RT_MEMORY_ADVISE_EXE (0x02U)
  71. #define RT_MEMORY_ADVISE_THP (0x04U)
  72. #define RT_MEMORY_ADVISE_PLE (0x08U)
  73. #define RT_MEMORY_ADVISE_PIN (0x16U)
  74. /**
  75. * @ingroup dvrt_mem
  76. * @brief memory copy type
  77. */
  78. typedef enum tagRtMemcpyKind {
  79. RT_MEMCPY_HOST_TO_HOST = 0, // host to host
  80. RT_MEMCPY_HOST_TO_DEVICE, // host to device
  81. RT_MEMCPY_DEVICE_TO_HOST, // device to host
  82. RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P
  83. RT_MEMCPY_MANAGED, // managed memory
  84. RT_MEMCPY_ADDR_DEVICE_TO_DEVICE,
  85. RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes)
  86. RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex
  87. RT_MEMCPY_RESERVED,
  88. } rtMemcpyKind_t;
  89. typedef enum tagRtMemInfoType {
  90. RT_MEMORYINFO_DDR,
  91. RT_MEMORYINFO_HBM,
  92. RT_MEMORYINFO_DDR_HUGE, // Hugepage memory of DDR
  93. RT_MEMORYINFO_DDR_NORMAL, // Normal memory of DDR
  94. RT_MEMORYINFO_HBM_HUGE, // Hugepage memory of HBM
  95. RT_MEMORYINFO_HBM_NORMAL, // Normal memory of HBM
  96. RT_MEMORYINFO_DDR_P2P_HUGE, // Hugepage memory of DDR
  97. RT_MEMORYINFO_DDR_P2P_NORMAL, // Normal memory of DDR
  98. RT_MEMORYINFO_HBM_P2P_HUGE, // Hugepage memory of HBM
  99. RT_MEMORYINFO_HBM_P2P_NORMAL, // Normal memory of HBM
  100. } rtMemInfoType_t;
  101. typedef enum tagRtRecudeKind {
  102. RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P
  103. RT_MEMCPY_SDMA_AUTOMATIC_MAX = 11,
  104. RT_MEMCPY_SDMA_AUTOMATIC_MIN = 12,
  105. RT_MEMCPY_SDMA_AUTOMATIC_EQUAL = 13,
  106. RT_RECUDE_KIND_END = 14,
  107. } rtRecudeKind_t;
  108. typedef enum tagRtDataType {
  109. RT_DATA_TYPE_FP32 = 0, // fp32
  110. RT_DATA_TYPE_FP16 = 1, // fp16
  111. RT_DATA_TYPE_INT16 = 2, // int16
  112. RT_DATA_TYPE_INT4 = 3, // int4
  113. RT_DATA_TYPE_INT8 = 4, // int8
  114. RT_DATA_TYPE_INT32 = 5, // int32
  115. RT_DATA_TYPE_BFP16 = 6, // bfp16
  116. RT_DATA_TYPE_BFP32 = 7, // bfp32
  117. RT_DATA_TYPE_UINT8 = 8, // uint8
  118. RT_DATA_TYPE_UINT16 = 9, // uint16
  119. RT_DATA_TYPE_UINT32 = 10, // uint32
  120. RT_DATA_TYPE_END = 11,
  121. } rtDataType_t;
  122. /**
  123. * @ingroup dvrt_mem
  124. * @brief memory copy channel type
  125. */
  126. typedef enum tagRtMemcpyChannelType {
  127. RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P
  128. RT_MEMCPY_CHANNEL_TYPE_PCIe,
  129. RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now
  130. RT_MEMCPY_CHANNEL_TYPE_RESERVED,
  131. } rtMemcpyChannelType_t;
  132. /**
  133. * @ingroup rt_kernel
  134. * @brief ai core memory size
  135. */
  136. typedef struct rtAiCoreMemorySize {
  137. uint32_t l0ASize;
  138. uint32_t l0BSize;
  139. uint32_t l0CSize;
  140. uint32_t l1Size;
  141. uint32_t ubSize;
  142. uint32_t l2Size;
  143. uint32_t l2PageNum;
  144. uint32_t blockSize;
  145. uint64_t bankSize;
  146. uint64_t bankNum;
  147. uint64_t burstInOneBlock;
  148. uint64_t bankGroupNum;
  149. } rtAiCoreMemorySize_t;
  150. /**
  151. * @ingroup dvrt_mem
  152. * @brief memory type
  153. */
  154. typedef enum tagRtMemoryType {
  155. RT_MEMORY_TYPE_HOST = 1,
  156. RT_MEMORY_TYPE_DEVICE = 2,
  157. RT_MEMORY_TYPE_SVM = 3,
  158. RT_MEMORY_TYPE_DVPP = 4
  159. } rtMemoryType_t;
  160. /**
  161. * @ingroup dvrt_mem
  162. * @brief memory attribute
  163. */
  164. typedef struct tagRtPointerAttributes {
  165. rtMemoryType_t memoryType; // host memory or device memory
  166. rtMemoryType_t locationType;
  167. uint32_t deviceID; // device ID
  168. uint32_t pageSize;
  169. } rtPointerAttributes_t;
  170. typedef struct {
  171. const char_t *name;
  172. const uint64_t size;
  173. uint32_t flag;
  174. } rtMallocHostSharedMemoryIn;
  175. typedef struct {
  176. int32_t fd;
  177. void *ptr;
  178. void *devPtr;
  179. } rtMallocHostSharedMemoryOut;
  180. typedef struct {
  181. const char_t *name;
  182. const uint64_t size;
  183. int32_t fd;
  184. void *ptr;
  185. void *devPtr;
  186. } rtFreeHostSharedMemoryIn;
  187. /**
  188. * @ingroup dvrt_mem
  189. * @brief alloc device memory
  190. * @param [in|out] devPtr memory pointer
  191. * @param [in] size memory size
  192. * @param [in] type memory type
  193. * @return RT_ERROR_NONE for ok
  194. * @return RT_ERROR_INVALID_VALUE for error input
  195. */
  196. RTS_API rtError_t rtMalloc(void **devPtr, uint64_t size, rtMemType_t type);
  197. /**
  198. * @ingroup dvrt_mem
  199. * @brief free device memory
  200. * @param [in|out] devPtr memory pointer
  201. * @return RT_ERROR_NONE for ok
  202. * @return RT_ERROR_INVALID_VALUE for error input
  203. */
  204. RTS_API rtError_t rtFree(void *devPtr);
  205. /**
  206. * @ingroup dvrt_mem
  207. * @brief alloc device memory for dvpp
  208. * @param [in|out] devPtr memory pointer
  209. * @param [in] size memory size
  210. * @return RT_ERROR_NONE for ok
  211. * @return RT_ERROR_INVALID_VALUE for error input
  212. */
  213. RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size);
  214. /**
  215. * @ingroup dvrt_mem
  216. * @brief alloc device memory for dvpp, support set flag
  217. * @param [in|out] devPtr memory pointer
  218. * @param [in] size memory size
  219. * @param [in] flag mem flag, can use mem attribute set read only.
  220. * @return RT_ERROR_NONE for ok
  221. * @return RT_ERROR_INVALID_VALUE for error input
  222. * @return others is error
  223. */
  224. RTS_API rtError_t rtDvppMallocWithFlag(void **devPtr, uint64_t size, uint32_t flag);
  225. /**
  226. * @ingroup dvrt_mem
  227. * @brief free device memory for dvpp
  228. * @param [in|out] devPtr memory pointer
  229. * @return RT_ERROR_NONE for ok
  230. * @return RT_ERROR_INVALID_VALUE for error input
  231. */
  232. RTS_API rtError_t rtDvppFree(void *devPtr);
  233. /**
  234. * @ingroup dvrt_mem
  235. * @brief alloc host memory
  236. * @param [in|out] hostPtr memory pointer
  237. * @param [in] size memory size
  238. * @return RT_ERROR_NONE for ok
  239. * @return RT_ERROR_INVALID_VALUE for error input
  240. */
  241. RTS_API rtError_t rtMallocHost(void **hostPtr, uint64_t size);
  242. /**
  243. * @ingroup dvrt_mem
  244. * @brief free host memory
  245. * @param [in] hostPtr memory pointer
  246. * @return RT_ERROR_NONE for ok
  247. * @return RT_ERROR_INVALID_VALUE for error input
  248. */
  249. RTS_API rtError_t rtFreeHost(void *hostPtr);
  250. /**
  251. * @ingroup dvrt_mem
  252. * @brief alloc host shared memory
  253. * @param [in] in alloc host shared memory inputPara pointer
  254. * @param [in] out alloc host shared memory outputInfo pointer
  255. * @return RT_ERROR_NONE for ok
  256. * @return RT_ERROR_INVALID_VALUE for error input
  257. */
  258. RTS_API rtError_t rtMallocHostSharedMemory(rtMallocHostSharedMemoryIn *in,
  259. rtMallocHostSharedMemoryOut *out);
  260. /**
  261. * @ingroup dvrt_mem
  262. * @brief free host memory
  263. * @param [in] in free host shared memory inputPara pointer
  264. * @return RT_ERROR_NONE for ok
  265. * @return RT_ERROR_INVALID_VALUE for error input
  266. */
  267. RTS_API rtError_t rtFreeHostSharedMemory(rtFreeHostSharedMemoryIn *in);
  268. /**
  269. * @ingroup dvrt_mem
  270. * @brief alloc managed memory
  271. * @param [in|out] ptr memory pointer
  272. * @param [in] size memory size
  273. * @param [in] flag reserved, set to 0.
  274. * @return RT_ERROR_NONE for ok
  275. * @return RT_ERROR_INVALID_VALUE for error input
  276. */
  277. RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag);
  278. /**
  279. * @ingroup dvrt_mem
  280. * @brief free managed memory
  281. * @param [in] ptr memory pointer
  282. * @return RT_ERROR_NONE for ok
  283. * @return RT_ERROR_INVALID_VALUE for error input
  284. */
  285. RTS_API rtError_t rtMemFreeManaged(void *ptr);
  286. /**
  287. * @ingroup dvrt_mem
  288. * @brief alloc cached device memory
  289. * @param [in| devPtr memory pointer
  290. * @param [in] size memory size
  291. * @param [in] type memory type
  292. * @return RT_ERROR_NONE for ok
  293. */
  294. RTS_API rtError_t rtMallocCached(void **devPtr, uint64_t size, rtMemType_t type);
  295. /**
  296. * @ingroup dvrt_mem
  297. * @brief flush device mempory
  298. * @param [in] base virtal base address
  299. * @param [in] len memory size
  300. * @return RT_ERROR_NONE for ok, errno for failed
  301. */
  302. RTS_API rtError_t rtFlushCache(void *base, size_t len);
  303. /**
  304. * @ingroup dvrt_mem
  305. * @brief invalid device mempory
  306. * @param [in] base virtal base address
  307. * @param [in] len memory size
  308. * @return RT_ERROR_NONE for ok, errno for failed
  309. */
  310. RTS_API rtError_t rtInvalidCache(void *base, size_t len);
  311. /**
  312. * @ingroup dvrt_mem
  313. * @brief synchronized memcpy
  314. * @param [in] dst destination address pointer
  315. * @param [in] Max length of destination address memory
  316. * @param [in] src source address pointer
  317. * @param [in] cnt the number of byte to copy
  318. * @param [in] kind memcpy type
  319. * @return RT_ERROR_NONE for ok
  320. * @return RT_ERROR_INVALID_VALUE for error input
  321. */
  322. RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind);
  323. /**
  324. * @ingroup dvrt_mem
  325. * @brief host task memcpy
  326. * @param [in] dst destination address pointer
  327. * @param [in] destMax length of destination address memory
  328. * @param [in] src source address pointer
  329. * @param [in] cnt the number of byte to copy
  330. * @param [in] kind memcpy type
  331. * @param [in] stm task stream
  332. * @return RT_ERROR_NONE for ok, errno for failed
  333. */
  334. RTS_API rtError_t rtMemcpyHostTask(void * const dst, const uint64_t destMax, const void * const src,
  335. const uint64_t cnt, rtMemcpyKind_t kind, rtStream_t stm);
  336. /**
  337. * @ingroup dvrt_mem
  338. * @brief asynchronized memcpy
  339. * @param [in] dst destination address pointer
  340. * @param [in] Max length of destination address memory
  341. * @param [in] src source address pointer
  342. * @param [in] cnt the number of byte to copy
  343. * @param [in] kind memcpy type
  344. * @param [in] stm asynchronized task stream
  345. * @return RT_ERROR_NONE for ok
  346. * @return RT_ERROR_INVALID_VALUE for error input
  347. */
  348. RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind,
  349. rtStream_t stm);
  350. typedef struct rtMemcpyAddrInfo {
  351. uint32_t resv0;
  352. uint32_t resv1;
  353. uint32_t resv2;
  354. uint32_t len;
  355. uint64_t src;
  356. uint64_t dst;
  357. } rtMemcpyAddrInfo;
  358. RTS_API rtError_t rtMemcpyAsyncPtr(void *memcpyAddrInfo, uint64_t destMax, uint64_t count,
  359. rtMemcpyKind_t kind, rtStream_t stream);
  360. /**
  361. * @ingroup dvrt_mem
  362. * @brief asynchronized reduce memcpy
  363. * @param [in] dst destination address pointer
  364. * @param [in] Max length of destination address memory
  365. * @param [in] src source address pointer
  366. * @param [in] cnt the number of byte to copy
  367. * @param [in] kind memcpy type
  368. * @param [in] type data type
  369. * @param [in] stm asynchronized task stream
  370. * @return RT_ERROR_NONE for ok
  371. * @return RT_ERROR_INVALID_VALUE for error input
  372. */
  373. RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtRecudeKind_t kind,
  374. rtDataType_t type, rtStream_t stm);
  375. /**
  376. * @ingroup dvrt_mem
  377. * @brief asynchronized reduce memcpy
  378. * @param [in] dst destination address pointer
  379. * @param [in] Max length of destination address memory
  380. * @param [in] src source address pointer
  381. * @param [in] count the number of byte to copy
  382. * @param [in] kind memcpy type
  383. * @param [in] type data type
  384. * @param [in] stm asynchronized task stream
  385. * @param [in] overflowAddr addr of overflow flag
  386. * @return RT_ERROR_NONE for ok
  387. * @return RT_ERROR_INVALID_VALUE for error input
  388. */
  389. RTS_API rtError_t rtReduceAsyncV2(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
  390. rtDataType_t type, rtStream_t stm, void *overflowAddr);
  391. /**
  392. * @ingroup dvrt_mem
  393. * @brief synchronized memcpy2D
  394. * @param [in] dst destination address pointer
  395. * @param [in] dstPitch pitch of destination memory
  396. * @param [in] src source address pointer
  397. * @param [in] srcPitch pitch of source memory
  398. * @param [in] width width of matrix transfer
  399. * @param [in] height height of matrix transfer
  400. * @param [in] kind memcpy type
  401. * @return RT_ERROR_NONE for ok
  402. * @return RT_ERROR_INVALID_VALUE for error input
  403. */
  404. RTS_API rtError_t rtMemcpy2d(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width,
  405. uint64_t height, rtMemcpyKind_t kind);
  406. /**
  407. * @ingroup dvrt_mem
  408. * @brief asynchronized memcpy2D
  409. * @param [in] dst destination address pointer
  410. * @param [in] dstPitch length of destination address memory
  411. * @param [in] src source address pointer
  412. * @param [in] srcPitch length of destination address memory
  413. * @param [in] width width of matrix transfer
  414. * @param [in] height height of matrix transfer
  415. * @param [in] kind memcpy type
  416. * @param [in] stm asynchronized task stream
  417. * @return RT_ERROR_NONE for ok
  418. * @return RT_ERROR_INVALID_VALUE for error input
  419. */
  420. RTS_API rtError_t rtMemcpy2dAsync(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width,
  421. uint64_t height, rtMemcpyKind_t kind, rtStream_t stm);
  422. /**
  423. * @ingroup dvrt_mem
  424. * @brief query memory size
  425. * @param [in] aiCoreMemorySize
  426. * @return RT_ERROR_NONE for ok, errno for failed
  427. * @return RT_ERROR_INVALID_VALUE for error input
  428. */
  429. RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  430. /**
  431. * @ingroup dvrt_mem
  432. * @brief set memory size, Setting before model reasoning, Bright screen to prevent model can not be fully
  433. integrated network due to memory limitations.Requirement come from JiaMinHu.Only use for Tiny.
  434. * @param [in] aiCoreMemorySize
  435. * @return RT_ERROR_NONE for ok, errno for failed
  436. * @return RT_ERROR_INVALID_VALUE for error input
  437. */
  438. RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  439. /**
  440. * @ingroup dvrt_mem
  441. * @brief Specifies how memory is use
  442. * @param [in] devPtr memory pointer
  443. * @param [in] count memory count
  444. * @param [in] advise reserved, set to 1
  445. * @return RT_ERROR_NONE for ok
  446. * @return others for error
  447. */
  448. RTS_API rtError_t rtMemAdvise(void *devPtr, uint64_t count, uint32_t advise);
  449. /**
  450. * @ingroup dvrt_mem
  451. * @brief set memory with uint32_t value
  452. * @param [in] devPtr
  453. * @param [in] Max length of destination address memory
  454. * @param [in] val
  455. * @param [in] cnt byte num
  456. * @return RT_ERROR_NONE for ok, errno for failed
  457. * @return RT_ERROR_INVALID_VALUE for error input
  458. */
  459. RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t val, uint64_t cnt);
  460. /**
  461. * @ingroup dvrt_mem
  462. * @brief set memory with uint32_t value async
  463. * @param [in] devPtr
  464. * @param [in] Max length of destination address memory
  465. * @param [in] val
  466. * @param [in] cnt byte num
  467. * @param [in] stm
  468. * @return RT_ERROR_NONE for ok, errno for failed
  469. * @return RT_ERROR_INVALID_VALUE for error input
  470. */
  471. RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t val, uint64_t cnt, rtStream_t stm);
  472. /**
  473. * @ingroup dvrt_mem
  474. * @brief get current device memory total and free
  475. * @param [out] freeSize
  476. * @param [out] totalSize
  477. * @return RT_ERROR_NONE for ok, errno for failed
  478. * @return RT_ERROR_INVALID_VALUE for error input
  479. */
  480. RTS_API rtError_t rtMemGetInfo(size_t *freeSize, size_t *totalSize);
  481. /**
  482. * @ingroup dvrt_mem
  483. * @brief get current device memory total and free
  484. * @param [in] memInfoType
  485. * @param [out] freeSize
  486. * @param [out] totalSize
  487. * @return RT_ERROR_NONE for ok, errno for failed
  488. */
  489. RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *freeSize, size_t *totalSize);
  490. /**
  491. * @ingroup dvrt_mem
  492. * @brief set memory with uint32_t value
  493. * @param [in] devPtr
  494. * @param [in] len
  495. * @param [in] devId
  496. * @return RT_ERROR_NONE for ok, errno for failed
  497. * @return RT_ERROR_INVALID_VALUE for error input
  498. */
  499. RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t devId);
  500. /**
  501. * @ingroup dvrt_mem
  502. * @brief get memory attribute:Host or Device
  503. * @param [in] ptr
  504. * @param [out] attributes
  505. * @return RT_ERROR_NONE for ok, errno for failed
  506. * @return RT_ERROR_INVALID_VALUE for error input
  507. */
  508. RTS_API rtError_t rtPointerGetAttributes(rtPointerAttributes_t *attributes, const void *ptr);
  509. /**
  510. * @ingroup dvrt_mem
  511. * @brief make memory shared interprocess and assigned a name
  512. * @param [in] ptr device memory address pointer
  513. * @param [in] name identification name
  514. * @param [in] byteCount identification byteCount
  515. * @return RT_ERROR_NONE for ok
  516. * @return RT_ERROR_INVALID_VALUE for error input
  517. * @return RT_ERROR_DRV_ERR for driver error
  518. */
  519. RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char_t *name, uint32_t len);
  520. /**
  521. * @ingroup dvrt_mem
  522. * @brief destroy a interprocess shared memory
  523. * @param [in] name identification name
  524. * @return RT_ERROR_NONE for ok
  525. * @return RT_ERROR_INVALID_VALUE for error input
  526. * @return RT_ERROR_DRV_ERR for driver error
  527. */
  528. RTS_API rtError_t rtIpcDestroyMemoryName(const char_t *name);
  529. /**
  530. * @ingroup dvrt_mem
  531. * @brief open a interprocess shared memory
  532. * @param [in|out] ptr device memory address pointer
  533. * @param [in] name identification name
  534. * @return RT_ERROR_NONE for ok
  535. * @return RT_ERROR_INVALID_VALUE for error input
  536. * @return RT_ERROR_DRV_ERR for driver error
  537. */
  538. RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char_t *name);
  539. /**
  540. * @ingroup dvrt_mem
  541. * @brief close a interprocess shared memory
  542. * @param [in] ptr device memory address pointer
  543. * @param [in] name identification name
  544. * @return RT_ERROR_NONE for ok
  545. * @return RT_ERROR_INVALID_VALUE for error input
  546. * @return RT_ERROR_DRV_ERR for driver error
  547. */
  548. RTS_API rtError_t rtIpcCloseMemory(const void *ptr);
  549. /**
  550. * @ingroup dvrt_mem
  551. * @brief HCCL Async memory cpy
  552. * @param [in] sqIndex sq index
  553. * @param [in] wqeIndex moudle index
  554. * @param [in] stm asynchronized task stream
  555. * @return RT_ERROR_NONE for ok
  556. * @return RT_ERROR_INVALID_VALUE for error input
  557. * @return RT_ERROR_DRV_ERR for driver error
  558. */
  559. RTS_API rtError_t rtRDMASend(uint32_t sqIndex, uint32_t wqeIndex, rtStream_t stm);
  560. /**
  561. * @ingroup dvrt_mem
  562. * @brief Ipc set mem pid
  563. * @param [in] name name to be queried
  564. * @param [in] pid process id
  565. * @param [in] num length of pid[]
  566. * @return RT_ERROR_NONE for ok
  567. * @return RT_ERROR_INVALID_VALUE for error input
  568. * @return RT_ERROR_DRV_ERR for driver error
  569. */
  570. RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int32_t num);
  571. /**
  572. * @ingroup dvrt_mem
  573. * @brief HCCL Async memory cpy
  574. * @param [in] dbindex single device 0
  575. * @param [in] dbinfo doorbell info
  576. * @param [in] stm asynchronized task stream
  577. * @return RT_ERROR_NONE for ok
  578. * @return RT_ERROR_INVALID_VALUE for error input
  579. * @return RT_ERROR_DRV_ERR for driver error
  580. */
  581. RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stm);
  582. #if defined(__cplusplus)
  583. }
  584. #endif
  585. #endif // CCE_RUNTIME_MEM_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示