You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mem.h 23 kB

5 years ago
5 years ago
3 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
3 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697
  1. /*
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
  3. * Description: mem.h
  4. * Create: 2020-01-01
  5. */
  6. #ifndef CCE_RUNTIME_MEM_H
  7. #define CCE_RUNTIME_MEM_H
  8. #include <stddef.h>
  9. #include "base.h"
  10. #include "config.h"
  11. #include "stream.h"
  12. #if defined(__cplusplus)
  13. extern "C" {
  14. #endif
  15. /**
  16. * @ingroup dvrt_mem
  17. * @brief memory type
  18. */
  19. #define RT_MEMORY_DEFAULT (0x0U) // default memory on device
  20. #define RT_MEMORY_HBM (0x2U) // HBM memory on device
  21. #define RT_MEMORY_RDMA_HBM (0x3U) // RDMA-HBM memory on device
  22. #define RT_MEMORY_DDR (0x4U) // DDR memory on device
  23. #define RT_MEMORY_SPM (0x8U) // shared physical memory on device
  24. #define RT_MEMORY_P2P_HBM (0x10U) // HBM memory on other 4P device
  25. #define RT_MEMORY_P2P_DDR (0x11U) // DDR memory on other device
  26. #define RT_MEMORY_DDR_NC (0x20U) // DDR memory of non-cache
  27. #define RT_MEMORY_TS (0x40U) // Used for Ts memory
  28. #define RT_MEMORY_TS_4G (0x40U) // Used for Ts memory(only 51)
  29. #define RT_MEMORY_HOST (0x81U) // Memory on host
  30. #define RT_MEMORY_SVM (0x90U) // Memory for SVM
  31. #define RT_MEMORY_HOST_SVM (0x90U) // Memory for host SVM
  32. #define RT_MEMORY_RESERVED (0x100U)
  33. #define RT_MEMORY_L1 (0x1U << 16U)
  34. #define RT_MEMORY_L2 (0x1U << 17U)
  35. /**
  36. * @ingroup dvrt_mem
  37. * @brief memory info type
  38. */
  39. #define RT_MEM_INFO_TYPE_DDR_SIZE (0x1U)
  40. #define RT_MEM_INFO_TYPE_HBM_SIZE (0x2U)
  41. #define RT_MEM_INFO_TYPE_DDR_P2P_SIZE (0x3U)
  42. #define RT_MEM_INFO_TYPE_HBM_P2P_SIZE (0x4U)
  43. /**
  44. * @ingroup dvrt_mem
  45. * @brief memory Policy
  46. */
  47. #define RT_MEMORY_POLICY_NONE (0x0U) // Malloc mem prior huge page, then default page
  48. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST (0x400U) // Malloc mem prior huge page, then default page, 0x1U << 10U
  49. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY (0x800U) // Malloc mem only use huge page, 0x1U << 11U
  50. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY (0x1000U) // Malloc mem only use default page, 0x1U << 12U
  51. // Malloc mem prior huge page, then default page, for p2p, 0x1U << 13U
  52. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P (0x2000U)
  53. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P (0x4000U) // Malloc mem only use huge page, use for p2p, 0x1U << 14U
  54. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P (0x8000U) // Malloc mem only use default page, use for p2p, 0x1U << 15U
  55. /**
  56. * @ingroup dvrt_mem
  57. * @brief memory attribute
  58. */
  59. #define RT_MEMORY_ATTRIBUTE_DEFAULT (0x0U)
  60. // memory read only attribute, now only dvpp memory support.
  61. #define RT_MEMORY_ATTRIBUTE_READONLY (0x100000U) // Malloc readonly, 1<<20.
  62. #define MEM_ALLOC_TYPE_BIT (0x3FFU) // mem type bit in <0, 9>
  63. /**
  64. * @ingroup dvrt_mem
  65. * @brief memory type | memory Policy
  66. */
  67. typedef uint32_t rtMemType_t;
  68. /**
  69. * @ingroup dvrt_mem
  70. * @brief memory advise type
  71. */
  72. #define RT_MEMORY_ADVISE_EXE (0x02U)
  73. #define RT_MEMORY_ADVISE_THP (0x04U)
  74. #define RT_MEMORY_ADVISE_PLE (0x08U)
  75. #define RT_MEMORY_ADVISE_PIN (0x16U)
  76. /**
  77. * @ingroup dvrt_mem
  78. * @brief memory copy type
  79. */
  80. typedef enum tagRtMemcpyKind {
  81. RT_MEMCPY_HOST_TO_HOST = 0, // host to host
  82. RT_MEMCPY_HOST_TO_DEVICE, // host to device
  83. RT_MEMCPY_DEVICE_TO_HOST, // device to host
  84. RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P
  85. RT_MEMCPY_MANAGED, // managed memory
  86. RT_MEMCPY_ADDR_DEVICE_TO_DEVICE,
  87. RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes)
  88. RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex
  89. RT_MEMCPY_RESERVED,
  90. } rtMemcpyKind_t;
  91. typedef enum tagRtMemInfoType {
  92. RT_MEMORYINFO_DDR,
  93. RT_MEMORYINFO_HBM,
  94. RT_MEMORYINFO_DDR_HUGE, // Hugepage memory of DDR
  95. RT_MEMORYINFO_DDR_NORMAL, // Normal memory of DDR
  96. RT_MEMORYINFO_HBM_HUGE, // Hugepage memory of HBM
  97. RT_MEMORYINFO_HBM_NORMAL, // Normal memory of HBM
  98. RT_MEMORYINFO_DDR_P2P_HUGE, // Hugepage memory of DDR
  99. RT_MEMORYINFO_DDR_P2P_NORMAL, // Normal memory of DDR
  100. RT_MEMORYINFO_HBM_P2P_HUGE, // Hugepage memory of HBM
  101. RT_MEMORYINFO_HBM_P2P_NORMAL, // Normal memory of HBM
  102. } rtMemInfoType_t;
  103. typedef enum tagRtRecudeKind {
  104. RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P
  105. RT_MEMCPY_SDMA_AUTOMATIC_MAX = 11,
  106. RT_MEMCPY_SDMA_AUTOMATIC_MIN = 12,
  107. RT_MEMCPY_SDMA_AUTOMATIC_EQUAL = 13,
  108. RT_RECUDE_KIND_END = 14,
  109. } rtRecudeKind_t;
  110. typedef enum tagRtDataType {
  111. RT_DATA_TYPE_FP32 = 0, // fp32
  112. RT_DATA_TYPE_FP16 = 1, // fp16
  113. RT_DATA_TYPE_INT16 = 2, // int16
  114. RT_DATA_TYPE_INT4 = 3, // int4
  115. RT_DATA_TYPE_INT8 = 4, // int8
  116. RT_DATA_TYPE_INT32 = 5, // int32
  117. RT_DATA_TYPE_BFP16 = 6, // bfp16
  118. RT_DATA_TYPE_BFP32 = 7, // bfp32
  119. RT_DATA_TYPE_UINT8 = 8, // uint8
  120. RT_DATA_TYPE_UINT16 = 9, // uint16
  121. RT_DATA_TYPE_UINT32 = 10, // uint32
  122. RT_DATA_TYPE_END = 11,
  123. } rtDataType_t;
  124. /**
  125. * @ingroup dvrt_mem
  126. * @brief memory copy channel type
  127. */
  128. typedef enum tagRtMemcpyChannelType {
  129. RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P
  130. RT_MEMCPY_CHANNEL_TYPE_PCIe,
  131. RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now
  132. RT_MEMCPY_CHANNEL_TYPE_RESERVED,
  133. } rtMemcpyChannelType_t;
  134. /**
  135. * @ingroup rt_kernel
  136. * @brief ai core memory size
  137. */
  138. typedef struct rtAiCoreMemorySize {
  139. uint32_t l0ASize;
  140. uint32_t l0BSize;
  141. uint32_t l0CSize;
  142. uint32_t l1Size;
  143. uint32_t ubSize;
  144. uint32_t l2Size;
  145. uint32_t l2PageNum;
  146. uint32_t blockSize;
  147. uint64_t bankSize;
  148. uint64_t bankNum;
  149. uint64_t burstInOneBlock;
  150. uint64_t bankGroupNum;
  151. } rtAiCoreMemorySize_t;
  152. /**
  153. * @ingroup dvrt_mem
  154. * @brief memory type
  155. */
  156. typedef enum tagRtMemoryType {
  157. RT_MEMORY_TYPE_HOST = 1,
  158. RT_MEMORY_TYPE_DEVICE = 2,
  159. RT_MEMORY_TYPE_SVM = 3,
  160. RT_MEMORY_TYPE_DVPP = 4
  161. } rtMemoryType_t;
  162. /**
  163. * @ingroup dvrt_mem
  164. * @brief memory attribute
  165. */
  166. typedef struct tagRtPointerAttributes {
  167. rtMemoryType_t memoryType; // host memory or device memory
  168. rtMemoryType_t locationType;
  169. uint32_t deviceID; // device ID
  170. uint32_t pageSize;
  171. } rtPointerAttributes_t;
  172. typedef struct {
  173. const char_t *name;
  174. const uint64_t size;
  175. uint32_t flag;
  176. } rtMallocHostSharedMemoryIn;
  177. typedef struct {
  178. int32_t fd;
  179. void *ptr;
  180. void *devPtr;
  181. } rtMallocHostSharedMemoryOut;
  182. typedef struct {
  183. const char_t *name;
  184. const uint64_t size;
  185. int32_t fd;
  186. void *ptr;
  187. void *devPtr;
  188. } rtFreeHostSharedMemoryIn;
  189. /**
  190. * @ingroup dvrt_mem
  191. * @brief alloc device memory
  192. * @param [in|out] devPtr memory pointer
  193. * @param [in] size memory size
  194. * @param [in] type memory type
  195. * @return RT_ERROR_NONE for ok
  196. * @return RT_ERROR_INVALID_VALUE for error input
  197. */
  198. RTS_API rtError_t rtMalloc(void **devPtr, uint64_t size, rtMemType_t type);
  199. RTS_API rtError_t rtMallocV2(void **devPtr, uint64_t size, rtMemType_t type, const uint16_t moduleId);
  200. /**
  201. * @ingroup dvrt_mem
  202. * @brief free device memory
  203. * @param [in|out] devPtr memory pointer
  204. * @return RT_ERROR_NONE for ok
  205. * @return RT_ERROR_INVALID_VALUE for error input
  206. */
  207. RTS_API rtError_t rtFree(void *devPtr);
  208. /**
  209. * @ingroup dvrt_mem
  210. * @brief alloc device memory for dvpp
  211. * @param [in|out] devPtr memory pointer
  212. * @param [in] size memory size
  213. * @return RT_ERROR_NONE for ok
  214. * @return RT_ERROR_INVALID_VALUE for error input
  215. */
  216. RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size);
  217. rtError_t rtDvppMallocV2(void **devPtr, uint64_t size, uint16_t moduleId);
  218. /**
  219. * @ingroup dvrt_mem
  220. * @brief alloc device memory for dvpp, support set flag
  221. * @param [in|out] devPtr memory pointer
  222. * @param [in] size memory size
  223. * @param [in] flag mem flag, can use mem attribute set read only.
  224. * @return RT_ERROR_NONE for ok
  225. * @return RT_ERROR_INVALID_VALUE for error input
  226. * @return others is error
  227. */
  228. RTS_API rtError_t rtDvppMallocWithFlag(void **devPtr, uint64_t size, uint32_t flag);
  229. RTS_API rtError_t rtDvppMallocWithFlagV2(void **devPtr, uint64_t size, uint32_t flag, const uint16_t moduleId);
  230. /**
  231. * @ingroup dvrt_mem
  232. * @brief free device memory for dvpp
  233. * @param [in|out] devPtr memory pointer
  234. * @return RT_ERROR_NONE for ok
  235. * @return RT_ERROR_INVALID_VALUE for error input
  236. */
  237. RTS_API rtError_t rtDvppFree(void *devPtr);
  238. /**
  239. * @ingroup dvrt_mem
  240. * @brief alloc host memory
  241. * @param [in|out] hostPtr memory pointer
  242. * @param [in] size memory size
  243. * @return RT_ERROR_NONE for ok
  244. * @return RT_ERROR_INVALID_VALUE for error input
  245. */
  246. RTS_API rtError_t rtMallocHost(void **hostPtr, uint64_t size);
  247. RTS_API rtError_t rtMallocHostV2(void **hostPtr, uint64_t size, uint16_t moduleId);
  248. /**
  249. * @ingroup dvrt_mem
  250. * @brief free host memory
  251. * @param [in] hostPtr memory pointer
  252. * @return RT_ERROR_NONE for ok
  253. * @return RT_ERROR_INVALID_VALUE for error input
  254. */
  255. RTS_API rtError_t rtFreeHost(void *hostPtr);
  256. /**
  257. * @ingroup dvrt_mem
  258. * @brief alloc host shared memory
  259. * @param [in] in alloc host shared memory inputPara pointer
  260. * @param [in] out alloc host shared memory outputInfo pointer
  261. * @return RT_ERROR_NONE for ok
  262. * @return RT_ERROR_INVALID_VALUE for error input
  263. */
  264. RTS_API rtError_t rtMallocHostSharedMemory(rtMallocHostSharedMemoryIn *in,
  265. rtMallocHostSharedMemoryOut *out);
  266. /**
  267. * @ingroup dvrt_mem
  268. * @brief free host memory
  269. * @param [in] in free host shared memory inputPara pointer
  270. * @return RT_ERROR_NONE for ok
  271. * @return RT_ERROR_INVALID_VALUE for error input
  272. */
  273. RTS_API rtError_t rtFreeHostSharedMemory(rtFreeHostSharedMemoryIn *in);
  274. /**
  275. * @ingroup dvrt_mem
  276. * @brief alloc managed memory
  277. * @param [in|out] ptr memory pointer
  278. * @param [in] size memory size
  279. * @param [in] flag reserved, set to 0.
  280. * @return RT_ERROR_NONE for ok
  281. * @return RT_ERROR_INVALID_VALUE for error input
  282. */
  283. RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag);
  284. RTS_API rtError_t rtMemAllocManagedV2(void **ptr, uint64_t size, uint32_t flag, uint16_t moduleId);
  285. /**
  286. * @ingroup dvrt_mem
  287. * @brief free managed memory
  288. * @param [in] ptr memory pointer
  289. * @return RT_ERROR_NONE for ok
  290. * @return RT_ERROR_INVALID_VALUE for error input
  291. */
  292. RTS_API rtError_t rtMemFreeManaged(void *ptr);
  293. /**
  294. * @ingroup dvrt_mem
  295. * @brief alloc cached device memory
  296. * @param [in| devPtr memory pointer
  297. * @param [in] size memory size
  298. * @param [in] type memory type
  299. * @return RT_ERROR_NONE for ok
  300. */
  301. RTS_API rtError_t rtMallocCached(void **devPtr, uint64_t size, rtMemType_t type);
  302. RTS_API rtError_t rtMallocCachedV2(void **devPtr, uint64_t size, rtMemType_t type, uint16_t moduleId);
  303. /**
  304. * @ingroup dvrt_mem
  305. * @brief flush device mempory
  306. * @param [in] base virtal base addr
  307. * @param [in] len memory size
  308. * @return RT_ERROR_NONE for ok, errno for failed
  309. */
  310. RTS_API rtError_t rtFlushCache(void *base, size_t len);
  311. /**
  312. * @ingroup dvrt_mem
  313. * @brief invalid device mempory
  314. * @param [in] base virtal base addr
  315. * @param [in] len memory size
  316. * @return RT_ERROR_NONE for ok, errno for failed
  317. */
  318. RTS_API rtError_t rtInvalidCache(void *base, size_t len);
  319. /**
  320. * @ingroup dvrt_mem
  321. * @brief synchronized memcpy
  322. * @param [in] dst destination address pointer
  323. * @param [in] Max length of destination address memory
  324. * @param [in] src source address pointer
  325. * @param [in] cnt the number of byte to copy
  326. * @param [in] kind memcpy type
  327. * @return RT_ERROR_NONE for ok
  328. * @return RT_ERROR_INVALID_VALUE for error input
  329. */
  330. RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind);
  331. /**
  332. * @ingroup dvrt_mem
  333. * @brief host task memcpy
  334. * @param [in] dst destination address pointer
  335. * @param [in] destMax length of destination address memory
  336. * @param [in] src source address pointer
  337. * @param [in] cnt the number of byte to copy
  338. * @param [in] kind memcpy type
  339. * @param [in] stm task stream
  340. * @return RT_ERROR_NONE for ok, errno for failed
  341. */
  342. RTS_API rtError_t rtMemcpyHostTask(void * const dst, const uint64_t destMax, const void * const src,
  343. const uint64_t cnt, rtMemcpyKind_t kind, rtStream_t stm);
  344. /**
  345. * @ingroup dvrt_mem
  346. * @brief asynchronized memcpy
  347. * @param [in] dst destination address pointer
  348. * @param [in] Max length of destination address memory
  349. * @param [in] src source address pointer
  350. * @param [in] cnt the number of byte to copy
  351. * @param [in] kind memcpy type
  352. * @param [in] stm asynchronized task stream
  353. * @return RT_ERROR_NONE for ok
  354. * @return RT_ERROR_INVALID_VALUE for error input
  355. */
  356. RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind,
  357. rtStream_t stm);
  358. /**
  359. * @ingroup dvrt_mem
  360. * @brief asynchronized memcpy
  361. * @param [in] dst destination address pointer
  362. * @param [in] Max length of destination address memory
  363. * @param [in] src source address pointer
  364. * @param [in] cnt the number of byte to copy
  365. * @param [in] kind memcpy type
  366. * @param [in] stm asynchronized task stream
  367. * @param [in] qosCfg asynchronized task qosCfg
  368. * @return RT_ERROR_NONE for ok
  369. * @return RT_ERROR_INVALID_VALUE for error input
  370. */
  371. RTS_API rtError_t rtMemcpyAsyncWithCfg(void *dst, uint64_t destMax, const void *src, uint64_t cnt,
  372. rtMemcpyKind_t kind, rtStream_t stm, uint32_t qosCfg);
  373. typedef struct {
  374. uint32_t resv0;
  375. uint32_t resv1;
  376. uint32_t resv2;
  377. uint32_t len;
  378. uint64_t src;
  379. uint64_t dst;
  380. } rtMemcpyAddrInfo;
  381. RTS_API rtError_t rtMemcpyAsyncPtr(void *memcpyAddrInfo, uint64_t destMax, uint64_t count,
  382. rtMemcpyKind_t kind, rtStream_t stream);
  383. /**
  384. * @ingroup dvrt_mem
  385. * @brief asynchronized reduce memcpy
  386. * @param [in] dst destination address pointer
  387. * @param [in] Max length of destination address memory
  388. * @param [in] src source address pointer
  389. * @param [in] cnt the number of byte to copy
  390. * @param [in] kind memcpy type
  391. * @param [in] type data type
  392. * @param [in] stm asynchronized task stream
  393. * @return RT_ERROR_NONE for ok
  394. * @return RT_ERROR_INVALID_VALUE for error input
  395. */
  396. RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtRecudeKind_t kind,
  397. rtDataType_t type, rtStream_t stm);
  398. /**
  399. * @ingroup dvrt_mem
  400. * @brief asynchronized reduce memcpy
  401. * @param [in] dst destination address pointer
  402. * @param [in] Max length of destination address memory
  403. * @param [in] src source address pointer
  404. * @param [in] count the number of byte to copy
  405. * @param [in] kind memcpy type
  406. * @param [in] type data type
  407. * @param [in] stm asynchronized task stream
  408. * @param [in] qosCfg asynchronized task qosCfg
  409. * @return RT_ERROR_NONE for ok
  410. * @return RT_ERROR_INVALID_VALUE for error input
  411. */
  412. RTS_API rtError_t rtReduceAsyncWithCfg(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtRecudeKind_t kind,
  413. rtDataType_t type, rtStream_t stm, uint32_t qosCfg);
  414. /**
  415. * @ingroup dvrt_mem
  416. * @brief asynchronized reduce memcpy
  417. * @param [in] dst destination address pointer
  418. * @param [in] Max length of destination address memory
  419. * @param [in] src source address pointer
  420. * @param [in] count the number of byte to copy
  421. * @param [in] kind memcpy type
  422. * @param [in] type data type
  423. * @param [in] stm asynchronized task stream
  424. * @param [in] overflowAddr addr of overflow flag
  425. * @return RT_ERROR_NONE for ok
  426. * @return RT_ERROR_INVALID_VALUE for error input
  427. */
  428. RTS_API rtError_t rtReduceAsyncV2(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
  429. rtDataType_t type, rtStream_t stm, void *overflowAddr);
  430. /**
  431. * @ingroup dvrt_mem
  432. * @brief synchronized memcpy2D
  433. * @param [in] dst destination address pointer
  434. * @param [in] dstPitch pitch of destination memory
  435. * @param [in] src source address pointer
  436. * @param [in] srcPitch pitch of source memory
  437. * @param [in] width width of matrix transfer
  438. * @param [in] height height of matrix transfer
  439. * @param [in] kind memcpy type
  440. * @return RT_ERROR_NONE for ok
  441. * @return RT_ERROR_INVALID_VALUE for error input
  442. */
  443. RTS_API rtError_t rtMemcpy2d(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width,
  444. uint64_t height, rtMemcpyKind_t kind);
  445. /**
  446. * @ingroup dvrt_mem
  447. * @brief asynchronized memcpy2D
  448. * @param [in] dst destination address pointer
  449. * @param [in] dstPitch length of destination address memory
  450. * @param [in] src source address pointer
  451. * @param [in] srcPitch length of destination address memory
  452. * @param [in] width width of matrix transfer
  453. * @param [in] height height of matrix transfer
  454. * @param [in] kind memcpy type
  455. * @param [in] stm asynchronized task stream
  456. * @return RT_ERROR_NONE for ok
  457. * @return RT_ERROR_INVALID_VALUE for error input
  458. */
  459. RTS_API rtError_t rtMemcpy2dAsync(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width,
  460. uint64_t height, rtMemcpyKind_t kind, rtStream_t stm);
  461. /**
  462. * @ingroup dvrt_mem
  463. * @brief query memory size
  464. * @param [in] aiCoreMemorySize
  465. * @return RT_ERROR_NONE for ok, errno for failed
  466. * @return RT_ERROR_INVALID_VALUE for error input
  467. */
  468. RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  469. /**
  470. * @ingroup dvrt_mem
  471. * @brief set memory size, Setting before model reasoning, Bright screen to prevent model can not be fully
  472. integrated network due to memory limitations.Requirement come from JiaMinHu.Only use for Tiny.
  473. * @param [in] aiCoreMemorySize
  474. * @return RT_ERROR_NONE for ok, errno for failed
  475. * @return RT_ERROR_INVALID_VALUE for error input
  476. */
  477. RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  478. /**
  479. * @ingroup dvrt_mem
  480. * @brief Specifies how memory is use
  481. * @param [in] devPtr memory pointer
  482. * @param [in] count memory count
  483. * @param [in] advise reserved, set to 1
  484. * @return RT_ERROR_NONE for ok
  485. * @return others for error
  486. */
  487. RTS_API rtError_t rtMemAdvise(void *devPtr, uint64_t count, uint32_t advise);
  488. /**
  489. * @ingroup dvrt_mem
  490. * @brief set memory with uint32_t value
  491. * @param [in] devPtr
  492. * @param [in] Max length of destination address memory
  493. * @param [in] val
  494. * @param [in] cnt byte num
  495. * @return RT_ERROR_NONE for ok, errno for failed
  496. * @return RT_ERROR_INVALID_VALUE for error input
  497. */
  498. RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t val, uint64_t cnt);
  499. /**
  500. * @ingroup dvrt_mem
  501. * @brief set memory with uint32_t value async
  502. * @param [in] devPtr
  503. * @param [in] Max length of destination address memory
  504. * @param [in] val
  505. * @param [in] cnt byte num
  506. * @param [in] stm
  507. * @return RT_ERROR_NONE for ok, errno for failed
  508. * @return RT_ERROR_INVALID_VALUE for error input
  509. */
  510. RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t val, uint64_t cnt, rtStream_t stm);
  511. /**
  512. * @ingroup dvrt_mem
  513. * @brief get current device memory total and free
  514. * @param [out] freeSize
  515. * @param [out] totalSize
  516. * @return RT_ERROR_NONE for ok, errno for failed
  517. * @return RT_ERROR_INVALID_VALUE for error input
  518. */
  519. RTS_API rtError_t rtMemGetInfo(size_t *freeSize, size_t *totalSize);
  520. /**
  521. * @ingroup dvrt_mem
  522. * @brief get current device memory total and free
  523. * @param [in] memInfoType
  524. * @param [out] freeSize
  525. * @param [out] totalSize
  526. * @return RT_ERROR_NONE for ok, errno for failed
  527. */
  528. RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *freeSize, size_t *totalSize);
  529. /**
  530. * @ingroup dvrt_mem
  531. * @brief set memory with uint32_t value
  532. * @param [in] devPtr
  533. * @param [in] len
  534. * @param [in] devId
  535. * @return RT_ERROR_NONE for ok, errno for failed
  536. * @return RT_ERROR_INVALID_VALUE for error input
  537. */
  538. RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t devId);
  539. /**
  540. * @ingroup dvrt_mem
  541. * @brief get memory attribute:Host or Device
  542. * @param [in] ptr
  543. * @param [out] attributes
  544. * @return RT_ERROR_NONE for ok, errno for failed
  545. * @return RT_ERROR_INVALID_VALUE for error input
  546. */
  547. RTS_API rtError_t rtPointerGetAttributes(rtPointerAttributes_t *attributes, const void *ptr);
  548. /**
  549. * @ingroup dvrt_mem
  550. * @brief make memory shared interprocess and assigned a name
  551. * @param [in] ptr device memory address pointer
  552. * @param [in] name identification name
  553. * @param [in] byteCount identification byteCount
  554. * @return RT_ERROR_NONE for ok
  555. * @return RT_ERROR_INVALID_VALUE for error input
  556. * @return RT_ERROR_DRV_ERR for driver error
  557. */
  558. RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char_t *name, uint32_t len);
  559. /**
  560. * @ingroup dvrt_mem
  561. * @brief destroy a interprocess shared memory
  562. * @param [in] name identification name
  563. * @return RT_ERROR_NONE for ok
  564. * @return RT_ERROR_INVALID_VALUE for error input
  565. * @return RT_ERROR_DRV_ERR for driver error
  566. */
  567. RTS_API rtError_t rtIpcDestroyMemoryName(const char_t *name);
  568. /**
  569. * @ingroup dvrt_mem
  570. * @brief open a interprocess shared memory
  571. * @param [in|out] ptr device memory address pointer
  572. * @param [in] name identification name
  573. * @return RT_ERROR_NONE for ok
  574. * @return RT_ERROR_INVALID_VALUE for error input
  575. * @return RT_ERROR_DRV_ERR for driver error
  576. */
  577. RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char_t *name);
  578. /**
  579. * @ingroup dvrt_mem
  580. * @brief close a interprocess shared memory
  581. * @param [in] ptr device memory address pointer
  582. * @param [in] name identification name
  583. * @return RT_ERROR_NONE for ok
  584. * @return RT_ERROR_INVALID_VALUE for error input
  585. * @return RT_ERROR_DRV_ERR for driver error
  586. */
  587. RTS_API rtError_t rtIpcCloseMemory(const void *ptr);
  588. /**
  589. * @ingroup dvrt_mem
  590. * @brief HCCL Async memory cpy
  591. * @param [in] sqIndex sq index
  592. * @param [in] wqeIndex moudle index
  593. * @param [in] stm asynchronized task stream
  594. * @return RT_ERROR_NONE for ok
  595. * @return RT_ERROR_INVALID_VALUE for error input
  596. * @return RT_ERROR_DRV_ERR for driver error
  597. */
  598. RTS_API rtError_t rtRDMASend(uint32_t sqIndex, uint32_t wqeIndex, rtStream_t stm);
  599. /**
  600. * @ingroup dvrt_mem
  601. * @brief Ipc set mem pid
  602. * @param [in] name name to be queried
  603. * @param [in] pid process id
  604. * @param [in] num length of pid[]
  605. * @return RT_ERROR_NONE for ok
  606. * @return RT_ERROR_INVALID_VALUE for error input
  607. * @return RT_ERROR_DRV_ERR for driver error
  608. */
  609. RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int32_t num);
  610. /**
  611. * @ingroup dvrt_mem
  612. * @brief HCCL Async memory cpy
  613. * @param [in] dbindex single device 0
  614. * @param [in] dbinfo doorbell info
  615. * @param [in] stm asynchronized task stream
  616. * @return RT_ERROR_NONE for ok
  617. * @return RT_ERROR_INVALID_VALUE for error input
  618. * @return RT_ERROR_DRV_ERR for driver error
  619. */
  620. RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stm);
  621. #if defined(__cplusplus)
  622. }
  623. #endif
  624. #endif // CCE_RUNTIME_MEM_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示