You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mem.h 22 kB

5 years ago
5 years ago
3 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
3 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685
  1. /*
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
  3. * Description: mem.h
  4. * Create: 2020-01-01
  5. */
  6. #ifndef CCE_RUNTIME_MEM_H
  7. #define CCE_RUNTIME_MEM_H
  8. #include <stddef.h>
  9. #include "base.h"
  10. #include "config.h"
  11. #include "stream.h"
  12. #if defined(__cplusplus)
  13. extern "C" {
  14. #endif
  15. /**
  16. * @ingroup dvrt_mem
  17. * @brief memory type
  18. */
  19. #define RT_MEMORY_DEFAULT (0x0U) // default memory on device
  20. #define RT_MEMORY_HBM (0x2U) // HBM memory on device
  21. #define RT_MEMORY_RDMA_HBM (0x3U) // RDMA-HBM memory on device
  22. #define RT_MEMORY_DDR (0x4U) // DDR memory on device
  23. #define RT_MEMORY_SPM (0x8U) // shared physical memory on device
  24. #define RT_MEMORY_P2P_HBM (0x10U) // HBM memory on other 4P device
  25. #define RT_MEMORY_P2P_DDR (0x11U) // DDR memory on other device
  26. #define RT_MEMORY_DDR_NC (0x20U) // DDR memory of non-cache
  27. #define RT_MEMORY_TS (0x40U) // Used for Ts memory
  28. #define RT_MEMORY_TS_4G (0x40U) // Used for Ts memory(only 1951)
  29. #define RT_MEMORY_HOST (0x81U) // Memory on host
  30. #define RT_MEMORY_SVM (0x90U) // Memory for SVM
  31. #define RT_MEMORY_RESERVED (0x100U)
  32. #define RT_MEMORY_L1 (0x1U << 16U)
  33. #define RT_MEMORY_L2 (0x1U << 17U)
  34. /**
  35. * @ingroup dvrt_mem
  36. * @brief memory info type
  37. */
  38. #define RT_MEM_INFO_TYPE_DDR_SIZE (0x1U)
  39. #define RT_MEM_INFO_TYPE_HBM_SIZE (0x2U)
  40. #define RT_MEM_INFO_TYPE_DDR_P2P_SIZE (0x3U)
  41. #define RT_MEM_INFO_TYPE_HBM_P2P_SIZE (0x4U)
  42. /**
  43. * @ingroup dvrt_mem
  44. * @brief memory Policy
  45. */
  46. #define RT_MEMORY_POLICY_NONE (0x0U) // Malloc mem prior huge page, then default page
  47. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST (0x400U) // Malloc mem prior huge page, then default page, 0x1U << 10U
  48. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY (0x800U) // Malloc mem only use huge page, 0x1U << 11U
  49. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY (0x1000U) // Malloc mem only use default page, 0x1U << 12U
  50. // Malloc mem prior huge page, then default page, for p2p, 0x1U << 13U
  51. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P (0x2000U)
  52. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P (0x4000U) // Malloc mem only use huge page, use for p2p, 0x1U << 14U
  53. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P (0x8000U) // Malloc mem only use default page, use for p2p, 0x1U << 15U
  54. /**
  55. * @ingroup dvrt_mem
  56. * @brief memory attribute
  57. */
  58. #define RT_MEMORY_ATTRIBUTE_DEFAULT (0x0U)
  59. // memory read only attribute, now only dvpp memory support.
  60. #define RT_MEMORY_ATTRIBUTE_READONLY (0x100000U) // Malloc readonly, 1<<20.
  61. #define MEM_ALLOC_TYPE_BIT (0x3FFU) // mem type bit in <0, 9>
  62. /**
  63. * @ingroup dvrt_mem
  64. * @brief memory type | memory Policy
  65. */
  66. typedef uint32_t rtMemType_t;
  67. /**
  68. * @ingroup dvrt_mem
  69. * @brief memory advise type
  70. */
  71. #define RT_MEMORY_ADVISE_EXE (0x02U)
  72. #define RT_MEMORY_ADVISE_THP (0x04U)
  73. #define RT_MEMORY_ADVISE_PLE (0x08U)
  74. #define RT_MEMORY_ADVISE_PIN (0x16U)
  75. /**
  76. * @ingroup dvrt_mem
  77. * @brief memory copy type
  78. */
  79. typedef enum tagRtMemcpyKind {
  80. RT_MEMCPY_HOST_TO_HOST = 0, // host to host
  81. RT_MEMCPY_HOST_TO_DEVICE, // host to device
  82. RT_MEMCPY_DEVICE_TO_HOST, // device to host
  83. RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P
  84. RT_MEMCPY_MANAGED, // managed memory
  85. RT_MEMCPY_ADDR_DEVICE_TO_DEVICE,
  86. RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes)
  87. RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex
  88. RT_MEMCPY_RESERVED,
  89. } rtMemcpyKind_t;
  90. typedef enum tagRtMemInfoType {
  91. RT_MEMORYINFO_DDR,
  92. RT_MEMORYINFO_HBM,
  93. RT_MEMORYINFO_DDR_HUGE, // Hugepage memory of DDR
  94. RT_MEMORYINFO_DDR_NORMAL, // Normal memory of DDR
  95. RT_MEMORYINFO_HBM_HUGE, // Hugepage memory of HBM
  96. RT_MEMORYINFO_HBM_NORMAL, // Normal memory of HBM
  97. RT_MEMORYINFO_DDR_P2P_HUGE, // Hugepage memory of DDR
  98. RT_MEMORYINFO_DDR_P2P_NORMAL, // Normal memory of DDR
  99. RT_MEMORYINFO_HBM_P2P_HUGE, // Hugepage memory of HBM
  100. RT_MEMORYINFO_HBM_P2P_NORMAL, // Normal memory of HBM
  101. } rtMemInfoType_t;
  102. typedef enum tagRtRecudeKind {
  103. RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P
  104. RT_MEMCPY_SDMA_AUTOMATIC_MAX = 11,
  105. RT_MEMCPY_SDMA_AUTOMATIC_MIN = 12,
  106. RT_MEMCPY_SDMA_AUTOMATIC_EQUAL = 13,
  107. RT_RECUDE_KIND_END = 14,
  108. } rtRecudeKind_t;
  109. typedef enum tagRtDataType {
  110. RT_DATA_TYPE_FP32 = 0, // fp32
  111. RT_DATA_TYPE_FP16 = 1, // fp16
  112. RT_DATA_TYPE_INT16 = 2, // int16
  113. RT_DATA_TYPE_INT4 = 3, // int4
  114. RT_DATA_TYPE_INT8 = 4, // int8
  115. RT_DATA_TYPE_INT32 = 5, // int32
  116. RT_DATA_TYPE_BFP16 = 6, // bfp16
  117. RT_DATA_TYPE_BFP32 = 7, // bfp32
  118. RT_DATA_TYPE_UINT8 = 8, // uint8
  119. RT_DATA_TYPE_UINT16 = 9, // uint16
  120. RT_DATA_TYPE_UINT32 = 10, // uint32
  121. RT_DATA_TYPE_END = 11,
  122. } rtDataType_t;
  123. /**
  124. * @ingroup dvrt_mem
  125. * @brief memory copy channel type
  126. */
  127. typedef enum tagRtMemcpyChannelType {
  128. RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P
  129. RT_MEMCPY_CHANNEL_TYPE_PCIe,
  130. RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now
  131. RT_MEMCPY_CHANNEL_TYPE_RESERVED,
  132. } rtMemcpyChannelType_t;
  133. /**
  134. * @ingroup rt_kernel
  135. * @brief ai core memory size
  136. */
  137. typedef struct rtAiCoreMemorySize {
  138. uint32_t l0ASize;
  139. uint32_t l0BSize;
  140. uint32_t l0CSize;
  141. uint32_t l1Size;
  142. uint32_t ubSize;
  143. uint32_t l2Size;
  144. uint32_t l2PageNum;
  145. uint32_t blockSize;
  146. uint64_t bankSize;
  147. uint64_t bankNum;
  148. uint64_t burstInOneBlock;
  149. uint64_t bankGroupNum;
  150. } rtAiCoreMemorySize_t;
  151. /**
  152. * @ingroup dvrt_mem
  153. * @brief memory type
  154. */
  155. typedef enum tagRtMemoryType {
  156. RT_MEMORY_TYPE_HOST = 1,
  157. RT_MEMORY_TYPE_DEVICE = 2,
  158. RT_MEMORY_TYPE_SVM = 3,
  159. RT_MEMORY_TYPE_DVPP = 4
  160. } rtMemoryType_t;
  161. /**
  162. * @ingroup dvrt_mem
  163. * @brief memory attribute
  164. */
  165. typedef struct tagRtPointerAttributes {
  166. rtMemoryType_t memoryType; // host memory or device memory
  167. rtMemoryType_t locationType;
  168. uint32_t deviceID; // device ID
  169. uint32_t pageSize;
  170. } rtPointerAttributes_t;
  171. typedef struct {
  172. const char_t *name;
  173. const uint64_t size;
  174. uint32_t flag;
  175. } rtMallocHostSharedMemoryIn;
  176. typedef struct {
  177. int32_t fd;
  178. void *ptr;
  179. void *devPtr;
  180. } rtMallocHostSharedMemoryOut;
  181. typedef struct {
  182. const char_t *name;
  183. const uint64_t size;
  184. int32_t fd;
  185. void *ptr;
  186. void *devPtr;
  187. } rtFreeHostSharedMemoryIn;
  188. /**
  189. * @ingroup dvrt_mem
  190. * @brief alloc device memory
  191. * @param [in|out] devPtr memory pointer
  192. * @param [in] size memory size
  193. * @param [in] type memory type
  194. * @return RT_ERROR_NONE for ok
  195. * @return RT_ERROR_INVALID_VALUE for error input
  196. */
  197. RTS_API rtError_t rtMalloc(void **devPtr, uint64_t size, rtMemType_t type);
  198. /**
  199. * @ingroup dvrt_mem
  200. * @brief free device memory
  201. * @param [in|out] devPtr memory pointer
  202. * @return RT_ERROR_NONE for ok
  203. * @return RT_ERROR_INVALID_VALUE for error input
  204. */
  205. RTS_API rtError_t rtFree(void *devPtr);
  206. /**
  207. * @ingroup dvrt_mem
  208. * @brief alloc device memory for dvpp
  209. * @param [in|out] devPtr memory pointer
  210. * @param [in] size memory size
  211. * @return RT_ERROR_NONE for ok
  212. * @return RT_ERROR_INVALID_VALUE for error input
  213. */
  214. RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size);
  215. /**
  216. * @ingroup dvrt_mem
  217. * @brief alloc device memory for dvpp, support set flag
  218. * @param [in|out] devPtr memory pointer
  219. * @param [in] size memory size
  220. * @param [in] flag mem flag, can use mem attribute set read only.
  221. * @return RT_ERROR_NONE for ok
  222. * @return RT_ERROR_INVALID_VALUE for error input
  223. * @return others is error
  224. */
  225. RTS_API rtError_t rtDvppMallocWithFlag(void **devPtr, uint64_t size, uint32_t flag);
  226. /**
  227. * @ingroup dvrt_mem
  228. * @brief free device memory for dvpp
  229. * @param [in|out] devPtr memory pointer
  230. * @return RT_ERROR_NONE for ok
  231. * @return RT_ERROR_INVALID_VALUE for error input
  232. */
  233. RTS_API rtError_t rtDvppFree(void *devPtr);
  234. /**
  235. * @ingroup dvrt_mem
  236. * @brief alloc host memory
  237. * @param [in|out] hostPtr memory pointer
  238. * @param [in] size memory size
  239. * @return RT_ERROR_NONE for ok
  240. * @return RT_ERROR_INVALID_VALUE for error input
  241. */
  242. RTS_API rtError_t rtMallocHost(void **hostPtr, uint64_t size);
  243. /**
  244. * @ingroup dvrt_mem
  245. * @brief free host memory
  246. * @param [in] hostPtr memory pointer
  247. * @return RT_ERROR_NONE for ok
  248. * @return RT_ERROR_INVALID_VALUE for error input
  249. */
  250. RTS_API rtError_t rtFreeHost(void *hostPtr);
  251. /**
  252. * @ingroup dvrt_mem
  253. * @brief alloc host shared memory
  254. * @param [in] in alloc host shared memory inputPara pointer
  255. * @param [in] out alloc host shared memory outputInfo pointer
  256. * @return RT_ERROR_NONE for ok
  257. * @return RT_ERROR_INVALID_VALUE for error input
  258. */
  259. RTS_API rtError_t rtMallocHostSharedMemory(rtMallocHostSharedMemoryIn *in,
  260. rtMallocHostSharedMemoryOut *out);
  261. /**
  262. * @ingroup dvrt_mem
  263. * @brief free host memory
  264. * @param [in] in free host shared memory inputPara pointer
  265. * @return RT_ERROR_NONE for ok
  266. * @return RT_ERROR_INVALID_VALUE for error input
  267. */
  268. RTS_API rtError_t rtFreeHostSharedMemory(rtFreeHostSharedMemoryIn *in);
  269. /**
  270. * @ingroup dvrt_mem
  271. * @brief alloc managed memory
  272. * @param [in|out] ptr memory pointer
  273. * @param [in] size memory size
  274. * @param [in] flag reserved, set to 0.
  275. * @return RT_ERROR_NONE for ok
  276. * @return RT_ERROR_INVALID_VALUE for error input
  277. */
  278. RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag);
  279. /**
  280. * @ingroup dvrt_mem
  281. * @brief free managed memory
  282. * @param [in] ptr memory pointer
  283. * @return RT_ERROR_NONE for ok
  284. * @return RT_ERROR_INVALID_VALUE for error input
  285. */
  286. RTS_API rtError_t rtMemFreeManaged(void *ptr);
  287. /**
  288. * @ingroup dvrt_mem
  289. * @brief alloc cached device memory
  290. * @param [in| devPtr memory pointer
  291. * @param [in] size memory size
  292. * @param [in] type memory type
  293. * @return RT_ERROR_NONE for ok
  294. */
  295. RTS_API rtError_t rtMallocCached(void **devPtr, uint64_t size, rtMemType_t type);
  296. /**
  297. * @ingroup dvrt_mem
  298. * @brief flush device mempory
  299. * @param [in] base virtal base address
  300. * @param [in] len memory size
  301. * @return RT_ERROR_NONE for ok, errno for failed
  302. */
  303. RTS_API rtError_t rtFlushCache(void *base, size_t len);
  304. /**
  305. * @ingroup dvrt_mem
  306. * @brief invalid device mempory
  307. * @param [in] base virtal base address
  308. * @param [in] len memory size
  309. * @return RT_ERROR_NONE for ok, errno for failed
  310. */
  311. RTS_API rtError_t rtInvalidCache(void *base, size_t len);
  312. /**
  313. * @ingroup dvrt_mem
  314. * @brief synchronized memcpy
  315. * @param [in] dst destination address pointer
  316. * @param [in] Max length of destination address memory
  317. * @param [in] src source address pointer
  318. * @param [in] cnt the number of byte to copy
  319. * @param [in] kind memcpy type
  320. * @return RT_ERROR_NONE for ok
  321. * @return RT_ERROR_INVALID_VALUE for error input
  322. */
  323. RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind);
  324. /**
  325. * @ingroup dvrt_mem
  326. * @brief host task memcpy
  327. * @param [in] dst destination address pointer
  328. * @param [in] destMax length of destination address memory
  329. * @param [in] src source address pointer
  330. * @param [in] cnt the number of byte to copy
  331. * @param [in] kind memcpy type
  332. * @param [in] stm task stream
  333. * @return RT_ERROR_NONE for ok, errno for failed
  334. */
  335. RTS_API rtError_t rtMemcpyHostTask(void * const dst, const uint64_t destMax, const void * const src,
  336. const uint64_t cnt, rtMemcpyKind_t kind, rtStream_t stm);
  337. /**
  338. * @ingroup dvrt_mem
  339. * @brief asynchronized memcpy
  340. * @param [in] dst destination address pointer
  341. * @param [in] Max length of destination address memory
  342. * @param [in] src source address pointer
  343. * @param [in] cnt the number of byte to copy
  344. * @param [in] kind memcpy type
  345. * @param [in] stm asynchronized task stream
  346. * @return RT_ERROR_NONE for ok
  347. * @return RT_ERROR_INVALID_VALUE for error input
  348. */
  349. RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind,
  350. rtStream_t stm);
  351. /**
  352. * @ingroup dvrt_mem
  353. * @brief asynchronized memcpy
  354. * @param [in] dst destination address pointer
  355. * @param [in] Max length of destination address memory
  356. * @param [in] src source address pointer
  357. * @param [in] count the number of byte to copy
  358. * @param [in] kind memcpy type
  359. * @param [in] stream asynchronized task stream
  360. * @param [in] qosCfg asynchronized task qosCfg
  361. * @return RT_ERROR_NONE for ok
  362. * @return RT_ERROR_INVALID_VALUE for error input
  363. */
  364. RTS_API rtError_t rtMemcpyAsyncWithCfg(void *dst, uint64_t destMax, const void *src, uint64_t count,
  365. rtMemcpyKind_t kind, rtStream_t stream, uint32_t qosCfg);
  366. typedef struct {
  367. uint32_t resv0;
  368. uint32_t resv1;
  369. uint32_t resv2;
  370. uint32_t len;
  371. uint64_t src;
  372. uint64_t dst;
  373. } rtMemcpyAddrInfo;
  374. RTS_API rtError_t rtMemcpyAsyncPtr(void *memcpyAddrInfo, uint64_t destMax, uint64_t count,
  375. rtMemcpyKind_t kind, rtStream_t stream);
  376. /**
  377. * @ingroup dvrt_mem
  378. * @brief asynchronized reduce memcpy
  379. * @param [in] dst destination address pointer
  380. * @param [in] Max length of destination address memory
  381. * @param [in] src source address pointer
  382. * @param [in] cnt the number of byte to copy
  383. * @param [in] kind memcpy type
  384. * @param [in] type data type
  385. * @param [in] stm asynchronized task stream
  386. * @return RT_ERROR_NONE for ok
  387. * @return RT_ERROR_INVALID_VALUE for error input
  388. */
  389. RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtRecudeKind_t kind,
  390. rtDataType_t type, rtStream_t stm);
  391. /**
  392. * @ingroup dvrt_mem
  393. * @brief asynchronized reduce memcpy
  394. * @param [in] dst destination address pointer
  395. * @param [in] Max length of destination address memory
  396. * @param [in] src source address pointer
  397. * @param [in] count the number of byte to copy
  398. * @param [in] kind memcpy type
  399. * @param [in] type data type
  400. * @param [in] stm asynchronized task stream
  401. * @param [in] qosCfg asynchronized task qosCfg
  402. * @return RT_ERROR_NONE for ok
  403. * @return RT_ERROR_INVALID_VALUE for error input
  404. */
  405. RTS_API rtError_t rtReduceAsyncWithCfg(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtRecudeKind_t kind,
  406. rtDataType_t type, rtStream_t stm, uint32_t qosCfg);
  407. /**
  408. * @ingroup dvrt_mem
  409. * @brief asynchronized reduce memcpy
  410. * @param [in] dst destination address pointer
  411. * @param [in] Max length of destination address memory
  412. * @param [in] src source address pointer
  413. * @param [in] count the number of byte to copy
  414. * @param [in] kind memcpy type
  415. * @param [in] type data type
  416. * @param [in] stm asynchronized task stream
  417. * @param [in] overflowAddr addr of overflow flag
  418. * @return RT_ERROR_NONE for ok
  419. * @return RT_ERROR_INVALID_VALUE for error input
  420. */
  421. RTS_API rtError_t rtReduceAsyncV2(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
  422. rtDataType_t type, rtStream_t stm, void *overflowAddr);
  423. /**
  424. * @ingroup dvrt_mem
  425. * @brief synchronized memcpy2D
  426. * @param [in] dst destination address pointer
  427. * @param [in] dstPitch pitch of destination memory
  428. * @param [in] src source address pointer
  429. * @param [in] srcPitch pitch of source memory
  430. * @param [in] width width of matrix transfer
  431. * @param [in] height height of matrix transfer
  432. * @param [in] kind memcpy type
  433. * @return RT_ERROR_NONE for ok
  434. * @return RT_ERROR_INVALID_VALUE for error input
  435. */
  436. RTS_API rtError_t rtMemcpy2d(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width,
  437. uint64_t height, rtMemcpyKind_t kind);
  438. /**
  439. * @ingroup dvrt_mem
  440. * @brief asynchronized memcpy2D
  441. * @param [in] dst destination address pointer
  442. * @param [in] dstPitch length of destination address memory
  443. * @param [in] src source address pointer
  444. * @param [in] srcPitch length of destination address memory
  445. * @param [in] width width of matrix transfer
  446. * @param [in] height height of matrix transfer
  447. * @param [in] kind memcpy type
  448. * @param [in] stm asynchronized task stream
  449. * @return RT_ERROR_NONE for ok
  450. * @return RT_ERROR_INVALID_VALUE for error input
  451. */
  452. RTS_API rtError_t rtMemcpy2dAsync(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width,
  453. uint64_t height, rtMemcpyKind_t kind, rtStream_t stm);
  454. /**
  455. * @ingroup dvrt_mem
  456. * @brief query memory size
  457. * @param [in] aiCoreMemorySize
  458. * @return RT_ERROR_NONE for ok, errno for failed
  459. * @return RT_ERROR_INVALID_VALUE for error input
  460. */
  461. RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  462. /**
  463. * @ingroup dvrt_mem
  464. * @brief set memory size, Setting before model reasoning, Bright screen to prevent model can not be fully
  465. integrated network due to memory limitations.Requirement come from JiaMinHu.Only use for Tiny.
  466. * @param [in] aiCoreMemorySize
  467. * @return RT_ERROR_NONE for ok, errno for failed
  468. * @return RT_ERROR_INVALID_VALUE for error input
  469. */
  470. RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  471. /**
  472. * @ingroup dvrt_mem
  473. * @brief Specifies how memory is use
  474. * @param [in] devPtr memory pointer
  475. * @param [in] count memory count
  476. * @param [in] advise reserved, set to 1
  477. * @return RT_ERROR_NONE for ok
  478. * @return others for error
  479. */
  480. RTS_API rtError_t rtMemAdvise(void *devPtr, uint64_t count, uint32_t advise);
  481. /**
  482. * @ingroup dvrt_mem
  483. * @brief set memory with uint32_t value
  484. * @param [in] devPtr
  485. * @param [in] Max length of destination address memory
  486. * @param [in] val
  487. * @param [in] cnt byte num
  488. * @return RT_ERROR_NONE for ok, errno for failed
  489. * @return RT_ERROR_INVALID_VALUE for error input
  490. */
  491. RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t val, uint64_t cnt);
  492. /**
  493. * @ingroup dvrt_mem
  494. * @brief set memory with uint32_t value async
  495. * @param [in] devPtr
  496. * @param [in] Max length of destination address memory
  497. * @param [in] val
  498. * @param [in] cnt byte num
  499. * @param [in] stm
  500. * @return RT_ERROR_NONE for ok, errno for failed
  501. * @return RT_ERROR_INVALID_VALUE for error input
  502. */
  503. RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t val, uint64_t cnt, rtStream_t stm);
  504. /**
  505. * @ingroup dvrt_mem
  506. * @brief get current device memory total and free
  507. * @param [out] freeSize
  508. * @param [out] totalSize
  509. * @return RT_ERROR_NONE for ok, errno for failed
  510. * @return RT_ERROR_INVALID_VALUE for error input
  511. */
  512. RTS_API rtError_t rtMemGetInfo(size_t *freeSize, size_t *totalSize);
  513. /**
  514. * @ingroup dvrt_mem
  515. * @brief get current device memory total and free
  516. * @param [in] memInfoType
  517. * @param [out] freeSize
  518. * @param [out] totalSize
  519. * @return RT_ERROR_NONE for ok, errno for failed
  520. */
  521. RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *freeSize, size_t *totalSize);
  522. /**
  523. * @ingroup dvrt_mem
  524. * @brief set memory with uint32_t value
  525. * @param [in] devPtr
  526. * @param [in] len
  527. * @param [in] devId
  528. * @return RT_ERROR_NONE for ok, errno for failed
  529. * @return RT_ERROR_INVALID_VALUE for error input
  530. */
  531. RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t devId);
  532. /**
  533. * @ingroup dvrt_mem
  534. * @brief get memory attribute:Host or Device
  535. * @param [in] ptr
  536. * @param [out] attributes
  537. * @return RT_ERROR_NONE for ok, errno for failed
  538. * @return RT_ERROR_INVALID_VALUE for error input
  539. */
  540. RTS_API rtError_t rtPointerGetAttributes(rtPointerAttributes_t *attributes, const void *ptr);
  541. /**
  542. * @ingroup dvrt_mem
  543. * @brief make memory shared interprocess and assigned a name
  544. * @param [in] ptr device memory address pointer
  545. * @param [in] name identification name
  546. * @param [in] byteCount identification byteCount
  547. * @return RT_ERROR_NONE for ok
  548. * @return RT_ERROR_INVALID_VALUE for error input
  549. * @return RT_ERROR_DRV_ERR for driver error
  550. */
  551. RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char_t *name, uint32_t len);
  552. /**
  553. * @ingroup dvrt_mem
  554. * @brief destroy a interprocess shared memory
  555. * @param [in] name identification name
  556. * @return RT_ERROR_NONE for ok
  557. * @return RT_ERROR_INVALID_VALUE for error input
  558. * @return RT_ERROR_DRV_ERR for driver error
  559. */
  560. RTS_API rtError_t rtIpcDestroyMemoryName(const char_t *name);
  561. /**
  562. * @ingroup dvrt_mem
  563. * @brief open a interprocess shared memory
  564. * @param [in|out] ptr device memory address pointer
  565. * @param [in] name identification name
  566. * @return RT_ERROR_NONE for ok
  567. * @return RT_ERROR_INVALID_VALUE for error input
  568. * @return RT_ERROR_DRV_ERR for driver error
  569. */
  570. RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char_t *name);
  571. /**
  572. * @ingroup dvrt_mem
  573. * @brief close a interprocess shared memory
  574. * @param [in] ptr device memory address pointer
  575. * @param [in] name identification name
  576. * @return RT_ERROR_NONE for ok
  577. * @return RT_ERROR_INVALID_VALUE for error input
  578. * @return RT_ERROR_DRV_ERR for driver error
  579. */
  580. RTS_API rtError_t rtIpcCloseMemory(const void *ptr);
  581. /**
  582. * @ingroup dvrt_mem
  583. * @brief HCCL Async memory cpy
  584. * @param [in] sqIndex sq index
  585. * @param [in] wqeIndex moudle index
  586. * @param [in] stm asynchronized task stream
  587. * @return RT_ERROR_NONE for ok
  588. * @return RT_ERROR_INVALID_VALUE for error input
  589. * @return RT_ERROR_DRV_ERR for driver error
  590. */
  591. RTS_API rtError_t rtRDMASend(uint32_t sqIndex, uint32_t wqeIndex, rtStream_t stm);
  592. /**
  593. * @ingroup dvrt_mem
  594. * @brief Ipc set mem pid
  595. * @param [in] name name to be queried
  596. * @param [in] pid process id
  597. * @param [in] num length of pid[]
  598. * @return RT_ERROR_NONE for ok
  599. * @return RT_ERROR_INVALID_VALUE for error input
  600. * @return RT_ERROR_DRV_ERR for driver error
  601. */
  602. RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int32_t num);
  603. /**
  604. * @ingroup dvrt_mem
  605. * @brief HCCL Async memory cpy
  606. * @param [in] dbindex single device 0
  607. * @param [in] dbinfo doorbell info
  608. * @param [in] stm asynchronized task stream
  609. * @return RT_ERROR_NONE for ok
  610. * @return RT_ERROR_INVALID_VALUE for error input
  611. * @return RT_ERROR_DRV_ERR for driver error
  612. */
  613. RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stm);
  614. #if defined(__cplusplus)
  615. }
  616. #endif
  617. #endif // CCE_RUNTIME_MEM_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示