You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mem.h 22 kB

5 years ago
5 years ago
3 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
3 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686
  1. /*
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
  3. * Description: mem.h
  4. * Create: 2020-01-01
  5. */
  6. #ifndef CCE_RUNTIME_MEM_H
  7. #define CCE_RUNTIME_MEM_H
  8. #include <stddef.h>
  9. #include "base.h"
  10. #include "config.h"
  11. #include "stream.h"
  12. #if defined(__cplusplus)
  13. extern "C" {
  14. #endif
  15. /**
  16. * @ingroup dvrt_mem
  17. * @brief memory type
  18. */
  19. #define RT_MEMORY_DEFAULT (0x0U) // default memory on device
  20. #define RT_MEMORY_HBM (0x2U) // HBM memory on device
  21. #define RT_MEMORY_RDMA_HBM (0x3U) // RDMA-HBM memory on device
  22. #define RT_MEMORY_DDR (0x4U) // DDR memory on device
  23. #define RT_MEMORY_SPM (0x8U) // shared physical memory on device
  24. #define RT_MEMORY_P2P_HBM (0x10U) // HBM memory on other 4P device
  25. #define RT_MEMORY_P2P_DDR (0x11U) // DDR memory on other device
  26. #define RT_MEMORY_DDR_NC (0x20U) // DDR memory of non-cache
  27. #define RT_MEMORY_TS (0x40U) // Used for Ts memory
  28. #define RT_MEMORY_TS_4G (0x40U) // Used for Ts memory(only 1951)
  29. #define RT_MEMORY_HOST (0x81U) // Memory on host
  30. #define RT_MEMORY_SVM (0x90U) // Memory for SVM
  31. #define RT_MEMORY_HOST_SVM (0x90U) // Memory for host SVM
  32. #define RT_MEMORY_RESERVED (0x100U)
  33. #define RT_MEMORY_L1 (0x1U << 16U)
  34. #define RT_MEMORY_L2 (0x1U << 17U)
  35. /**
  36. * @ingroup dvrt_mem
  37. * @brief memory info type
  38. */
  39. #define RT_MEM_INFO_TYPE_DDR_SIZE (0x1U)
  40. #define RT_MEM_INFO_TYPE_HBM_SIZE (0x2U)
  41. #define RT_MEM_INFO_TYPE_DDR_P2P_SIZE (0x3U)
  42. #define RT_MEM_INFO_TYPE_HBM_P2P_SIZE (0x4U)
  43. /**
  44. * @ingroup dvrt_mem
  45. * @brief memory Policy
  46. */
  47. #define RT_MEMORY_POLICY_NONE (0x0U) // Malloc mem prior huge page, then default page
  48. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST (0x400U) // Malloc mem prior huge page, then default page, 0x1U << 10U
  49. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY (0x800U) // Malloc mem only use huge page, 0x1U << 11U
  50. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY (0x1000U) // Malloc mem only use default page, 0x1U << 12U
  51. // Malloc mem prior huge page, then default page, for p2p, 0x1U << 13U
  52. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P (0x2000U)
  53. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P (0x4000U) // Malloc mem only use huge page, use for p2p, 0x1U << 14U
  54. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P (0x8000U) // Malloc mem only use default page, use for p2p, 0x1U << 15U
  55. /**
  56. * @ingroup dvrt_mem
  57. * @brief memory attribute
  58. */
  59. #define RT_MEMORY_ATTRIBUTE_DEFAULT (0x0U)
  60. // memory read only attribute, now only dvpp memory support.
  61. #define RT_MEMORY_ATTRIBUTE_READONLY (0x100000U) // Malloc readonly, 1<<20.
  62. #define MEM_ALLOC_TYPE_BIT (0x3FFU) // mem type bit in <0, 9>
  63. /**
  64. * @ingroup dvrt_mem
  65. * @brief memory type | memory Policy
  66. */
  67. typedef uint32_t rtMemType_t;
  68. /**
  69. * @ingroup dvrt_mem
  70. * @brief memory advise type
  71. */
  72. #define RT_MEMORY_ADVISE_EXE (0x02U)
  73. #define RT_MEMORY_ADVISE_THP (0x04U)
  74. #define RT_MEMORY_ADVISE_PLE (0x08U)
  75. #define RT_MEMORY_ADVISE_PIN (0x16U)
  76. /**
  77. * @ingroup dvrt_mem
  78. * @brief memory copy type
  79. */
  80. typedef enum tagRtMemcpyKind {
  81. RT_MEMCPY_HOST_TO_HOST = 0, // host to host
  82. RT_MEMCPY_HOST_TO_DEVICE, // host to device
  83. RT_MEMCPY_DEVICE_TO_HOST, // device to host
  84. RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P
  85. RT_MEMCPY_MANAGED, // managed memory
  86. RT_MEMCPY_ADDR_DEVICE_TO_DEVICE,
  87. RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes)
  88. RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex
  89. RT_MEMCPY_RESERVED,
  90. } rtMemcpyKind_t;
  91. typedef enum tagRtMemInfoType {
  92. RT_MEMORYINFO_DDR,
  93. RT_MEMORYINFO_HBM,
  94. RT_MEMORYINFO_DDR_HUGE, // Hugepage memory of DDR
  95. RT_MEMORYINFO_DDR_NORMAL, // Normal memory of DDR
  96. RT_MEMORYINFO_HBM_HUGE, // Hugepage memory of HBM
  97. RT_MEMORYINFO_HBM_NORMAL, // Normal memory of HBM
  98. RT_MEMORYINFO_DDR_P2P_HUGE, // Hugepage memory of DDR
  99. RT_MEMORYINFO_DDR_P2P_NORMAL, // Normal memory of DDR
  100. RT_MEMORYINFO_HBM_P2P_HUGE, // Hugepage memory of HBM
  101. RT_MEMORYINFO_HBM_P2P_NORMAL, // Normal memory of HBM
  102. } rtMemInfoType_t;
  103. typedef enum tagRtRecudeKind {
  104. RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P
  105. RT_MEMCPY_SDMA_AUTOMATIC_MAX = 11,
  106. RT_MEMCPY_SDMA_AUTOMATIC_MIN = 12,
  107. RT_MEMCPY_SDMA_AUTOMATIC_EQUAL = 13,
  108. RT_RECUDE_KIND_END = 14,
  109. } rtRecudeKind_t;
  110. typedef enum tagRtDataType {
  111. RT_DATA_TYPE_FP32 = 0, // fp32
  112. RT_DATA_TYPE_FP16 = 1, // fp16
  113. RT_DATA_TYPE_INT16 = 2, // int16
  114. RT_DATA_TYPE_INT4 = 3, // int4
  115. RT_DATA_TYPE_INT8 = 4, // int8
  116. RT_DATA_TYPE_INT32 = 5, // int32
  117. RT_DATA_TYPE_BFP16 = 6, // bfp16
  118. RT_DATA_TYPE_BFP32 = 7, // bfp32
  119. RT_DATA_TYPE_UINT8 = 8, // uint8
  120. RT_DATA_TYPE_UINT16 = 9, // uint16
  121. RT_DATA_TYPE_UINT32 = 10, // uint32
  122. RT_DATA_TYPE_END = 11,
  123. } rtDataType_t;
  124. /**
  125. * @ingroup dvrt_mem
  126. * @brief memory copy channel type
  127. */
  128. typedef enum tagRtMemcpyChannelType {
  129. RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P
  130. RT_MEMCPY_CHANNEL_TYPE_PCIe,
  131. RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now
  132. RT_MEMCPY_CHANNEL_TYPE_RESERVED,
  133. } rtMemcpyChannelType_t;
  134. /**
  135. * @ingroup rt_kernel
  136. * @brief ai core memory size
  137. */
  138. typedef struct rtAiCoreMemorySize {
  139. uint32_t l0ASize;
  140. uint32_t l0BSize;
  141. uint32_t l0CSize;
  142. uint32_t l1Size;
  143. uint32_t ubSize;
  144. uint32_t l2Size;
  145. uint32_t l2PageNum;
  146. uint32_t blockSize;
  147. uint64_t bankSize;
  148. uint64_t bankNum;
  149. uint64_t burstInOneBlock;
  150. uint64_t bankGroupNum;
  151. } rtAiCoreMemorySize_t;
  152. /**
  153. * @ingroup dvrt_mem
  154. * @brief memory type
  155. */
  156. typedef enum tagRtMemoryType {
  157. RT_MEMORY_TYPE_HOST = 1,
  158. RT_MEMORY_TYPE_DEVICE = 2,
  159. RT_MEMORY_TYPE_SVM = 3,
  160. RT_MEMORY_TYPE_DVPP = 4
  161. } rtMemoryType_t;
  162. /**
  163. * @ingroup dvrt_mem
  164. * @brief memory attribute
  165. */
  166. typedef struct tagRtPointerAttributes {
  167. rtMemoryType_t memoryType; // host memory or device memory
  168. rtMemoryType_t locationType;
  169. uint32_t deviceID; // device ID
  170. uint32_t pageSize;
  171. } rtPointerAttributes_t;
  172. typedef struct {
  173. const char_t *name;
  174. const uint64_t size;
  175. uint32_t flag;
  176. } rtMallocHostSharedMemoryIn;
  177. typedef struct {
  178. int32_t fd;
  179. void *ptr;
  180. void *devPtr;
  181. } rtMallocHostSharedMemoryOut;
  182. typedef struct {
  183. const char_t *name;
  184. const uint64_t size;
  185. int32_t fd;
  186. void *ptr;
  187. void *devPtr;
  188. } rtFreeHostSharedMemoryIn;
  189. /**
  190. * @ingroup dvrt_mem
  191. * @brief alloc device memory
  192. * @param [in|out] devPtr memory pointer
  193. * @param [in] size memory size
  194. * @param [in] type memory type
  195. * @return RT_ERROR_NONE for ok
  196. * @return RT_ERROR_INVALID_VALUE for error input
  197. */
  198. RTS_API rtError_t rtMalloc(void **devPtr, uint64_t size, rtMemType_t type);
  199. /**
  200. * @ingroup dvrt_mem
  201. * @brief free device memory
  202. * @param [in|out] devPtr memory pointer
  203. * @return RT_ERROR_NONE for ok
  204. * @return RT_ERROR_INVALID_VALUE for error input
  205. */
  206. RTS_API rtError_t rtFree(void *devPtr);
  207. /**
  208. * @ingroup dvrt_mem
  209. * @brief alloc device memory for dvpp
  210. * @param [in|out] devPtr memory pointer
  211. * @param [in] size memory size
  212. * @return RT_ERROR_NONE for ok
  213. * @return RT_ERROR_INVALID_VALUE for error input
  214. */
  215. RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size);
  216. /**
  217. * @ingroup dvrt_mem
  218. * @brief alloc device memory for dvpp, support set flag
  219. * @param [in|out] devPtr memory pointer
  220. * @param [in] size memory size
  221. * @param [in] flag mem flag, can use mem attribute set read only.
  222. * @return RT_ERROR_NONE for ok
  223. * @return RT_ERROR_INVALID_VALUE for error input
  224. * @return others is error
  225. */
  226. RTS_API rtError_t rtDvppMallocWithFlag(void **devPtr, uint64_t size, uint32_t flag);
  227. /**
  228. * @ingroup dvrt_mem
  229. * @brief free device memory for dvpp
  230. * @param [in|out] devPtr memory pointer
  231. * @return RT_ERROR_NONE for ok
  232. * @return RT_ERROR_INVALID_VALUE for error input
  233. */
  234. RTS_API rtError_t rtDvppFree(void *devPtr);
  235. /**
  236. * @ingroup dvrt_mem
  237. * @brief alloc host memory
  238. * @param [in|out] hostPtr memory pointer
  239. * @param [in] size memory size
  240. * @return RT_ERROR_NONE for ok
  241. * @return RT_ERROR_INVALID_VALUE for error input
  242. */
  243. RTS_API rtError_t rtMallocHost(void **hostPtr, uint64_t size);
  244. /**
  245. * @ingroup dvrt_mem
  246. * @brief free host memory
  247. * @param [in] hostPtr memory pointer
  248. * @return RT_ERROR_NONE for ok
  249. * @return RT_ERROR_INVALID_VALUE for error input
  250. */
  251. RTS_API rtError_t rtFreeHost(void *hostPtr);
  252. /**
  253. * @ingroup dvrt_mem
  254. * @brief alloc host shared memory
  255. * @param [in] in alloc host shared memory inputPara pointer
  256. * @param [in] out alloc host shared memory outputInfo pointer
  257. * @return RT_ERROR_NONE for ok
  258. * @return RT_ERROR_INVALID_VALUE for error input
  259. */
  260. RTS_API rtError_t rtMallocHostSharedMemory(rtMallocHostSharedMemoryIn *in,
  261. rtMallocHostSharedMemoryOut *out);
  262. /**
  263. * @ingroup dvrt_mem
  264. * @brief free host memory
  265. * @param [in] in free host shared memory inputPara pointer
  266. * @return RT_ERROR_NONE for ok
  267. * @return RT_ERROR_INVALID_VALUE for error input
  268. */
  269. RTS_API rtError_t rtFreeHostSharedMemory(rtFreeHostSharedMemoryIn *in);
  270. /**
  271. * @ingroup dvrt_mem
  272. * @brief alloc managed memory
  273. * @param [in|out] ptr memory pointer
  274. * @param [in] size memory size
  275. * @param [in] flag reserved, set to 0.
  276. * @return RT_ERROR_NONE for ok
  277. * @return RT_ERROR_INVALID_VALUE for error input
  278. */
  279. RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag);
  280. /**
  281. * @ingroup dvrt_mem
  282. * @brief free managed memory
  283. * @param [in] ptr memory pointer
  284. * @return RT_ERROR_NONE for ok
  285. * @return RT_ERROR_INVALID_VALUE for error input
  286. */
  287. RTS_API rtError_t rtMemFreeManaged(void *ptr);
  288. /**
  289. * @ingroup dvrt_mem
  290. * @brief alloc cached device memory
  291. * @param [in| devPtr memory pointer
  292. * @param [in] size memory size
  293. * @param [in] type memory type
  294. * @return RT_ERROR_NONE for ok
  295. */
  296. RTS_API rtError_t rtMallocCached(void **devPtr, uint64_t size, rtMemType_t type);
  297. /**
  298. * @ingroup dvrt_mem
  299. * @brief flush device mempory
  300. * @param [in] base virtal base address
  301. * @param [in] len memory size
  302. * @return RT_ERROR_NONE for ok, errno for failed
  303. */
  304. RTS_API rtError_t rtFlushCache(void *base, size_t len);
  305. /**
  306. * @ingroup dvrt_mem
  307. * @brief invalid device mempory
  308. * @param [in] base virtal base address
  309. * @param [in] len memory size
  310. * @return RT_ERROR_NONE for ok, errno for failed
  311. */
  312. RTS_API rtError_t rtInvalidCache(void *base, size_t len);
  313. /**
  314. * @ingroup dvrt_mem
  315. * @brief synchronized memcpy
  316. * @param [in] dst destination address pointer
  317. * @param [in] Max length of destination address memory
  318. * @param [in] src source address pointer
  319. * @param [in] cnt the number of byte to copy
  320. * @param [in] kind memcpy type
  321. * @return RT_ERROR_NONE for ok
  322. * @return RT_ERROR_INVALID_VALUE for error input
  323. */
  324. RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind);
  325. /**
  326. * @ingroup dvrt_mem
  327. * @brief host task memcpy
  328. * @param [in] dst destination address pointer
  329. * @param [in] destMax length of destination address memory
  330. * @param [in] src source address pointer
  331. * @param [in] cnt the number of byte to copy
  332. * @param [in] kind memcpy type
  333. * @param [in] stm task stream
  334. * @return RT_ERROR_NONE for ok, errno for failed
  335. */
  336. RTS_API rtError_t rtMemcpyHostTask(void * const dst, const uint64_t destMax, const void * const src,
  337. const uint64_t cnt, rtMemcpyKind_t kind, rtStream_t stm);
  338. /**
  339. * @ingroup dvrt_mem
  340. * @brief asynchronized memcpy
  341. * @param [in] dst destination address pointer
  342. * @param [in] Max length of destination address memory
  343. * @param [in] src source address pointer
  344. * @param [in] cnt the number of byte to copy
  345. * @param [in] kind memcpy type
  346. * @param [in] stm asynchronized task stream
  347. * @return RT_ERROR_NONE for ok
  348. * @return RT_ERROR_INVALID_VALUE for error input
  349. */
  350. RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind,
  351. rtStream_t stm);
  352. /**
  353. * @ingroup dvrt_mem
  354. * @brief asynchronized memcpy
  355. * @param [in] dst destination address pointer
  356. * @param [in] Max length of destination address memory
  357. * @param [in] src source address pointer
  358. * @param [in] count the number of byte to copy
  359. * @param [in] kind memcpy type
  360. * @param [in] stream asynchronized task stream
  361. * @param [in] qosCfg asynchronized task qosCfg
  362. * @return RT_ERROR_NONE for ok
  363. * @return RT_ERROR_INVALID_VALUE for error input
  364. */
  365. RTS_API rtError_t rtMemcpyAsyncWithCfg(void *dst, uint64_t destMax, const void *src, uint64_t count,
  366. rtMemcpyKind_t kind, rtStream_t stream, uint32_t qosCfg);
  367. typedef struct {
  368. uint32_t resv0;
  369. uint32_t resv1;
  370. uint32_t resv2;
  371. uint32_t len;
  372. uint64_t src;
  373. uint64_t dst;
  374. } rtMemcpyAddrInfo;
  375. RTS_API rtError_t rtMemcpyAsyncPtr(void *memcpyAddrInfo, uint64_t destMax, uint64_t count,
  376. rtMemcpyKind_t kind, rtStream_t stream);
  377. /**
  378. * @ingroup dvrt_mem
  379. * @brief asynchronized reduce memcpy
  380. * @param [in] dst destination address pointer
  381. * @param [in] Max length of destination address memory
  382. * @param [in] src source address pointer
  383. * @param [in] cnt the number of byte to copy
  384. * @param [in] kind memcpy type
  385. * @param [in] type data type
  386. * @param [in] stm asynchronized task stream
  387. * @return RT_ERROR_NONE for ok
  388. * @return RT_ERROR_INVALID_VALUE for error input
  389. */
  390. RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtRecudeKind_t kind,
  391. rtDataType_t type, rtStream_t stm);
  392. /**
  393. * @ingroup dvrt_mem
  394. * @brief asynchronized reduce memcpy
  395. * @param [in] dst destination address pointer
  396. * @param [in] Max length of destination address memory
  397. * @param [in] src source address pointer
  398. * @param [in] count the number of byte to copy
  399. * @param [in] kind memcpy type
  400. * @param [in] type data type
  401. * @param [in] stm asynchronized task stream
  402. * @param [in] qosCfg asynchronized task qosCfg
  403. * @return RT_ERROR_NONE for ok
  404. * @return RT_ERROR_INVALID_VALUE for error input
  405. */
  406. RTS_API rtError_t rtReduceAsyncWithCfg(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtRecudeKind_t kind,
  407. rtDataType_t type, rtStream_t stm, uint32_t qosCfg);
  408. /**
  409. * @ingroup dvrt_mem
  410. * @brief asynchronized reduce memcpy
  411. * @param [in] dst destination address pointer
  412. * @param [in] Max length of destination address memory
  413. * @param [in] src source address pointer
  414. * @param [in] count the number of byte to copy
  415. * @param [in] kind memcpy type
  416. * @param [in] type data type
  417. * @param [in] stm asynchronized task stream
  418. * @param [in] overflowAddr addr of overflow flag
  419. * @return RT_ERROR_NONE for ok
  420. * @return RT_ERROR_INVALID_VALUE for error input
  421. */
  422. RTS_API rtError_t rtReduceAsyncV2(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
  423. rtDataType_t type, rtStream_t stm, void *overflowAddr);
  424. /**
  425. * @ingroup dvrt_mem
  426. * @brief synchronized memcpy2D
  427. * @param [in] dst destination address pointer
  428. * @param [in] dstPitch pitch of destination memory
  429. * @param [in] src source address pointer
  430. * @param [in] srcPitch pitch of source memory
  431. * @param [in] width width of matrix transfer
  432. * @param [in] height height of matrix transfer
  433. * @param [in] kind memcpy type
  434. * @return RT_ERROR_NONE for ok
  435. * @return RT_ERROR_INVALID_VALUE for error input
  436. */
  437. RTS_API rtError_t rtMemcpy2d(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width,
  438. uint64_t height, rtMemcpyKind_t kind);
  439. /**
  440. * @ingroup dvrt_mem
  441. * @brief asynchronized memcpy2D
  442. * @param [in] dst destination address pointer
  443. * @param [in] dstPitch length of destination address memory
  444. * @param [in] src source address pointer
  445. * @param [in] srcPitch length of destination address memory
  446. * @param [in] width width of matrix transfer
  447. * @param [in] height height of matrix transfer
  448. * @param [in] kind memcpy type
  449. * @param [in] stm asynchronized task stream
  450. * @return RT_ERROR_NONE for ok
  451. * @return RT_ERROR_INVALID_VALUE for error input
  452. */
  453. RTS_API rtError_t rtMemcpy2dAsync(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width,
  454. uint64_t height, rtMemcpyKind_t kind, rtStream_t stm);
  455. /**
  456. * @ingroup dvrt_mem
  457. * @brief query memory size
  458. * @param [in] aiCoreMemorySize
  459. * @return RT_ERROR_NONE for ok, errno for failed
  460. * @return RT_ERROR_INVALID_VALUE for error input
  461. */
  462. RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  463. /**
  464. * @ingroup dvrt_mem
  465. * @brief set memory size, Setting before model reasoning, Bright screen to prevent model can not be fully
  466. integrated network due to memory limitations.Requirement come from JiaMinHu.Only use for Tiny.
  467. * @param [in] aiCoreMemorySize
  468. * @return RT_ERROR_NONE for ok, errno for failed
  469. * @return RT_ERROR_INVALID_VALUE for error input
  470. */
  471. RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  472. /**
  473. * @ingroup dvrt_mem
  474. * @brief Specifies how memory is use
  475. * @param [in] devPtr memory pointer
  476. * @param [in] count memory count
  477. * @param [in] advise reserved, set to 1
  478. * @return RT_ERROR_NONE for ok
  479. * @return others for error
  480. */
  481. RTS_API rtError_t rtMemAdvise(void *devPtr, uint64_t count, uint32_t advise);
  482. /**
  483. * @ingroup dvrt_mem
  484. * @brief set memory with uint32_t value
  485. * @param [in] devPtr
  486. * @param [in] Max length of destination address memory
  487. * @param [in] val
  488. * @param [in] cnt byte num
  489. * @return RT_ERROR_NONE for ok, errno for failed
  490. * @return RT_ERROR_INVALID_VALUE for error input
  491. */
  492. RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t val, uint64_t cnt);
  493. /**
  494. * @ingroup dvrt_mem
  495. * @brief set memory with uint32_t value async
  496. * @param [in] devPtr
  497. * @param [in] Max length of destination address memory
  498. * @param [in] val
  499. * @param [in] cnt byte num
  500. * @param [in] stm
  501. * @return RT_ERROR_NONE for ok, errno for failed
  502. * @return RT_ERROR_INVALID_VALUE for error input
  503. */
  504. RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t val, uint64_t cnt, rtStream_t stm);
  505. /**
  506. * @ingroup dvrt_mem
  507. * @brief get current device memory total and free
  508. * @param [out] freeSize
  509. * @param [out] totalSize
  510. * @return RT_ERROR_NONE for ok, errno for failed
  511. * @return RT_ERROR_INVALID_VALUE for error input
  512. */
  513. RTS_API rtError_t rtMemGetInfo(size_t *freeSize, size_t *totalSize);
  514. /**
  515. * @ingroup dvrt_mem
  516. * @brief get current device memory total and free
  517. * @param [in] memInfoType
  518. * @param [out] freeSize
  519. * @param [out] totalSize
  520. * @return RT_ERROR_NONE for ok, errno for failed
  521. */
  522. RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *freeSize, size_t *totalSize);
  523. /**
  524. * @ingroup dvrt_mem
  525. * @brief set memory with uint32_t value
  526. * @param [in] devPtr
  527. * @param [in] len
  528. * @param [in] devId
  529. * @return RT_ERROR_NONE for ok, errno for failed
  530. * @return RT_ERROR_INVALID_VALUE for error input
  531. */
  532. RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t devId);
  533. /**
  534. * @ingroup dvrt_mem
  535. * @brief get memory attribute:Host or Device
  536. * @param [in] ptr
  537. * @param [out] attributes
  538. * @return RT_ERROR_NONE for ok, errno for failed
  539. * @return RT_ERROR_INVALID_VALUE for error input
  540. */
  541. RTS_API rtError_t rtPointerGetAttributes(rtPointerAttributes_t *attributes, const void *ptr);
  542. /**
  543. * @ingroup dvrt_mem
  544. * @brief make memory shared interprocess and assigned a name
  545. * @param [in] ptr device memory address pointer
  546. * @param [in] name identification name
  547. * @param [in] byteCount identification byteCount
  548. * @return RT_ERROR_NONE for ok
  549. * @return RT_ERROR_INVALID_VALUE for error input
  550. * @return RT_ERROR_DRV_ERR for driver error
  551. */
  552. RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char_t *name, uint32_t len);
  553. /**
  554. * @ingroup dvrt_mem
  555. * @brief destroy a interprocess shared memory
  556. * @param [in] name identification name
  557. * @return RT_ERROR_NONE for ok
  558. * @return RT_ERROR_INVALID_VALUE for error input
  559. * @return RT_ERROR_DRV_ERR for driver error
  560. */
  561. RTS_API rtError_t rtIpcDestroyMemoryName(const char_t *name);
  562. /**
  563. * @ingroup dvrt_mem
  564. * @brief open a interprocess shared memory
  565. * @param [in|out] ptr device memory address pointer
  566. * @param [in] name identification name
  567. * @return RT_ERROR_NONE for ok
  568. * @return RT_ERROR_INVALID_VALUE for error input
  569. * @return RT_ERROR_DRV_ERR for driver error
  570. */
  571. RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char_t *name);
  572. /**
  573. * @ingroup dvrt_mem
  574. * @brief close a interprocess shared memory
  575. * @param [in] ptr device memory address pointer
  576. * @param [in] name identification name
  577. * @return RT_ERROR_NONE for ok
  578. * @return RT_ERROR_INVALID_VALUE for error input
  579. * @return RT_ERROR_DRV_ERR for driver error
  580. */
  581. RTS_API rtError_t rtIpcCloseMemory(const void *ptr);
  582. /**
  583. * @ingroup dvrt_mem
  584. * @brief HCCL Async memory cpy
  585. * @param [in] sqIndex sq index
  586. * @param [in] wqeIndex moudle index
  587. * @param [in] stm asynchronized task stream
  588. * @return RT_ERROR_NONE for ok
  589. * @return RT_ERROR_INVALID_VALUE for error input
  590. * @return RT_ERROR_DRV_ERR for driver error
  591. */
  592. RTS_API rtError_t rtRDMASend(uint32_t sqIndex, uint32_t wqeIndex, rtStream_t stm);
  593. /**
  594. * @ingroup dvrt_mem
  595. * @brief Ipc set mem pid
  596. * @param [in] name name to be queried
  597. * @param [in] pid process id
  598. * @param [in] num length of pid[]
  599. * @return RT_ERROR_NONE for ok
  600. * @return RT_ERROR_INVALID_VALUE for error input
  601. * @return RT_ERROR_DRV_ERR for driver error
  602. */
  603. RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int32_t num);
  604. /**
  605. * @ingroup dvrt_mem
  606. * @brief HCCL Async memory cpy
  607. * @param [in] dbindex single device 0
  608. * @param [in] dbinfo doorbell info
  609. * @param [in] stm asynchronized task stream
  610. * @return RT_ERROR_NONE for ok
  611. * @return RT_ERROR_INVALID_VALUE for error input
  612. * @return RT_ERROR_DRV_ERR for driver error
  613. */
  614. RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stm);
  615. #if defined(__cplusplus)
  616. }
  617. #endif
  618. #endif // CCE_RUNTIME_MEM_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示