You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mem.h 16 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago

  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __CCE_RUNTIME_MEM_H__
  17. #define __CCE_RUNTIME_MEM_H__
  18. #include <stddef.h>
  19. #include "base.h"
  20. #include "config.h"
  21. #include "stream.h"
  22. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  23. extern "C" {
  24. #endif
  25. /**
  26. * @ingroup dvrt_mem
  27. * @brief memory type
  28. */
  29. #define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device
  30. #define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device
  31. #define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device
  32. #define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device
  33. #define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device
  34. #define RT_MEMORY_P2P_DDR ((uint32_t)0x11) // DDR memory on other device
  35. #define RT_MEMORY_DDR_NC ((uint32_t)0x20) // DDR memory of non-cache
  36. #define RT_MEMORY_TS_4G ((uint32_t)0x40)
  37. #define RT_MEMORY_TS ((uint32_t)0x80)
  38. #define RT_MEMORY_RESERVED ((uint32_t)0x100)
  39. #define RT_MEMORY_L1 ((uint32_t)0x1<<16)
  40. #define RT_MEMORY_L2 ((uint32_t)0x1<<17)
  41. /**
  42. * @ingroup dvrt_mem
  43. * @brief memory info type
  44. */
  45. #define RT_MEM_INFO_TYPE_DDR_SIZE ((uint32_t)0x1)
  46. #define RT_MEM_INFO_TYPE_HBM_SIZE ((uint32_t)0x2)
  47. #define RT_MEM_INFO_TYPE_DDR_P2P_SIZE ((uint32_t)0x3)
  48. #define RT_MEM_INFO_TYPE_HBM_P2P_SIZE ((uint32_t)0x4)
  49. /**
  50. * @ingroup dvrt_mem
  51. * @brief memory Policy
  52. */
  53. #define RT_MEMORY_POLICY_NONE ((uint32_t)0x0) // Malloc mem prior hage page, then default page
  54. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST ((uint32_t)0x1 << 10) // Malloc mem prior hage page, then default page
  55. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY ((uint32_t)0x1 << 11) // Malloc mem only use hage page
  56. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY ((uint32_t)0x1 << 12) // Malloc mem only use default page
  57. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P ((uint32_t)0x1 << 13) // Malloc mem prior hage page, then default page, use for p2p
  58. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P ((uint32_t)0x1 << 14) // Malloc mem only use hage page, use for p2p
  59. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P ((uint32_t)0x1 << 15) // Malloc mem only use default page, use for p2p
  60. #define MEM_ALLOC_TYPE_BIT ((uint32_t)0x3FF) // mem type bit in <0, 9>
  61. /**
  62. * @ingroup dvrt_mem
  63. * @brief memory type | memory Policy
  64. */
  65. typedef uint32_t rtMemType_t;
  66. /**
  67. * @ingroup dvrt_mem
  68. * @brief memory advise type
  69. */
  70. #define RT_MEMORY_ADVISE_EXE (0x02)
  71. #define RT_MEMORY_ADVISE_THP (0x04)
  72. #define RT_MEMORY_ADVISE_PLE (0x08)
  73. #define RT_MEMORY_ADVISE_PIN (0x16)
  74. /**
  75. * @ingroup dvrt_mem
  76. * @brief memory copy type
  77. */
  78. typedef enum tagRtMemcpyKind {
  79. RT_MEMCPY_HOST_TO_HOST = 0, // host to host
  80. RT_MEMCPY_HOST_TO_DEVICE, // host to device
  81. RT_MEMCPY_DEVICE_TO_HOST, // device to host
  82. RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P
  83. RT_MEMCPY_MANAGED, // managed memory
  84. RT_MEMCPY_ADDR_DEVICE_TO_DEVICE,
  85. RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes)
  86. RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex
  87. RT_MEMCPY_RESERVED,
  88. } rtMemcpyKind_t;
  89. typedef enum tagRtMemInfoType {
  90. RT_MEMORYINFO_DDR,
  91. RT_MEMORYINFO_HBM,
  92. RT_MEMORYINFO_DDR_HUGE, // Hugepage memory of DDR
  93. RT_MEMORYINFO_DDR_NORMAL, // Normal memory of DDR
  94. RT_MEMORYINFO_HBM_HUGE, // Hugepage memory of HBM
  95. RT_MEMORYINFO_HBM_NORMAL, // Normal memory of HBM
  96. RT_MEMORYINFO_DDR_P2P_HUGE, // Hugepage memory of DDR
  97. RT_MEMORYINFO_DDR_P2P_NORMAL, // Normal memory of DDR
  98. RT_MEMORYINFO_HBM_P2P_HUGE, // Hugepage memory of HBM
  99. RT_MEMORYINFO_HBM_P2P_NORMAL, // Normal memory of HBM
  100. } rtMemInfoType_t;
  101. typedef enum tagRtRecudeKind {
  102. RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P
  103. RT_RECUDE_KIND_END
  104. } rtRecudeKind_t;
  105. typedef enum tagRtDataType {
  106. RT_DATA_TYPE_FP32 = 0, // fp32
  107. RT_DATA_TYPE_FP16 = 1, // fp16
  108. RT_DATA_TYPE_INT16 = 2, // int16
  109. RT_DATA_TYPE_END
  110. } rtDataType_t;
  111. /**
  112. * @ingroup dvrt_mem
  113. * @brief memory copy channel type
  114. */
  115. typedef enum tagRtMemcpyChannelType {
  116. RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P
  117. RT_MEMCPY_CHANNEL_TYPE_PCIe,
  118. RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now
  119. RT_MEMCPY_CHANNEL_TYPE_RESERVED,
  120. } rtMemcpyChannelType_t;
  121. /**
  122. * @ingroup rt_kernel
  123. * @brief ai core memory size
  124. */
  125. typedef struct rtAiCoreMemorySize {
  126. uint32_t l0ASize;
  127. uint32_t l0BSize;
  128. uint32_t l0CSize;
  129. uint32_t l1Size;
  130. uint32_t ubSize;
  131. uint32_t l2Size;
  132. uint32_t l2PageNum;
  133. uint32_t blockSize;
  134. uint64_t bankSize;
  135. uint64_t bankNum;
  136. uint64_t burstInOneBlock;
  137. uint64_t bankGroupNum;
  138. } rtAiCoreMemorySize_t;
  139. /**
  140. * @ingroup dvrt_mem
  141. * @brief memory type
  142. */
  143. typedef enum tagRtMemoryType {
  144. RT_MEMORY_TYPE_HOST = 1,
  145. RT_MEMORY_TYPE_DEVICE = 2 ,
  146. RT_MEMORY_TYPE_SVM = 3,
  147. RT_MEMORY_TYPE_DVPP = 4
  148. } rtMemoryType_t;
  149. /**
  150. * @ingroup dvrt_mem
  151. * @brief memory attribute
  152. */
  153. typedef struct tagRtPointerAttributes {
  154. rtMemoryType_t memoryType; // host memory or device memory
  155. rtMemoryType_t locationType;
  156. uint32_t deviceID; // device ID
  157. uint32_t pageSize;
  158. } rtPointerAttributes_t;
  159. /**
  160. * @ingroup dvrt_mem
  161. * @brief alloc device memory
  162. * @param [in|out] devPtr memory pointer
  163. * @param [in] size memory size
  164. * @param [in] type memory type
  165. * @return RT_ERROR_NONE for ok
  166. * @return RT_ERROR_INVALID_VALUE for error input
  167. */
  168. RTS_API rtError_t rtMalloc(void **devPtr, uint64_t size, rtMemType_t type);
  169. /**
  170. * @ingroup dvrt_mem
  171. * @brief free device memory
  172. * @param [in|out] devPtr memory pointer
  173. * @return RT_ERROR_NONE for ok
  174. * @return RT_ERROR_INVALID_VALUE for error input
  175. */
  176. RTS_API rtError_t rtFree(void *devPtr);
  177. /**
  178. * @ingroup dvrt_mem
  179. * @brief alloc device memory for dvpp
  180. * @param [in|out] devPtr memory pointer
  181. * @param [in] size memory size
  182. * @return RT_ERROR_NONE for ok
  183. * @return RT_ERROR_INVALID_VALUE for error input
  184. */
  185. RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size);
  186. /**
  187. * @ingroup dvrt_mem
  188. * @brief free device memory for dvpp
  189. * @param [in|out] devPtr memory pointer
  190. * @return RT_ERROR_NONE for ok
  191. * @return RT_ERROR_INVALID_VALUE for error input
  192. */
  193. RTS_API rtError_t rtDvppFree(void *devPtr);
  194. /**
  195. * @ingroup dvrt_mem
  196. * @brief alloc host memory
  197. * @param [in|out] hostPtr memory pointer
  198. * @param [in] size memory size
  199. * @return RT_ERROR_NONE for ok
  200. * @return RT_ERROR_INVALID_VALUE for error input
  201. */
  202. RTS_API rtError_t rtMallocHost(void **hostPtr, uint64_t size);
  203. /**
  204. * @ingroup dvrt_mem
  205. * @brief free host memory
  206. * @param [in] hostPtr memory pointer
  207. * @return RT_ERROR_NONE for ok
  208. * @return RT_ERROR_INVALID_VALUE for error input
  209. */
  210. RTS_API rtError_t rtFreeHost(void *hostPtr);
  211. /**
  212. * @ingroup dvrt_mem
  213. * @brief alloc managed memory
  214. * @param [in|out] ptr memory pointer
  215. * @param [in] size memory size
  216. * @param [in] flag reserved, set to 0.
  217. * @return RT_ERROR_NONE for ok
  218. * @return RT_ERROR_INVALID_VALUE for error input
  219. */
  220. RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag);
  221. /**
  222. * @ingroup dvrt_mem
  223. * @brief free managed memory
  224. * @param [in] ptr memory pointer
  225. * @return RT_ERROR_NONE for ok
  226. * @return RT_ERROR_INVALID_VALUE for error input
  227. */
  228. RTS_API rtError_t rtMemFreeManaged(void *ptr);
  229. /**
  230. * @ingroup dvrt_mem
  231. * @brief alloc cached device memory
  232. * @param [in| devPtr memory pointer
  233. * @param [in] size memory size
  234. * @param [in] type memory type
  235. * @return RT_ERROR_NONE for ok
  236. */
  237. RTS_API rtError_t rtMallocCached(void **devPtr, uint64_t size, rtMemType_t type);
  238. /**
  239. * @ingroup dvrt_mem
  240. * @brief flush device mempory
  241. * @param [in] base virtal base address
  242. * @param [in] len memory size
  243. * @return RT_ERROR_NONE for ok, errno for failed
  244. */
  245. RTS_API rtError_t rtFlushCache(void *base, size_t len);
  246. /**
  247. * @ingroup dvrt_mem
  248. * @brief invalid device mempory
  249. * @param [in] base virtal base address
  250. * @param [in] len memory size
  251. * @return RT_ERROR_NONE for ok, errno for failed
  252. */
  253. RTS_API rtError_t rtInvalidCache(void *base, size_t len);
  254. /**
  255. * @ingroup dvrt_mem
  256. * @brief synchronized memcpy
  257. * @param [in] dst destination address pointer
  258. * @param [in] Max length of destination address memory
  259. * @param [in] src source address pointer
  260. * @param [in] count the number of byte to copy
  261. * @param [in] kind memcpy type
  262. * @return RT_ERROR_NONE for ok
  263. * @return RT_ERROR_INVALID_VALUE for error input
  264. */
  265. RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind);
  266. /**
  267. * @ingroup dvrt_mem
  268. * @brief asynchronized memcpy
  269. * @param [in] dst destination address pointer
  270. * @param [in] Max length of destination address memory
  271. * @param [in] src source address pointer
  272. * @param [in] count the number of byte to copy
  273. * @param [in] kind memcpy type
  274. * @param [in] stream asynchronized task stream
  275. * @return RT_ERROR_NONE for ok
  276. * @return RT_ERROR_INVALID_VALUE for error input
  277. */
  278. RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind,
  279. rtStream_t stream);
  280. /**
  281. * @ingroup dvrt_mem
  282. * @brief asynchronized reduce memcpy
  283. * @param [in] dst destination address pointer
  284. * @param [in] Max length of destination address memory
  285. * @param [in] src source address pointer
  286. * @param [in] count the number of byte to copy
  287. * @param [in] kind memcpy type
  288. * @param [in] type data type
  289. * @param [in] stream asynchronized task stream
  290. * @return RT_ERROR_NONE for ok
  291. * @return RT_ERROR_INVALID_VALUE for error input
  292. */
  293. RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
  294. rtDataType_t type, rtStream_t stream);
  295. /**
  296. * @ingroup dvrt_mem
  297. * @brief query memory size
  298. * @param [in] aiCoreMemorySize
  299. * @return RT_ERROR_NONE for ok, errno for failed
  300. * @return RT_ERROR_INVALID_VALUE for error input
  301. */
  302. RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  303. /**
  304. * @ingroup dvrt_mem
  305. * @brief set memory size, Setting before model reasoning, Bright screen to prevent model can not be fully
  306. integrated network due to memory limitations.Requirement come from JiaMinHu.Only use for Tiny.
  307. * @param [in] aiCoreMemorySize
  308. * @return RT_ERROR_NONE for ok, errno for failed
  309. * @return RT_ERROR_INVALID_VALUE for error input
  310. */
  311. RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  312. /**
  313. * @ingroup dvrt_mem
  314. * @brief set memory with uint32_t value
  315. * @param [in] devPtr
  316. * @param [in] Max length of destination address memory
  317. * @param [in] value
  318. * @param [in] count byte num
  319. * @return RT_ERROR_NONE for ok, errno for failed
  320. * @return RT_ERROR_INVALID_VALUE for error input
  321. */
  322. RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t value, uint64_t count);
  323. /**
  324. * @ingroup dvrt_mem
  325. * @brief set memory with uint32_t value async
  326. * @param [in] devPtr
  327. * @param [in] Max length of destination address memory
  328. * @param [in] value
  329. * @param [in] count byte num
  330. * @param [in] stream
  331. * @return RT_ERROR_NONE for ok, errno for failed
  332. * @return RT_ERROR_INVALID_VALUE for error input
  333. */
  334. RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uint64_t count, rtStream_t stream);
  335. /**
  336. * @ingroup dvrt_mem
  337. * @brief get current device memory total and free
  338. * @param [out] free
  339. * @param [out] total
  340. * @return RT_ERROR_NONE for ok, errno for failed
  341. * @return RT_ERROR_INVALID_VALUE for error input
  342. */
  343. RTS_API rtError_t rtMemGetInfo(size_t *free, size_t *total);
  344. /**
  345. * @ingroup dvrt_mem
  346. * @brief get current device memory total and free
  347. * @param [in] memInfoType
  348. * @param [out] free
  349. * @param [out] total
  350. * @return RT_ERROR_NONE for ok, errno for failed
  351. */
  352. RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size_t *total);
  353. /**
  354. * @ingroup dvrt_mem
  355. * @brief set memory with uint32_t value
  356. * @param [in] devPtr
  357. * @param [in] len
  358. * @param [in] device
  359. * @return RT_ERROR_NONE for ok, errno for failed
  360. * @return RT_ERROR_INVALID_VALUE for error input
  361. */
  362. RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t device);
  363. /**
  364. * @ingroup dvrt_mem
  365. * @brief get memory attribute:Host or Device
  366. * @param [in] ptr
  367. * @param [out] attributes
  368. * @return RT_ERROR_NONE for ok, errno for failed
  369. * @return RT_ERROR_INVALID_VALUE for error input
  370. */
  371. RTS_API rtError_t rtPointerGetAttributes(rtPointerAttributes_t *attributes, const void *ptr);
  372. /**
  373. * @ingroup dvrt_mem
  374. * @brief make memory shared interprocess and assigned a name
  375. * @param [in] ptr device memory address pointer
  376. * @param [in] name identification name
  377. * @param [in] byteCount identification byteCount
  378. * @return RT_ERROR_NONE for ok
  379. * @return RT_ERROR_INVALID_VALUE for error input
  380. * @return RT_ERROR_DRV_ERR for driver error
  381. */
  382. RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char *name, uint32_t len);
  383. /**
  384. * @ingroup dvrt_mem
  385. * @brief destroy a interprocess shared memory
  386. * @param [in] name identification name
  387. * @return RT_ERROR_NONE for ok
  388. * @return RT_ERROR_INVALID_VALUE for error input
  389. * @return RT_ERROR_DRV_ERR for driver error
  390. */
  391. rtError_t rtIpcDestroyMemoryName(const char *name);
  392. /**
  393. * @ingroup dvrt_mem
  394. * @brief open a interprocess shared memory
  395. * @param [in|out] ptr device memory address pointer
  396. * @param [in] name identification name
  397. * @return RT_ERROR_NONE for ok
  398. * @return RT_ERROR_INVALID_VALUE for error input
  399. * @return RT_ERROR_DRV_ERR for driver error
  400. */
  401. RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char *name);
  402. /**
  403. * @ingroup dvrt_mem
  404. * @brief close a interprocess shared memory
  405. * @param [in] ptr device memory address pointer
  406. * @param [in] name identification name
  407. * @return RT_ERROR_NONE for ok
  408. * @return RT_ERROR_INVALID_VALUE for error input
  409. * @return RT_ERROR_DRV_ERR for driver error
  410. */
  411. RTS_API rtError_t rtIpcCloseMemory(const void *ptr);
  412. /**
  413. * @ingroup dvrt_mem
  414. * @brief HCCL Async memory cpy
  415. * @param [in] index sq index
  416. * @param [in] wqe_index moudle index
  417. * @param [in] stream asynchronized task stream
  418. * @return RT_ERROR_NONE for ok
  419. * @return RT_ERROR_INVALID_VALUE for error input
  420. * @return RT_ERROR_DRV_ERR for driver error
  421. */
  422. RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqe_index, rtStream_t stream);
  423. /**
  424. * @ingroup dvrt_mem
  425. * @brief Ipc set mem pid
  426. * @param [in] name name to be queried
  427. * @param [in] pid process id
  428. * @param [in] num length of pid[]
  429. * @return RT_ERROR_NONE for ok
  430. * @return RT_ERROR_INVALID_VALUE for error input
  431. * @return RT_ERROR_DRV_ERR for driver error
  432. */
  433. RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num);
  434. /**
  435. * @ingroup dvrt_mem
  436. * @brief HCCL Async memory cpy
  437. * @param [in] dbindex single device 0
  438. * @param [in] dbinfo doorbell info
  439. * @param [in] stream asynchronized task stream
  440. * @return RT_ERROR_NONE for ok
  441. * @return RT_ERROR_INVALID_VALUE for error input
  442. * @return RT_ERROR_DRV_ERR for driver error
  443. */
  444. RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream);
  445. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  446. }
  447. #endif
  448. #endif // __CCE_RUNTIME_MEM_H__

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示