You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mem.h 14 kB

5 years ago
5 years ago

  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __CCE_RUNTIME_MEM_H__
  17. #define __CCE_RUNTIME_MEM_H__
  18. #include <stddef.h>
  19. #include "base.h"
  20. #include "config.h"
  21. #include "stream.h"
  22. #ifdef __cplusplus
  23. extern "C" {
  24. #endif // __cplusplus
  25. /**
  26. * @ingroup dvrt_mem
  27. * @brief memory type
  28. */
  29. #define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device
  30. #define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device
  31. #define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device
  32. #define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device
  33. #define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device
  34. #define RT_MEMORY_P2P_DDR ((uint32_t)0x11) // DDR memory on other device>
  35. #define RT_MEMORY_DDR_NC ((uint32_t)0x20) // DDR memory of non-cache>
  36. #define RT_MEMORY_RESERVED ((uint32_t)0x40)
  37. /**
  38. * @ingroup dvrt_mem
  39. * @brief memory Policy
  40. */
  41. #define RT_MEMORY_POLICY_NONE ((uint32_t)0x0) // Malloc mem prior hage page, then default page
  42. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST ((uint32_t)0x1 << 10) // Malloc mem prior hage page, then default page
  43. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY ((uint32_t)0x1 << 11) // Malloc mem only use hage page
  44. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY ((uint32_t)0x1 << 12) // Malloc mem only use default page
  45. #define MEM_ALLOC_TYPE_BIT ((uint32_t)0x3FF) // mem type bit in <0, 9>
  46. /**
  47. * @ingroup dvrt_mem
  48. * @brief memory type | memory Policy
  49. */
  50. typedef uint32_t rtMemType_t;
  51. /**
  52. * @ingroup dvrt_mem
  53. * @brief memory advise type
  54. */
  55. #define RT_MEMORY_ADVISE_EXE (0x02)
  56. #define RT_MEMORY_ADVISE_THP (0x04)
  57. #define RT_MEMORY_ADVISE_PLE (0x08)
  58. #define RT_MEMORY_ADVISE_PIN (0x16)
  59. /**
  60. * @ingroup dvrt_mem
  61. * @brief memory copy type
  62. */
  63. typedef enum tagRtMemcpyKind {
  64. RT_MEMCPY_HOST_TO_HOST = 0, // host to host
  65. RT_MEMCPY_HOST_TO_DEVICE, // host to device
  66. RT_MEMCPY_DEVICE_TO_HOST, // device to host
  67. RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P
  68. RT_MEMCPY_MANAGED, // managed memory
  69. RT_MEMCPY_RESERVED,
  70. } rtMemcpyKind_t;
  71. typedef enum tagRtRecudeKind {
  72. RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P
  73. RT_RECUDE_KIND_END
  74. } rtRecudeKind_t;
  75. typedef enum tagRtDataType {
  76. RT_DATA_TYPE_FP32 = 0, // fp32
  77. RT_DATA_TYPE_END
  78. } rtDataType_t;
  79. /**
  80. * @ingroup dvrt_mem
  81. * @brief memory copy channel type
  82. */
  83. typedef enum tagRtMemcpyChannelType {
  84. RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P
  85. RT_MEMCPY_CHANNEL_TYPE_PCIe,
  86. RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now
  87. RT_MEMCPY_CHANNEL_TYPE_RESERVED,
  88. } rtMemcpyChannelType_t;
  89. /**
  90. * @ingroup rt_kernel
  91. * @brief ai core memory size
  92. */
  93. typedef struct rtAiCoreMemorySize {
  94. uint32_t l0ASize;
  95. uint32_t l0BSize;
  96. uint32_t l0CSize;
  97. uint32_t l1Size;
  98. uint32_t ubSize;
  99. uint32_t l2Size;
  100. uint32_t l2PageNum;
  101. uint32_t blockSize;
  102. uint64_t bankSize;
  103. uint64_t bankNum;
  104. uint64_t burstInOneBlock;
  105. uint64_t bankGroupNum;
  106. } rtAiCoreMemorySize_t;
  107. /**
  108. * @ingroup dvrt_mem
  109. * @brief memory type
  110. */
  111. typedef enum tagRtMemoryType { RT_MEMORY_TYPE_HOST = 1, RT_MEMORY_TYPE_DEVICE = 2 } rtMemoryType_t;
  112. /**
  113. * @ingroup dvrt_mem
  114. * @brief memory attribute
  115. */
  116. typedef struct tagRtPointerAttributes {
  117. rtMemoryType_t memoryType; // host memory or device memory
  118. uint32_t deviceID; // device ID
  119. uint32_t isManaged;
  120. uint32_t pageSize;
  121. } rtPointerAttributes_t;
  122. /**
  123. * @ingroup dvrt_mem
  124. * @brief alloc device memory
  125. * @param [in|out] devPtr memory pointer
  126. * @param [in] size memory size
  127. * @param [in] type memory type
  128. * @return RT_ERROR_NONE for ok
  129. * @return RT_ERROR_MEMORY_ALLOCATION for memory allocation failed
  130. */
  131. RTS_API rtError_t rtMalloc(void **devPtr, uint64_t size, rtMemType_t type);
  132. /**
  133. * @ingroup dvrt_mem
  134. * @brief free device memory
  135. * @param [in|out] devPtr memory pointer
  136. * @return RT_ERROR_NONE for ok
  137. * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer
  138. */
  139. RTS_API rtError_t rtFree(void *devPtr);
  140. /**
  141. * @ingroup dvrt_mem
  142. * @brief alloc device memory for dvpp
  143. * @param [in|out] devPtr memory pointer
  144. * @param [in] size memory size
  145. * @return RT_ERROR_NONE for ok
  146. * @return RT_ERROR_MEMORY_ALLOCATION for memory allocation failed
  147. */
  148. RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size);
  149. /**
  150. * @ingroup dvrt_mem
  151. * @brief free device memory for dvpp
  152. * @param [in|out] devPtr memory pointer
  153. * @return RT_ERROR_NONE for ok
  154. * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer
  155. */
  156. RTS_API rtError_t rtDvppFree(void *devPtr);
  157. /**
  158. * @ingroup dvrt_mem
  159. * @brief alloc host memory
  160. * @param [in|out] hostPtr memory pointer
  161. * @param [in] size memory size
  162. * @return RT_ERROR_NONE for ok
  163. * @return RT_ERROR_MEMORY_ALLOCATION for memory allocation failed
  164. */
  165. RTS_API rtError_t rtMallocHost(void **hostPtr, uint64_t size);
  166. /**
  167. * @ingroup dvrt_mem
  168. * @brief free host memory
  169. * @param [in] hostPtr memory pointer
  170. * @return RT_ERROR_NONE for ok
  171. * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer
  172. */
  173. RTS_API rtError_t rtFreeHost(void *hostPtr);
  174. /**
  175. * @ingroup dvrt_mem
  176. * @brief alloc managed memory
  177. * @param [in|out] ptr memory pointer
  178. * @param [in] size memory size
  179. * @param [in] flag reserved, set to 0.
  180. * @return RT_ERROR_NONE for ok
  181. * @return RT_ERROR_MEMORY_ALLOCATION for memory allocation failed
  182. */
  183. RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag);
  184. /**
  185. * @ingroup dvrt_mem
  186. * @brief free managed memory
  187. * @param [in] ptr memory pointer
  188. * @return RT_ERROR_NONE for ok
  189. * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer
  190. */
  191. RTS_API rtError_t rtMemFreeManaged(void *ptr);
  192. /**
  193. * @ingroup dvrt_mem
  194. * @brief advise memory
  195. * @param [in] ptr memory pointer
  196. * @param [in] size memory size
  197. * @param [in] advise memory advise
  198. * @return RT_ERROR_NONE for ok
  199. * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer
  200. */
  201. RTS_API rtError_t rtMemAdvise(void *ptr, uint64_t size, uint32_t advise);
  202. /**
  203. * @ingroup dvrt_mem
  204. * @brief flush device mempory
  205. * @param [in] base virtal base address
  206. * @param [in] len memory size
  207. * @return RT_ERROR_NONE for ok, errno for failed
  208. */
  209. RTS_API rtError_t rtFlushCache(uint64_t base, uint32_t len);
  210. /**
  211. * @ingroup dvrt_mem
  212. * @brief invalid device mempory
  213. * @param [in] base virtal base address
  214. * @param [in] len memory size
  215. * @return RT_ERROR_NONE for ok, errno for failed
  216. */
  217. RTS_API rtError_t rtInvalidCache(uint64_t base, uint32_t len);
  218. /**
  219. * @ingroup dvrt_mem
  220. * @brief synchronized memcpy
  221. * @param [in] dst destination address pointer
  222. * @param [in] Max length of destination address memory
  223. * @param [in] src source address pointer
  224. * @param [in] count the number of byte to copy
  225. * @param [in] kind memcpy type
  226. * @return RT_ERROR_NONE for ok
  227. * @return RT_ERROR_INVALID_VALUE for error input of count
  228. * @return RT_ERROR_INVALID_DEVICE_POINTER for error input memory pointer of dst,src
  229. * @return RT_ERROR_INVALID_MEMCPY_DIRECTION for error copy direction of kind
  230. */
  231. RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind);
  232. /**
  233. * @ingroup dvrt_mem
  234. * @brief asynchronized memcpy
  235. * @param [in] dst destination address pointer
  236. * @param [in] Max length of destination address memory
  237. * @param [in] src source address pointer
  238. * @param [in] count the number of byte to copy
  239. * @param [in] kind memcpy type
  240. * @param [in] stream asynchronized task stream
  241. * @return RT_ERROR_NONE for ok
  242. * @return RT_ERROR_INVALID_VALUE for error input of count,stream
  243. * @return RT_ERROR_INVALID_DEVICE_POINTER for error input memory pointer of dst,src
  244. * @return RT_ERROR_INVALID_MEMCPY_DIRECTION for error copy direction of kind
  245. */
  246. RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind,
  247. rtStream_t stream);
  248. /**
  249. * @ingroup dvrt_mem
  250. * @brief asynchronized reduce memcpy
  251. * @param [in] dst destination address pointer
  252. * @param [in] Max length of destination address memory
  253. * @param [in] src source address pointer
  254. * @param [in] count the number of byte to copy
  255. * @param [in] kind memcpy type
  256. * @param [in] type data type
  257. * @param [in] stream asynchronized task stream
  258. * @return RT_ERROR_NONE for ok
  259. * @return RT_ERROR_INVALID_VALUE for error input of count,stream
  260. * @return RT_ERROR_INVALID_DEVICE_POINTER for error input memory pointer of dst,src
  261. * @return RT_ERROR_INVALID_MEMCPY_DIRECTION for error copy direction of kind
  262. */
  263. RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
  264. rtDataType_t type, rtStream_t stream);
  265. /**
  266. * @ingroup dvrt_mem
  267. * @brief query memory size
  268. * @param [in] aiCoreMemorySize
  269. * @return RT_ERROR_NONE for ok, errno for failed
  270. */
  271. RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  272. /**
  273. * @ingroup dvrt_mem
  274. * @brief set memory size, Setting before model reasoning, Bright screen to prevent model can not be fully
  275. integrated network due to memory limitations.Requirement come from JiaMinHu.Only use for Tiny.
  276. * @param [in] aiCoreMemorySize
  277. * @return RT_ERROR_NONE for ok, errno for failed
  278. */
  279. RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  280. /**
  281. * @ingroup dvrt_mem
  282. * @brief set memory with uint32_t value
  283. * @param [in] devPtr
  284. * @param [in] Max length of destination address memory
  285. * @param [in] value
  286. * @param [in] count byte num
  287. * @return RT_ERROR_NONE for ok, errno for failed
  288. */
  289. RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t value, uint64_t count);
  290. /**
  291. * @ingroup dvrt_mem
  292. * @brief set memory with uint32_t value async
  293. * @param [in] devPtr
  294. * @param [in] Max length of destination address memory
  295. * @param [in] value
  296. * @param [in] count byte num
  297. * @param [in] stream
  298. * @return RT_ERROR_NONE for ok, errno for failed
  299. */
  300. RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uint64_t count, rtStream_t stream);
  301. /**
  302. * @ingroup dvrt_mem
  303. * @brief get current device memory total and free
  304. * @param [out] free
  305. * @param [out] total
  306. * @return RT_ERROR_NONE for ok, errno for failed
  307. */
  308. RTS_API rtError_t rtMemGetInfo(size_t *free, size_t *total);
  309. /**
  310. * @ingroup dvrt_mem
  311. * @brief set memory with uint32_t value
  312. * @param [in] devPtr
  313. * @param [in] len
  314. * @param [in] device
  315. * @return RT_ERROR_NONE for ok, errno for failed
  316. */
  317. RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t device);
  318. /**
  319. * @ingroup dvrt_mem
  320. * @brief get memory attribute:Host or Device
  321. * @param [in] ptr
  322. * @param [out] attributes
  323. * @return RT_ERROR_NONE for ok, errno for failed
  324. */
  325. RTS_API rtError_t rtPointerGetAttributes(rtPointerAttributes_t *attributes, const void *ptr);
  326. /**
  327. * @ingroup dvrt_mem
  328. * @brief make memory shared interprocess and assigned a name
  329. * @param [in] ptr device memory address pointer
  330. * @param [in] name identification name
  331. * @param [in] byteCount identification byteCount
  332. * @return RT_ERROR_NONE for ok
  333. * @return RT_ERROR_INVALID_VALUE for error input of ptr, name, byteCount
  334. * @return RT_ERROR_DRV_ERR for driver error
  335. */
  336. RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char *name, uint32_t len);
  337. /**
  338. * @ingroup dvrt_mem
  339. * @brief destroy a interprocess shared memory
  340. * @param [in] name identification name
  341. * @return RT_ERROR_NONE for ok
  342. * @return RT_ERROR_INVALID_VALUE for error input of name
  343. * @return RT_ERROR_DRV_ERR for driver error
  344. */
  345. rtError_t rtIpcDestroyMemoryName(const char *name);
  346. /**
  347. * @ingroup dvrt_mem
  348. * @brief open a interprocess shared memory
  349. * @param [in|out] ptr device memory address pointer
  350. * @param [in] name identification name
  351. * @return RT_ERROR_NONE for ok
  352. * @return RT_ERROR_INVALID_VALUE for error input of ptr, name
  353. * @return RT_ERROR_DRV_ERR for driver error
  354. */
  355. RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char *name);
  356. /**
  357. * @ingroup dvrt_mem
  358. * @brief close a interprocess shared memory
  359. * @param [in] ptr device memory address pointer
  360. * @param [in] name identification name
  361. * @return RT_ERROR_NONE for ok
  362. * @return RT_ERROR_INVALID_VALUE for error input of ptr, name
  363. * @return RT_ERROR_DRV_ERR for driver error
  364. */
  365. RTS_API rtError_t rtIpcCloseMemory(const void *ptr);
  366. /**
  367. * @ingroup dvrt_mem
  368. * @brief HCCL Async memory cpy
  369. * @param [in] index sq index
  370. * @param [in] wqe_index moudle index
  371. * @param [in] stream asynchronized task stream
  372. * @return RT_ERROR_NONE for ok
  373. * @return RT_ERROR_INVALID_VALUE for error input of ptr, name
  374. * @return RT_ERROR_DRV_ERR for driver error
  375. */
  376. RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqe_index, rtStream_t stream);
  377. /**
  378. * @ingroup dvrt_mem
  379. * @brief Set the memory readCount value
  380. * @param [in] devPtr memory pointer
  381. * @param [in] size memory size
  382. * @param [in] readCount readCount value
  383. * @return RT_ERROR_NONE for ok
  384. * @return RT_ERROR_INVALID_VALUE for error input
  385. * @return RT_ERROR_INVALID_RESOURCE_HANDLE for invalid resource handle
  386. * @return RT_ERROR_DRV_ERR for driver error
  387. */
  388. RTS_API rtError_t rtMemSetRC(const void *devPtr, uint64_t size, uint32_t readCount);
  389. /**
  390. * @ingroup dvrt_mem
  391. * @brief Ipc set mem pid
  392. * @param [in] name name to be queried
  393. * @param [in] pid process id
  394. * @param [in] num length of pid[]
  395. * @return RT_ERROR_NONE for ok
  396. * @return RT_ERROR_INVALID_VALUE for error input
  397. * @return RT_ERROR_INVALID_RESOURCE_HANDLE for invalid resource handle
  398. * @return RT_ERROR_DRV_ERR for driver error
  399. */
  400. RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num);
  401. #ifdef __cplusplus
  402. }
  403. #endif
  404. #endif // __CCE_RUNTIME_MEM_H__

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示