You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

config.h 7.0 kB

5 years ago
5 years ago
3 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
3 years ago
3 years ago
3 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
3 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. /*
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
  3. * Description: config.h
  4. * Create: 2020-01-01
  5. */
  6. #ifndef CCE_RUNTIME_CONFIG_H
  7. #define CCE_RUNTIME_CONFIG_H
  8. #include "base.h"
  9. #if defined(__cplusplus)
  10. extern "C" {
  11. #endif
  12. #define PLAT_COMBINE(arch, chip, ver) (((arch) << 16U) | ((chip) << 8U) | (ver))
  13. #define PLAT_GET_ARCH(type) (((type) >> 16U) & 0xffffU)
  14. #define PLAT_GET_CHIP(type) (((type) >> 8U) & 0xffU)
  15. #define PLAT_GET_VER(type) ((type) & 0xffU)
  16. typedef enum tagRtArchType {
  17. ARCH_BEGIN = 0,
  18. ARCH_V100 = ARCH_BEGIN,
  19. ARCH_V200 = 1,
  20. ARCH_V300 = 2,
  21. ARCH_END = 3,
  22. } rtArchType_t;
  23. typedef enum tagRtChipType {
  24. CHIP_BEGIN = 0,
  25. CHIP_MINI = CHIP_BEGIN,
  26. CHIP_CLOUD = 1,
  27. CHIP_MDC = 2,
  28. CHIP_LHISI = 3,
  29. CHIP_DC = 4,
  30. CHIP_CLOUD_V2 = 5,
  31. CHIP_NO_DEVICE = 6,
  32. CHIP_MINI_V3 = 7,
  33. CHIP_5612 = 8, /* 1911T */
  34. CHIP_END = 9,
  35. } rtChipType_t;
  36. typedef enum tagRtAicpuScheType {
  37. SCHEDULE_SOFTWARE = 0, /* Software Schedule */
  38. SCHEDULE_SOFTWARE_OPT,
  39. SCHEDULE_HARDWARE, /* HWTS Schedule */
  40. } rtAicpuScheType;
  41. typedef enum tagRtDeviceCapabilityType {
  42. RT_SCHEDULE_SOFTWARE = 0, // Software Schedule
  43. RT_SCHEDULE_SOFTWARE_OPT,
  44. RT_SCHEDULE_HARDWARE, // HWTS Schedule
  45. RT_AICPU_BLOCKING_OP_NOT_SUPPORT,
  46. RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/51 ts support AICPU blocking operation
  47. RT_MODE_NO_FFTS, // no ffts
  48. RT_MODE_FFTS, // 81 get ffts work mode, ffts
  49. RT_MODE_FFTS_PLUS, // 81 get ffts work mode, ffts plus
  50. } rtDeviceCapabilityType;
  51. typedef enum tagRtVersion {
  52. VER_BEGIN = 0,
  53. VER_NA = VER_BEGIN,
  54. VER_ES = 1,
  55. VER_CS = 2,
  56. VER_SD3403 = 3,
  57. VER_END = 4,
  58. } rtVersion_t;
  59. /* match rtChipType_t */
  60. typedef enum tagRtPlatformType {
  61. PLATFORM_BEGIN = 0,
  62. PLATFORM_MINI_V1 = PLATFORM_BEGIN,
  63. PLATFORM_CLOUD_V1 = 1,
  64. PLATFORM_MINI_V2 = 2,
  65. PLATFORM_LHISI_ES = 3,
  66. PLATFORM_LHISI_CS = 4,
  67. PLATFORM_DC = 5,
  68. PLATFORM_CLOUD_V2 = 6,
  69. PLATFORM_LHISI_SD3403 = 7,
  70. PLATFORM_MINI_V3 = 8,
  71. PLATFORM_MINI_5612 = 9,
  72. PLATFORM_CLOUD_V2_910B1 = 10,
  73. PLATFORM_CLOUD_V2_910B2 = 11,
  74. PLATFORM_CLOUD_V2_910B3 = 12,
  75. PLATFORM_CLOUD_V2_910C1 = 13,
  76. PLATFORM_CLOUD_V2_910C2 = 14,
  77. PLATFORM_CLOUD_V2_910C3 = 15,
  78. PLATFORM_MDC_BS9SX1A = 16,
  79. PLATFORM_END = 17,
  80. } rtPlatformType_t;
  81. typedef enum tagRtCubeFracMKNFp16 {
  82. RT_CUBE_MKN_FP16_2_16_16 = 0,
  83. RT_CUBE_MKN_FP16_4_16_16,
  84. RT_CUBE_MKN_FP16_16_16_16,
  85. RT_CUBE_MKN_FP16_Default,
  86. } rtCubeFracMKNFp16_t;
  87. typedef enum tagRtCubeFracMKNInt8 {
  88. RT_CUBE_MKN_INT8_2_32_16 = 0,
  89. RT_CUBE_MKN_INT8_4_32_4,
  90. RT_CUBE_MKN_INT8_4_32_16,
  91. RT_CUBE_MKN_INT8_16_32_16,
  92. RT_CUBE_MKN_INT8_Default,
  93. } rtCubeFracMKNInt8_t;
  94. typedef enum tagRtVecFracVmulMKNFp16 {
  95. RT_VEC_VMUL_MKN_FP16_1_16_16 = 0,
  96. RT_VEC_VMUL_MKN_FP16_Default,
  97. } rtVecFracVmulMKNFp16_t;
  98. typedef enum tagRtVecFracVmulMKNInt8 {
  99. RT_VEC_VMUL_MKN_INT8_1_32_16 = 0,
  100. RT_VEC_VMUL_MKN_INT8_Default,
  101. } rtVecFracVmulMKNInt8_t;
  102. typedef struct tagRtAiCoreSpec {
  103. uint32_t cubeFreq;
  104. uint32_t cubeMSize;
  105. uint32_t cubeKSize;
  106. uint32_t cubeNSize;
  107. rtCubeFracMKNFp16_t cubeFracMKNFp16;
  108. rtCubeFracMKNInt8_t cubeFracMKNInt8;
  109. rtVecFracVmulMKNFp16_t vecFracVmulMKNFp16;
  110. rtVecFracVmulMKNInt8_t vecFracVmulMKNInt8;
  111. } rtAiCoreSpec_t;
  112. typedef struct tagRtAiCoreRatesPara {
  113. uint32_t ddrRate;
  114. uint32_t l2Rate;
  115. uint32_t l2ReadRate;
  116. uint32_t l2WriteRate;
  117. uint32_t l1ToL0ARate;
  118. uint32_t l1ToL0BRate;
  119. uint32_t l0CToUBRate;
  120. uint32_t ubToL2;
  121. uint32_t ubToDDR;
  122. uint32_t ubToL1;
  123. } rtAiCoreMemoryRates_t;
  124. typedef struct tagRtMemoryConfig {
  125. uint32_t flowtableSize;
  126. uint32_t compilerSize;
  127. } rtMemoryConfig_t;
  128. typedef struct tagRtPlatformConfig {
  129. uint32_t platformConfig;
  130. } rtPlatformConfig_t;
  131. typedef enum tagRTTaskTimeoutType {
  132. RT_TIMEOUT_TYPE_OP_WAIT = 0,
  133. RT_TIMEOUT_TYPE_OP_EXECUTE,
  134. } rtTaskTimeoutType_t;
  135. /**
  136. * @ingroup
  137. * @brief get AI core count
  138. * @param [in] aiCoreCnt
  139. * @return aiCoreCnt
  140. */
  141. RTS_API rtError_t rtGetAiCoreCount(uint32_t *aiCoreCnt);
  142. /**
  143. * @ingroup
  144. * @brief get AI cpu count
  145. * @param [in] aiCpuCnt
  146. * @return aiCpuCnt
  147. */
  148. RTS_API rtError_t rtGetAiCpuCount(uint32_t *aiCpuCnt);
  149. /**
  150. * @ingroup
  151. * @brief get AI core frequency
  152. * @param [in] aiCoreSpec
  153. * @return aiCoreSpec
  154. */
  155. RTS_API rtError_t rtGetAiCoreSpec(rtAiCoreSpec_t *aiCoreSpec);
  156. /**
  157. * @ingroup
  158. * @brief AI get core band Info
  159. * @param [in] aiCoreMemoryRates
  160. * @return aiCoreMemoryRates
  161. */
  162. RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRates);
  163. /**
  164. * @ingroup
  165. * @brief AI get core buffer Info,FlowTable Size,Compiler Size
  166. * @param [in] memoryConfig
  167. * @return memoryConfig
  168. */
  169. RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig);
  170. /**
  171. * @ingroup
  172. * @brief get float overflow mode
  173. * @param [out] floatOverflowMode
  174. * @return RT_ERROR_NONE for ok
  175. * @return RT_ERROR_INVALID_VALUE for error input
  176. */
  177. RTS_API rtError_t rtGetFloatOverflowMode(rtFloatOverflowMode_t * const floatOverflowMode);
  178. /**
  179. * @ingroup
  180. * @brief get l2 buffer Info,virtual baseaddr,Size
  181. * @param [in] stm
  182. * @return RT_ERROR_NONE for ok, errno for failed
  183. */
  184. RTS_API rtError_t rtMemGetL2Info(rtStream_t stm, void **ptr, uint32_t *size);
  185. /**
  186. * @ingroup
  187. * @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be
  188. * represented by 9020.
  189. * @param [out] runtimeVersion
  190. * @return RT_ERROR_NONE for ok
  191. * @return RT_ERROR_INVALID_VALUE for error input
  192. */
  193. RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);
  194. /**
  195. * @ingroup
  196. * @brief get device feature ability by device id, such as task schedule ability.
  197. * @param [in] deviceId
  198. * @param [in] moduleType
  199. * @param [in] featureType
  200. * @param [out] val
  201. * @return RT_ERROR_NONE for ok
  202. * @return RT_ERROR_INVALID_VALUE for error input
  203. */
  204. RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *val);
  205. /**
  206. * @ingroup
  207. * @brief set event wait task timeout time.
  208. * @param [in] timeout
  209. * @return RT_ERROR_NONE for ok
  210. * @return RT_ERROR_INVALID_VALUE for error input
  211. */
  212. RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout);
  213. /**
  214. * @ingroup
  215. * @brief set op execute task timeout time.
  216. * @param [in] timeout
  217. * @return RT_ERROR_NONE for ok
  218. * @return RT_ERROR_INVALID_VALUE for error input
  219. */
  220. RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout);
  221. /**
  222. * @ingroup
  223. * @brief get is Heterogenous.
  224. * @param [out] heterogenous=1 Heterogenous Mode: read isHeterogenous=1 in ini file.
  225. * @param [out] heterogenous=0 NOT Heterogenous Mode:
  226. * 1:not found ini file, 2:error when reading ini, 3:Heterogenous value is not 1
  227. * @return RT_ERROR_NONE for ok
  228. */
  229. RTS_API rtError_t rtGetIsHeterogenous(int32_t *heterogenous);
  230. #if defined(__cplusplus)
  231. }
  232. #endif
  233. #endif // CCE_RUNTIME_CONFIG_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示