You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

config.h 6.0 kB

5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. * http://www.apache.org/licenses/LICENSE-2.0
  7. * Unless required by applicable law or agreed to in writing, software
  8. * distributed under the License is distributed on an "AS IS" BASIS,
  9. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. * See the License for the specific language governing permissions and
  11. * limitations under the License.
  12. */
  13. #ifndef __CCE_RUNTIME_CONFIG_H__
  14. #define __CCE_RUNTIME_CONFIG_H__
  15. #include "base.h"
  16. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  17. extern "C" {
  18. #endif
  19. #define PLAT_COMBINE(arch, chip, ver) ((arch << 16) | (chip << 8) | (ver))
  20. #define PLAT_GET_ARCH(type) ((type >> 16) & 0xffff)
  21. #define PLAT_GET_CHIP(type) ((type >> 8) & 0xff)
  22. #define PLAT_GET_VER(type) (type & 0xff)
  23. typedef enum tagRtArchType {
  24. ARCH_BEGIN = 0,
  25. ARCH_V100 = ARCH_BEGIN,
  26. ARCH_V200,
  27. ARCH_END,
  28. } rtArchType_t;
  29. typedef enum tagRtChipType {
  30. CHIP_BEGIN = 0,
  31. CHIP_MINI = CHIP_BEGIN,
  32. CHIP_CLOUD,
  33. CHIP_MDC,
  34. CHIP_LHISI,
  35. CHIP_DC,
  36. CHIP_CLOUD_V2,
  37. CHIP_END,
  38. } rtChipType_t;
  39. typedef enum tagRtAicpuScheType {
  40. SCHEDULE_SOFTWARE = 0, /* Software Schedule */
  41. SCHEDULE_SOFTWARE_OPT,
  42. SCHEDULE_HARDWARE, /* HWTS Schedule */
  43. } rtAicpuScheType;
  44. typedef enum tagRtVersion {
  45. VER_BEGIN = 0,
  46. VER_NA = VER_BEGIN,
  47. VER_ES,
  48. VER_CS,
  49. VER_SD3403,
  50. VER_END,
  51. } rtVersion_t;
  52. /* match rtChipType_t */
  53. typedef enum tagRtPlatformType {
  54. PLATFORM_BEGIN = 0,
  55. PLATFORM_MINI_V1 = PLATFORM_BEGIN,
  56. PLATFORM_CLOUD_V1,
  57. PLATFORM_MINI_V2,
  58. PLATFORM_LHISI_ES,
  59. PLATFORM_LHISI_CS,
  60. PLATFORM_DC,
  61. PLATFORM_CLOUD_V2,
  62. PLATFORM_END,
  63. } rtPlatformType_t;
  64. typedef enum tagRtCubeFracMKNFp16 {
  65. RT_CUBE_MKN_FP16_2_16_16 = 0,
  66. RT_CUBE_MKN_FP16_4_16_16,
  67. RT_CUBE_MKN_FP16_16_16_16,
  68. RT_CUBE_MKN_FP16_Default,
  69. } rtCubeFracMKNFp16_t;
  70. typedef enum tagRtCubeFracMKNInt8 {
  71. RT_CUBE_MKN_INT8_2_32_16 = 0,
  72. RT_CUBE_MKN_INT8_4_32_4,
  73. RT_CUBE_MKN_INT8_4_32_16,
  74. RT_CUBE_MKN_INT8_16_32_16,
  75. RT_CUBE_MKN_INT8_Default,
  76. } rtCubeFracMKNInt8_t;
  77. typedef enum tagRtVecFracVmulMKNFp16 {
  78. RT_VEC_VMUL_MKN_FP16_1_16_16 = 0,
  79. RT_VEC_VMUL_MKN_FP16_Default,
  80. } rtVecFracVmulMKNFp16_t;
  81. typedef enum tagRtVecFracVmulMKNInt8 {
  82. RT_VEC_VMUL_MKN_INT8_1_32_16 = 0,
  83. RT_VEC_VMUL_MKN_INT8_Default,
  84. } rtVecFracVmulMKNInt8_t;
  85. typedef struct tagRtAiCoreSpec {
  86. uint32_t cubeFreq;
  87. uint32_t cubeMSize;
  88. uint32_t cubeKSize;
  89. uint32_t cubeNSize;
  90. rtCubeFracMKNFp16_t cubeFracMKNFp16;
  91. rtCubeFracMKNInt8_t cubeFracMKNInt8;
  92. rtVecFracVmulMKNFp16_t vecFracVmulMKNFp16;
  93. rtVecFracVmulMKNInt8_t vecFracVmulMKNInt8;
  94. } rtAiCoreSpec_t;
  95. typedef struct tagRtAiCoreRatesPara {
  96. uint32_t ddrRate;
  97. uint32_t l2Rate;
  98. uint32_t l2ReadRate;
  99. uint32_t l2WriteRate;
  100. uint32_t l1ToL0ARate;
  101. uint32_t l1ToL0BRate;
  102. uint32_t l0CToUBRate;
  103. uint32_t ubToL2;
  104. uint32_t ubToDDR;
  105. uint32_t ubToL1;
  106. } rtAiCoreMemoryRates_t;
  107. typedef struct tagRtMemoryConfig {
  108. uint32_t flowtableSize;
  109. uint32_t compilerSize;
  110. } rtMemoryConfig_t;
  111. typedef struct tagRtPlatformConfig {
  112. uint32_t platformConfig;
  113. } rtPlatformConfig_t;
  114. typedef enum tagRTTaskTimeoutType {
  115. RT_TIMEOUT_TYPE_OP_WAIT = 0,
  116. RT_TIMEOUT_TYPE_OP_EXECUTE,
  117. } rtTaskTimeoutType_t;
  118. /**
  119. * @ingroup
  120. * @brief get AI core count
  121. * @param [in] aiCoreCnt
  122. * @return aiCoreCnt
  123. */
  124. RTS_API rtError_t rtGetAiCoreCount(uint32_t *aiCoreCnt);
  125. /**
  126. * @ingroup
  127. * @brief get AI cpu count
  128. * @param [in] aiCpuCnt
  129. * @return aiCpuCnt
  130. */
  131. RTS_API rtError_t rtGetAiCpuCount(uint32_t *aiCpuCnt);
  132. /**
  133. * @ingroup
  134. * @brief get AI core frequency
  135. * @param [in] aiCoreSpec
  136. * @return aiCoreSpec
  137. */
  138. RTS_API rtError_t rtGetAiCoreSpec(rtAiCoreSpec_t *aiCoreSpec);
  139. /**
  140. * @ingroup
  141. * @brief AI get core band Info
  142. * @param [in] aiCoreMemoryRates
  143. * @return aiCoreMemoryRates
  144. */
  145. RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRates);
  146. /**
  147. * @ingroup
  148. * @brief AI get core buffer Info,FlowTable Size,Compiler Size
  149. * @param [in] memoryConfig
  150. * @return memoryConfig
  151. */
  152. RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig);
  153. /**
  154. * @ingroup
  155. * @brief get l2 buffer Info,virtual baseaddr,Size
  156. * @param [in] stream
  157. * @return RT_ERROR_NONE for ok, errno for failed
  158. */
  159. RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size);
  160. /**
  161. * @ingroup
  162. * @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be
  163. * represented by 9020.
  164. * @param [out] runtimeVersion
  165. * @return RT_ERROR_NONE for ok
  166. * @return RT_ERROR_INVALID_VALUE for error input
  167. */
  168. RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);
  169. /**
  170. * @ingroup
  171. * @brief get device feature ability by device id, such as task schedule ability.
  172. * @param [in] deviceId
  173. * @param [in] moduleType
  174. * @param [in] featureType
  175. * @param [out] value
  176. * @return RT_ERROR_NONE for ok
  177. * @return RT_ERROR_INVALID_VALUE for error input
  178. */
  179. RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *value);
  180. /**
  181. * @ingroup
  182. * @brief set event wait task timeout time.
  183. * @param [in] timeout
  184. * @return RT_ERROR_NONE for ok
  185. * @return RT_ERROR_INVALID_VALUE for error input
  186. */
  187. RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout);
  188. /**
  189. * @ingroup
  190. * @brief set op execute task timeout time.
  191. * @param [in] timeout
  192. * @return RT_ERROR_NONE for ok
  193. * @return RT_ERROR_INVALID_VALUE for error input
  194. */
  195. RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout);
  196. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  197. }
  198. #endif
  199. #endif // __CCE_RUNTIME_STREAM_H__

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示