You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

prof_acl_api.h 13 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MSPROFILER_API_PROF_ACL_API_H_
  17. #define MSPROFILER_API_PROF_ACL_API_H_
  18. #define MSVP_MAX_DEV_NUM 64
  19. #ifndef OS_TYPE
  20. #define OS_TYPE 0
  21. #endif // OS_TYPE
  22. #if (OS_TYPE != LINUX)
  23. #define MSVP_PROF_API __declspec(dllexport)
  24. #else
  25. #define MSVP_PROF_API __attribute__((visibility("default")))
  26. #endif
  27. // DataTypeConfig
  28. #define PROF_ACL_API 0x0001
  29. #define PROF_TASK_TIME 0x0002
  30. #define PROF_AICORE_METRICS 0x0004
  31. #define PROF_AICPU_TRACE 0x0008
  32. #define PROF_MODEL_EXECUTE 0x0010
  33. #define PROF_RUNTIME_API 0x0020
  34. #define PROF_RUNTIME_TRACE 0x0040
  35. #define PROF_SCHEDULE_TIMELINE 0x0080
  36. #define PROF_SCHEDULE_TRACE 0x0100
  37. #define PROF_AIVECTORCORE_METRICS 0x0200
  38. #define PROF_SUBTASK_TIME 0x0400
  39. #define PROF_TRAINING_TRACE 0x0800
  40. #define PROF_HCCL_TRACE 0x1000
  41. #define PROF_DATA_PROCESS 0x2000
  42. #define PROF_TASK_TRACE 0x3842
  43. #define PROF_MODEL_LOAD 0x8000000000000000
  44. // DataTypeConfig MASK
  45. #define PROF_ACL_API_MASK 0x0001
  46. #define PROF_TASK_TIME_MASK 0x0002
  47. #define PROF_AICORE_METRICS_MASK 0x0004
  48. #define PROF_AICPU_TRACE_MASK 0x0008
  49. #define PROF_MODEL_EXECUTE_MASK 0x0010
  50. #define PROF_RUNTIME_API_MASK 0x0020
  51. #define PROF_RUNTIME_TRACE_MASK 0x0040
  52. #define PROF_SCHEDULE_TIMELINE_MASK 0x0080
  53. #define PROF_SCHEDULE_TRACE_MASK 0x0100
  54. #define PROF_AIVECTORCORE_METRICS_MASK 0x0200
  55. #define PROF_SUBTASK_TIME_MASK 0x0400
  56. #define PROF_TRAINING_TRACE_MASK 0x0800
  57. #define PROF_HCCL_TRACE_MASK 0x1000
  58. #define PROF_DATA_PROCESS_MASK 0x2000
  59. #define PROF_MODEL_LOAD_MASK 0x8000000000000000
  60. #include <cstdint>
  61. #include <string>
  62. /**
  63. * @name ProrErrorCode
  64. * @brief error code enum of prof_acl_apis
  65. */
  66. enum ProfErrorCode {
  67. PROF_ERROR_NONE = 0, // ok
  68. PROF_ERROR_PARAM_INVALID, // param invalid, for example nullptr
  69. PROF_ERROR_REPEAT_INIT, // profiling has already been inited
  70. PROF_ERROR_CONFIG_INVALID, // config invalid, for example invalid json string
  71. PROF_ERROR_DIR_NO_ACCESS, // dir is not accessable
  72. PROF_ERROR_FAILURE, // failed to init or start profiling
  73. PROF_ERROR_NOT_INITED, // profiling has not been inited
  74. PROF_ERROR_DEVICE_INVALID, // device id invalid
  75. PROF_ERROR_UNSUPPORTED, // unsupported data type or ai core metrics
  76. PROF_ERROR_REPEAT_START, // profiilng has already been started
  77. PROF_ERROR_NOT_STARTED, // profiling has not been started
  78. PROF_ERROR_REPEAT_SUBSCRIBE, // same model id has already been subscribed
  79. PROF_ERROR_MODEL_ID_INVALID, // model id does not exist or has not been subscribed
  80. PROF_ERROR_API_CONFLICT, // prof ctrl api mode conflicts with subscribe mode
  81. };
  82. /**
  83. * @brief transfer profiling config in acl.json to sample config
  84. * @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...}
  85. * @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]}
  86. * @return ProfErrorCode
  87. */
  88. MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg);
  89. /**
  90. * @name ProfInit
  91. * @brief init profiling
  92. * @param profInitCfg [IN] config of init profiling of json format
  93. * @return ProfErrorCode
  94. */
  95. MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg);
  96. /**
  97. * @name ProfAicoreMetrics
  98. * @brief aicore metrics enum
  99. */
  100. enum ProfAicoreMetrics {
  101. PROF_AICORE_ARITHMATIC_THROUGHPUT = 0,
  102. PROF_AICORE_PIPELINE = 1,
  103. PROF_AICORE_SYNCHRONIZATION = 2,
  104. PROF_AICORE_MEMORY = 3,
  105. PROF_AICORE_INTERNAL_MEMORY = 4,
  106. PROF_AICORE_STALL = 5,
  107. PROF_AICORE_METRICS_COUNT,
  108. PROF_AICORE_NONE = 0xff,
  109. };
  110. /**
  111. * @name ProfConfig
  112. * @brief struct of ProfStart
  113. */
  114. struct ProfConfig {
  115. uint32_t devNums; // length of device id list
  116. uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list
  117. ProfAicoreMetrics aicoreMetrics; // aicore metric
  118. uint64_t dataTypeConfig; // data type to start profiling
  119. };
  120. /**
  121. * @name ProfStartProfiling
  122. * @brief start profiling
  123. * @param profStartCfg [IN] config to start profiling
  124. * @return ProfErrorCode
  125. */
  126. MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg);
  127. /**
  128. * @name ProfStopProfiling
  129. * @brief stop profiling
  130. * @param profStopCfg [IN] config to stop profiling
  131. * @return ProfErrorCode
  132. */
  133. MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg);
  134. /**
  135. * @name ProfFinalize
  136. * @brief finalize profiling task
  137. * @return ProfErrorCode
  138. */
  139. MSVP_PROF_API int32_t ProfFinalize();
  140. /**
  141. * @name ProfGetDataTypeConfig
  142. * @brief get dataTypeConfig started with of one device
  143. * @param deviceId [IN] deviceId to get dataTypeConfig
  144. * @param dataTypeConfig [OUT] result get
  145. * @return ProfErrorCode
  146. */
  147. MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig);
  148. namespace Msprofiler {
  149. namespace Api {
  150. /**
  151. * @brief transfer profiling config in acl.json to sample config
  152. * @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...}
  153. * @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]}
  154. * @return ProfErrorCode
  155. */
  156. MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg);
  157. /**
  158. * @name ProfInit
  159. * @brief init profiling
  160. * @param profInitCfg [IN] config of init profiling of json format
  161. * @return ProfErrorCode
  162. */
  163. MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg);
  164. /**
  165. * @name ProfStartProfiling
  166. * @brief start profiling
  167. * @param profStartCfg [IN] config to start profiling
  168. * @return ProfErrorCode
  169. */
  170. MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg);
  171. /**
  172. * @name ProfStopProfiling
  173. * @brief stop profiling
  174. * @param profStopCfg [IN] config to stop profiling
  175. * @return ProfErrorCode
  176. */
  177. MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg);
  178. /**
  179. * @name ProfFinalize
  180. * @brief finalize profiling task
  181. * @return ProfErrorCode
  182. */
  183. MSVP_PROF_API int32_t ProfFinalize();
  184. /**
  185. * @name ProfGetDataTypeConfig
  186. * @brief get dataTypeConfig started with of one device
  187. * @param deviceId [IN] deviceId to get dataTypeConfig
  188. * @param dataTypeConfig [OUT] result get
  189. * @return ProfErrorCode
  190. */
  191. MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig);
  192. /**
  193. * @name WorkMode
  194. * @brief profiling api work mode
  195. */
  196. enum WorkMode {
  197. WORK_MODE_OFF, // profiling not at work
  198. WORK_MODE_API_CTRL, // profiling work on api ctrl mode, (ProfInit)
  199. WORK_MODE_SUBSCRIBE, // profiling work on subscribe mode
  200. };
  201. /**
  202. * @name ProfGetApiWorkMode
  203. * @brief get profiling api work mode
  204. * @return WorkMode
  205. */
  206. MSVP_PROF_API WorkMode ProfGetApiWorkMode();
  207. /**
  208. * @name ProfSubscribeConfig
  209. * @brief config of subscribe api
  210. */
  211. struct ProfSubscribeConfig {
  212. bool timeInfo; // subscribe op time
  213. ProfAicoreMetrics aicoreMetrics; // subscribe ai core metrics
  214. void* fd; // pipe fd
  215. };
  216. /**
  217. * @name ProfGetDataTypeConfig
  218. * @brief get DataTypeConfig of subscribe
  219. * @param profSubscribeConfig [IN] config to subscribe data
  220. * @return DataTypeConfig
  221. */
  222. MSVP_PROF_API uint64_t ProfGetDataTypeConfig(const ProfSubscribeConfig *profSubscribeConfig);
  223. /**
  224. * @name ProfModelSubscribe
  225. * @brief subscribe data of one model id
  226. * @param modelId [IN] model id to subscribe data
  227. * @param devId [IN] device id of model
  228. * @param profSubscribeConfig [IN] config to subscribe data
  229. * @return ProfErrorCode
  230. */
  231. MSVP_PROF_API int32_t ProfModelSubscribe(uint32_t modelId, uint32_t devId,
  232. const ProfSubscribeConfig *profSubscribeConfig);
  233. /**
  234. * @name ProfIsModelSubscribed
  235. * @brief check if a model id is subscribed
  236. * @param modeiId [IN] modei id to check
  237. * @return true: subscribed, false: not
  238. */
  239. MSVP_PROF_API bool ProfIsModelSubscribed(uint32_t modelId);
  240. /**
  241. * @name ProfModelUnSubscribe
  242. * @brief unsubscribe a model id
  243. * @param modeiId [IN] modei id to unsubscribe
  244. * @return ProfErrorCode
  245. */
  246. MSVP_PROF_API int32_t ProfModelUnSubscribe(uint32_t modelId);
  247. /**
  248. * @name ProfGetOpDescSize
  249. * @brief get profiling data struct size
  250. * @param opDescSize [OUT] bytes of profiling subscribe data struct
  251. * @return ProfErrorCode
  252. */
  253. MSVP_PROF_API int32_t ProfGetOpDescSize(uint32_t *opDescSize);
  254. /**
  255. * @name ProfGetOpNum
  256. * @brief get how many op data there are in data
  257. * @param data [IN] data read from pipe
  258. * @param len [IN] data length
  259. * @param opNum [OUT] number of op in data
  260. * @return ProfErrorCode
  261. */
  262. MSVP_PROF_API int32_t ProfGetOpNum(const void *data, uint32_t len, uint32_t *opNum);
  263. /**
  264. * @name ProfGetModelId
  265. * @brief get model id of specific part of data
  266. * @param data [IN] data read from pipe
  267. * @param len [IN] data length
  268. * @param index [IN] index of part(op)
  269. * @return model id
  270. */
  271. MSVP_PROF_API uint32_t ProfGetModelId(const void *data, uint32_t len, uint32_t index);
  272. /**
  273. * @name ProfGetOpType
  274. * @brief get op type of specific part of data
  275. * @param data [IN] data read from pipe
  276. * @param len [IN] data length
  277. * @param opType [OUT] op type buffer
  278. * @param opTypeLen [IN] buffer size of param opType
  279. * @param index [IN] index of part(op)
  280. * @return ProfErrorCode
  281. */
  282. MSVP_PROF_API int32_t ProfGetOpType(const void *data, uint32_t len, char *opType, uint32_t opTypeLen, uint32_t index);
  283. /**
  284. * @name ProfGetOpName
  285. * @brief get op name of specific part of data
  286. * @param data [IN] data read from pipe
  287. * @param len [IN] data length
  288. * @param opType [OUT] op name buffer
  289. * @param opTypeLen [IN] buffer size of param opName
  290. * @param index [IN] index of part(op)
  291. * @return ProfErrorCode
  292. */
  293. MSVP_PROF_API int32_t ProfGetOpName(const void *data, uint32_t len, char *opName, uint32_t opNameLen, uint32_t index);
  294. /**
  295. * @name ProfGetOpStart
  296. * @brief get op start timestamp of specific part of data
  297. * @param data [IN] data read from pipe
  298. * @param len [IN] data length
  299. * @param index [IN] index of part(op)
  300. * @return op start timestamp (us)
  301. */
  302. MSVP_PROF_API uint64_t ProfGetOpStart(const void *data, uint32_t len, uint32_t index);
  303. /**
  304. * @name ProfGetOpEnd
  305. * @brief get op end timestamp of specific part of data
  306. * @param data [IN] data read from pipe
  307. * @param len [IN] data length
  308. * @param index [IN] index of part(op)
  309. * @return op end timestamp (us)
  310. */
  311. MSVP_PROF_API uint64_t ProfGetOpEnd(const void *data, uint32_t len, uint32_t index);
  312. /**
  313. * @name ProfGetOpDuration
  314. * @brief get op duration of specific part of data
  315. * @param data [IN] data read from pipe
  316. * @param len [IN] data length
  317. * @param index [IN] index of part(op)
  318. * @return op duration (us)
  319. */
  320. MSVP_PROF_API uint64_t ProfGetOpDuration(const void *data, uint32_t len, uint32_t index);
  321. /**
  322. * @name ProfGetOpExecutionTime
  323. * @brief get op execution time of specific part of data
  324. * @param data [IN] data read from pipe
  325. * @param len [IN] data length
  326. * @param index [IN] index of part(op)
  327. * @return op execution time (us)
  328. */
  329. MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);
  330. /**
  331. * @name ProfGetOpCubeOps
  332. * @brief get op cube fops of specific part of data
  333. * @param data [IN] data read from pipe
  334. * @param len [IN] data length
  335. * @param index [IN] index of part(op)
  336. * @return op cube fops
  337. */
  338. MSVP_PROF_API uint64_t ProfGetOpCubeOps(const void *data, uint32_t len, uint32_t index);
  339. /**
  340. * @name ProfGetOpVectorOps
  341. * @brief get op vector fops of specific part of data
  342. * @param data [IN] data read from pipe
  343. * @param len [IN] data length
  344. * @param index [IN] index of part(op)
  345. * @return op vector fops
  346. */
  347. MSVP_PROF_API uint64_t ProfGetOpVectorOps(const void *data, uint32_t len, uint32_t index);
  348. } // namespace Api
  349. } // namespace Msprofiler
  350. #endif // MSPROFILER_API_PROF_ACL_API_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示