You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

prof_acl_api.h 5.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MSPROF_ENGINE_PROF_ACL_API_H_
  17. #define MSPROF_ENGINE_PROF_ACL_API_H_
  18. #define MSVP_MAX_DEV_NUM 64
  19. #define MSVP_PROF_API __attribute__((visibility("default")))
  20. // DataTypeConfig
  21. #define PROF_ACL_API 0x0001
  22. #define PROF_TASK_TIME 0x0002
  23. #define PROF_AICORE_METRICS 0x0004
  24. #define PROF_AICPU_TRACE 0x0008
  25. #define PROF_MODEL_EXECUTE 0x0010
  26. #define PROF_RUNTIME_API 0x0020
  27. #define PROF_RUNTIME_TRACE 0x0040
  28. #define PROF_SCHEDULE_TIMELINE 0x0080
  29. #define PROF_SCHEDULE_TRACE 0x0100
  30. #define PROF_AIVECTORCORE_METRICS 0x0200
  31. #define PROF_SUBTASK_TIME 0x0400
  32. #define PROF_TRAINING_TRACE 0x0800
  33. #define PROF_HCCL_TRACE 0x1000
  34. #define PROF_DATA_PROCESS 0x2000
  35. #define PROF_TASK_TRACE 0x3842
  36. #define PROF_MODEL_LOAD 0x8000000000000000
  37. // DataTypeConfig MASK
  38. #define PROF_ACL_API_MASK 0x0001
  39. #define PROF_TASK_TIME_MASK 0x0002
  40. #define PROF_AICORE_METRICS_MASK 0x0004
  41. #define PROF_AICPU_TRACE_MASK 0x0008
  42. #define PROF_MODEL_EXECUTE_MASK 0x0010
  43. #define PROF_RUNTIME_API_MASK 0x0020
  44. #define PROF_RUNTIME_TRACE_MASK 0x0040
  45. #define PROF_SCHEDULE_TIMELINE_MASK 0x0080
  46. #define PROF_SCHEDULE_TRACE_MASK 0x0100
  47. #define PROF_AIVECTORCORE_METRICS_MASK 0x0200
  48. #define PROF_SUBTASK_TIME_MASK 0x0400
  49. #define PROF_TRAINING_TRACE_MASK 0x0800
  50. #define PROF_HCCL_TRACE_MASK 0x1000
  51. #define PROF_DATA_PROCESS_MASK 0x2000
  52. #define PROF_MODEL_LOAD_MASK 0x8000000000000000
  53. #include <cstdint>
  54. #include <string>
  55. /**
  56. * @name ProrErrorCode
  57. * @brief error code enum of prof_acl_apis
  58. */
  59. enum ProfErrorCode {
  60. PROF_ERROR_NONE = 0, // ok
  61. PROF_ERROR_PARAM_INVALID, // param invalid, for example nullptr
  62. PROF_ERROR_REPEAT_INIT, // profiling has already been inited
  63. PROF_ERROR_CONFIG_INVALID, // config invalid, for example invalid json string
  64. PROF_ERROR_DIR_NO_ACCESS, // dir is not accessable
  65. PROF_ERROR_FAILURE, // failed to init or start profiling
  66. PROF_ERROR_NOT_INITED, // profiling has not been inited
  67. PROF_ERROR_DEVICE_INVALID, // device id invalid
  68. PROF_ERROR_UNSUPPORTED, // unsupported data type or ai core metrics
  69. PROF_ERROR_REPEAT_START, // profiilng has already been started
  70. PROF_ERROR_NOT_STARTED, // profiling has not been started
  71. };
  72. /**
  73. * @brief transfer profiling config in acl.json to sample config
  74. * @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...}
  75. * @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]}
  76. * @return ProfErrorCode
  77. */
  78. MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg);
  79. /**
  80. * @name ProfInit
  81. * @brief init profiling
  82. * @param profInitCfg [IN] config of init profiling of json format
  83. * @return ProfErrorCode
  84. */
  85. MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg);
  86. /**
  87. * @name ProfAicoreMetrics
  88. * @brief aicore metrics enum
  89. */
  90. enum ProfAicoreMetrics {
  91. PROF_AICORE_ARITHMATIC_THROUGHPUT = 0,
  92. PROF_AICORE_PIPELINE = 1,
  93. PROF_AICORE_SYNCHRONIZATION = 2,
  94. PROF_AICORE_MEMORY = 3,
  95. PROF_AICORE_INTERNAL_MEMORY = 4,
  96. PROF_AICORE_STALL = 5,
  97. PROF_AICORE_EVENT = 255
  98. };
  99. /**
  100. * @name ProfConfig
  101. * @brief struct of ProfStart
  102. */
  103. struct ProfConfig {
  104. uint32_t devNums; // length of device id list
  105. uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list
  106. ProfAicoreMetrics aicoreMetrics; // aicore metric
  107. uint64_t dataTypeConfig; // data type to start profiling
  108. };
  109. /**
  110. * @name ProfStartProfiling
  111. * @brief start profiling
  112. * @param profStartCfg [IN] config to start profiling
  113. * @return ProfErrorCode
  114. */
  115. MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg);
  116. /**
  117. * @name ProfStopConfig
  118. * @brief struct of ProfStop
  119. */
  120. struct ProfStopConfig {
  121. uint64_t padding;
  122. };
  123. /**
  124. * @name ProfStopProfiling
  125. * @brief stop profiling
  126. * @param profStopCfg [IN] config to stop profiling
  127. * @return ProfErrorCode
  128. */
  129. MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg);
  130. /**
  131. * @name ProfFinalize
  132. * @brief finalize profiling task
  133. * @return ProfErrorCode
  134. */
  135. MSVP_PROF_API int32_t ProfFinalize();
  136. #endif // MSPROF_ENGINE_PROF_ACL_API_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示