You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

profiling_definitions.h 4.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef AIR_CXX_PROFILING_DEFINITIONS_H
  17. #define AIR_CXX_PROFILING_DEFINITIONS_H
  18. #include <string>
  19. #include <iostream>
  20. #include <atomic>
  21. #include <mutex>
  22. #include <unordered_map>
  23. #include "graph/profiler.h"
  24. namespace ge {
  25. namespace profiling {
  26. enum {
  27. kInferShape,
  28. kTiling,
  29. kUpdateShape,
  30. kConstPrepare,
  31. kInitHybridExecuteArgs,
  32. kInitInferShapeContext,
  33. kDestroyInferShapeContext,
  34. kResetSubgraphExecutor,
  35. kCommitInferShapeTask,
  36. kDeviceToHost,
  37. kPrepareTask,
  38. kLaunchTask,
  39. kCommitTilingTask,
  40. kAtomic,
  41. kKernelLaunchPrepare,
  42. kRtKernelLaunch,
  43. kOpExecute,
  44. kAllocMem,
  45. kCopyH2D,
  46. kProfilingIndexEnd
  47. };
  48. class ProfilingContext {
  49. public:
  50. static bool IsDumpToStdEnabled();
  51. static ProfilingContext &GetInstance();
  52. ProfilingContext();
  53. ~ProfilingContext();
  54. /*
  55. * 还有一种思路是`IsEnabled`只判断profiler_是否为空指针,不再设置单独的enabled标记位,这样可以少一个标记位。
  56. * 但是这么做就意味着,profiler_实例在未使能profiling时,必须是空指针状态。
  57. * 为了性能考虑,profiling机制在编译和加载时,就会调用`RegisterString`,向profiler_注册字符串,后续执行时,只会使用注册好的index了。
  58. * 因此存在一种场景:编译时并未使能profiling(因为编译时间很长,使能profiling也无法真实反应执行时的耗时状态),
  59. * 因此编译时注册字符串的动作并没有生效。在执行时,动态的打开了profiling,这种场景下,执行时无法拿到注册后字符串
  60. */
  61. bool IsEnabled() const noexcept {
  62. return enabled_ && profiler_ != nullptr;
  63. }
  64. void SetEnable() noexcept {
  65. enabled_ = true;
  66. }
  67. void SetDisable() noexcept {
  68. enabled_ = false;
  69. }
  70. void RecordCurrentThread(int64_t element, int64_t event, EventType et) {
  71. if (IsEnabled()) {
  72. profiler_->RecordCurrentThread(element, event, et);
  73. }
  74. }
  75. const Profiler *GetProfiler() const {
  76. return profiler_.get();
  77. }
  78. void Dump(std::ostream &out_stream) const {
  79. if (IsEnabled()) {
  80. profiler_->Dump(out_stream);
  81. } else {
  82. out_stream << "Profiling not enable, skip to dump" << std::endl;
  83. }
  84. }
  85. void DumpToStdOut() const {
  86. Dump(std::cout);
  87. }
  88. void Reset() {
  89. if (IsEnabled()) {
  90. profiler_->Reset();
  91. }
  92. }
  93. int64_t RegisterString(const std::string &str);
  94. private:
  95. void RegisterString(int64_t index, const std::string &str);
  96. void Init();
  97. private:
  98. bool enabled_;
  99. int64_t str_index_;
  100. std::unordered_map<std::string, int64_t> strings_to_index_;
  101. std::mutex strings_to_index_mutex_;
  102. std::unique_ptr<Profiler> profiler_;
  103. };
  104. class ScopeProfiler {
  105. public:
  106. ScopeProfiler(int64_t element, int64_t event) : element_(element), event_(event) {
  107. ProfilingContext::GetInstance().RecordCurrentThread(element_, event, kEventStart);
  108. }
  109. ~ScopeProfiler() {
  110. ProfilingContext::GetInstance().RecordCurrentThread(element_, event_, kEventEnd);
  111. }
  112. private:
  113. int64_t element_;
  114. int64_t event_;
  115. };
  116. } // namespace profiling
  117. } // namespace ge
  118. #define PROFILING_START(element, event) \
  119. profiling::ProfilingContext::GetInstance().RecordCurrentThread(element, event, profiling::kEventStart)
  120. #define PROFILING_END(element, event) \
  121. profiling::ProfilingContext::GetInstance().RecordCurrentThread(element, event, profiling::kEventEnd)
  122. #define PROFILING_SCOPE(element, event) profiling::ScopeProfiler profiler(element, event)
  123. #endif // AIR_CXX_PROFILING_DEFINITIONS_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示