You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

profiler_cache.cpp 6.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. /**
  2. * \file src/gopt/impl/profiler_cache.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "./opr_safe_dump.h"
  13. #include "megbrain/comp_node_env.h"
  14. #include "megbrain/gopt/profiler.h"
  15. using namespace mgb;
  16. using namespace gopt;
  17. using ReformatKey = ReformatManager::ReformatKey;
  18. // =================== ProfilerCache ======================
  19. void ProfilerCache::Key::build_blob_from_opr() {
  20. auto&& opr = m_key_impl.opr_key.opr;
  21. // process opr param
  22. auto data = intl::opr_safe_dump(opr);
  23. size_t param_size = data.size();
  24. size_t nr_inputs = opr->input().size();
  25. size_t nr_outputs = opr->usable_output().size();
  26. size_t nr_layouts = nr_inputs + nr_outputs;
  27. m_blob_storage.reserve(sizeof(TensorLayout) * 3 * nr_layouts + param_size);
  28. // serialize param
  29. const char* data_ptr = reinterpret_cast<const char*>(data.data());
  30. m_blob_storage.append(data_ptr, param_size);
  31. // serialize layouts
  32. auto append_layout = [this](const VarNode* v) {
  33. TensorLayout ly{v->shape(), v->dtype(), v->format()};
  34. for (size_t i = 0; i < ly.ndim; ++i) {
  35. if (i)
  36. m_blob_storage.push_back(',');
  37. m_blob_storage.append(std::to_string(ly.shape[i]));
  38. }
  39. if (!ly.is_contiguous()) {
  40. m_blob_storage.push_back(';');
  41. for (size_t i = 0; i < ly.ndim; ++i) {
  42. if (i)
  43. m_blob_storage.push_back(',');
  44. m_blob_storage.append(std::to_string(ly.stride[i]));
  45. }
  46. }
  47. m_blob_storage.push_back(';');
  48. m_blob_storage.append(ly.dtype.name());
  49. m_blob_storage.push_back('|');
  50. };
  51. for (size_t i = 0; i < nr_inputs; ++i) {
  52. append_layout(opr->input(i));
  53. }
  54. for (size_t i = 0; i < nr_outputs; ++i) {
  55. append_layout(opr->output(i));
  56. }
  57. // serialize opr_format
  58. m_blob_storage.append(
  59. std::to_string(static_cast<uint32_t>(m_key_impl.opr_key.opr_format)));
  60. // serialize extra_attribute
  61. m_blob_storage.append(
  62. std::to_string(static_cast<uint32_t>(m_key_impl.opr_key.extra_attribute)));
  63. }
  64. void ProfilerCache::Key::build_category(CompNode cn) {
  65. m_category = "layout_transform_profile:";
  66. auto&& env = CompNodeEnv::from_comp_node(cn);
  67. switch (env.property().type) {
  68. #if MGB_CUDA
  69. case CompNode::DeviceType::CUDA: {
  70. m_category += "plat=cuda";
  71. if (ProfilerCache::inst().enable_device_info()) {
  72. auto&& prop = env.cuda_env().device_prop;
  73. m_category += ssprintf(
  74. ";dev=%s;cap=%d.%d", prop.name, prop.major, prop.minor);
  75. }
  76. break;
  77. }
  78. #endif
  79. case CompNode::DeviceType::CPU:
  80. m_category += "plat=cpu";
  81. break;
  82. default:
  83. mgb_throw(
  84. MegBrainError,
  85. "unsupported comp node for global layout transform "
  86. "profiler cache category");
  87. }
  88. }
  89. void ProfilerCache::Key::build_blob_from_var() {
  90. auto v = m_key_impl.var_key.var;
  91. // serialize layouts
  92. auto append_layout = [this](const VarNode* v) {
  93. TensorLayout ly{v->shape(), v->dtype(), v->format()};
  94. for (size_t i = 0; i < ly.ndim; ++i) {
  95. if (i)
  96. m_blob_storage.push_back(',');
  97. m_blob_storage.append(std::to_string(ly.shape[i]));
  98. }
  99. if (!ly.is_contiguous()) {
  100. m_blob_storage.push_back(';');
  101. for (size_t i = 0; i < ly.ndim; ++i) {
  102. if (i)
  103. m_blob_storage.push_back(',');
  104. m_blob_storage.append(std::to_string(ly.stride[i]));
  105. }
  106. }
  107. m_blob_storage.push_back(';');
  108. m_blob_storage.append(ly.dtype.name());
  109. m_blob_storage.push_back('|');
  110. };
  111. append_layout(v);
  112. // serialze reformat key
  113. m_blob_storage.append(m_key_impl.var_key.key.to_string());
  114. }
  115. const std::string& ProfilerCache::Key::category() const {
  116. mgb_assert(!m_category.empty());
  117. return m_category;
  118. }
  119. PersistentCache::Blob ProfilerCache::Key::blob() const {
  120. mgb_assert(!m_blob_storage.empty());
  121. return {m_blob_storage.data(), m_blob_storage.size()};
  122. }
  123. ProfilerCache& ProfilerCache::inst() {
  124. static ProfilerCache inst;
  125. return inst;
  126. }
  127. ProfilerCache& ProfilerCache::set_impl(std::unique_ptr<PersistentCache> impl) {
  128. mgb_assert(impl != nullptr);
  129. m_impl.swap(impl);
  130. return *this;
  131. }
  132. void ProfilerCache::dump_cache(const char* path) {
  133. mgb_assert(
  134. m_impl->support_dump_cache(),
  135. "current impl of ProfilerCache does not support dump cache to "
  136. "file.");
  137. auto cache = static_cast<InFilePersistentCache*>(m_impl.get());
  138. cache->dump_cache(path);
  139. }
  140. Maybe<ProfilerCache::Result> ProfilerCache::get(const Key& key) {
  141. auto raw_buf = m_impl->get(key.category(), key.blob());
  142. if (!raw_buf.valid())
  143. return None;
  144. // data type of cost is float
  145. auto buf = static_cast<const uint8_t*>(raw_buf->ptr);
  146. auto size = raw_buf->size;
  147. mgb_assert(
  148. buf && size == sizeof(float),
  149. "ProfileCache invalid value: ptr=%p, size=%zu", buf, size);
  150. auto read_f32 = [&]() {
  151. auto ret = *reinterpret_cast<const float*>(buf);
  152. return ret;
  153. };
  154. auto cost = read_f32();
  155. return cost;
  156. }
  157. void ProfilerCache::put(const Key& key, Result& result) {
  158. std::string val;
  159. megdnn::Algorithm::serialize_write_pod(result, val);
  160. m_impl->put(key.category(), key.blob(), {val.data(), val.size()});
  161. }
  162. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台