You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

physical_tensor.cpp 7.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. /**
  2. * \file imperative/src/impl/physical_tensor.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megbrain/imperative.h"
  12. #include "megbrain/imperative/blob_manager.h"
  13. #include "megbrain/imperative/profiler.h"
  14. #include "./async_releaser.h"
  15. #include "./event_pool.h"
  16. #include "./profiler/events.h"
  17. #include <mutex>
  18. namespace mgb {
  19. namespace imperative {
  20. namespace {
  21. class CompNodeSyncManager : public CompNodeDepedentObject {
  22. ThinHashMap<Blob*, std::unique_ptr<CompNode::Event>> m_blob2event;
  23. std::mutex m_mtx;
  24. public:
  25. #if MGB_CUDA && defined(WIN32)
  26. //! FIXME: windows cuda driver shutdown before call atexit function even
  27. //! register atexit function after init cuda driver! as a workround
  28. //! recovery resource by OS temporarily, may need remove this after
  29. //! upgrade cuda runtime
  30. static bool is_into_atexit;
  31. #endif
  32. std::shared_ptr<void> on_comp_node_finalize() override {
  33. MGB_LOCK_GUARD(m_mtx);
  34. m_blob2event.clear();
  35. return {};
  36. }
  37. static CompNodeSyncManager& inst() {
  38. static CompNodeSyncManager* sl_inst = new CompNodeSyncManager();
  39. #if MGB_CUDA && defined(WIN32)
  40. //! FIXME: windows cuda driver shutdown before call atexit function even
  41. //! register atexit function after init cuda driver! as a workround
  42. //! recovery resource by OS temporarily, may need remove this after
  43. //! upgrade cuda runtime
  44. if (!is_into_atexit) {
  45. auto err = atexit([] { is_into_atexit = true; });
  46. mgb_assert(!err, "failed to register atexit function");
  47. }
  48. #endif
  49. return *sl_inst;
  50. }
  51. CompNode::Event* get_or_create_event(Blob* blob) {
  52. mgb_assert(!is_finalized());
  53. MGB_LOCK_GUARD(m_mtx);
  54. auto&& e = m_blob2event[blob];
  55. if (!e) {
  56. e = blob->comp_node().create_event();
  57. }
  58. return e.get();
  59. }
  60. void remove(Blob* blob) {
  61. MGB_LOCK_GUARD(m_mtx);
  62. m_blob2event.erase(blob);
  63. }
  64. };
  65. #if MGB_CUDA && defined(WIN32)
  66. //! FIXME: windows cuda driver shutdown before call atexit function even
  67. //! register atexit function after init cuda driver! as a workround
  68. //! recovery resource by OS temporarily, may need remove this after
  69. //! upgrade cuda runtime
  70. bool CompNodeSyncManager::is_into_atexit = false;
  71. #endif
  72. } // namespace
  73. void EventDeleter::operator()(CompNode::Event* event) {
  74. EventPool::without_timer().free(event);
  75. }
  76. namespace {
  77. std::atomic_uint64_t next_blob_id = 0;
  78. }
  79. Blob::Blob(const DeviceTensorStorage& s)
  80. : m_comp_node{s.comp_node()},
  81. m_storage{s.raw_storage()},
  82. m_size{s.size() + s.offset()} {
  83. m_id = next_blob_id++;
  84. BlobManager::inst()->register_blob(this);
  85. }
  86. Blob::Blob(CompNode cn, size_t sz) : m_comp_node{cn}, m_storage{}, m_size{sz} {
  87. m_id = next_blob_id++;
  88. BlobManager::inst()->register_blob(this);
  89. }
  90. Blob::~Blob() {
  91. BlobManager::inst()->unregister_blob(this);
  92. #if MGB_CUDA && defined(WIN32)
  93. //! FIXME: windows cuda driver shutdown before call atexit function even
  94. //! register atexit function after init cuda driver! as a workround
  95. //! recovery resource by OS temporarily, may need remove this after
  96. //! upgrade cuda runtime
  97. if (CompNodeSyncManager::is_into_atexit)
  98. return;
  99. #endif
  100. CompNodeSyncManager::inst().remove(this);
  101. }
  102. const Blob::RawStorage& Blob::storage() {
  103. if (!m_storage) {
  104. BlobManager::inst()->alloc_with_defrag(this, m_size);
  105. }
  106. return m_storage;
  107. }
  108. Tensor::Tensor(
  109. BlobPtr blob, const TensorLayout& layout, size_t offset, const HostTensorND& hv)
  110. : m_layout(layout), m_blob(std::move(blob)), m_offset(offset), m_value(hv) {}
  111. Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) {
  112. m_value = hv;
  113. MGB_RECORD_EVENT(
  114. profiler::HostToDeviceEvent, hv.layout(), hv.comp_node(), hv.raw_ptr(),
  115. dev_tensor().raw_ptr());
  116. dev_tensor().copy_from_fixlayout(hv);
  117. // even though hv is saved in m_value, Tensor itself could be
  118. // released before copy completes
  119. MGB_RECORD_EVENT(
  120. profiler::HostToDeviceFinishEvent, hv.layout(), hv.comp_node(),
  121. hv.raw_ptr(), dev_tensor().raw_ptr());
  122. AsyncReleaser::inst()->add(hv);
  123. }
  124. Tensor::Tensor(const DeviceTensorND& dv, const HostTensorND& hv) {
  125. if (!hv.empty()) {
  126. mgb_assert(dv.comp_node() == hv.comp_node());
  127. mgb_assert(dv.dtype() == hv.dtype());
  128. mgb_assert(dv.shape().eq_shape(hv.shape()));
  129. m_value = hv;
  130. }
  131. m_layout = dv.layout();
  132. m_blob = Blob::make(dv.storage());
  133. m_offset = dv.storage().offset();
  134. }
  135. Tensor::Tensor(const TensorLayout& layout, const CompNode& cn)
  136. : m_layout{layout},
  137. m_blob{Blob::make(cn, layout.span().dist_byte())},
  138. m_offset{0} {}
  139. Tensor::Tensor(const BlobPtr blob, const size_t offset, const TensorLayout& layout)
  140. : m_layout{layout}, m_blob{blob}, m_offset{offset} {}
  141. TensorPtr Tensor::make(const HostTensorND& hv) {
  142. auto&& blob = MultiCNConstTensorCache::inst().lookup(hv);
  143. if (blob) {
  144. return make(std::forward<decltype(blob)>(blob), hv.layout(), hv);
  145. }
  146. return std::make_shared<Tensor>(hv);
  147. }
  148. DeviceTensorND Tensor::dev_tensor() {
  149. mgb_assert(m_blob, "uninitialized tensor.");
  150. DeviceTensorStorage storage;
  151. storage.reset(m_blob->comp_node(), m_blob->size(), m_blob->storage());
  152. storage = storage.sub(m_offset);
  153. DeviceTensorND ret;
  154. ret.reset(storage, m_layout);
  155. return ret;
  156. }
  157. void Tensor::fetch_value() {
  158. MGB_LOCK_GUARD(m_mtx);
  159. if (m_value.empty()) {
  160. m_value.copy_from(dev_tensor());
  161. m_value_ready.reset(EventPool::without_timer().alloc(comp_node()));
  162. m_value_ready->record();
  163. }
  164. }
  165. bool Tensor::value_fetched() {
  166. MGB_LOCK_GUARD(m_mtx);
  167. return m_value.layout().ndim != 0;
  168. }
  169. const HostTensorND& Tensor::get_value() {
  170. fetch_value();
  171. if (m_value_ready) {
  172. m_value_ready->host_wait();
  173. }
  174. return m_value;
  175. }
  176. const HostTensorND* Tensor::try_get_value() {
  177. MGB_LOCK_GUARD(m_mtx);
  178. if (!m_value.empty() && (!m_value_ready || m_value_ready->finished())) {
  179. return &m_value;
  180. }
  181. return nullptr;
  182. }
  183. TensorPtr Tensor::make_scalar(DTypeScalar value, CompNode cn) {
  184. HostTensorND hv{cn, value.dtype()};
  185. hv.resize({1});
  186. memcpy(hv.raw_ptr(), value.storage(), value.dtype().size(1));
  187. return make(hv);
  188. }
  189. TensorPtr Tensor::sub(size_t offset, TensorShape shape) {
  190. TensorLayout layout(shape, m_layout.dtype);
  191. return Tensor::make(m_blob, offset + m_offset, layout);
  192. }
  193. void Tensor::add_release_callback(CompNode cn) {
  194. AsyncReleaser::inst()->add(m_blob, cn);
  195. }
  196. CompNode::Event* Tensor::get_or_create_event() {
  197. auto e = CompNodeSyncManager::inst().get_or_create_event(m_blob.get());
  198. e->record();
  199. return e;
  200. }
  201. void Tensor::static_initialize() {
  202. EventPool::with_timer();
  203. EventPool::without_timer();
  204. AsyncReleaser::inst();
  205. CompNodeSyncManager::inst();
  206. MultiCNConstTensorCache::inst();
  207. }
  208. } // namespace imperative
  209. } // namespace mgb
  210. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台