You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

physical_tensor.cpp 8.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. /**
  2. * \file imperative/src/impl/physical_tensor.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megbrain/imperative.h"
  12. #include "megbrain/imperative/blob_manager.h"
  13. #include "megbrain/imperative/profiler.h"
  14. #include "megbrain/imperative/resource_manager.h"
  15. #include "./async_releaser.h"
  16. #include "./event_pool.h"
  17. #include "./profiler/events.h"
  18. #include <mutex>
  19. namespace mgb {
  20. namespace imperative {
  21. namespace {
  22. class CompNodeSyncManager : public CompNodeDepedentObject {
  23. ThinHashMap<Blob*, std::unique_ptr<CompNode::Event>> m_blob2event;
  24. std::mutex m_mtx;
  25. public:
  26. std::shared_ptr<void> on_comp_node_finalize() override {
  27. MGB_LOCK_GUARD(m_mtx);
  28. m_blob2event.clear();
  29. return {};
  30. }
  31. static CompNodeSyncManager& inst() {
  32. static auto* sl_inst = ResourceManager::create_global<CompNodeSyncManager>();
  33. return *sl_inst;
  34. }
  35. CompNode::Event* get_or_create_event(Blob* blob) {
  36. mgb_assert(!is_finalized());
  37. MGB_LOCK_GUARD(m_mtx);
  38. auto&& e = m_blob2event[blob];
  39. if (!e) {
  40. e = blob->comp_node().create_event();
  41. }
  42. return e.get();
  43. }
  44. void remove(Blob* blob) {
  45. MGB_LOCK_GUARD(m_mtx);
  46. m_blob2event.erase(blob);
  47. }
  48. };
  49. } // namespace
  50. void EventDeleter::operator()(CompNode::Event* event) {
  51. EventPool::without_timer().free(event);
  52. }
  53. namespace {
  54. std::atomic_uint64_t next_blob_id = 0;
  55. }
  56. Blob::Blob(const DeviceTensorStorage& s)
  57. : m_comp_node{s.comp_node()},
  58. m_storage{s.raw_storage()},
  59. m_size{s.size() + s.offset()} {
  60. m_id = next_blob_id++;
  61. BlobManager::inst()->register_blob(this);
  62. }
  63. Blob::Blob(CompNode cn, size_t sz) : m_comp_node{cn}, m_storage{}, m_size{sz} {
  64. m_id = next_blob_id++;
  65. BlobManager::inst()->register_blob(this);
  66. }
  67. Blob::~Blob() {
  68. BlobManager::inst()->unregister_blob(this);
  69. CompNodeSyncManager::inst().remove(this);
  70. }
  71. const Blob::RawStorage& Blob::storage() {
  72. if (!m_storage && m_size) {
  73. BlobManager::inst()->alloc_with_defrag(this, m_size);
  74. }
  75. return m_storage;
  76. }
  77. Tensor::Tensor(
  78. BlobPtr blob, const TensorLayout& layout, size_t offset, const HostTensorND& hv)
  79. : m_cn(blob->comp_node()),
  80. m_shape(layout),
  81. m_dtype(layout.dtype),
  82. m_layout(layout),
  83. m_blob(std::move(blob)),
  84. m_offset(offset),
  85. m_value(hv) {}
  86. Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) {
  87. constexpr int size_threshold = TensorShape::MAX_NDIM;
  88. size_t nr_elems = hv.layout().total_nr_elems();
  89. if (nr_elems <= size_threshold) {
  90. m_value = hv;
  91. }
  92. if (nr_elems) {
  93. MGB_RECORD_EVENT(
  94. profiler::HostToDeviceEvent, hv.layout(), hv.comp_node(), hv.raw_ptr(),
  95. dev_tensor().raw_ptr());
  96. dev_tensor(false).copy_from_fixlayout(hv);
  97. // even though hv is saved in m_value, Tensor itself could be
  98. // released before copy completes
  99. MGB_RECORD_EVENT(
  100. profiler::HostToDeviceFinishEvent, hv.layout(), hv.comp_node(),
  101. hv.raw_ptr(), dev_tensor().raw_ptr());
  102. AsyncReleaser::inst()->add(hv);
  103. }
  104. }
  105. Tensor::Tensor(const DeviceTensorND& dv, const HostTensorND& hv)
  106. : m_offset(dv.storage().offset()),
  107. m_cn(dv.comp_node()),
  108. m_shape(dv.layout()),
  109. m_dtype(dv.layout().dtype),
  110. m_blob(Blob::make(dv.storage())),
  111. m_layout(dv.layout()) {
  112. if (!hv.empty()) {
  113. mgb_assert(dv.comp_node() == hv.comp_node());
  114. mgb_assert(dv.dtype() == hv.dtype());
  115. mgb_assert(dv.shape().eq_shape(hv.shape()));
  116. m_value = hv;
  117. }
  118. }
  119. Tensor::Tensor(const TensorLayout& layout, const CompNode& cn)
  120. : m_layout{layout},
  121. m_blob{Blob::make(cn, layout.span().dist_byte())},
  122. m_offset{0},
  123. m_cn(cn),
  124. m_shape(layout),
  125. m_dtype(layout.dtype) {}
  126. Tensor::Tensor(const BlobPtr blob, const size_t offset, const TensorLayout& layout)
  127. : m_layout{layout},
  128. m_blob{blob},
  129. m_offset{offset},
  130. m_cn(blob->comp_node()),
  131. m_shape(layout),
  132. m_dtype(layout.dtype) {}
  133. TensorPtr Tensor::make(const HostTensorND& hv) {
  134. auto&& blob = MultiCNConstTensorCache::inst().lookup(hv);
  135. if (blob) {
  136. return make(std::forward<decltype(blob)>(blob), hv.layout(), hv);
  137. }
  138. return std::make_shared<Tensor>(hv);
  139. }
  140. void Tensor::to_contiguous_inplace(VarNode::LayoutConstraintCallback& layout_checker) {
  141. MGB_LOCK_GUARD(m_blob_mtx);
  142. if (!m_layout.is_empty() && !layout_checker(m_layout)) {
  143. DeviceTensorStorage storage;
  144. storage.reset(m_cn, m_blob->size(), m_blob->storage());
  145. storage = storage.sub(m_offset);
  146. DeviceTensorND dv;
  147. dv.reset(storage, m_layout);
  148. DeviceTensorND dv_contig;
  149. dv_contig.copy_from(dv);
  150. m_layout = dv_contig.layout();
  151. std::atomic_store(&m_blob, Blob::make(dv_contig.storage()));
  152. mgb_assert(m_layout.is_contiguous());
  153. m_offset = 0;
  154. }
  155. }
  156. void Tensor::to_contiguous_inplace() {
  157. static VarNode::LayoutConstraintCallback default_cb =
  158. [](const TensorLayout& layout) { return layout.is_contiguous(); };
  159. to_contiguous_inplace(default_cb);
  160. }
  161. void Tensor::assign_from_dev_tensor(DeviceTensorND dv) {
  162. MGB_LOCK_GUARD(m_blob_mtx);
  163. std::atomic_store(&m_blob, Blob::make(dv.storage()));
  164. m_offset = dv.storage().offset();
  165. m_layout = dv.layout();
  166. }
  167. DeviceTensorND Tensor::dev_tensor(bool contiguous) {
  168. mgb_assert(m_blob, "uninitialized tensor.");
  169. if (contiguous) {
  170. to_contiguous_inplace();
  171. }
  172. MGB_LOCK_GUARD(m_blob_mtx);
  173. DeviceTensorStorage storage;
  174. storage.reset(m_cn, m_blob->size(), m_blob->storage());
  175. storage = storage.sub(m_offset);
  176. DeviceTensorND ret;
  177. ret.reset(storage, m_layout);
  178. return ret;
  179. }
  180. megdnn::TensorND Tensor::dnn_tensor() {
  181. mgb_assert(m_blob, "uninitialized tensor.");
  182. return {m_layout, {m_blob->storage().get(), m_offset}};
  183. }
  184. void Tensor::fetch_value() {
  185. MGB_LOCK_GUARD(m_value_mtx);
  186. if (m_value.empty()) {
  187. m_value.copy_from(dev_tensor(false));
  188. m_value_ready.reset(EventPool::without_timer().alloc(comp_node()));
  189. m_value_ready->record();
  190. }
  191. }
  192. bool Tensor::value_fetched() {
  193. MGB_LOCK_GUARD(m_value_mtx);
  194. return m_value.layout().ndim != 0;
  195. }
  196. const HostTensorND& Tensor::get_value() {
  197. fetch_value();
  198. if (m_value_ready) {
  199. m_value_ready->host_wait();
  200. }
  201. return m_value;
  202. }
  203. const HostTensorND* Tensor::try_get_value() {
  204. MGB_LOCK_GUARD(m_value_mtx);
  205. if (!m_value.empty() && (!m_value_ready || m_value_ready->finished())) {
  206. return &m_value;
  207. }
  208. return nullptr;
  209. }
  210. TensorPtr Tensor::make_scalar(DTypeScalar value, CompNode cn) {
  211. HostTensorND hv{cn, value.dtype()};
  212. hv.resize({1});
  213. memcpy(hv.raw_ptr(), value.storage(), value.dtype().size(1));
  214. return make(hv);
  215. }
  216. TensorPtr Tensor::sub(size_t offset, TensorShape shape) {
  217. TensorLayout layout(shape, m_dtype);
  218. return Tensor::make(m_blob, offset + m_offset, layout);
  219. }
  220. void Tensor::add_release_callback(CompNode cn) {
  221. AsyncReleaser::inst()->add(m_blob, cn);
  222. }
  223. CompNode::Event* Tensor::get_or_create_event() {
  224. auto e = CompNodeSyncManager::inst().get_or_create_event(m_blob.get());
  225. e->record();
  226. return e;
  227. }
  228. void Tensor::static_initialize() {
  229. EventPool::with_timer();
  230. EventPool::without_timer();
  231. AsyncReleaser::inst();
  232. CompNodeSyncManager::inst();
  233. MultiCNConstTensorCache::inst();
  234. }
  235. } // namespace imperative
  236. } // namespace mgb
  237. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}