You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensor.h 18 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570
  1. /**
  2. * \file src/core/include/megbrain/tensor.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #pragma once
  12. #include "megbrain/common.h"
  13. #include "megbrain/comp_node.h"
  14. #include "megbrain/dtype.h"
  15. #include "megbrain/utils/metahelper.h"
  16. #include "megdnn/basic_types.h"
  17. #include <limits>
  18. #include <memory>
  19. namespace mgb {
  20. using ::megdnn::TensorFormat;
  21. using ::megdnn::TensorLayout;
  22. using ::megdnn::TensorShape;
  23. using ::megdnn::TensorFormatArray;
  24. using ::megdnn::TensorLayoutArray;
  25. using ::megdnn::TensorShapeArray;
  26. /*!
  27. * \brief specify how a subtensor resides in a larger one
  28. */
  29. class SubTensorSpec {
  30. TensorLayout m_layout;
  31. ptrdiff_t m_offset_elem = 0;
  32. SubTensorSpec(const TensorLayout& l, ptrdiff_t o) : m_layout{l}, m_offset_elem{o} {}
  33. public:
  34. SubTensorSpec() = default;
  35. //! make a SubTensorSpec from given layout and zero offset
  36. static SubTensorSpec make_from_layout(const TensorLayout& layout) {
  37. return make_from_offset_elem(layout, 0);
  38. }
  39. //! make a SubTensorSpec from given layout and offset
  40. MGE_WIN_DECLSPEC_FUC static SubTensorSpec make_from_offset_elem(
  41. const TensorLayout& layout, ptrdiff_t offset_elem);
  42. //! get underlying layout
  43. const TensorLayout& layout() const { return m_layout; }
  44. //! get offset in number of logical elements in the layout
  45. ptrdiff_t offset_elem() const { return m_offset_elem; }
  46. //! get offset measured in bytes
  47. ptrdiff_t offset_byte() const {
  48. //! for lowbit cases, offset must aligned to bytes
  49. mgb_assert(
  50. !m_layout.dtype.is_low_bit() ||
  51. !(m_offset_elem * m_layout.dtype.low_bit() % 8));
  52. return m_layout.dtype.size(m_offset_elem);
  53. }
  54. /*!
  55. * \brief merge with another SubTensorSpec: accum offset, and replace
  56. * layout by rhs
  57. */
  58. MGE_WIN_DECLSPEC_FUC void merge_with(const SubTensorSpec& rhs);
  59. };
  60. /*!
  61. * \brief slice along some axis; index as in Python, with negative indices
  62. * supported. Scalar index can also be represented as a Slice, where
  63. * m_begin = idx, m_end = idx+1 and m_step = 1. The flag m_is_scalar_idx
  64. * indicates whether the Slice comes from a scalar index.
  65. */
  66. class Slice {
  67. Maybe<ptrdiff_t> m_begin, m_end, m_step;
  68. bool m_is_scalar_idx;
  69. public:
  70. Slice(Maybe<ptrdiff_t> begin = None, Maybe<ptrdiff_t> end = None,
  71. Maybe<ptrdiff_t> step = None, bool is_scalar_idx = false)
  72. : m_begin{begin},
  73. m_end{end},
  74. m_step{step},
  75. m_is_scalar_idx{is_scalar_idx} {}
  76. /*!
  77. * \brief apply this slice on given tensor layout, and get corresponding
  78. * subtensor
  79. * \param axis the axis to apply this slice; -1 can be used for
  80. * flattened layout
  81. */
  82. MGE_WIN_DECLSPEC_FUC SubTensorSpec apply(TensorLayout layout, int axis) const;
  83. };
  84. template <class Trait>
  85. class TensorStorage;
  86. class DeviceTensorStorageTrait;
  87. class HostTensorStorageTrait;
  88. using HostTensorStorage = TensorStorage<HostTensorStorageTrait>;
  89. using DeviceTensorStorage = TensorStorage<DeviceTensorStorageTrait>;
  90. /*!
  91. * \brief manager for raw tensor memory
  92. *
  93. * It contains no dtype information and all sizes are measured in bytes.
  94. *
  95. * Note that ensure_size() is lazy, and memory allocation only happens when
  96. * ptr() or sub() is called
  97. */
  98. template <class Trait>
  99. class TensorStorage {
  100. public:
  101. using RawStorage = std::shared_ptr<dt_byte>;
  102. TensorStorage() = default;
  103. TensorStorage(CompNode comp_node) : m_comp_node(comp_node) {}
  104. TensorStorage(TensorStorage&&) noexcept = default;
  105. TensorStorage& operator=(TensorStorage&&) noexcept = default;
  106. TensorStorage(const TensorStorage& rhs) { *this = rhs; }
  107. MGE_WIN_DECLSPEC_FUC TensorStorage& operator=(const TensorStorage& rhs);
  108. /*!
  109. * \brief whether given tensor span is valid in this storage
  110. */
  111. bool valid_span(const TensorLayout::Span& span) const {
  112. return m_comp_node.valid() &&
  113. static_cast<ptrdiff_t>(m_offset) + span.low_byte >= 0 &&
  114. span.high_byte <= size();
  115. }
  116. /*!
  117. * \brief ensure that its space could hold at least sz bytes
  118. *
  119. * Note
  120. * 1. This method is lazy; size would only be changed when memory
  121. * must be accessed.
  122. * 2. This method would only grow storage, but it would not release
  123. * memory
  124. */
  125. MGE_WIN_DECLSPEC_FUC TensorStorage& ensure_size(size_t sz);
  126. /*!
  127. * \brief return a subtensor that shares the memory; the returned
  128. * subtensor is not allowed to realloc
  129. * \param offset offset given in bytes
  130. */
  131. MGE_WIN_DECLSPEC_FUC TensorStorage sub(ptrdiff_t offset) const;
  132. //! apply lazy resize and get ptr
  133. dt_byte* ptr() const {
  134. return const_cast<TensorStorage*>(this)->apply_lazy_and_get_ptr();
  135. }
  136. /*!
  137. * \brief usable size in bytes until end of allocated block
  138. */
  139. size_t size() const { return m_size; }
  140. /*!
  141. * \brief offset on allocated block in bytes
  142. */
  143. size_t offset() const { return m_offset; }
  144. //! get underlying comp node; error would be raised if it is invalid
  145. CompNode comp_node() const {
  146. check_comp_node_valid();
  147. return m_comp_node;
  148. }
  149. //! get underlying comp node and allow it to be invalid
  150. CompNode comp_node_allow_invalid() const { return m_comp_node; }
  151. /*!
  152. * \brief whether underlying comp_node is valid
  153. */
  154. bool comp_node_valid() const { return m_comp_node.valid(); }
  155. /*!
  156. * \brief whether this tensor has no valid element (either due to
  157. * reaching end of mem chunk or no mem allocated)
  158. */
  159. bool empty() const { return !m_size; }
  160. /*!
  161. * \brief chain-style computing node setter
  162. *
  163. * note that if allow_mem_node_change is true and memory node is
  164. * changed, the underlying data would be released and this tensor would
  165. * become empty
  166. */
  167. MGE_WIN_DECLSPEC_FUC TensorStorage& comp_node(
  168. CompNode node, bool allow_mem_node_change = false);
  169. /*!
  170. * \brief copy from another TensorStorage, possibly of other storage
  171. * type
  172. *
  173. * This storage must have been initialized
  174. *
  175. * \param size number of bytes to be copied; must not exceed size of
  176. * this or src
  177. */
  178. template <class RTrait>
  179. MGE_WIN_DECLSPEC_FUC void copy_from(
  180. const TensorStorage<RTrait>& src, size_t size) const;
  181. /*!
  182. * \brief reset the tensor storage to given memory area
  183. */
  184. MGE_WIN_DECLSPEC_FUC void reset(CompNode node, size_t size, RawStorage data);
  185. /*!
  186. * \brief make a TensorStorage that shares memory with another
  187. * TensorStorage some different storage type
  188. *
  189. * This method can be used to convert between HostTensorStorage and
  190. * DeviceTensorStorage; \p src must be on CPU memory node.
  191. */
  192. template <
  193. class RTrait, typename = typename std::enable_if<
  194. !std::is_same<Trait, RTrait>::value>::type>
  195. MGE_WIN_DECLSPEC_FUC static TensorStorage make_proxy(
  196. const TensorStorage<RTrait>& src);
  197. /*!
  198. * \brief make a DeviceTensorStorage on default_cpu
  199. * that shares memory with this
  200. *
  201. * this must be a HostTensorStorage. Alignment not checked.
  202. */
  203. template <
  204. bool x = true,
  205. typename = std::enable_if_t<
  206. x && std::is_same<Trait, HostTensorStorageTrait>::value>>
  207. DeviceTensorStorage proxy_to_default_cpu() const {
  208. ptr();
  209. return {true, CompNode::default_cpu(), m_size, m_capacity, m_offset, m_data};
  210. }
  211. //! shortcut for raw_storage().use_count(), but won't trigger lazy alloc
  212. size_t use_count() const {
  213. if (m_size > m_capacity) {
  214. return 1;
  215. }
  216. return raw_storage().use_count();
  217. }
  218. //! whether current capacity is 0 (so we are waiting for lazy init)
  219. bool has_no_real_storage() const { return !m_capacity; }
  220. //! get underlying raw reference-counted storage
  221. const RawStorage& raw_storage() const {
  222. ptr(); // apply lazy resize
  223. return m_data;
  224. }
  225. private:
  226. template <class T>
  227. friend class TensorStorage;
  228. bool m_allow_realloc = true;
  229. CompNode m_comp_node;
  230. //! current logical size; may exceed m_capacity and in such case memory
  231. //! would be allocate when ptr() is called
  232. size_t m_size = 0;
  233. //! usable size until end of allocated data block, excluding offset
  234. size_t m_capacity = 0;
  235. //! offset on m_data
  236. size_t m_offset = 0;
  237. RawStorage m_data;
  238. //! used internally for returning a predefined TensorStorage
  239. TensorStorage(
  240. bool allow_realloc, CompNode comp_node, size_t size, size_t capacity,
  241. size_t offset, const RawStorage& data)
  242. : m_allow_realloc(allow_realloc),
  243. m_comp_node(comp_node),
  244. m_size(size),
  245. m_capacity(capacity),
  246. m_offset(offset),
  247. m_data(data) {}
  248. void check_comp_node_valid() const {
  249. if (mgb_unlikely(!m_comp_node.valid()))
  250. on_invalid_comp_node();
  251. }
  252. MGE_WIN_DECLSPEC_FUC dt_byte* apply_lazy_and_get_ptr();
  253. [[noreturn]] MGE_WIN_DECLSPEC_FUC static void on_invalid_comp_node();
  254. };
  255. template <class TensorStorage>
  256. class TensorND;
  257. using HostTensorND = TensorND<HostTensorStorage>;
  258. using DeviceTensorND = TensorND<DeviceTensorStorage>;
  259. /*!
  260. * \brief n-dimensional tensor
  261. *
  262. * Note that TensorND is built on TensorStorage, which has some lazy behavior.
  263. */
  264. template <class TensorStorage>
  265. class TensorND {
  266. TensorStorage m_storage;
  267. TensorLayout m_layout;
  268. public:
  269. using ChainReturnType = TensorND<TensorStorage>;
  270. MGE_WIN_DECLSPEC_FUC TensorND();
  271. MGE_WIN_DECLSPEC_FUC explicit TensorND(CompNode node);
  272. MGE_WIN_DECLSPEC_FUC explicit TensorND(DType dtype);
  273. MGE_WIN_DECLSPEC_FUC TensorND(CompNode node, DType dtype);
  274. //! allocate contiguous tensor
  275. MGE_WIN_DECLSPEC_FUC TensorND(
  276. CompNode node, const TensorShape& shape, DType dtype = dtype::Float32{});
  277. MGE_WIN_DECLSPEC_FUC TensorND(
  278. CompNode node, const TensorShape& shape, DType dtype, TensorFormat format);
  279. //! allocate contiguous tensor from given comp node and layout; layout
  280. //! is required to be contiguous, and its dtype and format would be used
  281. MGE_WIN_DECLSPEC_FUC TensorND(CompNode node, const TensorLayout& layout);
  282. /* ================= shape and basic functionality ================= */
  283. //! get subtensor according to given slices
  284. MGE_WIN_DECLSPEC_FUC ChainReturnType
  285. operator[](std::initializer_list<Slice> slice) const;
  286. //! get subtensor according to spec
  287. MGE_WIN_DECLSPEC_FUC ChainReturnType sub(const SubTensorSpec& spec) const;
  288. //! whether underlying storage is empty
  289. bool empty() const { return m_storage.empty(); }
  290. //! whether tensor shape is valid (i.e. ndim != 0)
  291. bool shape_valid() const { return m_layout.ndim; }
  292. const TensorShape& shape() const { return m_layout; }
  293. const TensorLayout& layout() const { return m_layout; }
  294. //! shape at given dimension, with boundary check
  295. size_t shape(size_t dim) const {
  296. mgb_assert(dim < m_layout.ndim);
  297. return m_layout.shape[dim];
  298. }
  299. //! get ptr at given index
  300. template <typename T, typename Iter>
  301. T* ptr(Iter idx_begin, Iter idx_end) {
  302. auto ptr = this->template ptr<T>();
  303. size_t nidx = 0;
  304. while (idx_begin != idx_end) {
  305. mgb_assert(nidx < m_layout.ndim);
  306. size_t idx = *idx_begin;
  307. mgb_assert(idx < m_layout.shape[nidx]);
  308. ptr += m_layout.stride[nidx] * idx;
  309. ++idx_begin;
  310. ++nidx;
  311. }
  312. return ptr;
  313. }
  314. template <typename T>
  315. T* ptr(std::initializer_list<size_t> idx) {
  316. return ptr<T>(idx.begin(), idx.end());
  317. }
  318. template <typename T>
  319. const T* ptr(std::initializer_list<size_t> dim) const {
  320. return const_cast<TensorND&>(*this).ptr<T>(dim);
  321. }
  322. //! get ptr of buffer start; *T* must match dtype
  323. template <typename T>
  324. T* ptr() const {
  325. m_layout.dtype.assert_is_ctype<T>();
  326. return m_storage.ptr()->template as<T>();
  327. }
  328. dt_byte* raw_ptr() const { return m_storage.ptr(); }
  329. /*!
  330. * \brief change the shape without retaining old data, and initialize as
  331. * contiguous stride
  332. *
  333. * dtype and format would not be changed
  334. */
  335. MGE_WIN_DECLSPEC_FUC ChainReturnType& resize(const TensorShape& shape);
  336. /*!
  337. * \brief totally reset the tensor to given storage and layout
  338. */
  339. MGE_WIN_DECLSPEC_FUC ChainReturnType& reset(
  340. TensorStorage storage, const TensorLayout& layout);
  341. /* ================= getter and setters ================= */
  342. /*!
  343. * \brief change comp node; see TensorStorage::comp_node()
  344. */
  345. MGE_WIN_DECLSPEC_FUC ChainReturnType& comp_node(
  346. CompNode comp_node, bool allow_mem_node_change = false);
  347. CompNode comp_node() const { return m_storage.comp_node(); }
  348. const TensorStorage& storage() const { return m_storage; }
  349. /*!
  350. * \brief change the storage and invalidate all data, resulting in an
  351. * empty tensor
  352. */
  353. MGE_WIN_DECLSPEC_FUC ChainReturnType& storage(const TensorStorage& storage);
  354. //! get data type
  355. DType dtype() const { return m_layout.dtype; }
  356. //! get tensor format
  357. TensorFormat format() const { return m_layout.format; }
  358. /*!
  359. * \brief change underlying dtype
  360. *
  361. * layout would be cleared (reset to ndim=0) if dtype actually changes
  362. */
  363. MGE_WIN_DECLSPEC_FUC ChainReturnType& dtype(DType dtype);
  364. /*!
  365. * \brief change underlying tensor format
  366. *
  367. * layout would be cleared (reset to ndim=0) if format actually changes
  368. */
  369. MGE_WIN_DECLSPEC_FUC ChainReturnType& format(TensorFormat format);
  370. /*!
  371. * \brief copy from another tensor and initialize contiguous layout
  372. *
  373. * Note:
  374. * 1. If the computing node is empty, it would be copied from src
  375. * 2. To copy from device to host, if the two tensors reside on
  376. * different computing nodes, the caller is responsible to perform
  377. * sync before copying; a better way is to set empty computing node
  378. * to host tensor.
  379. * 3. For cross-device copy: copy would be synced on comp node of this,
  380. * and the caller is responsible to sync this comp node with src comp
  381. * node.
  382. * 4. If dtype is valid, it would be checked to match the dtype of src.
  383. * 5. Format would be reset to default and layout would be initialized
  384. * to be contiguous.
  385. */
  386. template <class RStorage>
  387. MGE_WIN_DECLSPEC_FUC ChainReturnType& copy_from(const TensorND<RStorage>& src);
  388. /*!
  389. * \brief copy from another tensor of the same shape, retaining current
  390. * layout
  391. *
  392. * If storage type of src and this are different and src is not
  393. * contiguous, a temporary storage would be allocated to first make src
  394. * contiguous.
  395. */
  396. template <class RStorage>
  397. MGE_WIN_DECLSPEC_FUC const ChainReturnType& copy_from_fixlayout(
  398. const TensorND<RStorage>& src) const;
  399. //! non-const version of copy_from_fixlayout
  400. template <class RStorage>
  401. ChainReturnType& copy_from_fixlayout(const TensorND<RStorage>& src) {
  402. return const_cast<ChainReturnType&>(
  403. static_cast<const ChainReturnType*>(this)->copy_from_fixlayout(src));
  404. }
  405. //! convert to megdnn::TensorND
  406. megdnn::TensorND as_megdnn() const {
  407. return {const_cast<void*>(static_cast<const void*>(raw_ptr())), m_layout};
  408. }
  409. /* ================= misc ================= */
  410. /*!
  411. * \brief block host thread to synchronize with the CompNode
  412. */
  413. const ChainReturnType& sync() const {
  414. comp_node().sync();
  415. return static_cast<const ChainReturnType&>(*this);
  416. }
  417. ChainReturnType& sync() {
  418. return const_cast<ChainReturnType&>(
  419. static_cast<const ChainReturnType*>(this)->sync());
  420. }
  421. //! similar to TensorStorage<>::make_proxy
  422. template <
  423. class RStorage, typename = typename std::enable_if<!std::is_same<
  424. TensorStorage, RStorage>::value>::type>
  425. static ChainReturnType make_proxy(const TensorND<RStorage>& src) {
  426. ChainReturnType ret;
  427. ret.reset(TensorStorage::make_proxy(src.storage()), src.layout());
  428. return ret;
  429. }
  430. //! similar to HostTensorStorage::proxy_to_default_cpu
  431. template <
  432. bool x = true,
  433. typename = std::enable_if_t<
  434. x && std::is_same<TensorStorage, HostTensorStorage>::value>>
  435. DeviceTensorND proxy_to_default_cpu() const {
  436. DeviceTensorND ret;
  437. ret.reset(storage().proxy_to_default_cpu(), layout());
  438. return ret;
  439. }
  440. template <
  441. bool x = true,
  442. typename = std::enable_if_t<
  443. x && std::is_same<TensorStorage, HostTensorStorage>::value>>
  444. HostTensorND proxy_to_comp_node(CompNode cn) const {
  445. HostTensorStorage host_storage;
  446. host_storage.reset(cn, m_storage.size(), m_storage.raw_storage());
  447. HostTensorND ret;
  448. ret.reset(host_storage, m_layout);
  449. return ret;
  450. }
  451. };
  452. /*!
  453. * \brief call memset in the data of a device tensor
  454. */
  455. MGE_WIN_DECLSPEC_FUC void dev_tensor_memset(const DeviceTensorND& tensor, int val);
  456. /*!
  457. * \brief fill zeros in the content of a dev tensor
  458. */
  459. static inline void fill_zero_dev_tensor(const DeviceTensorND& tensor) {
  460. dev_tensor_memset(tensor, 0);
  461. }
  462. } // namespace mgb
  463. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台