|
- /**
- * \file src/core/include/megbrain/tensor.h
- * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- *
- * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-
- #pragma once
-
- #include "megbrain/common.h"
- #include "megbrain/comp_node.h"
- #include "megbrain/dtype.h"
- #include "megbrain/utils/metahelper.h"
-
- #include "megdnn/basic_types.h"
-
- #include <limits>
- #include <memory>
-
- namespace mgb {
-
- using ::megdnn::TensorFormat;
- using ::megdnn::TensorLayout;
- using ::megdnn::TensorShape;
-
- using ::megdnn::TensorFormatArray;
- using ::megdnn::TensorLayoutArray;
- using ::megdnn::TensorShapeArray;
-
- /*!
- * \brief specify how a subtensor resides in a larger one
- */
- class SubTensorSpec {
- TensorLayout m_layout;
-
- ptrdiff_t m_offset_elem = 0;
-
- SubTensorSpec(const TensorLayout& l, ptrdiff_t o) : m_layout{l}, m_offset_elem{o} {}
-
- public:
- SubTensorSpec() = default;
-
- //! make a SubTensorSpec from given layout and zero offset
- static SubTensorSpec make_from_layout(const TensorLayout& layout) {
- return make_from_offset_elem(layout, 0);
- }
-
- //! make a SubTensorSpec from given layout and offset
- MGE_WIN_DECLSPEC_FUC static SubTensorSpec make_from_offset_elem(
- const TensorLayout& layout, ptrdiff_t offset_elem);
-
- //! get underlying layout
- const TensorLayout& layout() const { return m_layout; }
-
- //! get offset in number of logical elements in the layout
- ptrdiff_t offset_elem() const { return m_offset_elem; }
-
- //! get offset measured in bytes
- ptrdiff_t offset_byte() const {
- //! for lowbit cases, offset must aligned to bytes
- mgb_assert(
- !m_layout.dtype.is_low_bit() ||
- !(m_offset_elem * m_layout.dtype.low_bit() % 8));
- return m_layout.dtype.size(m_offset_elem);
- }
-
- /*!
- * \brief merge with another SubTensorSpec: accum offset, and replace
- * layout by rhs
- */
- MGE_WIN_DECLSPEC_FUC void merge_with(const SubTensorSpec& rhs);
- };
-
- /*!
- * \brief slice along some axis; index as in Python, with negative indices
- * supported. Scalar index can also be represented as a Slice, where
- * m_begin = idx, m_end = idx+1 and m_step = 1. The flag m_is_scalar_idx
- * indicates whether the Slice comes from a scalar index.
- */
- class Slice {
- Maybe<ptrdiff_t> m_begin, m_end, m_step;
- bool m_is_scalar_idx;
-
- public:
- Slice(Maybe<ptrdiff_t> begin = None, Maybe<ptrdiff_t> end = None,
- Maybe<ptrdiff_t> step = None, bool is_scalar_idx = false)
- : m_begin{begin},
- m_end{end},
- m_step{step},
- m_is_scalar_idx{is_scalar_idx} {}
-
- /*!
- * \brief apply this slice on given tensor layout, and get corresponding
- * subtensor
- * \param axis the axis to apply this slice; -1 can be used for
- * flattened layout
- */
- MGE_WIN_DECLSPEC_FUC SubTensorSpec apply(TensorLayout layout, int axis) const;
- };
-
- template <class Trait>
- class TensorStorage;
-
- class DeviceTensorStorageTrait;
- class HostTensorStorageTrait;
-
- using HostTensorStorage = TensorStorage<HostTensorStorageTrait>;
- using DeviceTensorStorage = TensorStorage<DeviceTensorStorageTrait>;
-
- /*!
- * \brief manager for raw tensor memory
- *
- * It contains no dtype information and all sizes are measured in bytes.
- *
- * Note that ensure_size() is lazy, and memory allocation only happens when
- * ptr() or sub() is called
- */
- template <class Trait>
- class TensorStorage {
- public:
- using RawStorage = std::shared_ptr<dt_byte>;
-
- TensorStorage() = default;
-
- TensorStorage(CompNode comp_node) : m_comp_node(comp_node) {}
-
- TensorStorage(TensorStorage&&) noexcept = default;
- TensorStorage& operator=(TensorStorage&&) noexcept = default;
-
- TensorStorage(const TensorStorage& rhs) { *this = rhs; }
-
- MGE_WIN_DECLSPEC_FUC TensorStorage& operator=(const TensorStorage& rhs);
-
- /*!
- * \brief whether given tensor span is valid in this storage
- */
- bool valid_span(const TensorLayout::Span& span) const {
- return m_comp_node.valid() &&
- static_cast<ptrdiff_t>(m_offset) + span.low_byte >= 0 &&
- span.high_byte <= size();
- }
-
- /*!
- * \brief ensure that its space could hold at least sz bytes
- *
- * Note
- * 1. This method is lazy; size would only be changed when memory
- * must be accessed.
- * 2. This method would only grow storage, but it would not release
- * memory
- */
- MGE_WIN_DECLSPEC_FUC TensorStorage& ensure_size(size_t sz);
-
- /*!
- * \brief return a subtensor that shares the memory; the returned
- * subtensor is not allowed to realloc
- * \param offset offset given in bytes
- */
- MGE_WIN_DECLSPEC_FUC TensorStorage sub(ptrdiff_t offset) const;
-
- //! apply lazy resize and get ptr
- dt_byte* ptr() const {
- return const_cast<TensorStorage*>(this)->apply_lazy_and_get_ptr();
- }
-
- /*!
- * \brief usable size in bytes until end of allocated block
- */
- size_t size() const { return m_size; }
-
- /*!
- * \brief offset on allocated block in bytes
- */
- size_t offset() const { return m_offset; }
-
- //! get underlying comp node; error would be raised if it is invalid
- CompNode comp_node() const {
- check_comp_node_valid();
- return m_comp_node;
- }
-
- //! get underlying comp node and allow it to be invalid
- CompNode comp_node_allow_invalid() const { return m_comp_node; }
-
- /*!
- * \brief whether underlying comp_node is valid
- */
- bool comp_node_valid() const { return m_comp_node.valid(); }
-
- /*!
- * \brief whether this tensor has no valid element (either due to
- * reaching end of mem chunk or no mem allocated)
- */
- bool empty() const { return !m_size; }
-
- /*!
- * \brief chain-style computing node setter
- *
- * note that if allow_mem_node_change is true and memory node is
- * changed, the underlying data would be released and this tensor would
- * become empty
- */
- MGE_WIN_DECLSPEC_FUC TensorStorage& comp_node(
- CompNode node, bool allow_mem_node_change = false);
-
- /*!
- * \brief copy from another TensorStorage, possibly of other storage
- * type
- *
- * This storage must have been initialized
- *
- * \param size number of bytes to be copied; must not exceed size of
- * this or src
- */
- template <class RTrait>
- MGE_WIN_DECLSPEC_FUC void copy_from(
- const TensorStorage<RTrait>& src, size_t size) const;
-
- /*!
- * \brief reset the tensor storage to given memory area
- */
- MGE_WIN_DECLSPEC_FUC void reset(CompNode node, size_t size, RawStorage data);
-
- /*!
- * \brief make a TensorStorage that shares memory with another
- * TensorStorage some different storage type
- *
- * This method can be used to convert between HostTensorStorage and
- * DeviceTensorStorage; \p src must be on CPU memory node.
- */
- template <
- class RTrait, typename = typename std::enable_if<
- !std::is_same<Trait, RTrait>::value>::type>
- MGE_WIN_DECLSPEC_FUC static TensorStorage make_proxy(
- const TensorStorage<RTrait>& src);
-
- /*!
- * \brief make a DeviceTensorStorage on default_cpu
- * that shares memory with this
- *
- * this must be a HostTensorStorage. Alignment not checked.
- */
- template <
- bool x = true,
- typename = std::enable_if_t<
- x && std::is_same<Trait, HostTensorStorageTrait>::value>>
- DeviceTensorStorage proxy_to_default_cpu() const {
- ptr();
- return {true, CompNode::default_cpu(), m_size, m_capacity, m_offset, m_data};
- }
-
- //! shortcut for raw_storage().use_count(), but won't trigger lazy alloc
- size_t use_count() const {
- if (m_size > m_capacity) {
- return 1;
- }
- return raw_storage().use_count();
- }
-
- //! whether current capacity is 0 (so we are waiting for lazy init)
- bool has_no_real_storage() const { return !m_capacity; }
-
- //! get underlying raw reference-counted storage
- const RawStorage& raw_storage() const {
- ptr(); // apply lazy resize
- return m_data;
- }
-
- private:
- template <class T>
- friend class TensorStorage;
-
- bool m_allow_realloc = true;
- CompNode m_comp_node;
-
- //! current logical size; may exceed m_capacity and in such case memory
- //! would be allocate when ptr() is called
- size_t m_size = 0;
-
- //! usable size until end of allocated data block, excluding offset
- size_t m_capacity = 0;
-
- //! offset on m_data
- size_t m_offset = 0;
-
- RawStorage m_data;
-
- //! used internally for returning a predefined TensorStorage
- TensorStorage(
- bool allow_realloc, CompNode comp_node, size_t size, size_t capacity,
- size_t offset, const RawStorage& data)
- : m_allow_realloc(allow_realloc),
- m_comp_node(comp_node),
- m_size(size),
- m_capacity(capacity),
- m_offset(offset),
- m_data(data) {}
-
- void check_comp_node_valid() const {
- if (mgb_unlikely(!m_comp_node.valid()))
- on_invalid_comp_node();
- }
-
- MGE_WIN_DECLSPEC_FUC dt_byte* apply_lazy_and_get_ptr();
-
- [[noreturn]] MGE_WIN_DECLSPEC_FUC static void on_invalid_comp_node();
- };
-
- template <class TensorStorage>
- class TensorND;
-
- using HostTensorND = TensorND<HostTensorStorage>;
- using DeviceTensorND = TensorND<DeviceTensorStorage>;
-
- /*!
- * \brief n-dimensional tensor
- *
- * Note that TensorND is built on TensorStorage, which has some lazy behavior.
- */
- template <class TensorStorage>
- class TensorND {
- TensorStorage m_storage;
- TensorLayout m_layout;
-
- public:
- using ChainReturnType = TensorND<TensorStorage>;
-
- MGE_WIN_DECLSPEC_FUC TensorND();
-
- MGE_WIN_DECLSPEC_FUC explicit TensorND(CompNode node);
-
- MGE_WIN_DECLSPEC_FUC explicit TensorND(DType dtype);
-
- MGE_WIN_DECLSPEC_FUC TensorND(CompNode node, DType dtype);
-
- //! allocate contiguous tensor
- MGE_WIN_DECLSPEC_FUC TensorND(
- CompNode node, const TensorShape& shape, DType dtype = dtype::Float32{});
-
- MGE_WIN_DECLSPEC_FUC TensorND(
- CompNode node, const TensorShape& shape, DType dtype, TensorFormat format);
-
- //! allocate contiguous tensor from given comp node and layout; layout
- //! is required to be contiguous, and its dtype and format would be used
- MGE_WIN_DECLSPEC_FUC TensorND(CompNode node, const TensorLayout& layout);
-
- /* ================= shape and basic functionality ================= */
-
- //! get subtensor according to given slices
- MGE_WIN_DECLSPEC_FUC ChainReturnType
- operator[](std::initializer_list<Slice> slice) const;
-
- //! get subtensor according to spec
- MGE_WIN_DECLSPEC_FUC ChainReturnType sub(const SubTensorSpec& spec) const;
-
- //! whether underlying storage is empty
- bool empty() const { return m_storage.empty(); }
-
- //! whether tensor shape is valid (i.e. ndim != 0)
- bool shape_valid() const { return m_layout.ndim; }
-
- const TensorShape& shape() const { return m_layout; }
-
- const TensorLayout& layout() const { return m_layout; }
-
- //! shape at given dimension, with boundary check
- size_t shape(size_t dim) const {
- mgb_assert(dim < m_layout.ndim);
- return m_layout.shape[dim];
- }
-
- //! get ptr at given index
- template <typename T, typename Iter>
- T* ptr(Iter idx_begin, Iter idx_end) {
- auto ptr = this->template ptr<T>();
- size_t nidx = 0;
- while (idx_begin != idx_end) {
- mgb_assert(nidx < m_layout.ndim);
- size_t idx = *idx_begin;
- mgb_assert(idx < m_layout.shape[nidx]);
- ptr += m_layout.stride[nidx] * idx;
-
- ++idx_begin;
- ++nidx;
- }
- return ptr;
- }
-
- template <typename T>
- T* ptr(std::initializer_list<size_t> idx) {
- return ptr<T>(idx.begin(), idx.end());
- }
-
- template <typename T>
- const T* ptr(std::initializer_list<size_t> dim) const {
- return const_cast<TensorND&>(*this).ptr<T>(dim);
- }
-
- //! get ptr of buffer start; *T* must match dtype
- template <typename T>
- T* ptr() const {
- m_layout.dtype.assert_is_ctype<T>();
- return m_storage.ptr()->template as<T>();
- }
-
- dt_byte* raw_ptr() const { return m_storage.ptr(); }
-
- /*!
- * \brief change the shape without retaining old data, and initialize as
- * contiguous stride
- *
- * dtype and format would not be changed
- */
- MGE_WIN_DECLSPEC_FUC ChainReturnType& resize(const TensorShape& shape);
-
- /*!
- * \brief totally reset the tensor to given storage and layout
- */
- MGE_WIN_DECLSPEC_FUC ChainReturnType& reset(
- TensorStorage storage, const TensorLayout& layout);
-
- /* ================= getter and setters ================= */
-
- /*!
- * \brief change comp node; see TensorStorage::comp_node()
- */
- MGE_WIN_DECLSPEC_FUC ChainReturnType& comp_node(
- CompNode comp_node, bool allow_mem_node_change = false);
-
- CompNode comp_node() const { return m_storage.comp_node(); }
-
- const TensorStorage& storage() const { return m_storage; }
-
- /*!
- * \brief change the storage and invalidate all data, resulting in an
- * empty tensor
- */
- MGE_WIN_DECLSPEC_FUC ChainReturnType& storage(const TensorStorage& storage);
-
- //! get data type
- DType dtype() const { return m_layout.dtype; }
-
- //! get tensor format
- TensorFormat format() const { return m_layout.format; }
-
- /*!
- * \brief change underlying dtype
- *
- * layout would be cleared (reset to ndim=0) if dtype actually changes
- */
- MGE_WIN_DECLSPEC_FUC ChainReturnType& dtype(DType dtype);
-
- /*!
- * \brief change underlying tensor format
- *
- * layout would be cleared (reset to ndim=0) if format actually changes
- */
- MGE_WIN_DECLSPEC_FUC ChainReturnType& format(TensorFormat format);
-
- /*!
- * \brief copy from another tensor and initialize contiguous layout
- *
- * Note:
- * 1. If the computing node is empty, it would be copied from src
- * 2. To copy from device to host, if the two tensors reside on
- * different computing nodes, the caller is responsible to perform
- * sync before copying; a better way is to set empty computing node
- * to host tensor.
- * 3. For cross-device copy: copy would be synced on comp node of this,
- * and the caller is responsible to sync this comp node with src comp
- * node.
- * 4. If dtype is valid, it would be checked to match the dtype of src.
- * 5. Format would be reset to default and layout would be initialized
- * to be contiguous.
- */
- template <class RStorage>
- MGE_WIN_DECLSPEC_FUC ChainReturnType& copy_from(const TensorND<RStorage>& src);
-
- /*!
- * \brief copy from another tensor of the same shape, retaining current
- * layout
- *
- * If storage type of src and this are different and src is not
- * contiguous, a temporary storage would be allocated to first make src
- * contiguous.
- */
- template <class RStorage>
- MGE_WIN_DECLSPEC_FUC const ChainReturnType& copy_from_fixlayout(
- const TensorND<RStorage>& src) const;
-
- //! non-const version of copy_from_fixlayout
- template <class RStorage>
- ChainReturnType& copy_from_fixlayout(const TensorND<RStorage>& src) {
- return const_cast<ChainReturnType&>(
- static_cast<const ChainReturnType*>(this)->copy_from_fixlayout(src));
- }
-
- //! convert to megdnn::TensorND
- megdnn::TensorND as_megdnn() const {
- return {const_cast<void*>(static_cast<const void*>(raw_ptr())), m_layout};
- }
-
- /* ================= misc ================= */
-
- /*!
- * \brief block host thread to synchronize with the CompNode
- */
- const ChainReturnType& sync() const {
- comp_node().sync();
- return static_cast<const ChainReturnType&>(*this);
- }
-
- ChainReturnType& sync() {
- return const_cast<ChainReturnType&>(
- static_cast<const ChainReturnType*>(this)->sync());
- }
-
- //! similar to TensorStorage<>::make_proxy
- template <
- class RStorage, typename = typename std::enable_if<!std::is_same<
- TensorStorage, RStorage>::value>::type>
- static ChainReturnType make_proxy(const TensorND<RStorage>& src) {
- ChainReturnType ret;
- ret.reset(TensorStorage::make_proxy(src.storage()), src.layout());
- return ret;
- }
-
- //! similar to HostTensorStorage::proxy_to_default_cpu
- template <
- bool x = true,
- typename = std::enable_if_t<
- x && std::is_same<TensorStorage, HostTensorStorage>::value>>
- DeviceTensorND proxy_to_default_cpu() const {
- DeviceTensorND ret;
- ret.reset(storage().proxy_to_default_cpu(), layout());
- return ret;
- }
-
- template <
- bool x = true,
- typename = std::enable_if_t<
- x && std::is_same<TensorStorage, HostTensorStorage>::value>>
- HostTensorND proxy_to_comp_node(CompNode cn) const {
- HostTensorStorage host_storage;
- host_storage.reset(cn, m_storage.size(), m_storage.raw_storage());
- HostTensorND ret;
- ret.reset(host_storage, m_layout);
- return ret;
- }
- };
-
- /*!
- * \brief call memset in the data of a device tensor
- */
- MGE_WIN_DECLSPEC_FUC void dev_tensor_memset(const DeviceTensorND& tensor, int val);
-
- /*!
- * \brief fill zeros in the content of a dev tensor
- */
- static inline void fill_zero_dev_tensor(const DeviceTensorND& tensor) {
- dev_tensor_memset(tensor, 0);
- }
-
- } // namespace mgb
-
- // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
|