You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensor_format.cpp 15 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. /**
  2. * \file dnn/src/common/tensor_format.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megdnn/tensor_format.h"
  12. #include "megdnn/basic_types.h"
  13. #include "src/common/utils.h"
  14. #include <unordered_map>
  15. using namespace megdnn;
  16. using namespace megdnn::detail;
  17. namespace {
  18. DefaultTensorFormat* default_tensor_format_obj;
  19. }
  20. /* ===================== TensorFormat ===================== */
  21. TensorFormat TensorFormat::deserialize(const std::string& bin,
  22. const Handle* handle) {
  23. using Type = TensorFormat::Type;
  24. auto type = reinterpret_cast<const Type*>(bin.data());
  25. switch (*type) {
  26. case Type::DEFAULT:
  27. return DefaultTensorFormat::deserialize(handle, type + 1,
  28. bin.size() - sizeof(Type));
  29. case Type::IMAGE2D_PACK4:
  30. return Image2DPack4TensorFormat::deserialize(
  31. handle, type + 1, bin.size() - sizeof(Type));
  32. default:
  33. megdnn_throw("invalid tensor format type in deserialize");
  34. }
  35. }
  36. TensorFormat::Format() : m_impl{DefaultTensorFormat::make().m_impl} {}
  37. std::string TensorFormat::to_string() const {
  38. return m_impl->to_string();
  39. }
  40. std::string TensorFormat::serialize() const {
  41. std::string ret;
  42. ret.reserve(32);
  43. ret.assign(sizeof(Type), '\0');
  44. *reinterpret_cast<Type*>(&ret[0]) = type();
  45. m_impl->serialize_append(ret);
  46. return ret;
  47. }
  48. void TensorFormat::on_bad_cvt(Type dst_type) const {
  49. MEGDNN_MARK_USED_VAR(dst_type);
  50. megdnn_throw(ssprintf("can not convert tensor format %s to %d",
  51. impl()->to_string().c_str(),
  52. static_cast<int>(dst_type)));
  53. }
  54. bool TensorFormat::is_default() const {
  55. return m_impl == default_tensor_format_obj;
  56. }
  57. /* ===================== DefaultFormat ===================== */
  58. size_t DefaultTensorFormat::init_contiguous_stride(TensorLayout& layout) const {
  59. if (!layout.ndim)
  60. return 0;
  61. megdnn_assert(layout.ndim <= TensorLayout::MAX_NDIM);
  62. size_t accum = 1;
  63. SafeMultiplies<size_t> mul;
  64. for (size_t i = layout.ndim; i; --i) {
  65. layout.stride[i - 1] = accum;
  66. accum = mul(accum, layout.shape[i - 1]);
  67. }
  68. return accum;
  69. }
  70. bool DefaultTensorFormat::is_contiguous_spec(const TensorLayout& layout) const {
  71. return layout.is_physical_contiguous();
  72. }
  73. TensorLayout DefaultTensorFormat::collapse_contiguous_spec(
  74. const TensorLayout& layout) const {
  75. megdnn_assert(layout.ndim);
  76. TensorLayout res{layout};
  77. // remove all dims with shape 1
  78. for (int i = static_cast<int>(res.ndim) - 1; i >= 0 && res.ndim >= 2; --i) {
  79. if (!res.shape[i]) {
  80. // empty tensor
  81. res.ndim = 1;
  82. res.shape[0] = 0;
  83. res.stride[0] = 1;
  84. return res;
  85. }
  86. if (res.shape[i] == 1)
  87. res.remove_axis_inplace(i);
  88. }
  89. if (res.ndim == 1) {
  90. if (res.shape[0] <= 1) {
  91. // make it the "most canonical" contiguous layout for scalars or
  92. // empty tensors
  93. res.stride[0] = 1;
  94. }
  95. return res;
  96. }
  97. megdnn_assert(res.ndim && res.shape[res.ndim - 1]);
  98. for (int i = static_cast<int>(res.ndim) - 2; i >= 0; --i) {
  99. megdnn_assert(res.shape[i]);
  100. if (res.stride[i] ==
  101. res.stride[i + 1] * static_cast<ptrdiff_t>(res.shape[i + 1])) {
  102. res.shape[i] *= res.shape[i + 1];
  103. res.stride[i] = res.stride[i + 1];
  104. res.remove_axis_inplace(i + 1);
  105. }
  106. }
  107. return res;
  108. }
  109. TensorLayout::Span DefaultTensorFormat::span_spec(
  110. const TensorLayout& layout) const {
  111. if (layout.ndim == 0)
  112. return {0, 0, 0, 0};
  113. ptrdiff_t low_elem = 0;
  114. size_t high_elem = 0;
  115. for (size_t i = 0; i < layout.ndim; ++i) {
  116. auto shape_val = layout.shape[i];
  117. if (!shape_val) {
  118. return {0, 0, 0, 0};
  119. }
  120. auto stride_val = layout.stride[i];
  121. if (stride_val > 0) {
  122. high_elem += (shape_val - 1) * stride_val;
  123. } else {
  124. low_elem += (shape_val - 1) * stride_val;
  125. }
  126. }
  127. ++high_elem;
  128. ptrdiff_t low_byte;
  129. if (low_elem < 0) {
  130. megdnn_assert(!layout.dtype.is_low_bit(),
  131. "tensors with low-bit dytes shouldn't have negative "
  132. "strides");
  133. low_byte = low_elem * layout.dtype.size();
  134. } else {
  135. low_byte = 0;
  136. }
  137. size_t high_byte = layout.dtype.size(high_elem);
  138. return TensorLayout::Span(low_elem, low_byte, high_elem, high_byte);
  139. }
  140. std::string DefaultTensorFormat::to_string() const {
  141. return "default{}";
  142. }
  143. void DefaultTensorFormat::serialize_append(std::string&) const {}
  144. TensorFormat DefaultTensorFormat::deserialize(const Handle* handle,
  145. const void* buf, size_t size) {
  146. MEGDNN_MARK_USED_VAR(handle);
  147. MEGDNN_MARK_USED_VAR(buf);
  148. megdnn_assert(!size);
  149. return make();
  150. }
  151. TensorFormat DefaultTensorFormat::make() {
  152. // use static storage so the object is accessible in global destructing
  153. // phase
  154. static std::aligned_storage_t<sizeof(DefaultTensorFormat),
  155. alignof(DefaultTensorFormat)>
  156. storage;
  157. static DefaultTensorFormat* obj = default_tensor_format_obj =
  158. new (&storage) DefaultTensorFormat{};
  159. return impl_to_tensor_format(obj);
  160. }
  161. /* ===================== Image2DTensorFormatBase ===================== */
  162. Image2DTensorFormatBase::Image2DTensorFormatBase(Type type, size_t align_axis,
  163. size_t align_size_in_byte)
  164. : ImplBase(type) {
  165. megdnn_assert(align_size_in_byte && align_axis);
  166. m_align_axis = align_axis;
  167. m_align_size_in_byte_log2 = __builtin_ctz(align_size_in_byte);
  168. megdnn_assert((1u << m_align_size_in_byte_log2) == align_size_in_byte,
  169. "align size not power of 2: %zu", align_size_in_byte);
  170. }
  171. size_t Image2DTensorFormatBase::init_contiguous_stride(
  172. TensorLayout& layout) const {
  173. if (!layout.ndim)
  174. return 0;
  175. megdnn_assert(layout.dtype.valid() && layout.ndim > m_align_axis,
  176. "dtype=%s ndim=%zu align=%zu", layout.dtype.name(),
  177. layout.ndim, m_align_axis);
  178. size_t align_size = align_size_in_byte(layout.dtype.size_log());
  179. size_t accum = 1;
  180. SafeMultiplies<size_t> mul;
  181. for (size_t i = layout.ndim; i; --i) {
  182. if (i == m_align_axis) {
  183. accum = get_aligned_power2<size_t>(accum, align_size);
  184. }
  185. layout.stride[i - 1] = accum;
  186. accum = mul(accum, layout.shape[i - 1]);
  187. }
  188. assert_valid(layout);
  189. return accum;
  190. };
  191. bool Image2DTensorFormatBase::is_contiguous_spec(
  192. const TensorLayout& layout) const {
  193. megdnn_assert(layout.dtype.valid());
  194. size_t align_size = align_size_in_byte(layout.dtype.size_log());
  195. ptrdiff_t expected = 1;
  196. int height_axis = static_cast<int>(m_align_axis - 1);
  197. for (int i = layout.ndim - 1; i >= 0; --i) {
  198. if (i == height_axis) {
  199. expected = megdnn::get_aligned_power2<size_t>(expected, align_size);
  200. }
  201. if (layout.shape[i] != 1 && layout.stride[i] != expected) {
  202. if (i == height_axis) {
  203. // allow row pitch to be larger than minimal required
  204. auto s = layout.stride[i];
  205. if (!s) {
  206. // broadcast is not contiguous
  207. return false;
  208. }
  209. size_t mask = align_size_in_byte(layout.dtype.size_log()) - 1;
  210. megdnn_assert(s > expected && !(s & mask),
  211. "invalid row pitch: %d; layout: %s",
  212. static_cast<int>(s), layout.to_string().c_str());
  213. expected = s;
  214. } else {
  215. return false;
  216. }
  217. }
  218. expected *= layout.shape[i];
  219. }
  220. // empty tensors are not contiguous
  221. return expected != 0;
  222. }
  223. TensorLayout Image2DTensorFormatBase::collapse_contiguous_spec(
  224. const TensorLayout& layout) const {
  225. assert_valid(layout);
  226. TensorLayout res{layout};
  227. int new_axis = m_align_axis;
  228. // remove all dims with shape 1
  229. for (int i = static_cast<int>(res.ndim) - 1; i >= 0 && res.ndim >= 3; --i) {
  230. if (i == new_axis && static_cast<int>(res.ndim) == new_axis + 1) {
  231. // i is the only width dim
  232. continue;
  233. }
  234. if (i == new_axis - 1 && !i) {
  235. // new_xis == 1 && i == 0, i is the only height dim
  236. continue;
  237. }
  238. if (res.shape[i] == 1) {
  239. res.remove_axis_inplace(i);
  240. if (i < new_axis)
  241. new_axis -= 1;
  242. }
  243. }
  244. megdnn_assert(res.ndim >= 2);
  245. auto contig_with_next = [&](size_t i) {
  246. return res.stride[i] ==
  247. res.stride[i + 1] * static_cast<ptrdiff_t>(res.shape[i + 1]);
  248. };
  249. for (int i = static_cast<int>(res.ndim) - 2; i >= new_axis; --i) {
  250. megdnn_assert(res.shape[i]);
  251. if (contig_with_next(i)) {
  252. // remove next axis
  253. res.shape[i] *= res.shape[i + 1];
  254. res.stride[i] = res.stride[i + 1];
  255. res.remove_axis_inplace(i + 1);
  256. }
  257. }
  258. for (int i = new_axis - 2; i >= 0; --i) {
  259. megdnn_assert(res.shape[i]);
  260. if (contig_with_next(i)) {
  261. res.shape[i] *= res.shape[i + 1];
  262. res.stride[i] = res.stride[i + 1];
  263. res.remove_axis_inplace(i + 1);
  264. if (i <= new_axis - 2)
  265. new_axis -= 1;
  266. }
  267. }
  268. res.format = change_axis(new_axis);
  269. return res;
  270. }
  271. TensorLayout::Span Image2DTensorFormatBase::span_spec(
  272. const TensorLayout& layout) const {
  273. assert_valid(layout);
  274. size_t size = image_height(layout) * image_row_pitch(layout);
  275. auto mask = (1 << layout.dtype.size_log()) - 1;
  276. megdnn_assert(!(size & mask), "unaligned size: %zu", size);
  277. return {0, 0, size >> layout.dtype.size_log(), size};
  278. }
  279. void Image2DTensorFormatBase::serialize_append(std::string& result) const {
  280. SerializePack pack;
  281. pack.align_axis = m_align_axis;
  282. megdnn_assert(pack.align_axis == m_align_axis); // detect overflow
  283. result.append(reinterpret_cast<char*>(&pack), sizeof(pack));
  284. }
  285. size_t Image2DTensorFormatBase::image_height(const TensorLayout& layout) const {
  286. size_t accum = 1;
  287. for (int i = m_align_axis - 1; i >= 0; --i) {
  288. if (layout.stride[i] == 0) {
  289. // this dimension is broadcasted
  290. } else {
  291. accum *= layout.shape[i];
  292. }
  293. }
  294. return accum;
  295. }
  296. size_t Image2DTensorFormatBase::image_row_pitch(
  297. const TensorLayout& layout) const {
  298. for (int i = m_align_axis - 1; i >= 0; --i) {
  299. // find a non-broadcast axis
  300. if (auto s = layout.stride[i]) {
  301. return layout.dtype.size(s);
  302. }
  303. }
  304. // use width for all broadcasted case
  305. return get_aligned_power2<size_t>(
  306. layout.dtype.size(image_width_elems(layout)),
  307. 1 << m_align_size_in_byte_log2);
  308. }
  309. void Image2DTensorFormatBase::assert_valid(const TensorLayout& layout) const {
  310. megdnn_assert(layout.dtype.valid() && layout.ndim > m_align_axis);
  311. ptrdiff_t first_non_zero_stride = 0;
  312. for (int i = layout.ndim - 1; i >= 0; --i) {
  313. megdnn_assert(layout.shape[i] && layout.stride[i] >= 0);
  314. if (i < static_cast<int>(m_align_axis) && !first_non_zero_stride) {
  315. first_non_zero_stride = layout.stride[i];
  316. }
  317. }
  318. size_t mask = align_size_in_byte(layout.dtype.size_log()) - 1;
  319. megdnn_assert(!(first_non_zero_stride & mask),
  320. "first stride is %d, but alignment is %zu",
  321. static_cast<int>(first_non_zero_stride), mask + 1);
  322. }
  323. size_t Image2DTensorFormatBase::image_width_elems(
  324. const TensorLayout& layout) const {
  325. size_t high_elem = 0;
  326. for (size_t i = m_align_axis; i < layout.ndim; ++i) {
  327. high_elem += (layout.shape[i] - 1) * layout.stride[i];
  328. }
  329. return high_elem + 1;
  330. }
  331. std::string Image2DTensorFormatBase::to_string() const {
  332. return ssprintf("I2D{%zu,%d}", m_align_axis,
  333. 1 << m_align_size_in_byte_log2);
  334. }
  335. /* ===================== Image2DPackedTensorFormatBase ===================== */
  336. template <size_t PIXEL_SIZE>
  337. size_t Image2DPackedTensorFormatBase<PIXEL_SIZE>::image_width(
  338. const TensorLayout& layout) const {
  339. auto ret = image_width_elems(layout);
  340. megdnn_assert(ret % PIXEL_SIZE == 0);
  341. return ret / PIXEL_SIZE;
  342. }
  343. template <size_t PIXEL_SIZE>
  344. void Image2DPackedTensorFormatBase<PIXEL_SIZE>::assert_valid(
  345. const TensorLayout& layout) const {
  346. Image2DTensorFormatBase::assert_valid(layout);
  347. megdnn_assert(!(layout.shape[layout.ndim - 1] % PIXEL_SIZE),
  348. "bad shape: %zu", layout.shape[layout.ndim - 1]);
  349. }
  350. namespace megdnn {
  351. namespace detail {
  352. template class Image2DPackedTensorFormatBase<4>;
  353. } // namespace detail
  354. } // namespace megdnn
  355. /* ===================== Image2DPack4TensorFormat ===================== */
  356. TensorFormat Image2DPack4TensorFormat::make_raw(size_t align_axis,
  357. size_t align_size_in_byte) {
  358. static std::mutex mtx;
  359. static std::unordered_map<uint64_t,
  360. std::unique_ptr<Image2DPack4TensorFormat>>
  361. cache;
  362. megdnn_assert(std::max(align_axis, align_size_in_byte) <=
  363. std::numeric_limits<uint32_t>::max());
  364. MEGDNN_LOCK_GUARD(mtx);
  365. auto&& ptr = cache[(static_cast<uint64_t>(align_axis) << 32) |
  366. align_size_in_byte];
  367. if (!ptr) {
  368. ptr.reset(new Image2DPack4TensorFormat{align_axis, align_size_in_byte});
  369. }
  370. return impl_to_tensor_format(ptr.get());
  371. }
  372. TensorFormat Image2DPack4TensorFormat::make(size_t align_axis,
  373. const Handle* handle) {
  374. return make_raw(align_axis, handle->image2d_pitch_alignment());
  375. }
  376. TensorFormat Image2DPack4TensorFormat::deserialize(const Handle* handle,
  377. const void* buf,
  378. size_t size) {
  379. megdnn_assert(size == sizeof(SerializePack));
  380. auto pack = *static_cast<const SerializePack*>(buf);
  381. return make(pack.align_axis, handle);
  382. }
  383. TensorFormat Image2DPack4TensorFormat::change_axis(size_t axis) const {
  384. return make_raw(axis, align_size_in_byte());
  385. }
  386. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台