You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

relayout.cpp 6.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. /**
  2. * \file dnn/test/fallback/relayout.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/fallback/fixture.h"
  12. #include "test/common/checker.h"
  13. #include "test/common/relayout.h"
  14. #include "test/common/tensor.h"
  15. #include <ctime>
  16. #include "megdnn/basic_types.h"
  17. #include "test/common/task_record_check.h"
  18. using namespace megdnn;
  19. using namespace test;
  20. namespace {
  21. template <typename tag>
  22. class FALLBACK_RELAYOUT : public FALLBACK {};
  23. TYPED_TEST_CASE(FALLBACK_RELAYOUT, relayout::test_types);
  24. TYPED_TEST(FALLBACK_RELAYOUT, run) {
  25. relayout::run_test<TypeParam>(this->handle());
  26. }
  27. } // namespace
  28. TEST_F(FALLBACK, RELAYOUT_CONTINUE) {
  29. Checker<Relayout> checker(handle());
  30. checker.set_dtype(0, dtype::Int32());
  31. checker.set_dtype(1, dtype::Int32());
  32. checker.exec({{2, 2, 2}, {2, 2, 2}});
  33. }
  34. TEST_F(FALLBACK, RELAYOUT_RECORD) {
  35. TaskRecordChecker<Relayout> checker(1);
  36. checker.set_dtype(0, dtype::Int32());
  37. checker.set_dtype(1, dtype::Int32());
  38. checker.exec({{2, 2, 2}, {2, 2, 2}});
  39. }
  40. #if MEGDNN_WITH_BENCHMARK
  41. TEST_F(FALLBACK, BENCHMARK_RELAYOUT_CV) {
  42. relayout::run_cv_benchmark(handle());
  43. }
  44. TEST_F(FALLBACK, BENCHMARK_RELAYOUT) {
  45. auto naive_handle = create_cpu_handle(2);
  46. bool verbose = false;
  47. auto run = [&](bool out_cont, const TensorLayout& cont_layout,
  48. const TensorLayout& noncont_layout) {
  49. megdnn_assert(
  50. cont_layout.dtype == dtype::Int32() &&
  51. noncont_layout.dtype == dtype::Int32() &&
  52. noncont_layout.span().low_byte == 0);
  53. auto noncont_storage_size = noncont_layout.span().high_elem;
  54. Tensor<dt_int32> noncont_storage0(
  55. handle(), {{noncont_storage_size}, dtype::Int32()}),
  56. noncont_storage1(handle(), {{noncont_storage_size}, dtype::Int32()}),
  57. cont_storage0(handle(), cont_layout),
  58. cont_storage1(handle(), cont_layout);
  59. auto noncont0 = noncont_storage0.tensornd(),
  60. noncont1 = noncont_storage1.tensornd();
  61. noncont0.layout = noncont_layout;
  62. noncont1.layout = noncont_layout;
  63. TensorND src, dst0, dst1;
  64. if (out_cont) {
  65. src = noncont0;
  66. dst0 = cont_storage0.tensornd();
  67. dst1 = cont_storage1.tensornd();
  68. auto ptr = src.ptr<int>();
  69. for (size_t i = 0; i < noncont_storage_size; ++i) {
  70. ptr[i] = i;
  71. }
  72. } else {
  73. memset(noncont_storage0.ptr(), -1,
  74. noncont_storage0.layout().span().dist_byte());
  75. memset(noncont_storage1.ptr(), -1,
  76. noncont_storage1.layout().span().dist_byte());
  77. src = cont_storage0.tensornd();
  78. dst0 = noncont0;
  79. dst1 = noncont1;
  80. auto ptr = src.ptr<int>();
  81. for (size_t i = 0, it = src.layout.total_nr_elems(); i < it; ++i) {
  82. ptr[i] = i;
  83. }
  84. }
  85. auto opr_cur = handle()->create_operator<Relayout>();
  86. auto opr_naive = naive_handle->create_operator<Relayout>();
  87. auto timeit = [&src](Relayout* opr, TensorND out) {
  88. opr->exec(src, out);
  89. auto start = clock();
  90. opr->exec(src, out);
  91. auto stop = clock();
  92. return (stop - start) * 1e3 / CLOCKS_PER_SEC;
  93. };
  94. auto t1 = timeit(opr_naive.get(), dst1), t0 = timeit(opr_cur.get(), dst0);
  95. double tot_size_gb_ms = cont_layout.total_nr_elems() * sizeof(int) / 1024.0 /
  96. 1024.0 / 1024.0 * 1e3;
  97. if (verbose) {
  98. printf("noncont-%zu dir=%d: fallback=%7.3fms,%5.2fGiB/s "
  99. "naive=%7.3fms,%5.2fGiB/s\n",
  100. noncont_layout.collapse_contiguous().ndim, out_cont, t0,
  101. tot_size_gb_ms / t0, t1, tot_size_gb_ms / t1);
  102. }
  103. ASSERT_EQ(
  104. 0, memcmp(dst0.ptr<int>(), dst1.ptr<int>(),
  105. dst0.layout.span().dist_byte()));
  106. };
  107. auto run_preset = [&](const TensorShape& noncont_shp, int swap, bool sub,
  108. bool out_cont) {
  109. TensorLayout noncont_layout(noncont_shp, dtype::Int32());
  110. if (swap) {
  111. auto a = swap - 1, b = swap;
  112. std::swap(noncont_layout.shape[a], noncont_layout.shape[b]);
  113. std::swap(noncont_layout.stride[a], noncont_layout.stride[b]);
  114. }
  115. TensorLayout cont_layout = noncont_layout;
  116. cont_layout.init_contiguous_stride();
  117. TensorShape noncont_storage_shp(cont_layout);
  118. if (sub) {
  119. ++noncont_storage_shp[noncont_layout.ndim - 1];
  120. noncont_layout.init_contiguous_stride(noncont_storage_shp);
  121. --noncont_layout.shape[noncont_layout.ndim - 1];
  122. }
  123. run(out_cont, cont_layout, noncont_layout);
  124. };
  125. for (bool out_cont : {false, true}) {
  126. verbose = false;
  127. run_preset({2, 3}, 1, false, out_cont);
  128. run_preset({2, 2, 2}, 0, true, out_cont);
  129. {
  130. // padding-like
  131. TensorLayout cont{{2, 3, 3}, dtype::Int32()}, noncont = cont;
  132. noncont.stride[1] = 5;
  133. noncont.stride[0] = 25;
  134. run(out_cont, cont, noncont);
  135. }
  136. verbose = true;
  137. run_preset({1234, 5678}, 0, false, out_cont);
  138. run_preset({256, 256, 256}, 0, true, out_cont);
  139. run_preset({2, 3, 1024, 1024}, 1, false, out_cont);
  140. run_preset({1025, 2049}, 1, false, out_cont);
  141. run_preset({2049, 1025}, 1, false, out_cont);
  142. run_preset({10, 1024, 1024}, 2, false, out_cont);
  143. {
  144. // padding-like
  145. TensorLayout cont{{60, 60, 60}, dtype::Int32()}, noncont = cont;
  146. noncont.stride[1] = 63;
  147. noncont.stride[0] = 63 * 63;
  148. run(out_cont, cont, noncont);
  149. }
  150. }
  151. }
  152. #endif
  153. // vim: syntax=cpp.doxygen