You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

relayout.cpp 6.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. /**
  2. * \file dnn/test/common/relayout.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megdnn/oprs/general.h"
  12. #include "src/common/relayout_helper.h"
  13. #include "test/common/benchmarker.h"
  14. #include "test/common/relayout.h"
  15. #include "test/common/checker.h"
  16. using namespace megdnn;
  17. using namespace test;
  18. using namespace megdnn::relayout;
  19. using namespace test::relayout;
  20. namespace {
  21. TestArg generate_transpose_args(size_t batch, size_t m, size_t n,
  22. size_t c, DType dtype) {
  23. TestArg arg;
  24. arg.src = TensorLayout(
  25. TensorShape{batch, n, m, c},
  26. {static_cast<std::ptrdiff_t>(n * m * c), static_cast<std::ptrdiff_t>(c),
  27. static_cast<std::ptrdiff_t>(n * c), 1},
  28. dtype);
  29. arg.dst = TensorLayout(TensorShape{batch, n, m, c}, dtype);
  30. return arg;
  31. }
  32. } // anonymous namespace
  33. namespace megdnn {
  34. namespace test {
  35. namespace relayout {
  36. void run_test_cv(Handle* handle, size_t CH) {
  37. std::vector<TestArg> args;
  38. for (size_t M = 124; M <= 130; ++M) {
  39. for (size_t N = 124; N <= 130; ++N) {
  40. args.push_back(
  41. generate_transpose_args(1, M, N, CH, dtype::Uint8()));
  42. args.push_back(
  43. generate_transpose_args(1, M, N, CH, dtype::Int32()));
  44. args.push_back(
  45. generate_transpose_args(1, M, N, CH, dtype::Float32()));
  46. args.push_back(
  47. generate_transpose_args(3, M, N, CH, dtype::Float32()));
  48. }
  49. }
  50. Checker<Relayout> checker(handle);
  51. for (auto&& arg : args) {
  52. checker.execl({arg.src, arg.dst});
  53. }
  54. }
  55. #define DEF_TEST(name) \
  56. template<> \
  57. void run_test<name>(Handle *handle)
  58. DEF_TEST(cv) {
  59. run_test_cv(handle, 1);
  60. }
  61. DEF_TEST(cv_ch3) {
  62. run_test_cv(handle, 3);
  63. }
  64. DEF_TEST(cv_ch5) {
  65. run_test_cv(handle, 5);
  66. }
  67. DEF_TEST(broadcast) {
  68. std::vector<TestArg> args;
  69. TensorLayout src{{2, 3, 4}, dtype::Float32()},
  70. dst{{2, 3, 4}, dtype::Float32()};
  71. src.stride[0] = 4;
  72. src.stride[1] = 0;
  73. args.emplace_back(src, dst);
  74. // last stride contiguous
  75. args.emplace_back(TensorLayout({3, 100, 2}, {2, 0, 1}, dtype::Float16()),
  76. TensorLayout({3, 100, 2}, {200, 2, 1}, dtype::Float16()));
  77. Checker<Relayout> checker(handle);
  78. for (auto &&arg : args) {
  79. checker.execl({arg.src, arg.dst});
  80. }
  81. }
  82. DEF_TEST(negative) {
  83. TensorLayout src{{7, 8, 10}, dtype::Float32()},
  84. dst{{7, 8, 10}, dtype::Float32()};
  85. src.stride[0] *= -1;
  86. Checker<Relayout> checker(handle);
  87. checker.execl({src, dst});
  88. }
  89. DEF_TEST(transpose) {
  90. Checker<Relayout> checker(handle);
  91. {
  92. TensorLayout sl({8, 10}, dtype::Int32()), dl({10, 8}, dtype::Int32());
  93. sl = sl.dimshuffle({1, 0});
  94. checker.execl({sl, dl});
  95. checker.execl({dl, sl});
  96. }
  97. {
  98. TensorLayout sl({8, 10, 2}, dtype::Int32()),
  99. dl({2, 8, 10}, dtype::Int32());
  100. sl = sl.dimshuffle({2, 0, 1});
  101. checker.execl({sl, dl});
  102. checker.execl({dl, sl});
  103. }
  104. }
  105. #undef DEF_TEST
  106. } // namespace relayout
  107. } // namespace test
  108. } // namespace megdnn
  109. void test::relayout::run_cv_benchmark(Handle* handle) {
  110. auto handle_naive = create_cpu_handle(2);
  111. std::vector<TestArg> args;
  112. args.push_back(generate_transpose_args(1, 255, 256, 1, dtype::Int32()));
  113. args.push_back(generate_transpose_args(1, 513, 1025, 3, dtype::Int32()));
  114. args.push_back(generate_transpose_args(1, 255, 256, 1, dtype::Uint8()));
  115. args.push_back(generate_transpose_args(1, 513, 1025, 3, dtype::Uint8()));
  116. args.push_back(generate_transpose_args(1, 255, 256, 3, dtype::Float32()));
  117. args.push_back(generate_transpose_args(1, 513, 1025, 1, dtype::Float32()));
  118. args.push_back(generate_transpose_args(2, 987, 573, 6, dtype::Float32()));
  119. Benchmarker<Relayout> benchmarker(handle);
  120. Benchmarker<Relayout> benchmarker_naive(handle_naive.get());
  121. Checker<Relayout> checker(handle);
  122. benchmarker_naive.set_times(1).set_display(false);
  123. benchmarker.set_times(1).set_display(false);
  124. for (auto&& arg : args) {
  125. checker.execl({arg.src, arg.dst});
  126. auto t0 = benchmarker.execl({arg.src, arg.dst});
  127. auto t1 = benchmarker_naive.execl({arg.src, arg.dst});
  128. double k = arg.dst.span().dist_byte() * 1e3 / (1024 * 1024 * 1024);
  129. printf("cur=%7.3fms,%5.2fGiB/s naive=%7.3fms,%5.2fGiB/s %s %s\n", t0,
  130. k / t0, t1, k / t1, arg.dst.TensorShape::to_string().c_str(),
  131. arg.dst.dtype.name());
  132. }
  133. }
  134. TEST(RELAYOUT, TRANSPOSE_DET) {
  135. auto run = [](const TensorShape& shape,
  136. const std::vector<size_t>& dimshuffle,
  137. bool expect_is_transpose, const TransposeParam& p = {}) {
  138. TensorLayout src{shape, dtype::Float32{}};
  139. src = src.dimshuffle(dimshuffle).collapse_contiguous();
  140. TensorLayout dst{TensorShape{src.total_nr_elems()}, src.dtype};
  141. TransposeParam p_get;
  142. bool succ = is_transpose(src, dst, p_get);
  143. ASSERT_EQ(expect_is_transpose, succ);
  144. if (succ) {
  145. ASSERT_EQ(p_get.batch, p.batch);
  146. ASSERT_EQ(p_get.m, p.m);
  147. ASSERT_EQ(p_get.n, p.n);
  148. ASSERT_EQ(p_get.c, p.c);
  149. }
  150. // swap m, n
  151. succ = is_transpose(dst, src, p_get);
  152. ASSERT_EQ(expect_is_transpose, succ);
  153. if (succ) {
  154. ASSERT_EQ(p_get.batch, p.batch);
  155. ASSERT_EQ(p_get.m, p.n);
  156. ASSERT_EQ(p_get.n, p.m);
  157. ASSERT_EQ(p_get.c, p.c);
  158. }
  159. };
  160. run({2, 3}, {1, 0}, true, {1, 2, 3, 1});
  161. run({2, 3, 5}, {1, 0, 2}, true, {1, 2, 3, 5});
  162. run({2, 3, 5}, {0, 2, 1}, true, {2, 3, 5, 1});
  163. run({3, 2, 3, 5}, {0, 2, 1, 3}, true, {3, 2, 3, 5});
  164. run({3, 2, 3, 5}, {0, 1, 3, 2}, true, {6, 3, 5, 1});
  165. run({2, 3, 5}, {2, 1, 0}, false);
  166. run({3, 2, 3, 5}, {3, 2, 1, 0}, false);
  167. }
  168. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台