You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

relayout.cpp 6.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. /**
  2. * \file dnn/test/common/relayout.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megdnn/oprs/general.h"
  12. #include "src/common/relayout_helper.h"
  13. #include "test/common/benchmarker.h"
  14. #include "test/common/checker.h"
  15. #include "test/common/relayout.h"
  16. using namespace megdnn;
  17. using namespace test;
  18. using namespace megdnn::relayout;
  19. using namespace test::relayout;
  20. namespace {
  21. TestArg generate_transpose_args(
  22. size_t batch, size_t m, size_t n, size_t c, DType dtype) {
  23. TestArg arg;
  24. arg.src = TensorLayout(
  25. TensorShape{batch, n, m, c},
  26. {static_cast<std::ptrdiff_t>(n * m * c), static_cast<std::ptrdiff_t>(c),
  27. static_cast<std::ptrdiff_t>(n * c), 1},
  28. dtype);
  29. arg.dst = TensorLayout(TensorShape{batch, n, m, c}, dtype);
  30. return arg;
  31. }
  32. } // anonymous namespace
  33. namespace megdnn {
  34. namespace test {
  35. namespace relayout {
  36. void run_test_cv(Handle* handle, size_t CH) {
  37. std::vector<TestArg> args;
  38. for (size_t M = 124; M <= 130; ++M) {
  39. for (size_t N = 124; N <= 130; ++N) {
  40. args.push_back(generate_transpose_args(1, M, N, CH, dtype::Uint8()));
  41. args.push_back(generate_transpose_args(1, M, N, CH, dtype::Int32()));
  42. args.push_back(generate_transpose_args(1, M, N, CH, dtype::Float32()));
  43. args.push_back(generate_transpose_args(3, M, N, CH, dtype::Float32()));
  44. }
  45. }
  46. Checker<Relayout> checker(handle);
  47. for (auto&& arg : args) {
  48. checker.execl({arg.src, arg.dst});
  49. }
  50. }
  51. #define DEF_TEST(name) \
  52. template <> \
  53. void run_test<name>(Handle * handle)
  54. DEF_TEST(cv) {
  55. run_test_cv(handle, 1);
  56. }
  57. DEF_TEST(cv_ch3) {
  58. run_test_cv(handle, 3);
  59. }
  60. DEF_TEST(cv_ch5) {
  61. run_test_cv(handle, 5);
  62. }
  63. DEF_TEST(broadcast) {
  64. std::vector<TestArg> args;
  65. TensorLayout src{{2, 3, 4}, dtype::Float32()}, dst{{2, 3, 4}, dtype::Float32()};
  66. src.stride[0] = 4;
  67. src.stride[1] = 0;
  68. args.emplace_back(src, dst);
  69. // last stride contiguous
  70. args.emplace_back(
  71. TensorLayout({3, 100, 2}, {2, 0, 1}, dtype::Float16()),
  72. TensorLayout({3, 100, 2}, {200, 2, 1}, dtype::Float16()));
  73. Checker<Relayout> checker(handle);
  74. for (auto&& arg : args) {
  75. checker.execl({arg.src, arg.dst});
  76. }
  77. }
  78. DEF_TEST(negative) {
  79. TensorLayout src{{7, 8, 10}, dtype::Float32()}, dst{{7, 8, 10}, dtype::Float32()};
  80. src.stride[0] *= -1;
  81. Checker<Relayout> checker(handle);
  82. checker.execl({src, dst});
  83. }
  84. DEF_TEST(transpose) {
  85. Checker<Relayout> checker(handle);
  86. {
  87. TensorLayout sl({8, 10}, dtype::Int32()), dl({10, 8}, dtype::Int32());
  88. sl = sl.dimshuffle({1, 0});
  89. checker.execl({sl, dl});
  90. checker.execl({dl, sl});
  91. }
  92. {
  93. TensorLayout sl({8, 10, 2}, dtype::Int32()), dl({2, 8, 10}, dtype::Int32());
  94. sl = sl.dimshuffle({2, 0, 1});
  95. checker.execl({sl, dl});
  96. checker.execl({dl, sl});
  97. }
  98. }
  99. #undef DEF_TEST
  100. } // namespace relayout
  101. } // namespace test
  102. } // namespace megdnn
  103. void test::relayout::run_cv_benchmark(Handle* handle) {
  104. auto handle_naive = create_cpu_handle(2);
  105. std::vector<TestArg> args;
  106. args.push_back(generate_transpose_args(1, 255, 256, 1, dtype::Int32()));
  107. args.push_back(generate_transpose_args(1, 513, 1025, 3, dtype::Int32()));
  108. args.push_back(generate_transpose_args(1, 255, 256, 1, dtype::Uint8()));
  109. args.push_back(generate_transpose_args(1, 513, 1025, 3, dtype::Uint8()));
  110. args.push_back(generate_transpose_args(1, 255, 256, 3, dtype::Float32()));
  111. args.push_back(generate_transpose_args(1, 513, 1025, 1, dtype::Float32()));
  112. args.push_back(generate_transpose_args(2, 987, 573, 6, dtype::Float32()));
  113. Benchmarker<Relayout> benchmarker(handle);
  114. Benchmarker<Relayout> benchmarker_naive(handle_naive.get());
  115. Checker<Relayout> checker(handle);
  116. benchmarker_naive.set_times(1).set_display(false);
  117. benchmarker.set_times(1).set_display(false);
  118. for (auto&& arg : args) {
  119. checker.execl({arg.src, arg.dst});
  120. auto t0 = benchmarker.execl({arg.src, arg.dst});
  121. auto t1 = benchmarker_naive.execl({arg.src, arg.dst});
  122. double k = arg.dst.span().dist_byte() * 1e3 / (1024 * 1024 * 1024);
  123. printf("cur=%7.3fms,%5.2fGiB/s naive=%7.3fms,%5.2fGiB/s %s %s\n", t0, k / t0,
  124. t1, k / t1, arg.dst.TensorShape::to_string().c_str(),
  125. arg.dst.dtype.name());
  126. }
  127. }
  128. TEST(RELAYOUT, TRANSPOSE_DET) {
  129. auto run = [](const TensorShape& shape, const std::vector<size_t>& dimshuffle,
  130. bool expect_is_transpose, const TransposeParam& p = {}) {
  131. TensorLayout src{shape, dtype::Float32{}};
  132. src = src.dimshuffle(dimshuffle).collapse_contiguous();
  133. TensorLayout dst{TensorShape{src.total_nr_elems()}, src.dtype};
  134. TransposeParam p_get;
  135. bool succ = is_transpose(src, dst, p_get);
  136. ASSERT_EQ(expect_is_transpose, succ);
  137. if (succ) {
  138. ASSERT_EQ(p_get.batch, p.batch);
  139. ASSERT_EQ(p_get.m, p.m);
  140. ASSERT_EQ(p_get.n, p.n);
  141. ASSERT_EQ(p_get.c, p.c);
  142. }
  143. // swap m, n
  144. succ = is_transpose(dst, src, p_get);
  145. ASSERT_EQ(expect_is_transpose, succ);
  146. if (succ) {
  147. ASSERT_EQ(p_get.batch, p.batch);
  148. ASSERT_EQ(p_get.m, p.n);
  149. ASSERT_EQ(p_get.n, p.m);
  150. ASSERT_EQ(p_get.c, p.c);
  151. }
  152. };
  153. run({2, 3}, {1, 0}, true, {1, 2, 3, 1, 0});
  154. run({2, 3, 5}, {1, 0, 2}, true, {1, 2, 3, 5, 0});
  155. run({2, 3, 5}, {0, 2, 1}, true, {2, 3, 5, 1, 0});
  156. run({3, 2, 3, 5}, {0, 2, 1, 3}, true, {3, 2, 3, 5, 0});
  157. run({3, 2, 3, 5}, {0, 1, 3, 2}, true, {6, 3, 5, 1, 0});
  158. run({2, 3, 5}, {2, 1, 0}, false);
  159. run({3, 2, 3, 5}, {3, 2, 1, 0}, false);
  160. }
  161. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台