You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

relayout.cpp 6.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. /**
  2. * \file dnn/test/common/relayout.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megdnn/oprs/general.h"
  12. #include "src/common/relayout_helper.h"
  13. #include "test/common/benchmarker.h"
  14. #include "test/common/relayout.h"
  15. #include "test/common/checker.h"
  16. using namespace megdnn;
  17. using namespace test;
  18. using namespace megdnn::relayout;
  19. using namespace test::relayout;
  20. namespace {
  21. TestArg generate_transpose_args(size_t batch, size_t m, size_t n,
  22. size_t c, DType dtype) {
  23. TestArg arg;
  24. arg.src = TensorLayout(
  25. TensorShape{batch, n, m, c},
  26. {static_cast<std::ptrdiff_t>(n * m * c), static_cast<std::ptrdiff_t>(c),
  27. static_cast<std::ptrdiff_t>(n * c), 1},
  28. dtype);
  29. arg.dst = TensorLayout(TensorShape{batch, n, m, c}, dtype);
  30. return arg;
  31. }
  32. } // anonymous namespace
  33. namespace megdnn {
  34. namespace test {
  35. namespace relayout {
  36. #define DEF_TEST(name) \
  37. template<> \
  38. void run_test<name>(Handle *handle)
  39. DEF_TEST(cv) {
  40. std::vector<TestArg> args;
  41. for (size_t M = 124; M <= 130; ++M) {
  42. for (size_t N = 124; N <= 130; ++N) {
  43. for (size_t CH : {1, 3, 5}) {
  44. args.push_back(
  45. generate_transpose_args(1, M, N, CH, dtype::Uint8()));
  46. args.push_back(
  47. generate_transpose_args(1, M, N, CH, dtype::Int32()));
  48. args.push_back(
  49. generate_transpose_args(1, M, N, CH, dtype::Float32()));
  50. args.push_back(
  51. generate_transpose_args(3, M, N, CH, dtype::Float32()));
  52. }
  53. }
  54. }
  55. Checker<Relayout> checker(handle);
  56. for (auto &&arg : args) {
  57. checker.execl({arg.src, arg.dst});
  58. }
  59. }
  60. DEF_TEST(broadcast) {
  61. std::vector<TestArg> args;
  62. TensorLayout src{{2, 3, 4}, dtype::Float32()},
  63. dst{{2, 3, 4}, dtype::Float32()};
  64. src.stride[0] = 4;
  65. src.stride[1] = 0;
  66. args.emplace_back(src, dst);
  67. // last stride contiguous
  68. args.emplace_back(TensorLayout({3, 100, 2}, {2, 0, 1}, dtype::Float16()),
  69. TensorLayout({3, 100, 2}, {200, 2, 1}, dtype::Float16()));
  70. Checker<Relayout> checker(handle);
  71. for (auto &&arg : args) {
  72. checker.execl({arg.src, arg.dst});
  73. }
  74. }
  75. DEF_TEST(negative) {
  76. TensorLayout src{{7, 8, 10}, dtype::Float32()},
  77. dst{{7, 8, 10}, dtype::Float32()};
  78. src.stride[0] *= -1;
  79. Checker<Relayout> checker(handle);
  80. checker.execl({src, dst});
  81. }
  82. DEF_TEST(transpose) {
  83. Checker<Relayout> checker(handle);
  84. {
  85. TensorLayout sl({8, 10}, dtype::Int32()), dl({10, 8}, dtype::Int32());
  86. sl = sl.dimshuffle({1, 0});
  87. checker.execl({sl, dl});
  88. checker.execl({dl, sl});
  89. }
  90. {
  91. TensorLayout sl({8, 10, 2}, dtype::Int32()),
  92. dl({2, 8, 10}, dtype::Int32());
  93. sl = sl.dimshuffle({2, 0, 1});
  94. checker.execl({sl, dl});
  95. checker.execl({dl, sl});
  96. }
  97. }
  98. #undef DEF_TEST
  99. } // namespace relayout
  100. } // namespace test
  101. } // namespace megdnn
  102. void test::relayout::run_cv_benchmark(Handle* handle) {
  103. auto handle_naive = create_cpu_handle(2);
  104. std::vector<TestArg> args;
  105. args.push_back(generate_transpose_args(1, 255, 256, 1, dtype::Int32()));
  106. args.push_back(generate_transpose_args(1, 513, 1025, 3, dtype::Int32()));
  107. args.push_back(generate_transpose_args(1, 255, 256, 1, dtype::Uint8()));
  108. args.push_back(generate_transpose_args(1, 513, 1025, 3, dtype::Uint8()));
  109. args.push_back(generate_transpose_args(1, 255, 256, 3, dtype::Float32()));
  110. args.push_back(generate_transpose_args(1, 513, 1025, 1, dtype::Float32()));
  111. args.push_back(generate_transpose_args(2, 987, 573, 6, dtype::Float32()));
  112. Benchmarker<Relayout> benchmarker(handle);
  113. Benchmarker<Relayout> benchmarker_naive(handle_naive.get());
  114. Checker<Relayout> checker(handle);
  115. benchmarker_naive.set_times(1).set_display(false);
  116. benchmarker.set_times(1).set_display(false);
  117. for (auto&& arg : args) {
  118. checker.execl({arg.src, arg.dst});
  119. auto t0 = benchmarker.execl({arg.src, arg.dst});
  120. auto t1 = benchmarker_naive.execl({arg.src, arg.dst});
  121. double k = arg.dst.span().dist_byte() * 1e3 / (1024 * 1024 * 1024);
  122. printf("cur=%7.3fms,%5.2fGiB/s naive=%7.3fms,%5.2fGiB/s %s %s\n", t0,
  123. k / t0, t1, k / t1, arg.dst.TensorShape::to_string().c_str(),
  124. arg.dst.dtype.name());
  125. }
  126. }
  127. TEST(RELAYOUT, TRANSPOSE_DET) {
  128. auto run = [](const TensorShape& shape,
  129. const std::vector<size_t>& dimshuffle,
  130. bool expect_is_transpose, const TransposeParam& p = {}) {
  131. TensorLayout src{shape, dtype::Float32{}};
  132. src = src.dimshuffle(dimshuffle).collapse_contiguous();
  133. TensorLayout dst{TensorShape{src.total_nr_elems()}, src.dtype};
  134. TransposeParam p_get;
  135. bool succ = is_transpose(src, dst, p_get);
  136. ASSERT_EQ(expect_is_transpose, succ);
  137. if (succ) {
  138. ASSERT_EQ(p_get.batch, p.batch);
  139. ASSERT_EQ(p_get.m, p.m);
  140. ASSERT_EQ(p_get.n, p.n);
  141. ASSERT_EQ(p_get.c, p.c);
  142. }
  143. // swap m, n
  144. succ = is_transpose(dst, src, p_get);
  145. ASSERT_EQ(expect_is_transpose, succ);
  146. if (succ) {
  147. ASSERT_EQ(p_get.batch, p.batch);
  148. ASSERT_EQ(p_get.m, p.n);
  149. ASSERT_EQ(p_get.n, p.m);
  150. ASSERT_EQ(p_get.c, p.c);
  151. }
  152. };
  153. run({2, 3}, {1, 0}, true, {1, 2, 3, 1});
  154. run({2, 3, 5}, {1, 0, 2}, true, {1, 2, 3, 5});
  155. run({2, 3, 5}, {0, 2, 1}, true, {2, 3, 5, 1});
  156. run({3, 2, 3, 5}, {0, 2, 1, 3}, true, {3, 2, 3, 5});
  157. run({3, 2, 3, 5}, {0, 1, 3, 2}, true, {6, 3, 5, 1});
  158. run({2, 3, 5}, {2, 1, 0}, false);
  159. run({3, 2, 3, 5}, {3, 2, 1, 0}, false);
  160. }
  161. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台