You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

reduce.cpp 4.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. /**
  2. * \file dnn/test/cuda/reduce.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megdnn/oprs.h"
  12. #include "test/common/checker.h"
  13. #include "test/common/rng.h"
  14. #include "test/cuda/fixture.h"
  15. using namespace megdnn;
  16. using namespace test;
  17. TEST_F(CUDA, REDUCE) {
  18. using Mode = Reduce::Param::Mode;
  19. Checker<Reduce> checker(handle_cuda());
  20. UniformFloatRNG rng(-1.0f, 1.0f);
  21. checker.set_epsilon(1e-2);
  22. checker.set_rng(0, &rng);
  23. checker.set_param({Mode::SUM, 1});
  24. // 1-step
  25. checker.execs({{2, 64, 32}, {}});
  26. // 2-step
  27. checker.execs({{2, 192, 32}, {}});
  28. // 3-step
  29. checker.execs({{2, 4333, 32}, {}});
  30. // single reduce
  31. checker.execs({{2, 1, 1}, {}});
  32. checker.execs({{2, 1 + 1, 1}, {}});
  33. checker.execs({{2, 2048 + 1, 1}, {}});
  34. checker.execs({{2, 2048 * 2048 + 1, 1}, {}});
  35. checker.execs({{2, 1 + 1, 31}, {}});
  36. checker.execs({{2, 16 + 1, 31}, {}});
  37. checker.execs({{2, 16 * 16 + 1, 31}, {}});
  38. checker.execs({{2, 16 * 16 * 16 + 1, 31}, {}});
  39. checker.execs({{2, 16 * 16 * 16 * 16 + 1, 31}, {}});
  40. #if MEGDNN_TEGRA_X1
  41. checker.execs({{2, 8 * 16 * 16 * 16 * 16 + 1, 31}, {}});
  42. #else
  43. checker.execs({{2, 16 * 16 * 16 * 16 * 16 + 1, 31}, {}});
  44. #endif
  45. checker.execs({{3, 256 * 256 + 1, 2}, {}});
  46. checker.execs({{3, 128 * 128 + 1, 3}, {}});
  47. checker.execs({{3, 64 * 64 + 1, 7}, {}});
  48. checker.execs({{3, 32 * 32 + 1, 15}, {}});
  49. checker.execs({{3, 512, 500}, {}});
  50. // very large reduce
  51. checker.execs({{1, 4194304, 1}, {}});
  52. auto check = [&](Reduce::Mode mode, DType src_dtype, DType dst_dtype,
  53. Reduce::DataType data_type) {
  54. for (int32_t axis : {0, 1, 2, 3}) {
  55. if (data_type == Reduce::DataType::DEFAULT &&
  56. src_dtype == dtype::Float16()) {
  57. checker.set_epsilon(1e-2);
  58. } else {
  59. checker.set_epsilon(1e-3);
  60. }
  61. Reduce::Param param{mode, axis, data_type};
  62. auto dst_shape = TensorShape{2, 3, 100, 5};
  63. dst_shape[axis] = 1;
  64. checker.set_dtype(0, src_dtype)
  65. .set_dtype(1, dst_dtype)
  66. .set_param(param)
  67. .execs({{2, 3, 100, 5}, dst_shape});
  68. }
  69. };
  70. for (auto mode : {Mode::SUM, Mode::MEAN, Mode::SUM_SQR, Mode::PRODUCT,
  71. Mode::MIN, Mode::MAX}) {
  72. for (auto dtype : std::vector<DType>{dtype::Float16(), dtype::Float32(),
  73. dtype::Int32()}) {
  74. check(mode, dtype, dtype, Reduce::DataType::DEFAULT);
  75. }
  76. check(mode, dtype::Float16(), dtype::Float32(),
  77. Reduce::DataType::FLOAT_O32xC32);
  78. check(mode, dtype::Int32(), dtype::Float32(),
  79. Reduce::DataType::FLOAT_O32xC32);
  80. check(mode, dtype::Float16(), dtype::Float16(),
  81. Reduce::DataType::FLOAT_O16xC32);
  82. check(mode, dtype::Float32(), dtype::Float16(),
  83. Reduce::DataType::FLOAT_O16xC32);
  84. ASSERT_THROW(check(mode, dtype::Int32(), dtype::Float16(),
  85. Reduce::DataType::FLOAT_O16xC32),
  86. MegDNNError);
  87. ASSERT_THROW(check(mode, dtype::Float16(), dtype::Float16(),
  88. Reduce::DataType::FLOAT_IO16xC32),
  89. MegDNNError);
  90. }
  91. {
  92. // very large reduce for I16CO32
  93. Reduce::Param param{Mode::SUM_SQR, 1,
  94. Reduce::Param::DataType::FLOAT_O32xC32};
  95. checker.set_dtype(0, dtype::Float16())
  96. .set_dtype(1, dtype::Float32())
  97. .set_param(param)
  98. .execs({{1, 4194304, 1}, {1, 1, 1}});
  99. }
  100. }
  101. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台