You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

correlation.cpp 6.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. /**
  2. * \file dnn/test/cuda/correlation.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "test/cuda/fixture.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/correlation.h"
  15. namespace megdnn {
  16. namespace test {
  17. TEST_F(CUDA, CORRELATION_FORWARD) {
  18. using namespace correlation;
  19. std::vector<TestArg> args = get_args();
  20. Checker<Correlation> checker(handle_cuda());
  21. for (auto&& arg : args) {
  22. checker.set_param(arg.param)
  23. .set_dtype(0, dtype::Float32())
  24. .set_dtype(1, dtype::Float32())
  25. .execs({arg.data1, arg.data2, {}});
  26. }
  27. }
  28. TEST_F(CUDA, CORRELATION_BACKWARDDATA1) {
  29. ConstValue const_0{0};
  30. using Param = CorrelationBackwardData1::Param;
  31. Param param;
  32. param.is_multiply = true;
  33. param.format = Param::Format::NCHW;
  34. param.stride1 = 2;
  35. param.stride2 = 2;
  36. param.kernel_size = 3;
  37. param.pad_size = 4;
  38. Checker<CorrelationBackwardData1> checker(handle_cuda());
  39. checker.set_epsilon(1e-2);
  40. uint32_t pad_size = param.pad_size;
  41. uint32_t kernel_size = param.kernel_size;
  42. uint32_t stride1 = param.stride1;
  43. uint32_t stride2 = param.stride2;
  44. uint32_t max_displacement = param.max_displacement;
  45. auto run = [&](DType dtype) {
  46. for (size_t N : {1, 3})
  47. for (size_t C : {1, 3})
  48. for (size_t OH : {10, 100})
  49. for (size_t OW : {10, 100}) {
  50. int paddedbottomheight = OH + 2 * pad_size;
  51. int paddedbottomwidth = OW + 2 * pad_size;
  52. uint32_t kernel_radius = (kernel_size - 1) / 2;
  53. uint32_t border_size = max_displacement + kernel_radius;
  54. uint32_t top_width =
  55. ceil(static_cast<float>(paddedbottomwidth -
  56. border_size * 2) /
  57. static_cast<float>(stride1));
  58. uint32_t top_height =
  59. ceil(static_cast<float>(paddedbottomheight -
  60. border_size * 2) /
  61. static_cast<float>(stride1));
  62. uint32_t neighborhood_grid_radius =
  63. max_displacement / stride2;
  64. uint32_t neighborhood_grid_width =
  65. neighborhood_grid_radius * 2 + 1;
  66. uint32_t top_channels = neighborhood_grid_width *
  67. neighborhood_grid_width;
  68. checker.set_param(param)
  69. .set_dtype(0, dtype)
  70. .set_dtype(1, dtype)
  71. .set_dtype(2, dtype)
  72. .set_dtype(3, dtype)
  73. .execs({{N, top_channels, top_height,
  74. top_width},
  75. {N, C, OH, OW},
  76. {N, C, OH, OW},
  77. {N, C, OH, OW}});
  78. }
  79. };
  80. run(dtype::Float32());
  81. run(dtype::Float16());
  82. checker.set_epsilon(5e-2);
  83. run(dtype::BFloat16());
  84. }
  85. TEST_F(CUDA, CORRELATION_BACKWARDDATA2) {
  86. ConstValue const_0{0};
  87. using Param = CorrelationBackwardData2::Param;
  88. Param param;
  89. param.is_multiply = true;
  90. param.format = Param::Format::NCHW;
  91. param.stride1 = 2;
  92. param.stride2 = 2;
  93. param.kernel_size = 3;
  94. param.pad_size = 4;
  95. Checker<CorrelationBackwardData2> checker(handle_cuda());
  96. checker.set_epsilon(1e-2);
  97. uint32_t pad_size = param.pad_size;
  98. uint32_t kernel_size = param.kernel_size;
  99. uint32_t stride1 = param.stride1;
  100. uint32_t stride2 = param.stride2;
  101. uint32_t max_displacement = param.max_displacement;
  102. auto run = [&](DType dtype) {
  103. for (size_t N : {1, 3})
  104. for (size_t C : {1, 3})
  105. for (size_t OH : {10, 100})
  106. for (size_t OW : {10, 100}) {
  107. int paddedbottomheight = OH + 2 * pad_size;
  108. int paddedbottomwidth = OW + 2 * pad_size;
  109. uint32_t kernel_radius = (kernel_size - 1) / 2;
  110. uint32_t border_size = max_displacement + kernel_radius;
  111. uint32_t top_width =
  112. ceil(static_cast<float>(paddedbottomwidth -
  113. border_size * 2) /
  114. static_cast<float>(stride1));
  115. uint32_t top_height =
  116. ceil(static_cast<float>(paddedbottomheight -
  117. border_size * 2) /
  118. static_cast<float>(stride1));
  119. uint32_t neighborhood_grid_radius =
  120. max_displacement / stride2;
  121. uint32_t neighborhood_grid_width =
  122. neighborhood_grid_radius * 2 + 1;
  123. uint32_t top_channels = neighborhood_grid_width *
  124. neighborhood_grid_width;
  125. checker.set_param(param)
  126. .set_dtype(0, dtype)
  127. .set_dtype(1, dtype)
  128. .set_dtype(2, dtype)
  129. .set_dtype(3, dtype)
  130. .execs({{N, top_channels, top_height,
  131. top_width},
  132. {N, C, OH, OW},
  133. {N, C, OH, OW},
  134. {N, C, OH, OW}});
  135. }
  136. };
  137. run(dtype::Float32());
  138. run(dtype::Float16());
  139. checker.set_epsilon(5e-2);
  140. run(dtype::BFloat16());
  141. }
  142. } // namespace test
  143. } // namespace megdnn
  144. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台