You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dct_ref.cpp 9.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. /**
  2. * \file
  3. * dnn/test/common/dct_ref.cpp
  4. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  5. *
  6. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  7. *
  8. * Unless required by applicable law or agreed to in writing,
  9. * software distributed under the License is distributed on an
  10. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  11. * implied.
  12. */
  13. #include "test/common/dct_ref.h"
  14. namespace megdnn {
  15. namespace test {
  16. struct FixCase {
  17. std::vector<int> mask_offset;
  18. std::vector<int> mask_val;
  19. };
  20. using Param = DctChannelSelectForward::Param;
  21. static inline FixCase get_fix_mask(Param::FastImpl impl) {
  22. std::vector<int> fix_32_mask_offset{0, 16, 24, 32};
  23. std::vector<int> fix_32_mask_val{0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32,
  24. 25, 18, 11, 4, 5, 0, 1, 8, 16, 9, 2,
  25. 3, 10, 0, 1, 8, 16, 9, 2, 3, 10};
  26. megdnn_assert(impl == Param::FastImpl::FIX_32_MASK,
  27. "only support gen FIX_32_MASK");
  28. return {fix_32_mask_offset, fix_32_mask_val};
  29. }
  30. CheckerHelper::TensorsConstriant gen_dct_constriant(
  31. const size_t /* n */, const size_t ic, const size_t ih, const size_t iw,
  32. const size_t oc, Param param) {
  33. auto constraint = [=](CheckerHelper::TensorValueArray& tensors_orig) {
  34. const size_t block = param.dct_block_size;
  35. const int block_c = param.format == Param::Format::NCHW4 ? 4 : 1;
  36. megdnn_assert(oc % block_c == 0, "oc mod block_c must == 0");
  37. std::shared_ptr<DctTestcase> test_case_ptr = DctTestcase::make();
  38. DctTestcase& test_case = *test_case_ptr.get();
  39. UniformIntRNG rng(0, 255);
  40. UniformIntRNG mask_rng(0, 64 / block_c - 1);
  41. const size_t no_mask_oc = ic * block * block;
  42. megdnn_assert(ih % block == 0, "%zu mod %zu == 0", ih, block);
  43. megdnn_assert(iw % block == 0, "%zu mod %zu == 0", iw, block);
  44. TensorND mask_offset;
  45. TensorND mask_val;
  46. std::vector<int>& mask_offset_vec = test_case.mask_offset_vec;
  47. std::vector<int>& mask_val_vec = test_case.mask_val_vec;
  48. UniformIntRNG rng_oc(0, oc);
  49. if (param.fastImpl == Param::FastImpl::FIX_32_MASK) {
  50. auto fix_32_mask = get_fix_mask(Param::FastImpl::FIX_32_MASK);
  51. mask_offset_vec = fix_32_mask.mask_offset;
  52. mask_val_vec = fix_32_mask.mask_val;
  53. megdnn_assert(oc == 32, "oc must eq 32");
  54. } else if (no_mask_oc > oc) {
  55. size_t remain_oc = oc;
  56. mask_offset_vec.resize(ic + 1);
  57. mask_val_vec.resize(oc);
  58. mask_offset_vec[0] = 0;
  59. for (size_t ic_idx = 0; ic_idx < ic; ++ic_idx) {
  60. size_t random_len = (int)rng_oc.gen_single_val() * block_c;
  61. size_t mask_len = (ic_idx == ic - 1) || (remain_oc == 0)
  62. ? remain_oc
  63. : random_len % remain_oc;
  64. megdnn_assert(mask_len % block_c == 0,
  65. "mask_len mod block_c == 0, but %zu mod %d ",
  66. mask_len, block_c);
  67. const size_t oc_idx = mask_offset_vec[ic_idx];
  68. remain_oc -= mask_len;
  69. mask_offset_vec[ic_idx + 1] = oc_idx + mask_len;
  70. for (size_t mask_idx = 0; mask_idx < mask_len; ++mask_idx) {
  71. mask_val_vec[oc_idx + mask_idx] =
  72. (int)mask_rng.gen_single_val();
  73. }
  74. }
  75. }
  76. mask_offset = TensorND(mask_offset_vec.data(),
  77. {{mask_offset_vec.size()}, dtype::Int32()});
  78. mask_val = TensorND(mask_val_vec.data(),
  79. {{mask_val_vec.size()}, dtype::Int32()});
  80. if (tensors_orig.size() > 1) {
  81. megdnn_assert(tensors_orig.size() == 4, "tensors_orig.size() == 4");
  82. megdnn_assert(mask_offset_vec.size() >= 2,
  83. "mask_offset_vec.size() >= 2");
  84. megdnn_assert(tensors_orig[1].layout == mask_offset.layout,
  85. "tensors_orig[1].layout == mask_offset.layout");
  86. megdnn_assert(tensors_orig[2].layout == mask_val.layout,
  87. "tensors_orig[2].layout == mask_val.layout");
  88. auto naive_handle = create_cpu_handle(2, false);
  89. megdnn_memcpy_D2D(naive_handle.get(), tensors_orig[1].raw_ptr,
  90. mask_offset.raw_ptr,
  91. mask_offset.layout.span().dist_byte());
  92. megdnn_memcpy_D2D(naive_handle.get(), tensors_orig[2].raw_ptr,
  93. mask_val.raw_ptr,
  94. mask_val.layout.span().dist_byte());
  95. }
  96. };
  97. return constraint;
  98. }
  99. std::shared_ptr<DctTestcase> gen_dct_case(const size_t n, const size_t ic,
  100. const size_t ih, const size_t iw,
  101. const size_t oc, Param param,
  102. DType dst_dtype,
  103. bool correct_result) {
  104. const size_t block = param.dct_block_size;
  105. const int block_c = param.format == Param::Format::NCHW4 ? 4 : 1;
  106. megdnn_assert(oc % block_c == 0, "oc mod block_c must == 0");
  107. std::shared_ptr<DctTestcase> test_case_ptr = DctTestcase::make();
  108. DctTestcase& test_case = *test_case_ptr.get();
  109. UniformIntRNG rng(0, 255);
  110. UniformIntRNG mask_rng(0, 64 / block_c - 1);
  111. const size_t input_elements = n * ic * ih * iw;
  112. const size_t no_mask_oc = ic * block * block;
  113. megdnn_assert(ih % block == 0, "%zu mod %zu == 0", ih, block);
  114. megdnn_assert(iw % block == 0, "%zu mod %zu == 0", iw, block);
  115. std::vector<uint8_t>& inp_vec = test_case.inp_vec;
  116. inp_vec.resize(input_elements);
  117. TensorShape input_shape{n, ic, ih, iw};
  118. for (auto& elm : inp_vec) {
  119. elm = (uint8_t)rng.gen_single_val();
  120. }
  121. auto src = TensorND(inp_vec.data(), {input_shape, dtype::Uint8()});
  122. TensorND mask_offset;
  123. TensorND mask_val;
  124. std::vector<int>& mask_offset_vec = test_case.mask_offset_vec;
  125. std::vector<int>& mask_val_vec = test_case.mask_val_vec;
  126. UniformIntRNG rng_oc(0, oc);
  127. if (param.fastImpl == Param::FastImpl::FIX_32_MASK) {
  128. auto fix_32_mask = get_fix_mask(Param::FastImpl::FIX_32_MASK);
  129. mask_offset_vec = fix_32_mask.mask_offset;
  130. mask_val_vec = fix_32_mask.mask_val;
  131. megdnn_assert(oc == 32, "oc must eq 32");
  132. } else if (no_mask_oc > oc) {
  133. size_t remain_oc = oc;
  134. mask_offset_vec.resize(ic + 1);
  135. mask_val_vec.resize(oc);
  136. mask_offset_vec[0] = 0;
  137. for (size_t ic_idx = 0; ic_idx < ic; ++ic_idx) {
  138. size_t random_len = (int)rng_oc.gen_single_val() * block_c;
  139. size_t mask_len = (ic_idx == ic - 1) || (remain_oc == 0)
  140. ? remain_oc
  141. : random_len % remain_oc;
  142. megdnn_assert(mask_len % block_c == 0,
  143. "mask_len mod block_c == 0, but %zu mod %d ",
  144. mask_len, block_c);
  145. const size_t oc_idx = mask_offset_vec[ic_idx];
  146. remain_oc -= mask_len;
  147. mask_offset_vec[ic_idx + 1] = oc_idx + mask_len;
  148. for (size_t mask_idx = 0; mask_idx < mask_len; ++mask_idx) {
  149. mask_val_vec[oc_idx + mask_idx] =
  150. (int)mask_rng.gen_single_val();
  151. }
  152. }
  153. }
  154. mask_offset = TensorND(mask_offset_vec.data(),
  155. {{mask_offset_vec.size()}, dtype::Int32()});
  156. mask_val = TensorND(mask_val_vec.data(),
  157. {{mask_val_vec.size()}, dtype::Int32()});
  158. if (mask_offset_vec.size() >= 2) {
  159. test_case.testcase_in = {
  160. src, mask_offset, mask_val, {nullptr, {{}, dst_dtype}}};
  161. } else {
  162. test_case.testcase_in = {src, {}, {}, {nullptr, {{}, dst_dtype}}};
  163. }
  164. auto naive_handle = create_cpu_handle(2, false);
  165. auto opr_naive = naive_handle->create_operator<DctChannelSelectForward>();
  166. opr_naive->param() = param;
  167. using Proxy = OprProxy<DctChannelSelectForward>;
  168. Proxy naive_proxy;
  169. TensorLayout temp_dst_layout;
  170. temp_dst_layout.dtype = dst_dtype;
  171. TensorLayoutArray layouts{src.layout, mask_offset.layout, mask_val.layout,
  172. temp_dst_layout};
  173. naive_proxy.deduce_layout(opr_naive.get(), layouts);
  174. const size_t output_elements = layouts[3].total_nr_elems();
  175. std::vector<float>& output_vec = test_case.output_vec;
  176. output_vec.resize(output_elements);
  177. auto dst = TensorND(output_vec.data(), layouts[3]);
  178. DctTestcase::TensorValueArray testcase_naive;
  179. testcase_naive.emplace_back(test_case.testcase_in[0]);
  180. testcase_naive.emplace_back(test_case.testcase_in[1]);
  181. testcase_naive.emplace_back(test_case.testcase_in[2]);
  182. testcase_naive.emplace_back(dst);
  183. if (correct_result) {
  184. naive_proxy.exec(opr_naive.get(), testcase_naive);
  185. }
  186. test_case.testcase_out = {{}, {}, {}, dst};
  187. return test_case_ptr;
  188. }
  189. } // namespace test
  190. } // namespace megdnn
  191. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台