You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

codegen.cpp 10 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. /**
  2. * \file src/jit/test/codegen.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include <memory>
  12. #include "./helper.h"
  13. #include "megbrain/jit/executor_opr.h"
  14. #include "megbrain/opr/basic_arith.h"
  15. #include "megbrain/opr/basic_arith_wrapper.h"
  16. #include "megbrain/test/helper.h"
  17. #include "megdnn/dtype.h"
  18. #if MGB_JIT
  19. using namespace mgb;
  20. using namespace jit;
  21. #define FOREACH_CASE(cb) cb(simple) cb(grad)
  22. namespace {
  23. #define def_tag(x) \
  24. struct x {};
  25. FOREACH_CASE(def_tag)
  26. #undef def_tag
  27. #define t(n) n,
  28. using test_types = ::testing::Types<FOREACH_CASE(t) void>;
  29. #undef t
  30. template <typename tag>
  31. void run(Backend backend, CompNode cn);
  32. template <>
  33. void run<simple>(Backend backend, CompNode cn) {
  34. set_backend(backend);
  35. auto graph = ComputingGraph::make();
  36. HostTensorGenerator<> gen;
  37. auto host_x0 = gen({23, 42}, cn), host_x1 = gen({23, 1}, cn),
  38. host_x2 = gen({1, 42}, cn);
  39. auto a = opr::Host2DeviceCopy::make(*graph, host_x0),
  40. b = opr::Host2DeviceCopy::make(*graph, host_x1),
  41. c = opr::Host2DeviceCopy::make(*graph, host_x2);
  42. a = opr::TypeCvt::make(a, dtype::Float16{});
  43. auto y = a + b * c;
  44. y = opr::TypeCvt::make(y, dtype::Float16{});
  45. y = opr::TypeCvt::make((y + y.make_scalar_dt(1.f)), dtype::Float32{});
  46. VarNodeArray inputs{a.node(), b.node(), c.node()}, outputs{y.node()};
  47. auto ig_gen =
  48. std::make_unique<InternalGraphGenerator>(y.node()->owner_opr());
  49. for (auto i : get_rev_topo_order(y)) {
  50. if (!i->same_type<opr::Host2DeviceCopy>()) {
  51. ig_gen->add_opr(i);
  52. }
  53. }
  54. auto igraph = ig_gen->generate();
  55. auto y_jit = JITExecutor::make(igraph, ig_gen->orig_inps());
  56. HostTensorND host_y, host_y_jit;
  57. auto func = graph->compile({make_callback_copy(y, host_y),
  58. make_callback_copy(y_jit, host_y_jit)});
  59. func->execute();
  60. MGB_ASSERT_TENSOR_NEAR(host_y, host_y_jit, 5e-3);
  61. };
  62. template <>
  63. void run<grad>(Backend backend, CompNode cn) {
  64. set_backend(backend);
  65. auto graph = ComputingGraph::make();
  66. HostTensorGenerator<> gen;
  67. auto host_x0 = gen({23, 42}, cn), host_x1 = gen({23, 1}, cn),
  68. host_x2 = gen({1, 42}, cn);
  69. auto a = opr::Host2DeviceCopy::make(*graph, host_x0),
  70. b = opr::Host2DeviceCopy::make(*graph, host_x1),
  71. c = opr::Host2DeviceCopy::make(*graph, host_x2);
  72. a = opr::TypeCvt::make(a, dtype::Float16{});
  73. auto y = opr::floor_div(a, opr::abs(b) + 0.1f) * opr::sin(c);
  74. VarNodeArray inputs{a.node(), b.node(), c.node()}, outputs{y.node()};
  75. auto ig_gen =
  76. std::make_unique<InternalGraphGenerator>(y.node()->owner_opr());
  77. for (auto i : get_rev_topo_order(y)) {
  78. if (!i->same_type<opr::Host2DeviceCopy>()) {
  79. ig_gen->add_opr(i);
  80. }
  81. }
  82. auto igraph = ig_gen->generate();
  83. auto y_jit = JITExecutor::make(igraph, ig_gen->orig_inps());
  84. HostTensorND host_y, host_y_jit;
  85. auto func = graph->compile({make_callback_copy(y, host_y),
  86. make_callback_copy(y_jit, host_y_jit)});
  87. func->execute();
  88. MGB_ASSERT_TENSOR_EQ(host_y, host_y_jit);
  89. auto grad = [loss = opr::reduce_sum(y_jit, y_jit.make_scalar(1))](
  90. SymbolVar x) {
  91. return cg::grad(loss, x, false, false).node();
  92. };
  93. ASSERT_EQ(nullptr, grad(a));
  94. ASSERT_EQ(nullptr, grad(b));
  95. ASSERT_NE(nullptr, grad(c));
  96. };
  97. template <>
  98. void run<void>(Backend, CompNode) {}
  99. #if MGB_JIT_MLIR
  100. void run_mlir(CompNode cn) {
  101. set_backend(Backend::MLIR);
  102. auto graph = ComputingGraph::make();
  103. HostTensorGenerator<dtype::Float32> gen;
  104. auto host_x0 = gen({23, 42}, cn), host_x1 = gen({23, 42}, cn),
  105. host_x2 = gen({23, 42}, cn), host_x3 = gen({23, 42}, cn);
  106. auto a = opr::Host2DeviceCopy::make(*graph, host_x0),
  107. b = opr::Host2DeviceCopy::make(*graph, host_x1),
  108. c = opr::Host2DeviceCopy::make(*graph, host_x2);
  109. auto y = a + b + c;
  110. auto ig_gen =
  111. std::make_unique<InternalGraphGenerator>(y.node()->owner_opr());
  112. for (auto i : get_rev_topo_order(y)) {
  113. if (!i->same_type<opr::Host2DeviceCopy>()) {
  114. ig_gen->add_opr(i);
  115. }
  116. }
  117. auto igraph = ig_gen->generate();
  118. auto y_jit = JITExecutor::make(igraph, ig_gen->orig_inps());
  119. HostTensorND host_y, host_y_jit;
  120. auto func = graph->compile({make_callback_copy(y, host_y),
  121. make_callback_copy(y_jit, host_y_jit)});
  122. func->execute();
  123. MGB_ASSERT_TENSOR_EQ(host_y, host_y_jit);
  124. }
  125. template <typename tag, int arity>
  126. void run_mlir_mode(CompNode cn) {
  127. set_backend(Backend::MLIR);
  128. auto graph = ComputingGraph::make();
  129. float low = 0.f, high = 1.f;
  130. if (tag::mode == opr::Elemwise::Mode::LOG) {
  131. low = 0.1;
  132. high = 4;
  133. }
  134. HostTensorGenerator<dtype::Float32, RandomDistribution::UNIFORM> gen(low,
  135. high);
  136. SmallVector<std::shared_ptr<HostTensorND>> hosts;
  137. VarNodeArray input_vars;
  138. for (int i = 0; i < arity; i++) {
  139. hosts.push_back(gen({23, 42}, cn));
  140. input_vars.push_back(
  141. opr::Host2DeviceCopy::make(*graph, hosts[i]).node());
  142. }
  143. auto y = opr::Elemwise::make(input_vars, tag::mode);
  144. auto ig_gen =
  145. std::make_unique<InternalGraphGenerator>(y.node()->owner_opr());
  146. for (auto i : get_rev_topo_order(y)) {
  147. if (!i->template same_type<opr::Host2DeviceCopy>()) {
  148. ig_gen->add_opr(i);
  149. }
  150. }
  151. auto igraph = ig_gen->generate();
  152. auto y_jit = JITExecutor::make(igraph, ig_gen->orig_inps());
  153. HostTensorND host_y, host_y_jit;
  154. auto func = graph->compile({make_callback_copy(y, host_y),
  155. make_callback_copy(y_jit, host_y_jit)});
  156. func->execute();
  157. MGB_ASSERT_TENSOR_EQ(host_y, host_y_jit);
  158. }
  159. #endif
  160. } // anonymous namespace
  161. #if MGB_JIT_HALIDE
  162. template <typename tag>
  163. class TestJITHalideCodeGenCuda : public ::testing::Test {};
  164. TYPED_TEST_CASE(TestJITHalideCodeGenCuda, test_types);
  165. TYPED_TEST(TestJITHalideCodeGenCuda, run) {
  166. REQUIRE_GPU(1);
  167. run<TypeParam>(Backend::HALIDE, CompNode::load("gpu0"));
  168. }
  169. #endif
  170. template <typename tag>
  171. class TestJITNvrtcCodeGen : public ::testing::Test {};
  172. TYPED_TEST_CASE(TestJITNvrtcCodeGen, test_types);
  173. TYPED_TEST(TestJITNvrtcCodeGen, run) {
  174. REQUIRE_GPU(1);
  175. run<TypeParam>(Backend::NVRTC, CompNode::load("gpu0"));
  176. }
  177. #if MGB_JIT_MLIR
  178. TEST(TestJITMlirCodeGen, Basic) {
  179. auto cn = CompNode::load("cpu0");
  180. run_mlir(cn);
  181. }
  182. TEST(TestJITMlirCodeGen, BasicGPU) {
  183. REQUIRE_GPU(1);
  184. auto cn = CompNode::load("gpu0");
  185. run_mlir(cn);
  186. }
  187. ///////////////////////// unary ///////////////////////////////
  188. // clang-format off
  189. #define FOREACH_UNARY_MODE(cb) \
  190. cb(RELU) \
  191. cb(ABS) \
  192. cb(NEGATE) \
  193. cb(CEIL) \
  194. cb(EXP) \
  195. cb(FLOOR) \
  196. cb(LOG) \
  197. cb(LOG1P) \
  198. cb(SIN) \
  199. cb(TANH) \
  200. cb(FAST_TANH) \
  201. cb(H_SWISH) \
  202. cb(SIGMOID) \
  203. cb(EXPM1) \
  204. cb(ROUND)
  205. // clang-format on
  206. template <typename tag>
  207. class TestJITMlirUnaryElemwise : public ::testing::Test {};
  208. #define def_tag(x) \
  209. struct x { \
  210. static constexpr opr::Elemwise::Mode mode = opr::Elemwise::Mode::x; \
  211. };
  212. FOREACH_UNARY_MODE(def_tag)
  213. #undef def_tag
  214. #define t(n) n,
  215. using mlir_elemwise_unary_types =
  216. ::testing::Types<FOREACH_UNARY_MODE(t) ABS>;
  217. #undef t
  218. TYPED_TEST_CASE(TestJITMlirUnaryElemwise, mlir_elemwise_unary_types);
  219. TYPED_TEST(TestJITMlirUnaryElemwise, run) {
  220. auto cn = CompNode::load("cpu0");
  221. run_mlir_mode<TypeParam, 1>(cn);
  222. }
  223. ///////////////////////// binary ///////////////////////////////
  224. // clang-format off
  225. #define FOREACH_BINARY_MODE(cb) \
  226. cb(ADD) \
  227. cb(FLOOR_DIV) \
  228. cb(MUL) \
  229. cb(MAX) \
  230. cb(MIN) \
  231. cb(MOD) \
  232. cb(SUB) \
  233. cb(TRUE_DIV) \
  234. cb(ABS_GRAD) \
  235. cb(SIGMOID_GRAD) \
  236. cb(SWITCH_GT0) \
  237. cb(TANH_GRAD) \
  238. cb(LT) \
  239. cb(LEQ) \
  240. cb(EQ) \
  241. cb(FUSE_ADD_RELU) \
  242. cb(LOG_SUM_EXP) \
  243. cb(FUSE_ADD_TANH) \
  244. cb(FAST_TANH_GRAD) \
  245. cb(FUSE_ADD_SIGMOID) \
  246. cb(H_SWISH_GRAD) \
  247. cb(FUSE_ADD_H_SWISH)
  248. // clang-format on
  249. template <typename tag>
  250. class TestJITMlirBinaryElemwise : public ::testing::Test {};
  251. #define def_tag(x) \
  252. struct x { \
  253. static constexpr opr::Elemwise::Mode mode = opr::Elemwise::Mode::x; \
  254. };
  255. FOREACH_BINARY_MODE(def_tag)
  256. #undef def_tag
  257. #define t(n) n,
  258. using mlir_elemwise_binary_types =
  259. ::testing::Types<FOREACH_BINARY_MODE(t) ADD>;
  260. #undef t
  261. TYPED_TEST_CASE(TestJITMlirBinaryElemwise, mlir_elemwise_binary_types);
  262. TYPED_TEST(TestJITMlirBinaryElemwise, run) {
  263. auto cn = CompNode::load("cpu0");
  264. run_mlir_mode<TypeParam, 2>(cn);
  265. }
  266. ///////////////////////// ternary ///////////////////////////////
  267. // clang-format off
  268. #define FOREACH_TERNARY_MODE(cb) \
  269. cb(COND_LEQ_MOV) \
  270. cb(FUSE_MUL_ADD3) \
  271. // clang-format on
  272. template <typename tag>
  273. class TestJITMlirTernaryElemwise : public ::testing::Test {};
  274. #define def_tag(x) \
  275. struct x { \
  276. static constexpr opr::Elemwise::Mode mode = opr::Elemwise::Mode::x; \
  277. };
  278. FOREACH_TERNARY_MODE(def_tag)
  279. #undef def_tag
  280. #define t(n) n,
  281. using mlir_elemwise_ternary_types =
  282. ::testing::Types<FOREACH_TERNARY_MODE(t) COND_LEQ_MOV>;
  283. #undef t
  284. TYPED_TEST_CASE(TestJITMlirTernaryElemwise, mlir_elemwise_ternary_types);
  285. TYPED_TEST(TestJITMlirTernaryElemwise, run) {
  286. auto cn = CompNode::load("cpu0");
  287. run_mlir_mode<TypeParam, 3>(cn);
  288. }
  289. #endif
  290. #endif // MGB_JIT
  291. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台