You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

layout_transform_pass.cpp 14 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. /**
  2. * \file src/gopt/test/layout_transform_pass.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "./helper.h"
  13. #include "megbrain/gopt/global_layout_transform.h"
  14. #include "megbrain/gopt/inference.h"
  15. #include "megbrain/opr/dnn/pooling.h"
  16. #include "megbrain/opr/imgproc.h"
  17. #include "megbrain/opr/nn_int.h"
  18. #include "megbrain/plugin/profiler.h"
  19. #include "megbrain/serialization/serializer.h"
  20. using namespace mgb;
  21. using namespace gopt;
  22. using namespace serialization;
  23. #if MGB_CUDA
  24. TEST(TestLayoutTransform, Feature) {
  25. auto inp_file = InputFile::make_fs("./feat.mdl");
  26. auto format = GraphLoader::identify_graph_dump_format(*inp_file);
  27. ASSERT_TRUE(format.valid());
  28. auto loader = GraphLoader::make(std::move(inp_file), format.val());
  29. GraphLoader::LoadConfig load_config;
  30. load_config.comp_graph = ComputingGraph::make();
  31. auto&& graph_opt = load_config.comp_graph->options();
  32. graph_opt.graph_opt.enable_fuse_conv_bias_nonlinearity();
  33. graph_opt.graph_opt.enable_fuse_conv_bias_with_z();
  34. auto ret = loader->load(load_config, false);
  35. using S = opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;
  36. S strategy = S::PROFILE;
  37. gopt::modify_opr_algo_strategy_inplace({ret.output_var_list}, strategy);
  38. using OprFormat = LayoutTransformContext::OprFormat;
  39. using OprList = LayoutTransformContext::OprList;
  40. using ReformatAttribute = LayoutTransformContext::ReformatAttribute;
  41. using Attribute = LayoutTransformContext::Attribute;
  42. OprList opr_list = {
  43. opr::ConvBiasForward::typeinfo(),
  44. opr::ElemwiseMultiType::typeinfo(),
  45. opr::Elemwise::typeinfo(),
  46. opr::TypeCvt::typeinfo(),
  47. opr::PoolingForward::typeinfo(),
  48. opr::WarpPerspectiveForward::typeinfo(),
  49. };
  50. SmallVector<TensorFormats> available_tensor_formats = {
  51. TensorFormats::NCHWc4, TensorFormats::NCHWc32,
  52. TensorFormats::CHWNc4};
  53. Attribute attribute = {OprFormat::NCHW4, TensorFormats::NCHWc4,
  54. ReformatAttribute::DEFAULT};
  55. auto ctx = std::make_unique<LayoutTransformContext>(
  56. std::move(opr_list), std::move(available_tensor_formats),
  57. attribute);
  58. ctx->add_opr_config(opr::ConvBiasForward::typeinfo(),
  59. {OprFormat::NCHW4, OprFormat::NCHW32, OprFormat::CHWN4})
  60. .add_opr_config(
  61. opr::PoolingForward::typeinfo(),
  62. {OprFormat::NCHW4, OprFormat::NCHW32, OprFormat::CHWN4})
  63. .add_opr_config(opr::WarpPerspectiveForward::typeinfo(),
  64. OprFormat::NCHW4);
  65. auto profiler = ProfilerBase::make_profiler();
  66. auto filter = [](const GraphPartition& partition) {
  67. auto has_nchw4_conv = false;
  68. for (auto&& opr : partition.all_oprs()) {
  69. if (opr->dyn_typeinfo() == opr::ConvBiasForward::typeinfo()) {
  70. auto& conv = opr->cast_final_safe<opr::ConvBiasForward>();
  71. if (conv.param().format ==
  72. LayoutTransformContext::OprFormat::NCHW4) {
  73. has_nchw4_conv = true;
  74. break;
  75. }
  76. }
  77. }
  78. return has_nchw4_conv;
  79. };
  80. std::unique_ptr<SolverBase> solver{new DynamicProgrammingSolver(
  81. std::move(profiler), std::move(filter))};
  82. auto new_out_vars = gopt::GraphOptimizer{}
  83. .add_pass<FuseConvBiasNonlinPass>()
  84. .add_pass<FuseConvBiasZPass>()
  85. .add_pass<LayoutTransformPass>(
  86. std::move(ctx), std::move(solver))
  87. .add_pass<ShuffleShuffleRemovePass>()
  88. .add_pass(FuseNCHW4Int8Preprocess::make())
  89. .add_pass<FoldingConvBiasDimshufflePass>()
  90. .add_pass<ParamFusePass>()
  91. .add_pass<ParamMergePass>()
  92. .apply(ret.output_var_list)
  93. .endpoint_vars();
  94. auto dumper = GraphDumper::make(OutputFile::make_fs("model_opt.mgb"));
  95. dumper->dump({new_out_vars});
  96. }
  97. TEST(TestLayoutTransform, Detection) {
  98. auto inp_file = InputFile::make_fs("./det.mdl");
  99. static const char* magic = "mgbteset0";
  100. size_t skip_size = sizeof(magic) + sizeof(uint32_t);
  101. char skip[skip_size];
  102. inp_file->read(skip, skip_size);
  103. auto format = GraphLoader::identify_graph_dump_format(*inp_file);
  104. ASSERT_TRUE(format.valid());
  105. auto loader = GraphLoader::make(std::move(inp_file), format.val());
  106. GraphLoader::LoadConfig load_config;
  107. load_config.comp_graph = ComputingGraph::make();
  108. auto&& graph_opt = load_config.comp_graph->options();
  109. graph_opt.graph_opt.enable_fuse_conv_bias_nonlinearity();
  110. graph_opt.graph_opt.enable_fuse_conv_bias_with_z();
  111. auto ret = loader->load(load_config, false);
  112. using S = opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;
  113. S strategy = S::PROFILE;
  114. gopt::modify_opr_algo_strategy_inplace({ret.output_var_list}, strategy);
  115. using OprFormat = LayoutTransformContext::OprFormat;
  116. using OprList = LayoutTransformContext::OprList;
  117. using ReformatAttribute = LayoutTransformContext::ReformatAttribute;
  118. using Attribute = LayoutTransformContext::Attribute;
  119. OprList opr_list = {
  120. opr::ConvBiasForward::typeinfo(),
  121. opr::ConvolutionForward::typeinfo(),
  122. opr::ConvolutionBackwardData::typeinfo(),
  123. opr::ElemwiseMultiType::typeinfo(),
  124. opr::Elemwise::typeinfo(),
  125. opr::TypeCvt::typeinfo(),
  126. opr::PoolingForward::typeinfo(),
  127. opr::WarpPerspectiveForward::typeinfo(),
  128. };
  129. SmallVector<TensorFormats> available_tensor_formats = {
  130. TensorFormats::NCHW, TensorFormats::NHWC,
  131. TensorFormats::NCHWc4, TensorFormats::NCHWc32,
  132. TensorFormats::NCHWc64, TensorFormats::CHWNc4};
  133. Attribute attribute = {OprFormat::NCHW, TensorFormats::NCHW,
  134. ReformatAttribute::DEFAULT};
  135. auto ctx = std::make_unique<LayoutTransformContext>(
  136. std::move(opr_list), std::move(available_tensor_formats),
  137. attribute);
  138. ctx->add_opr_config(
  139. opr::ConvBiasForward::typeinfo(),
  140. {OprFormat::NCHW, OprFormat::NHWC, OprFormat::NCHW4,
  141. OprFormat::NCHW32, OprFormat::NCHW64, OprFormat::CHWN4})
  142. .add_opr_config(opr::ConvolutionForward::typeinfo(),
  143. {OprFormat::NCHW, OprFormat::NCHW4})
  144. .add_opr_config(opr::ConvolutionBackwardData::typeinfo(),
  145. {OprFormat::NCHW, OprFormat::NCHW4})
  146. .add_opr_config(
  147. opr::PoolingForward::typeinfo(),
  148. {OprFormat::NCHW4, OprFormat::NCHW32, OprFormat::NHWC,
  149. OprFormat::NCHW64, OprFormat::CHWN4})
  150. .add_opr_config(
  151. opr::WarpPerspectiveForward::typeinfo(),
  152. {OprFormat::NHWC, OprFormat::NCHW4, OprFormat::NCHW64});
  153. auto profiler = ProfilerBase::make_profiler();
  154. std::unique_ptr<SolverBase> solver{
  155. new DynamicProgrammingSolver(std::move(profiler))};
  156. auto new_out_vars = gopt::GraphOptimizer{}
  157. .add_pass<LayoutTransformPass>(
  158. std::move(ctx), std::move(solver))
  159. .add_pass<ShuffleShuffleRemovePass>()
  160. .add_pass(FuseNCHW4Int8Preprocess::make())
  161. .add_pass<FoldingConvBiasDimshufflePass>()
  162. .add_pass<ParamFusePass>()
  163. .add_pass<ParamMergePass>()
  164. .apply(ret.output_var_list)
  165. .endpoint_vars();
  166. using OutputSpecItem = cg::ComputingGraph::OutputSpecItem;
  167. std::vector<OutputSpecItem> outs(new_out_vars.size());
  168. for (size_t i = 0; i < new_out_vars.size(); ++i) {
  169. auto cb = [](DeviceTensorND& /* d */) {};
  170. outs[i] = std::make_pair(new_out_vars[i], cb);
  171. }
  172. GraphProfiler gprof{load_config.comp_graph.get()};
  173. auto func = load_config.comp_graph->compile(outs);
  174. for (size_t i = 0; i < 10; ++i)
  175. func->execute();
  176. func->wait();
  177. gprof.to_json_full(func.get())->writeto_fpath(output_file("det.json"));
  178. }
  179. TEST(TestLayoutTransform, DetectionHead) {
  180. REQUIRE_GPU(1);
  181. auto cn = CompNode::load("gpu0");
  182. cn.activate();
  183. REQUIRE_CUDA_COMPUTE_CAPABILITY_EQ(7, 5);
  184. constexpr size_t N = 16, C = 3, H = 768, W = 1280;
  185. HostTensorGenerator<dtype::Uint8> gen;
  186. auto graph = ComputingGraph::make();
  187. auto h2d = opr::Host2DeviceCopy::make(*graph, gen({N, C, H, W}, cn));
  188. auto data = opr::TypeCvt::make(h2d, dtype::Float32());
  189. auto sub_128 = data + (-128);
  190. auto x = opr::TypeCvt::make(sub_128, dtype::QuantizedS8(1.f));
  191. auto mkcvar = [&](const char* name, const TensorShape& shp,
  192. const DType& dtype) {
  193. return opr::TypeCvt::make(
  194. opr::SharedDeviceTensor::make(*graph, *gen(shp, cn))
  195. .rename(name),
  196. dtype);
  197. };
  198. auto w = mkcvar("w", {16, 3, 3, 3}, dtype::QuantizedS8(1.f));
  199. auto b = mkcvar("b", {1, 16, 1, 1}, dtype::QuantizedS32(1.f));
  200. opr::ConvBias::Param param;
  201. param.format = opr::ConvBias::Param::Format::NCHW;
  202. param.nonlineMode = opr::ConvBias::Param::NonlineMode::RELU;
  203. param.stride_h = param.stride_w = 2;
  204. param.pad_h = param.pad_w = 1;
  205. auto conv_1 = opr::ConvBias::make(
  206. x, w, b, param, {}, OperatorNodeConfig(dtype::QuantizedS8(1.f)));
  207. conv_1 = opr::TypeCvt::make(
  208. conv_1, dtype::Quantized4Asymm(1.f, static_cast<uint8_t>(8)));
  209. auto w1 = mkcvar("w1", {16, 16, 3, 3}, dtype::QuantizedS4(1.f));
  210. auto b1 = mkcvar("b1", {1, 16, 1, 1}, dtype::QuantizedS32(1.f));
  211. auto y = opr::ConvBias::make(conv_1, w1, b1, param, {},
  212. OperatorNodeConfig(dtype::Quantized4Asymm(
  213. 1.f, static_cast<uint8_t>(8))));
  214. using S = opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;
  215. S strategy = S::PROFILE;
  216. gopt::modify_opr_algo_strategy_inplace({y}, strategy);
  217. using OprFormat = LayoutTransformContext::OprFormat;
  218. using OprList = LayoutTransformContext::OprList;
  219. using ReformatAttribute = LayoutTransformContext::ReformatAttribute;
  220. using Attribute = LayoutTransformContext::Attribute;
  221. OprList opr_list = {
  222. opr::ConvBiasForward::typeinfo(),
  223. opr::ConvolutionForward::typeinfo(),
  224. opr::ConvolutionBackwardData::typeinfo(),
  225. opr::ElemwiseMultiType::typeinfo(),
  226. opr::Elemwise::typeinfo(),
  227. opr::TypeCvt::typeinfo(),
  228. opr::PoolingForward::typeinfo(),
  229. opr::WarpPerspectiveForward::typeinfo(),
  230. };
  231. SmallVector<TensorFormats> available_tensor_formats = {
  232. TensorFormats::NCHW, TensorFormats::NHWC,
  233. TensorFormats::NCHWc4, TensorFormats::NCHWc32,
  234. TensorFormats::NCHWc64, TensorFormats::CHWNc4};
  235. Attribute attribute = {OprFormat::NCHW, TensorFormats::NCHW,
  236. ReformatAttribute::DEFAULT};
  237. auto ctx = std::make_unique<LayoutTransformContext>(
  238. std::move(opr_list), std::move(available_tensor_formats),
  239. attribute);
  240. ctx->add_opr_config(
  241. opr::ConvBiasForward::typeinfo(),
  242. {OprFormat::NCHW, OprFormat::NHWC, OprFormat::NCHW4,
  243. OprFormat::NCHW32, OprFormat::NCHW64, OprFormat::CHWN4})
  244. .add_opr_config(opr::ConvolutionForward::typeinfo(),
  245. {OprFormat::NCHW, OprFormat::NCHW4})
  246. .add_opr_config(opr::ConvolutionBackwardData::typeinfo(),
  247. {OprFormat::NCHW, OprFormat::NCHW4})
  248. .add_opr_config(
  249. opr::PoolingForward::typeinfo(),
  250. {OprFormat::NCHW4, OprFormat::NCHW32, OprFormat::NHWC,
  251. OprFormat::NCHW64, OprFormat::CHWN4})
  252. .add_opr_config(
  253. opr::WarpPerspectiveForward::typeinfo(),
  254. {OprFormat::NHWC, OprFormat::NCHW4, OprFormat::NCHW64});
  255. auto profiler = ProfilerBase::make_profiler();
  256. std::unique_ptr<SolverBase> solver{
  257. new DynamicProgrammingSolver(std::move(profiler))};
  258. auto new_out_vars = gopt::GraphOptimizer{}
  259. .add_pass<LayoutTransformPass>(
  260. std::move(ctx), std::move(solver))
  261. .add_pass<ShuffleShuffleRemovePass>()
  262. .add_pass(FuseNCHW4Int8Preprocess::make())
  263. .add_pass<FoldingConvBiasDimshufflePass>()
  264. .add_pass<ParamFusePass>()
  265. .add_pass<ParamMergePass>()
  266. .apply(SymbolVarArray{y})
  267. .endpoint_vars();
  268. using OutputSpecItem = cg::ComputingGraph::OutputSpecItem;
  269. std::vector<OutputSpecItem> outs(new_out_vars.size());
  270. for (size_t i = 0; i < new_out_vars.size(); ++i) {
  271. auto cb = [](DeviceTensorND& /* d */) {};
  272. outs[i] = std::make_pair(new_out_vars[i], cb);
  273. }
  274. GraphProfiler gprof{graph.get()};
  275. auto func = graph->compile(outs);
  276. for (size_t i = 0; i < 10; ++i)
  277. func->execute();
  278. func->wait();
  279. gprof.to_json_full(func.get())->writeto_fpath(output_file("det_head.json"));
  280. }
  281. #endif
  282. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台