You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

compiler.cpp 6.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. /**
  2. * \file src/jit/impl/mlir/compiler.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "megbrain_build_config.h"
  13. #if MGB_JIT && MGB_JIT_MLIR
  14. #include "./compiler.h"
  15. #include "./executable_cpu.h"
  16. #include "./executable_cuda.h"
  17. #include "./mlir_gen.h"
  18. #include "megbrain/common.h"
  19. #include "megbrain/comp_node_env.h"
  20. #include "megbrain/jit/mlir/ir/dialect.h"
  21. #include "megbrain/jit/mlir/ir/passes.h"
  22. #include <mlir/Conversion/GPUCommon/GPUCommonPass.h>
  23. #include <mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h>
  24. #include <mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h>
  25. #include <mlir/Dialect/GPU/Passes.h>
  26. #include <mlir/IR/Dialect.h>
  27. #include <mlir/IR/MLIRContext.h>
  28. #include <mlir/IR/Module.h>
  29. #include <mlir/InitAllDialects.h>
  30. #include <mlir/Pass/PassManager.h>
  31. #include <mlir/Support/LogicalResult.h>
  32. #include <mlir/Target/NVVMIR.h>
  33. #include <mlir/Transforms/Passes.h>
  34. #include <llvm/Support/TargetSelect.h>
  35. using namespace mgb;
  36. using namespace jit;
  37. namespace {
  38. struct LLVMInitializer {
  39. LLVMInitializer() {
  40. llvm::InitializeNativeTarget();
  41. llvm::InitializeNativeTargetAsmPrinter();
  42. }
  43. };
  44. static LLVMInitializer initializer;
  45. #if MGB_CUDA
  46. mlir::OwnedBlob compile_ptx_to_cubin(const std::string ptx, mlir::Location,
  47. llvm::StringRef) {
  48. OwnedBlob result = std::make_unique<std::vector<char>>(
  49. ptx.data(), ptx.data() + ptx.size());
  50. return result;
  51. }
  52. #endif
  53. void add_cpu_lowering_pass(mlir::PassManager& manager) {
  54. {
  55. mlir::OpPassManager& opt_pm = manager.nest<mlir::FuncOp>();
  56. opt_pm.addPass(create_shape_inference_pass());
  57. opt_pm.addPass(mlir::createCanonicalizerPass());
  58. opt_pm.addPass(mlir::createCSEPass());
  59. }
  60. manager.addPass(create_lower_to_affine_pass());
  61. {
  62. mlir::OpPassManager& opt_pm = manager.nest<mlir::FuncOp>();
  63. opt_pm.addPass(mlir::createCanonicalizerPass());
  64. opt_pm.addPass(mlir::createCSEPass());
  65. opt_pm.addPass(mlir::createLoopFusionPass());
  66. opt_pm.addPass(mlir::createMemRefDataFlowOptPass());
  67. }
  68. manager.addPass(create_lower_to_llvm_pass());
  69. }
  70. #if MGB_CUDA
  71. void add_cuda_lowering_pass(mlir::PassManager& manager, CompNode cn) {
  72. {
  73. mlir::OpPassManager& opt_pm = manager.nest<mlir::FuncOp>();
  74. opt_pm.addPass(create_shape_inference_pass());
  75. opt_pm.addPass(mlir::createCanonicalizerPass());
  76. opt_pm.addPass(mlir::createCSEPass());
  77. }
  78. manager.addPass(create_lower_to_gpu_pass());
  79. {
  80. mlir::OpPassManager& opt_pm = manager.nest<mlir::FuncOp>();
  81. opt_pm.addPass(mlir::createCanonicalizerPass());
  82. opt_pm.addPass(mlir::createCSEPass());
  83. opt_pm.addPass(mlir::createLoopFusionPass());
  84. opt_pm.addPass(mlir::createMemRefDataFlowOptPass());
  85. }
  86. manager.addPass(mlir::createGpuKernelOutliningPass());
  87. {
  88. auto& kernel_pm = manager.nest<gpu::GPUModuleOp>();
  89. kernel_pm.addPass(mlir::createLowerGpuOpsToNVVMOpsPass());
  90. auto&& prop = CompNodeEnv::from_comp_node(cn).cuda_env().device_prop;
  91. kernel_pm.addPass(mlir::createConvertGPUKernelToBlobPass(
  92. mlir::translateModuleToNVVMIR, compile_ptx_to_cubin,
  93. "nvptx64-nvidia-cuda",
  94. ssprintf("sm_%d%d", prop.major, prop.minor), "+ptx60",
  95. MLIRCUDAExecutable::sm_blob_annotation));
  96. }
  97. }
  98. #endif
  99. } // namespace
  100. /* ==================== MLIRCompiler ===================== */
  101. thread_local mlir::MLIRContext MLIRCompiler::sm_ctx;
  102. MLIRCompiler::MLIRCompiler(CompNode::DeviceType device_type)
  103. : m_device_type{device_type} {
  104. mlir::registerAllDialects();
  105. mlir::registerDialect<MgbDialect>();
  106. #if MGB_CUDA
  107. if (m_device_type == CompNode::DeviceType::CUDA) {
  108. LLVMInitializeNVPTXTarget();
  109. LLVMInitializeNVPTXTargetInfo();
  110. LLVMInitializeNVPTXTargetMC();
  111. LLVMInitializeNVPTXAsmPrinter();
  112. }
  113. #endif
  114. }
  115. void MLIRCompiler::run_lowering_pass(mlir::OwningModuleRef& module,
  116. CompNode cn) {
  117. mgb_assert(cn.device_type() == m_device_type);
  118. mlir::PassManager manager(module->getContext());
  119. switch (m_device_type) {
  120. case CompNode::DeviceType::CPU:
  121. add_cpu_lowering_pass(manager);
  122. break;
  123. #if MGB_CUDA
  124. case CompNode::DeviceType::CUDA:
  125. add_cuda_lowering_pass(manager, cn);
  126. break;
  127. #endif
  128. default:
  129. mgb_throw(InternalError, "Unsupport device type: %d",
  130. static_cast<int>(m_device_type));
  131. break;
  132. }
  133. mgb_assert(mlir::succeeded(manager.run(*module)));
  134. }
  135. std::unique_ptr<Executable> MLIRCompiler::do_compile(
  136. const InternalGraph& graph, const JITExecutor::Args& args) {
  137. MGB_MARK_USED_VAR(graph);
  138. MGB_MARK_USED_VAR(args);
  139. mlir::MLIRContext ctx;
  140. ctx.printStackTraceOnDiagnostic(true);
  141. ctx.printOpOnDiagnostic(true);
  142. auto&& res = mlir_gen(ctx, graph, args);
  143. mgb_assert(res.second, "failed to generate module");
  144. CompNode cn = args.owner->comp_node();
  145. run_lowering_pass(res.second, cn);
  146. switch (cn.device_type()) {
  147. case CompNode::DeviceType::CPU:
  148. return std::make_unique<MLIRCPUExecutable>(res.second,
  149. res.first.str());
  150. #if MGB_CUDA
  151. case CompNode::DeviceType::CUDA:
  152. return std::make_unique<MLIRCUDAExecutable>(res.second,
  153. res.first.str());
  154. #endif
  155. default:
  156. mgb_throw(InternalError, "Unsupport device type: %d",
  157. static_cast<int>(cn.device_type()));
  158. return nullptr;
  159. }
  160. }
  161. size_t MLIRCompiler::get_nr_workspace_outputs(JITExecutor* opr) const {
  162. MGB_MARK_USED_VAR(opr);
  163. return 0;
  164. }
  165. void MLIRCompiler::init_workspace_size_infer(JITExecutor* opr) {
  166. MGB_MARK_USED_VAR(opr);
  167. }
  168. #endif // MGB_JIT && MGB_JIT_MLIR
  169. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台