You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_rt.cpp 14 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. /**
  2. * \file imperative/python/src/graph_rt.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./graph_rt.h"
  12. #include "megbrain/graph/cg.h"
  13. #include "megbrain/serialization/serializer.h"
  14. #include "megbrain/imperative/opr_utility.h"
  15. #include "megbrain/opr/io.h"
  16. #include "megbrain/opr/basic_arith.h"
  17. #include "megbrain/imperative.h"
  18. #include "./helper.h"
  19. #include "megbrain/plugin/profiler.h"
  20. #include "./common.h"
  21. namespace py = pybind11;
  22. using namespace mgb;
  23. using namespace imperative;
  24. namespace {
  25. class _CompGraphProfilerImpl {
  26. std::shared_ptr<ComputingGraph> m_comp_graph;
  27. GraphProfiler m_profiler;
  28. public:
  29. _CompGraphProfilerImpl(std::shared_ptr<ComputingGraph> cg):
  30. m_comp_graph{cg},
  31. m_profiler{m_comp_graph.get()}
  32. {
  33. }
  34. std::string _get_result() {
  35. auto json = m_profiler.to_json_full(
  36. m_comp_graph->current_comp_seq());
  37. return json->to_string();
  38. }
  39. };
  40. }
  41. #define DEF_READWRITE(name) .def_readwrite(#name, &CURRENT_CLASS::name)
  42. template<typename T>
  43. auto def_rendezvous(py::object m, const char* name) {
  44. return py::class_<Rendezvous<T>, std::shared_ptr<Rendezvous<T>>>(m, name)
  45. .def(py::init([](){return std::make_shared<Rendezvous<T>>();}))
  46. .def("set", [](Rendezvous<T>& r, T v) {r.set(std::move(v));})
  47. .def("get", [](Rendezvous<T>& r) {return r.get();}, py::call_guard<py::gil_scoped_release>())
  48. .def("drop", &Rendezvous<T>::drop)
  49. .def("reset", &Rendezvous<T>::reset);
  50. }
  51. using TensorAttr = LogicalTensorDesc;
  52. using HostNDWithEvent = std::pair<HostTensorND, std::shared_ptr<CompNode::Event>>;
  53. void init_graph_rt(py::module m) {
  54. def_rendezvous<DeviceTensorND>(m, "DeviceTensorNDRendezvous");
  55. def_rendezvous<HostNDWithEvent>(m, "HostTensorNDRendezvous");
  56. def_rendezvous<TensorAttr>(m, "TensorAttrRendezvous");
  57. py::class_<cg::VarNode, GraphNodePtr<cg::VarNode>>(m, "VarNode")
  58. .def_property_readonly("owner", [](cg::VarNode* v) {return v->owner_opr();})
  59. .def_property_readonly("graph", [](cg::VarNode* v) {return v->owner_graph();})
  60. .def_property("name", py::overload_cast<>(&VarNode::name, py::const_),
  61. py::overload_cast<std::string>(&VarNode::name))
  62. .def_property_readonly("dtype", [](cg::VarNode* v) {return v->dtype();})
  63. .def_property_readonly("comp_node", [](cg::VarNode* v) {return v->comp_node();})
  64. .def_property_readonly("shape", [](cg::VarNode* v) -> const TensorShape* {
  65. auto&& mgr = v->owner_graph()->static_infer_manager();
  66. auto&& type = mgr.get_infer_type(v);
  67. using InferType = cg::static_infer::InferType;
  68. if (!(type.shape & (InferType::CONST | InferType::RT_STATIC))) {
  69. return nullptr;
  70. }
  71. return mgr.infer_shape_fallible(v);
  72. })
  73. .def_property_readonly("value", [](cg::VarNode* v) -> py::object {
  74. auto&& mgr = v->owner_graph()->static_infer_manager();
  75. auto&& type = mgr.get_infer_type(v);
  76. using InferType = cg::static_infer::InferType;
  77. if (!(type.value & (InferType::CONST | InferType::RT_STATIC))) {
  78. return py::none();
  79. }
  80. auto* val = mgr.infer_value_fallible(v);
  81. if (!val) {
  82. return py::none();
  83. }
  84. return py::cast(*val).attr("numpy")();
  85. });
  86. py::class_<cg::OperatorNodeBase, GraphNodePtr<cg::OperatorNodeBase>>(m, "OperatorNode")
  87. .def_property_readonly("graph", [](cg::OperatorNodeBase* opr) {return opr->owner_graph();})
  88. .def_property("name", py::overload_cast<>(&cg::OperatorNodeBase::name, py::const_),
  89. py::overload_cast<std::string>(&cg::OperatorNodeBase::name))
  90. .def_property_readonly("inputs", [](cg::OperatorNodeBase* opr) {
  91. return to_tuple(opr->input());
  92. })
  93. .def_property_readonly("outputs", [](cg::OperatorNodeBase* opr) {
  94. return to_tuple(opr->usable_output());
  95. });
  96. py::class_<cg::AsyncExecutable>(m, "AsyncExecutable")
  97. .def("execute", &cg::AsyncExecutable::execute, py::call_guard<py::gil_scoped_release>())
  98. .def("wait", &cg::AsyncExecutable::wait, py::call_guard<py::gil_scoped_release>());
  99. auto PyComputingGraph = py::class_<cg::ComputingGraph, std::shared_ptr<cg::ComputingGraph>>(m, "ComputingGraph")
  100. .def(py::init(py::overload_cast<>(&cg::ComputingGraph::make)))
  101. .def("compile", [](cg::ComputingGraph& graph, const std::vector<cg::VarNode*>& dest_vars) {
  102. mgb_assert(!dest_vars.empty());
  103. cg::ComputingGraph::OutputSpec spec;
  104. for (auto v : dest_vars) {
  105. spec.emplace_back(v, nullptr);
  106. }
  107. return graph.compile(spec);
  108. })
  109. .def_property_readonly("options", py::overload_cast<>(&cg::ComputingGraph::options));
  110. py::class_<_CompGraphProfilerImpl, std::shared_ptr<_CompGraphProfilerImpl>>(m, "GraphProfiler")
  111. .def(py::init([](std::shared_ptr<ComputingGraph> graph) {
  112. return std::make_shared<_CompGraphProfilerImpl>(graph);
  113. }))
  114. .def("get", [](_CompGraphProfilerImpl& profiler) { return profiler._get_result(); });
  115. m.def("dump_graph", [](const std::vector<VarNode*>& dest_vars) {
  116. using namespace mgb::serialization;
  117. std::vector<uint8_t> buf;
  118. auto dumper = GraphDumper::make(OutputFile::make_vector_proxy(&buf));
  119. SymbolVarArray symvars(dest_vars.begin(), dest_vars.end());
  120. dumper->dump(symvars);
  121. return py::bytes(reinterpret_cast<const char*>(&buf[0]), buf.size());
  122. });
  123. #define CURRENT_CLASS cg::ComputingGraph::Options
  124. auto PyComputingGraphOptions = py::class_<cg::ComputingGraph::Options>(PyComputingGraph, "Options")
  125. // DEF_READWRITE(opr_attribute)
  126. DEF_READWRITE(seq_opt)
  127. DEF_READWRITE(graph_opt)
  128. DEF_READWRITE(graph_opt_level)
  129. DEF_READWRITE(log_level)
  130. DEF_READWRITE(async_exec_level)
  131. DEF_READWRITE(force_dynamic_alloc)
  132. DEF_READWRITE(var_sanity_check_first_run)
  133. DEF_READWRITE(allocate_static_mem_after_graph_compile)
  134. DEF_READWRITE(fake_next_exec)
  135. DEF_READWRITE(enable_sublinear_memory_opt)
  136. DEF_READWRITE(no_profiling_on_shape_change)
  137. DEF_READWRITE(enable_var_mem_defragment)
  138. DEF_READWRITE(enable_grad_var_static_reshape)
  139. DEF_READWRITE(enable_memory_swap)
  140. DEF_READWRITE(comp_node_seq_record_level)
  141. DEF_READWRITE(no_force_inplace)
  142. DEF_READWRITE(sublinear_mem_config)
  143. // DEF_READWRITE(eager_evaluation)
  144. // DEF_READWRITE(imperative_proxy_graph)
  145. // DEF_READWRITE(extra_vardeps)
  146. // DEF_READWRITE(user_data)
  147. ;
  148. #undef CURRENT_CLASS
  149. #define CURRENT_CLASS cg::ComputingGraph::Options::SeqOpt
  150. py::class_<cg::ComputingGraph::Options::SeqOpt>(PyComputingGraphOptions, "SeqOpt")
  151. DEF_READWRITE(enable_mem_plan_opt)
  152. DEF_READWRITE(enable_mem_reuse_alloc)
  153. DEF_READWRITE(enable_seq_comp_node_opt);
  154. #undef CURRENT_CLASS
  155. #define CURRENT_CLASS cg::ComputingGraph::Options::GraphOpt
  156. py::class_<cg::ComputingGraph::Options::GraphOpt>(PyComputingGraphOptions, "GraphOpt")
  157. DEF_READWRITE(jit)
  158. DEF_READWRITE(tensorrt);
  159. #undef CURRENT_CLASS
  160. #define CURRENT_CLASS cg::ComputingGraph::Options::SublinearMemConfig
  161. py::class_<cg::ComputingGraph::Options::SublinearMemConfig>(PyComputingGraphOptions, "SublinearMemConfig")
  162. DEF_READWRITE(thresh_nr_try)
  163. DEF_READWRITE(genetic_nr_iter)
  164. DEF_READWRITE(genetic_pool_size)
  165. DEF_READWRITE(lb_memory)
  166. DEF_READWRITE(num_worker);
  167. #undef CURRENT_CLASS
  168. auto common = rel_import("common", m, 1);
  169. common.def("invoke_op", [](const OpDef& def, const std::vector<cg::VarNode*> inputs, cg::ComputingGraph* graph) {
  170. cg::VarNodeArray vinputs(inputs.begin(), inputs.end());
  171. auto opr = OpDef::apply_on_var_node(def, vinputs);
  172. auto outputs = opr->usable_output();
  173. return to_tuple(outputs);
  174. },
  175. py::arg(), py::arg(), py::arg("graph") = py::none());
  176. auto input_callback = [](auto callback,
  177. const CompNode& comp_node,
  178. const DType& dtype,
  179. const TensorShape& shape,
  180. const std::vector<cg::VarNode*>& inputs,
  181. cg::ComputingGraph* graph) {
  182. if (!graph) {
  183. graph = inputs[0]->owner_graph();
  184. }
  185. SymbolVarArray sinputs;
  186. for (auto i : inputs) {
  187. sinputs.emplace_back(i);
  188. }
  189. static_assert(!std::is_reference<decltype(callback)>::value);
  190. auto soutputs = opr::InputCallback::make(*graph, std::move(callback), comp_node, dtype, shape, sinputs);
  191. std::vector<VarNode*> outputs;
  192. outputs.reserve(soutputs.size());
  193. for (auto i : soutputs) {
  194. outputs.push_back(i.node());
  195. }
  196. return outputs;
  197. };
  198. m.def("make_shared", [](cg::ComputingGraph* graph, const DeviceTensorND& data) {
  199. return opr::SharedDeviceTensor::make(*graph, std::make_shared<DeviceTensorND>(data)).node();
  200. });
  201. m.def("make_const", [](cg::ComputingGraph* graph, py::array data, CompNode cn, DType dtype) {
  202. if (!cn.valid()) {
  203. cn = CompNode::load(get_default_device());
  204. }
  205. auto hv = npy::np2tensor(data.ptr(), npy::Meth::borrow(cn), dtype);
  206. return opr::ImmutableTensor::make(*graph, hv, OperatorNodeConfig(cn)).node();
  207. });
  208. m.def("make_h2d", [](cg::ComputingGraph& graph, CompNode cn, DType dtype, std::optional<std::string> name) {
  209. if (!cn.valid()) {
  210. throw py::type_error("device must be valid");
  211. }
  212. if (!dtype.valid()) {
  213. throw py::type_error("dtype must be valid");
  214. }
  215. OperatorNodeConfig config;
  216. if (name) {
  217. config.name(*name);
  218. }
  219. return opr::Host2DeviceCopy::make(graph, std::make_shared<HostTensorND>(cn, dtype), config).node();
  220. }, py::arg(), py::arg(), py::arg(), py::arg() = py::none());
  221. m.def("input_callback", [input_callback](std::function<DeviceTensorND(void)> callback,
  222. const CompNode& comp_node,
  223. const DType& dtype,
  224. const TensorShape& shape,
  225. const std::vector<cg::VarNode*>& inputs,
  226. cg::ComputingGraph* graph) {
  227. return input_callback([f=std::move(callback)](){py::gil_scoped_acquire _; return f();}, comp_node, dtype, shape, inputs, graph);
  228. },
  229. py::arg(), py::arg(), py::arg(), py::arg() = py::none(), py::arg() = py::tuple(), py::arg("graph") = py::none());
  230. m.def("input_callback", [input_callback](std::shared_ptr<Rendezvous<DeviceTensorND>> p,
  231. const CompNode& comp_node,
  232. const DType& dtype,
  233. const TensorShape& shape,
  234. const std::vector<cg::VarNode*>& inputs,
  235. cg::ComputingGraph* graph) {
  236. auto f = [p]() -> DeviceTensorND {
  237. return p->get();
  238. };
  239. return input_callback(std::move(f), comp_node, dtype, shape, inputs, graph);
  240. },
  241. py::arg(), py::arg(), py::arg(), py::arg() = py::none(), py::arg() = py::tuple(), py::arg("graph") = py::none());
  242. auto output_callback = [](auto callback, const std::vector<cg::VarNode*>& inputs, bool borrow = false) {
  243. SymbolVarArray sinputs;
  244. for (auto i : inputs) {
  245. sinputs.emplace_back(i);
  246. }
  247. static_assert(!std::is_reference<decltype(callback)>::value);
  248. opr::OutputCallback::Param param{std::move(callback), borrow};
  249. auto output = opr::OutputCallback::make(std::move(param), sinputs);
  250. return output.node();
  251. };
  252. m.def("output_callback", [output_callback](std::function<void(DeviceTensorND)> callback, std::vector<cg::VarNode*> inputs) {
  253. auto f = [f=std::move(callback)](DeviceTensorND dv) {
  254. auto task = [f=std::move(f), dv=std::move(dv)]() {
  255. f(dv);
  256. };
  257. py_task_q.add_task(std::move(task));
  258. };
  259. return output_callback(std::move(f), std::move(inputs));
  260. });
  261. m.def("output_callback", [output_callback](std::shared_ptr<Rendezvous<DeviceTensorND>> p, std::vector<cg::VarNode*> inputs) {
  262. auto f = [p](DeviceTensorND dv) {
  263. p->set(std::move(dv));
  264. };
  265. return output_callback(std::move(f), std::move(inputs));
  266. });
  267. m.def("value_output_callback", [output_callback](std::shared_ptr<Rendezvous<HostNDWithEvent>> p, std::vector<cg::VarNode*> inputs) {
  268. auto f = [p](DeviceTensorND dv) {
  269. HostNDWithEvent hv_with_event;
  270. hv_with_event.first.copy_from(dv);
  271. hv_with_event.second = dv.comp_node().create_event();
  272. hv_with_event.second->record();
  273. p->set(std::move(hv_with_event));
  274. };
  275. return output_callback(std::move(f), std::move(inputs), true);
  276. });
  277. m.def("attr_output_callback", [output_callback](std::shared_ptr<Rendezvous<TensorAttr>> p, std::vector<cg::VarNode*> inputs) {
  278. auto f = [p](DeviceTensorND dv) {
  279. p->set(TensorAttr{TensorLayout{dv.shape(), dv.dtype()}, dv.comp_node()});
  280. };
  281. return output_callback(std::move(f), std::move(inputs), true);
  282. });
  283. }

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台