You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_rt.cpp 32 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726
  1. /**
  2. * \file imperative/python/src/graph_rt.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./graph_rt.h"
  12. #include "./common.h"
  13. #include "./helper.h"
  14. #include "./ops.h"
  15. #include "megbrain/gopt/inference.h"
  16. #include "megbrain/graph/cg.h"
  17. #include "megbrain/imperative.h"
  18. #include "megbrain/imperative/opr_utility.h"
  19. #include "megbrain/imperative/profiler_plugin.h"
  20. #include "megbrain/opr/basic_arith.h"
  21. #include "megbrain/opr/io.h"
  22. #include "megbrain/opr/utility.h"
  23. #include "megbrain/plugin/profiler.h"
  24. #include "megbrain/serialization/serializer.h"
  25. namespace py = pybind11;
  26. using namespace mgb;
  27. using namespace imperative;
  28. namespace ser = mgb::serialization;
  29. using _OptimizeForInferenceOptions = mgb::gopt::OptimizeForInferenceOptions;
  30. using _LayoutTransform = _OptimizeForInferenceOptions::LayoutTransform;
  31. using _AlgoStrategy = opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;
  32. using _SerializationMetadata = mgb::serialization::Metadata;
  33. using _SerializationFormat = mgb::serialization::GraphDumpFormat;
  34. namespace {
  35. class _CompGraphProfilerImpl {
  36. std::shared_ptr<ComputingGraph> m_comp_graph;
  37. GraphProfiler m_profiler;
  38. public:
  39. _CompGraphProfilerImpl(std::shared_ptr<ComputingGraph> cg)
  40. : m_comp_graph{cg}, m_profiler{m_comp_graph.get()} {}
  41. std::string _get_result() {
  42. auto json = m_profiler.to_json_full(m_comp_graph->current_comp_seq());
  43. return json->to_string();
  44. }
  45. };
  46. struct WeakRendezvousArray : public std::vector<std::weak_ptr<RendezvousBase>>,
  47. public UserDataContainer::UserData {
  48. MGB_TYPEINFO_OBJ_DECL;
  49. };
  50. MGB_TYPEINFO_OBJ_IMPL(WeakRendezvousArray);
  51. } // namespace
  52. #define DEF_READWRITE(name) .def_readwrite(#name, &CURRENT_CLASS::name)
  53. template <typename T>
  54. auto def_rendezvous(py::object m, const char* name) {
  55. return py::class_<Rendezvous<T>, std::shared_ptr<Rendezvous<T>>>(m, name)
  56. .def(py::init([]() { return Rendezvous<T>::make(); }))
  57. .def("set", [](Rendezvous<T>& r, T v) { r.set(std::move(v)); })
  58. .def(
  59. "get", [](Rendezvous<T>& r) { return r.get(); },
  60. py::call_guard<py::gil_scoped_release>())
  61. .def("drop", &Rendezvous<T>::drop)
  62. .def("reset", &Rendezvous<T>::reset)
  63. .def("set_exception", [](Rendezvous<T>& r, std::string&& message) {
  64. r.set_exception(std::make_exception_ptr(
  65. std::runtime_error(std::move(message))));
  66. });
  67. }
  68. using TensorAttr = LogicalTensorDesc;
  69. using HostNDWithEvent = std::pair<HostTensorND, std::shared_ptr<CompNode::Event>>;
  70. std::vector<mgb::cg::VarNode*> _replace_vars(
  71. const std::vector<mgb::cg::VarNode*>& repl_src,
  72. const std::vector<mgb::cg::VarNode*>& repl_dst,
  73. const std::vector<mgb::cg::VarNode*>& vars) {
  74. mgb::ThinHashMap<SymbolVar, SymbolVar> varmap;
  75. for (size_t i = 0; i < repl_src.size(); ++i) {
  76. varmap[SymbolVar(repl_src[i])] = SymbolVar(repl_dst[i]);
  77. }
  78. SymbolVarArray symvars(vars.begin(), vars.end());
  79. auto sym_result = mgb::cg::replace_vars(symvars, varmap);
  80. std::vector<mgb::cg::VarNode*> result;
  81. for (auto symvar : sym_result) {
  82. result.push_back(symvar.node());
  83. }
  84. return result;
  85. }
  86. typedef std::vector<mgb::cg::OperatorNodeBase*> OperatorArray;
  87. std::vector<mgb::cg::VarNode*> _replace_oprs(
  88. const OperatorArray& repl_src, const OperatorArray& repl_dst,
  89. const std::vector<mgb::cg::VarNode*>& vars) {
  90. mgb::ThinHashMap<mgb::cg::OperatorNodeBase*, mgb::cg::OperatorNodeBase*> oprmap;
  91. for (size_t i = 0; i < repl_src.size(); ++i) {
  92. oprmap[repl_src[i]] = repl_dst[i];
  93. }
  94. const SymbolVarArray symvars(vars.begin(), vars.end());
  95. auto sym_result = mgb::cg::replace_oprs(symvars, oprmap);
  96. std::vector<mgb::cg::VarNode*> result;
  97. for (auto symvar : sym_result) {
  98. result.push_back(symvar.node());
  99. }
  100. return result;
  101. }
  102. void _set_priority_to_id(const std::vector<mgb::cg::VarNode*>& dest_vars) {
  103. auto on_opr = [](mgb::cg::OperatorNodeBase* opr) {
  104. if (opr->node_prop().attribute().priority == 0) {
  105. opr->node_prop().attribute().priority = opr->id();
  106. }
  107. };
  108. mgb::cg::DepOprIter dep_iter{on_opr};
  109. for (const auto& var : dest_vars) {
  110. dep_iter.add(SymbolVar(var));
  111. }
  112. }
  113. void init_graph_rt(py::module m) {
  114. static const std::unique_ptr<mgb::OprFootprint> _imperative_sm_opr_footprint_ptr{
  115. std::make_unique<mgb::OprFootprint>()};
  116. def_rendezvous<DeviceTensorND>(m, "DeviceTensorNDRendezvous");
  117. def_rendezvous<HostNDWithEvent>(m, "HostTensorNDRendezvous");
  118. def_rendezvous<TensorAttr>(m, "TensorAttrRendezvous");
  119. py::class_<cg::VarNode, GraphNodePtr<cg::VarNode>>(m, "VarNode")
  120. .def_property_readonly(
  121. "owner", [](cg::VarNode* v) { return v->owner_opr(); })
  122. .def_property_readonly(
  123. "graph", [](cg::VarNode* v) { return v->owner_graph(); })
  124. .def_property(
  125. "name", py::overload_cast<>(&VarNode::name, py::const_),
  126. py::overload_cast<std::string>(&VarNode::name))
  127. .def_property_readonly("dtype", [](cg::VarNode* v) { return v->dtype(); })
  128. .def_property_readonly(
  129. "comp_node", [](cg::VarNode* v) { return v->comp_node(); })
  130. .def_property_readonly(
  131. "shape",
  132. [](cg::VarNode* v) -> const TensorShape* {
  133. auto&& mgr = v->owner_graph()->static_infer_manager();
  134. return mgr.infer_shape_fallible(v);
  135. })
  136. .def_property_readonly(
  137. "value",
  138. [](cg::VarNode* v) -> py::object {
  139. auto&& mgr = v->owner_graph()->static_infer_manager();
  140. auto&& type = mgr.get_infer_type(v);
  141. using InferType = cg::static_infer::InferType;
  142. if (!(type.value & (InferType::CONST | InferType::RT_STATIC))) {
  143. return py::none();
  144. }
  145. auto* val = mgr.infer_value_fallible(v);
  146. if (!val) {
  147. return py::none();
  148. }
  149. return py::cast(*val).attr("numpy")();
  150. })
  151. .def_property_readonly("id", [](cg::VarNode* v) { return (v->id()); })
  152. .def("__repr__", [](cg::VarNode* v) { return "Var:" + v->name(); });
  153. py::class_<cg::OperatorNodeBase, GraphNodePtr<cg::OperatorNodeBase>>(
  154. m, "OperatorNode")
  155. .def_property_readonly(
  156. "graph",
  157. [](cg::OperatorNodeBase* opr) { return opr->owner_graph(); })
  158. .def_property(
  159. "name",
  160. py::overload_cast<>(&cg::OperatorNodeBase::name, py::const_),
  161. py::overload_cast<std::string>(&cg::OperatorNodeBase::name))
  162. .def_property_readonly(
  163. "inputs",
  164. [](cg::OperatorNodeBase* opr) { return to_tuple(opr->input()); })
  165. .def_property_readonly(
  166. "outputs",
  167. [](cg::OperatorNodeBase* opr) {
  168. return to_tuple(opr->usable_output());
  169. })
  170. .def_property_readonly(
  171. "id", [](cg::OperatorNodeBase* opr) { return opr->id(); })
  172. .def_property_readonly(
  173. "params",
  174. [](cg::OperatorNodeBase* opr) {
  175. return _imperative_sm_opr_footprint_ptr->calc_footprint(opr)
  176. .param->to_string();
  177. })
  178. .def_property_readonly(
  179. "type",
  180. [](cg::OperatorNodeBase* opr) { return opr->dyn_typeinfo()->name; })
  181. .def("__repr__",
  182. [](cg::OperatorNodeBase* opr) { return "Opr:" + opr->name(); })
  183. .def_property(
  184. "priority",
  185. [](cg::OperatorNodeBase* opr) {
  186. return opr->node_prop().attribute().priority;
  187. },
  188. [](cg::OperatorNodeBase* opr, int priority) {
  189. opr->node_prop().attribute().priority = priority;
  190. });
  191. py::class_<cg::AsyncExecutable>(m, "AsyncExecutable")
  192. .def("execute", &cg::AsyncExecutable::execute,
  193. py::call_guard<py::gil_scoped_release>())
  194. .def("wait", &cg::AsyncExecutable::wait,
  195. py::call_guard<py::gil_scoped_release>())
  196. .def("get_prev_exec_time", &cg::AsyncExecutable::get_prev_exec_time,
  197. py::call_guard<py::gil_scoped_release>())
  198. .def("_to_json",
  199. [](cg::AsyncExecutable* exec) {
  200. py::call_guard<py::gil_scoped_release>();
  201. // dump currently compiled computing graph for debugging
  202. return exec->to_json()->to_string();
  203. })
  204. // only used for exception handle
  205. .def_property_readonly(
  206. "_all_rendezvous",
  207. [](cg::AsyncExecutable* exec) {
  208. auto ud =
  209. exec->owner_graph()
  210. ->options()
  211. .user_data.get_user_data<WeakRendezvousArray>();
  212. std::vector<std::shared_ptr<RendezvousBase>> ret;
  213. if (ud.second) {
  214. for (auto&& r : *ud.first[0]) {
  215. if (auto p = r.lock()) {
  216. ret.emplace_back(std::move(p));
  217. }
  218. }
  219. }
  220. return ret;
  221. })
  222. .def("get_static_memory_alloc_info",
  223. &cg::AsyncExecutable::get_static_memory_alloc_info,
  224. py::call_guard<py::gil_scoped_release>());
  225. auto PyComputingGraph =
  226. py::class_<cg::ComputingGraph, std::shared_ptr<cg::ComputingGraph>>(
  227. m, "ComputingGraph")
  228. .def(py::init(py::overload_cast<>(&cg::ComputingGraph::make)))
  229. .def("compile",
  230. [](cg::ComputingGraph& graph,
  231. const std::vector<cg::VarNode*>& dest_vars) {
  232. mgb_assert(!dest_vars.empty());
  233. cg::ComputingGraph::OutputSpec spec;
  234. for (auto v : dest_vars) {
  235. spec.emplace_back(v, nullptr);
  236. }
  237. return graph.compile(spec);
  238. })
  239. .def_property_readonly(
  240. "options",
  241. py::overload_cast<>(&cg::ComputingGraph::options));
  242. py::class_<_CompGraphProfilerImpl, std::shared_ptr<_CompGraphProfilerImpl>>(
  243. m, "GraphProfiler")
  244. .def(py::init([](std::shared_ptr<ComputingGraph> graph) {
  245. return std::make_shared<_CompGraphProfilerImpl>(graph);
  246. }))
  247. .def("get", [](_CompGraphProfilerImpl& profiler) {
  248. return profiler._get_result();
  249. });
  250. using interpreter::intl::ProfilerPlugin;
  251. py::class_<ProfilerPlugin, std::shared_ptr<ProfilerPlugin>>(m, "GraphProfiler2")
  252. .def(py::init<cg::ComputingGraph*>());
  253. auto GraphOptimizeOptions =
  254. py::class_<_OptimizeForInferenceOptions>(m, "GraphOptimizeOptions")
  255. .def(py::init())
  256. .def("serialize", &_OptimizeForInferenceOptions::serialize)
  257. .def_static(
  258. "deserialize", &_OptimizeForInferenceOptions::deserialize)
  259. .def_readwrite(
  260. "f16_io_f32_comp",
  261. &_OptimizeForInferenceOptions::f16_io_f32_comp)
  262. .def_readwrite(
  263. "f16_io_comp", &_OptimizeForInferenceOptions::f16_io_comp)
  264. .def_readwrite(
  265. "fuse_conv_bias_nonlinearity",
  266. &_OptimizeForInferenceOptions::fuse_conv_bias_nonlinearity)
  267. .def_readwrite(
  268. "fuse_conv_bias_with_z",
  269. &_OptimizeForInferenceOptions::fuse_conv_bias_with_z)
  270. .def_readwrite(
  271. "fuse_preprocess",
  272. &_OptimizeForInferenceOptions::fuse_preprocess)
  273. .def_readwrite(
  274. "layout_transform",
  275. &_OptimizeForInferenceOptions::layout_transform);
  276. py::enum_<_LayoutTransform>(GraphOptimizeOptions, "LayoutTransform")
  277. .value("DEFAULT", _LayoutTransform::DEFAULT)
  278. .value("NCHW4", _LayoutTransform::NCHW4)
  279. .value("NHWCD4", _LayoutTransform::NHWCD4)
  280. .value("NCHW88", _LayoutTransform::NCHW88)
  281. .value("NCHW44", _LayoutTransform::NCHW44)
  282. .value("NCHW44_DOT", _LayoutTransform::NCHW44_DOT)
  283. .value("NCHW32", _LayoutTransform::NCHW32)
  284. .value("CHWN4", _LayoutTransform::CHWN4)
  285. .value("NCHW64", _LayoutTransform::NCHW64)
  286. .export_values();
  287. py::enum_<_SerializationFormat>(m, "SerializationFormat")
  288. .value("FBS", _SerializationFormat::FLATBUFFERS)
  289. .export_values();
  290. m.def("optimize_for_inference",
  291. [](const VarNodeArray& dest_vars, const _OptimizeForInferenceOptions& opt) {
  292. SymbolVarArray symvars(dest_vars.begin(), dest_vars.end());
  293. auto res_symvars = mgb::gopt::optimize_for_inference(symvars, opt);
  294. VarNodeArray vars;
  295. for (auto& si : res_symvars)
  296. vars.push_back(si.node());
  297. return vars;
  298. });
  299. m.def("modify_opr_algo_strategy_inplace",
  300. [](const VarNodeArray& dest_vars, const _AlgoStrategy& strategy) {
  301. mgb::gopt::modify_opr_algo_strategy_inplace(dest_vars, strategy);
  302. });
  303. m.def("get_info_for_strip", [](const std::vector<VarNode*>& dest_vars) {
  304. std::unordered_set<const char*> opr_types, dtype_names, elemwise_modes;
  305. auto on_opr = [&](cg::OperatorNodeBase* opr) {
  306. if (ser::GraphDumper::should_remove_in_dump(opr))
  307. return;
  308. opr_types.insert(opr->dyn_typeinfo()->name);
  309. for (auto i : opr->output())
  310. dtype_names.insert(i->dtype().name());
  311. if (opr->same_type<opr::Elemwise>()) {
  312. auto mode = opr->cast_final<opr::Elemwise>().param().mode;
  313. elemwise_modes.insert(
  314. megdnn::Elemwise::ModeTrait::from_mode(mode).name);
  315. }
  316. };
  317. cg::DepOprIter opr_iter{on_opr};
  318. for (auto i : dest_vars)
  319. opr_iter.add(i->owner_opr());
  320. auto to_json = [](const std::unordered_set<const char*>& v) {
  321. std::vector<std::string> vs(v.begin(), v.end());
  322. std::sort(vs.begin(), vs.end());
  323. auto ret = json::Array::make();
  324. for (auto&& i : vs)
  325. ret->add(json::String::make(i));
  326. return ret;
  327. };
  328. return json::Object::make({
  329. {"opr_types", to_json(opr_types)},
  330. {"dtypes", to_json(dtype_names)},
  331. {"elemwise_modes", to_json(elemwise_modes)},
  332. })
  333. ->to_string();
  334. });
  335. py::class_<_SerializationMetadata>(m, "SerializationMetadata")
  336. .def(py::init())
  337. .def_property(
  338. "user_info",
  339. [](const _SerializationMetadata& meta) {
  340. return py::bytes(meta.get_user_info());
  341. },
  342. &_SerializationMetadata::set_user_info)
  343. .def_readonly(
  344. "optimized_for_inference",
  345. &_SerializationMetadata::optimized_for_inference)
  346. .def_property(
  347. "optimize_options", &_SerializationMetadata::get_optimize_options,
  348. &_SerializationMetadata::set_optimize_options)
  349. .def_readwrite("graph_modified", &_SerializationMetadata::graph_modified)
  350. .def_readwrite("is_valid", &_SerializationMetadata::is_valid);
  351. m.def("dump_graph",
  352. [](const std::vector<VarNode*>& dest_vars, int keep_var_name,
  353. bool keep_opr_name, bool keep_param_name, bool keep_opr_priority,
  354. std::optional<_SerializationMetadata> metadata,
  355. std::optional<_SerializationFormat> dump_format, py::list& stat,
  356. py::list& inputs, py::list& outputs, py::list& params) {
  357. std::vector<uint8_t> buf;
  358. ser::GraphDumpFormat format;
  359. if (dump_format.has_value()) {
  360. format = dump_format.value();
  361. } else {
  362. format = {};
  363. }
  364. auto dumper = ser::GraphDumper::make(
  365. ser::OutputFile::make_vector_proxy(&buf), format);
  366. SymbolVarArray symvars(dest_vars.begin(), dest_vars.end());
  367. ser::GraphDumper::DumpConfig config{
  368. keep_var_name, keep_param_name, keep_opr_priority, keep_opr_name};
  369. ser::GraphDumper::DumpResult rst;
  370. if (metadata)
  371. rst = dumper->dump(symvars, config, *metadata);
  372. else
  373. rst = dumper->dump(symvars, config);
  374. for (auto i : rst.inputs) {
  375. inputs.append(py::cast(i));
  376. }
  377. for (auto i : rst.outputs) {
  378. outputs.append(py::cast(i));
  379. }
  380. for (auto i : rst.params) {
  381. params.append(py::cast(i));
  382. }
  383. auto rst_stat = std::vector{
  384. rst.nr_opr, rst.tot_bytes, rst.tensor_value_bytes,
  385. static_cast<size_t>(rst.content_hash)};
  386. for (auto i : rst_stat) {
  387. stat.append(py::cast(i));
  388. }
  389. return py::bytes(reinterpret_cast<const char*>(&buf[0]), buf.size());
  390. });
  391. m.def("load_graph",
  392. [](std::string& buf, py::list& output_var_map, py::list& output_var_list) {
  393. auto file = ser::InputFile::make_mem_proxy(buf.c_str(), buf.length());
  394. auto format = ser::GraphLoader::identify_graph_dump_format(*file);
  395. auto loader = ser::GraphLoader::make(std::move(file), format.val());
  396. ser::GraphLoader::LoadConfig config;
  397. auto rst = loader->load(config);
  398. for (auto i : rst.output_var_map) {
  399. output_var_map.append(py::make_tuple(i.first, i.second.node()));
  400. }
  401. for (auto i : rst.output_var_list) {
  402. output_var_list.append(i.node());
  403. }
  404. std::unordered_map<HostTensorND*, const std::string*> tensor2name;
  405. for (const auto& pair : rst.tensor_map) {
  406. tensor2name[pair.second.get()] = &pair.first;
  407. }
  408. auto cb = [&tensor2name, graph = rst.graph](cg::OperatorNodeBase* opr) {
  409. if (!opr->same_type<opr::Host2DeviceCopy>())
  410. return;
  411. auto& h2d = opr->cast_final_safe<opr::Host2DeviceCopy>();
  412. auto it = tensor2name.find(h2d.host_data().get());
  413. mgb_throw_if(
  414. it == tensor2name.end(), GraphError,
  415. "unbound Host2DeviceCopy in loaded graph");
  416. h2d.output(0)->name(*it->second);
  417. };
  418. cg::DepOprIter iter{cb};
  419. for (const auto& var : rst.output_var_list) {
  420. iter.add(var);
  421. }
  422. auto ret = py::tuple(2);
  423. ret[0] = py::cast(rst.graph);
  424. ret[1] = py::cast(rst.metadata);
  425. return ret;
  426. });
  427. #define CURRENT_CLASS cg::ComputingGraph::Options
  428. // clang-format off
  429. auto PyComputingGraphOptions =
  430. py::class_<cg::ComputingGraph::Options>(PyComputingGraph, "Options")
  431. // DEF_READWRITE(opr_attribute)
  432. DEF_READWRITE(seq_opt)
  433. DEF_READWRITE(graph_opt)
  434. DEF_READWRITE(graph_opt_level)
  435. DEF_READWRITE(log_level)
  436. DEF_READWRITE(async_exec_level)
  437. DEF_READWRITE(force_dynamic_alloc)
  438. DEF_READWRITE(var_sanity_check_first_run)
  439. DEF_READWRITE(allocate_static_mem_after_graph_compile)
  440. DEF_READWRITE(fake_next_exec)
  441. DEF_READWRITE(enable_sublinear_memory_opt)
  442. DEF_READWRITE(enable_dtr_memory_opt)
  443. DEF_READWRITE(no_profiling_on_shape_change)
  444. DEF_READWRITE(enable_var_mem_defragment)
  445. DEF_READWRITE(enable_grad_var_static_reshape)
  446. DEF_READWRITE(enable_memory_swap)
  447. DEF_READWRITE(comp_node_seq_record_level)
  448. DEF_READWRITE(no_force_inplace)
  449. DEF_READWRITE(sublinear_mem_config)
  450. DEF_READWRITE(dtr_config)
  451. // DEF_READWRITE(eager_evaluation)
  452. // DEF_READWRITE(imperative_proxy_graph)
  453. // DEF_READWRITE(extra_vardeps)
  454. // DEF_READWRITE(user_data)
  455. ;
  456. // clang-format on
  457. #undef CURRENT_CLASS
  458. #define CURRENT_CLASS cg::ComputingGraph::Options::SeqOpt
  459. py::class_<cg::ComputingGraph::Options::SeqOpt>(PyComputingGraphOptions, "SeqOpt")
  460. DEF_READWRITE(enable_mem_plan_opt) DEF_READWRITE(enable_mem_reuse_alloc)
  461. DEF_READWRITE(enable_seq_comp_node_opt);
  462. #undef CURRENT_CLASS
  463. #define CURRENT_CLASS cg::ComputingGraph::Options::GraphOpt
  464. auto PyGraphOpt = py::class_<cg::ComputingGraph::Options::GraphOpt>(
  465. PyComputingGraphOptions, "GraphOpt") DEF_READWRITE(jit)
  466. DEF_READWRITE(jit_config)
  467. DEF_READWRITE(tensorrt);
  468. #undef CURRENT_CLASS
  469. #define CURRENT_CLASS cg::ComputingGraph::Options::GraphOpt::JITConfig
  470. py::class_<cg::ComputingGraph::Options::GraphOpt::JITConfig>(
  471. PyGraphOpt, "JITConfig") DEF_READWRITE(fuse_dimshuffle)
  472. DEF_READWRITE(fuse_reduce);
  473. #undef CURRENT_CLASS
  474. #define CURRENT_CLASS cg::ComputingGraph::Options::SublinearMemConfig
  475. py::class_<cg::ComputingGraph::Options::SublinearMemConfig>(
  476. PyComputingGraphOptions, "SublinearMemConfig") DEF_READWRITE(thresh_nr_try)
  477. DEF_READWRITE(genetic_nr_iter) DEF_READWRITE(genetic_pool_size)
  478. DEF_READWRITE(lb_memory_mb) DEF_READWRITE(num_worker);
  479. #undef CURRENT_CLASS
  480. #define CURRENT_CLASS cg::ComputingGraph::Options::DTRConfig
  481. py::class_<cg::ComputingGraph::Options::DTRConfig>(
  482. PyComputingGraphOptions, "DTRConfig") DEF_READWRITE(eviction_threshold)
  483. DEF_READWRITE(evictee_minimum_size) DEF_READWRITE(recomp_memory_factor)
  484. DEF_READWRITE(recomp_time_factor);
  485. #undef CURRENT_CLASS
  486. auto common = rel_import("common", m, 1);
  487. common.def(
  488. "invoke_op",
  489. [](const OpDef& def, const std::vector<cg::VarNode*> inputs,
  490. cg::ComputingGraph* graph) {
  491. cg::VarNodeArray vinputs(inputs.begin(), inputs.end());
  492. return to_tuple(OpDef::apply_on_var_node(def, vinputs));
  493. },
  494. py::arg(), py::arg(), py::arg("graph") = py::none());
  495. auto input_callback = [](auto callback, const CompNode& comp_node,
  496. const DType& dtype, const TensorShape& shape,
  497. const std::vector<cg::VarNode*>& inputs,
  498. cg::ComputingGraph* graph, bool use_static_shape) {
  499. if (!graph) {
  500. graph = inputs[0]->owner_graph();
  501. }
  502. SymbolVarArray sinputs;
  503. for (auto i : inputs) {
  504. sinputs.emplace_back(i);
  505. }
  506. static_assert(!std::is_reference<decltype(callback)>::value);
  507. auto soutputs = opr::InputCallback::make(
  508. *graph, std::move(callback), comp_node, dtype, shape, sinputs,
  509. use_static_shape);
  510. std::vector<VarNode*> outputs;
  511. outputs.reserve(soutputs.size());
  512. for (auto i : soutputs) {
  513. outputs.push_back(i.node());
  514. }
  515. return outputs;
  516. };
  517. m.def("make_shared", [](cg::ComputingGraph* graph, const DeviceTensorND& data) {
  518. return opr::SharedDeviceTensor::make(
  519. *graph, std::make_shared<DeviceTensorND>(data))
  520. .node();
  521. });
  522. m.def(
  523. "make_const",
  524. [](cg::ComputingGraph* graph, py::array data, CompNode cn, DType dtype,
  525. std::optional<std::string> name) {
  526. if (!cn.valid()) {
  527. cn = CompNode::load(get_default_device());
  528. }
  529. OperatorNodeConfig config(cn);
  530. if (name) {
  531. config.name(*name);
  532. }
  533. auto hv = npy::np2tensor(data.ptr(), npy::Meth::borrow(cn), dtype);
  534. return opr::ImmutableTensor::make(*graph, hv, config).node();
  535. },
  536. py::arg(), py::arg(), py::arg(), py::arg(), py::arg() = py::none());
  537. m.def(
  538. "make_h2d",
  539. [](cg::ComputingGraph& graph, CompNode cn, DType dtype, TensorShape shape,
  540. std::optional<std::string> name) {
  541. if (!cn.valid()) {
  542. throw py::type_error("device must be valid");
  543. }
  544. if (!dtype.valid()) {
  545. throw py::type_error("dtype must be valid");
  546. }
  547. OperatorNodeConfig config;
  548. if (name) {
  549. config.name(*name);
  550. }
  551. return opr::Host2DeviceCopy::make(
  552. graph, std::make_shared<HostTensorND>(cn, shape, dtype),
  553. config)
  554. .node();
  555. },
  556. py::arg(), py::arg(), py::arg(), py::arg() = py::none(),
  557. py::arg() = py::none());
  558. m.def("_replace_vars", &_replace_vars, py::arg(), py::arg(), py::arg());
  559. m.def("_replace_oprs", &_replace_oprs, py::arg(), py::arg(), py::arg());
  560. m.def("_set_priority_to_id", &_set_priority_to_id, py::arg());
  561. m.def(
  562. "input_callback",
  563. [input_callback](
  564. std::function<DeviceTensorND(void)> callback,
  565. const CompNode& comp_node, const DType& dtype,
  566. const TensorShape& shape, const std::vector<cg::VarNode*>& inputs,
  567. cg::ComputingGraph* graph, bool use_static_shape) {
  568. return input_callback(
  569. [f = std::move(callback)]() {
  570. py::gil_scoped_acquire _;
  571. return f();
  572. },
  573. comp_node, dtype, shape, inputs, graph, use_static_shape);
  574. },
  575. py::arg(), py::arg(), py::arg(), py::arg() = py::none(),
  576. py::arg() = py::tuple(), py::arg("graph") = py::none(),
  577. py::arg("use_static_shape") = false);
  578. m.def(
  579. "input_callback",
  580. [input_callback](
  581. std::shared_ptr<Rendezvous<DeviceTensorND>> p,
  582. const CompNode& comp_node, const DType& dtype,
  583. const TensorShape& shape, const std::vector<cg::VarNode*>& inputs,
  584. cg::ComputingGraph* graph, bool use_static_shape) {
  585. auto f = [p]() -> DeviceTensorND { return p->get(); };
  586. return input_callback(
  587. std::move(f), comp_node, dtype, shape, inputs, graph,
  588. use_static_shape);
  589. },
  590. py::arg(), py::arg(), py::arg(), py::arg() = py::none(),
  591. py::arg() = py::tuple(), py::arg("graph") = py::none(),
  592. py::arg("use_static_shape") = false);
  593. auto output_callback = [](auto callback, const std::vector<cg::VarNode*>& inputs,
  594. std::shared_ptr<RendezvousBase> r = {},
  595. bool borrow = false, bool prefer_host_value = false) {
  596. if (r) {
  597. mgb_assert(inputs.size());
  598. auto cg = inputs[0]->owner_graph();
  599. cg->options()
  600. .user_data.get_user_data_or_create<WeakRendezvousArray>()
  601. ->emplace_back(r);
  602. }
  603. SymbolVarArray sinputs;
  604. for (auto i : inputs) {
  605. sinputs.emplace_back(i);
  606. }
  607. static_assert(!std::is_reference<decltype(callback)>::value);
  608. opr::OutputCallback::Param param{
  609. std::move(callback), borrow, prefer_host_value};
  610. auto output = opr::OutputCallback::make(std::move(param), sinputs);
  611. return output.node();
  612. };
  613. m.def("output_callback", [output_callback](
  614. std::function<void(DeviceTensorND)> callback,
  615. std::vector<cg::VarNode*> inputs) {
  616. auto f = [f = std::move(callback)](DeviceTensorND dv) {
  617. auto task = [f = std::move(f), dv = std::move(dv)]() { f(dv); };
  618. py_task_q.add_task(std::move(task));
  619. };
  620. return output_callback(std::move(f), std::move(inputs));
  621. });
  622. m.def("output_callback", [output_callback](
  623. std::shared_ptr<Rendezvous<DeviceTensorND>> p,
  624. std::vector<cg::VarNode*> inputs) {
  625. auto f = [p](DeviceTensorND dv) { p->set(std::move(dv)); };
  626. return output_callback(std::move(f), std::move(inputs), p);
  627. });
  628. m.def("value_output_callback",
  629. [output_callback](
  630. std::shared_ptr<Rendezvous<HostNDWithEvent>> p,
  631. std::vector<cg::VarNode*> inputs) {
  632. auto f = [p](DeviceTensorND dv) {
  633. HostNDWithEvent hv_with_event;
  634. hv_with_event.first.copy_from(dv);
  635. hv_with_event.second = dv.comp_node().create_event();
  636. hv_with_event.second->record();
  637. p->set(std::move(hv_with_event));
  638. };
  639. return output_callback(std::move(f), std::move(inputs), p, true, true);
  640. });
  641. m.def("attr_output_callback", [output_callback](
  642. std::shared_ptr<Rendezvous<TensorAttr>> p,
  643. std::vector<cg::VarNode*> inputs) {
  644. auto f = [p](DeviceTensorND dv) {
  645. p->set(TensorAttr{TensorLayout{dv.shape(), dv.dtype()}, dv.comp_node()});
  646. };
  647. return output_callback(std::move(f), std::move(inputs), p, true);
  648. });
  649. m.def("virtual_dep", [](std::vector<cg::VarNode*> inputs, std::string device) {
  650. auto&& graph = inputs[0]->owner_graph();
  651. VarNodeArray inps(inputs.begin(), inputs.end());
  652. cg::OperatorNodeConfig config;
  653. if (device.length() > 0) {
  654. config.comp_node(CompNode::load(device));
  655. }
  656. cg::OperatorNodeBase* opr =
  657. graph->insert_opr(std::make_unique<mgb::opr::VirtualDep>(inps, config));
  658. return opr;
  659. });
  660. }

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台