You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

megbrain_wrap.h 14 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. /**
  2. * \file python_module/src/cpp/megbrain_wrap.h
  3. *
  4. * This file is part of MegBrain, a deep learning framework developed by Megvii.
  5. *
  6. * \brief wrappers for basic functionalities
  7. *
  8. * \copyright Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  9. *
  10. */
  11. #pragma once
  12. #include "./python_helper.h"
  13. #include "./megbrain_pubapi.h"
  14. #include "megbrain/graph.h"
  15. #include "megbrain/opr/io.h"
  16. #include "megbrain/plugin/opr_footprint.h"
  17. #include <map>
  18. #include <string>
  19. class CompGraph;
  20. class CompGraphCallbackValueProxy;
  21. /*!
  22. * \brief proxy a mgb::DeviceTensorND or a SymbolVar
  23. */
  24. class SharedND {
  25. mgb::pubapi::DeviceTensor m_pubapi_dev_tensor;
  26. std::shared_ptr<mgb::DeviceTensorND> m_dev_tensor;
  27. mgb::HostTensorND m_async_copy_refkeeper;
  28. mgb::VarNode *m_var = nullptr;
  29. bool m_copy_sync = true;
  30. bool sync(mgb::DeviceTensorND &dv);
  31. inline void _check_before_share_memory(const SharedND& rhs);
  32. public:
  33. SharedND() = default;
  34. SharedND(mgb::CompNode node, PyObject* dtype):
  35. m_dev_tensor(std::make_shared<mgb::DeviceTensorND>(
  36. node, npy::dtype_np2mgb(dtype)))
  37. { }
  38. SharedND(const std::shared_ptr<mgb::DeviceTensorND>& dv)
  39. : m_dev_tensor(dv) {}
  40. //! set init shape; can be only called once
  41. void _set_init_shape(const std::vector<size_t> &shape);
  42. //! resize to given shape
  43. void _resize(const std::vector<size_t> &shape);
  44. //! reset dev_tensor to zeros
  45. void _reset_zero();
  46. /*!
  47. * \brief assign to proxy given dev tensor; used by craniotome
  48. */
  49. void assign(const mgb::DeviceTensorND &dv) {
  50. mgb_assert(!m_dev_tensor && !m_var);
  51. m_dev_tensor = std::make_shared<mgb::DeviceTensorND>(dv);
  52. }
  53. /*!
  54. * \brief assign to proxy a var node; used by craniotome
  55. */
  56. void assign(mgb::VarNode *var) {
  57. mgb_assert(!m_dev_tensor && !m_var);
  58. m_var = var;
  59. }
  60. /*!
  61. * \brief share memory from another SharedND; only used in ParamPack
  62. */
  63. void _share_memory_from(const SharedND& rhs, size_t begin);
  64. /*!
  65. * \brief reset dev_tensor to another SharedNd's
  66. */
  67. void _reset_dev_tensor(const SharedND& rhs);
  68. uintptr_t _pubapi_dev_tensor_ptr(int version);
  69. mgb::SymbolVar _as_sym_var(CompGraph &cg, const std::string &name,
  70. bool volatile_);
  71. mgb::CompNode _get_comp_node() {
  72. return m_dev_tensor->comp_node();
  73. }
  74. void _set_copy_sync(bool flag) {
  75. m_copy_sync = flag;
  76. }
  77. //! get dev buffer from shared nd
  78. const std::shared_ptr<mgb::DeviceTensorND>& dev_tensor() {
  79. return m_dev_tensor;
  80. }
  81. void _copy_from_npyarr(PyObject *npyarr);
  82. void _copy_from_value_proxy(CompGraphCallbackValueProxy &value);
  83. void _share_from_value_proxy(CompGraphCallbackValueProxy &value);
  84. static SharedND _from_symvar(mgb::SymbolVar symvar);
  85. //! get numpy ndarray that contains a copy of the value; return new ref
  86. PyObject* _get_npyarr();
  87. PyObject* _get_dtype();
  88. std::vector<size_t> _get_shape();
  89. /*!
  90. * \brief copy to sub of this from another SharedND
  91. * \param axis axis for sub, or -1 to work on flattened array
  92. */
  93. void copy_to_sub_from_shared(
  94. int axis, ptrdiff_t begin, ptrdiff_t end, ptrdiff_t step,
  95. const SharedND &rhs);
  96. /*!
  97. * \brief copy from sub of another SharedND to this
  98. * \param axis axis for sub, or -1 to work on flattened array, -2 to
  99. * copy whole tensor, -3 to copy whole tensor fixlayout
  100. */
  101. void copy_from_shared_sub(const SharedND &rhs,
  102. int axis, ptrdiff_t begin, ptrdiff_t end, ptrdiff_t step);
  103. };
  104. /*!
  105. * \brief wraps around shared pointer to mgb::HostTensorND
  106. */
  107. class _HostSharedND {
  108. bool m_own_storage = false, m_borrow_on_cpu = false;
  109. std::shared_ptr<mgb::HostTensorND> m_tensor;
  110. //! set to non-null if this _HostSharedND is set to proxy a var
  111. mgb::opr::Host2DeviceCopy* m_proxied_opr = nullptr;
  112. void ensure_own_storage();
  113. public:
  114. _HostSharedND() = default;
  115. _HostSharedND(const _HostSharedND &rhs):
  116. m_own_storage{false},
  117. m_tensor{rhs.m_tensor},
  118. m_proxied_opr{rhs.m_proxied_opr}
  119. {
  120. }
  121. _HostSharedND(mgb::CompNode node, mgb::DType dtype):
  122. m_own_storage{true},
  123. m_tensor{std::make_shared<mgb::HostTensorND>(node, dtype)}
  124. {
  125. }
  126. _HostSharedND(mgb::CompNode node, PyObject* dtype):
  127. _HostSharedND(node, npy::dtype_np2mgb(dtype))
  128. {
  129. }
  130. _HostSharedND& operator = (const _HostSharedND &) = delete;
  131. /*!
  132. * \brief make a _HostSharedND by proxing a var produced by
  133. * Host2DeviceCopy
  134. */
  135. static _HostSharedND make_proxy(mgb::SymbolVar var);
  136. mgb::SymbolVar _as_sym_var(CompGraph &cg, bool enable_static_infer,
  137. const std::string &name);
  138. void _resize(const std::vector<size_t> &shape);
  139. void _copy_from_npyarr(PyObject *npyarr, bool borrow);
  140. void _enable_borrow_on_cpu(bool flag) {
  141. m_borrow_on_cpu = flag;
  142. }
  143. std::string __repr__() const;
  144. PyObject* _get_dtype();
  145. };
  146. /*!
  147. * \brief proxy a value to be passed to computing graph callback
  148. */
  149. class CompGraphCallbackValueProxy {
  150. mgb::pubapi::DeviceTensor m_pubapi_dev_tensor;
  151. bool m_is_active = false; //! setup called but on_finished not called
  152. bool m_use_raw_hv = false;
  153. bool m_value_used, m_eager_copy;
  154. mgb::HostTensorND m_hv;
  155. std::shared_ptr<mgb::CompNode::Event> m_copy_event;
  156. //! original dev value
  157. mgb::DeviceTensorND m_dev_value;
  158. //! perform D2H copy
  159. void do_copy();
  160. public:
  161. static CompGraphCallbackValueProxy make_raw_host_value_proxy(
  162. const mgb::HostTensorND &hv);
  163. bool eager_copy() const {
  164. return m_eager_copy;
  165. }
  166. mgb::DeviceTensorND& dev_tensor() {
  167. return m_dev_value;
  168. }
  169. void setup(const mgb::DeviceTensorND &val, bool eager_copy);
  170. void sync();
  171. /*!
  172. * \brief called after python callback returned
  173. */
  174. void on_finished();
  175. //! get numpy ndarray that contains a copy of the value; return new ref
  176. PyObject* _get_npyarr();
  177. PyObject* _get_dtype();
  178. std::vector<size_t> _get_shape();
  179. uintptr_t _pubapi_dev_tensor_ptr(int version);
  180. mgb::CompNode _get_comp_node();
  181. };
  182. class AsyncExec {
  183. public:
  184. class Core;
  185. AsyncExec() = default;
  186. ~AsyncExec();
  187. AsyncExec(std::unique_ptr<mgb::cg::AsyncExecutable> f);
  188. void _execute();
  189. void _wait();
  190. double _get_prev_exec_time();
  191. void clear_device_memory();
  192. std::vector<std::pair<mgb::CompNode, size_t>>
  193. _update_static_alloc_plan_and_get_size();
  194. std::string _to_json_str();
  195. /*!
  196. * \brief find all Host2DeviceCopy input vars that are mutable (i.e.
  197. * used as func args)
  198. */
  199. mgb::SymbolVarArray _find_mutable_input();
  200. Core* core() const;
  201. void set_multi_part_par_graph(std::shared_ptr<mgb::ComputingGraph> g) {
  202. m_multi_part_par_graph = std::move(g);
  203. }
  204. private:
  205. std::shared_ptr<Core> m_core;
  206. //! parent graph in multi-part compiling
  207. std::shared_ptr<mgb::ComputingGraph> m_multi_part_par_graph;
  208. };
  209. /*!
  210. * \brief callback wrapper for computing graph
  211. */
  212. class _CompGraphCallback {
  213. bool m_cb_created = false, m_eager_copy = false;
  214. AsyncExec::Core* m_ae_core = nullptr;
  215. std::vector<CompGraphCallbackValueProxy> m_value_proxies;
  216. public:
  217. /*!
  218. * \brief set AsyncExec associated with this callback; if it is set,
  219. * eager value copy would be enabled
  220. */
  221. void set_async_exec(const AsyncExec &ae);
  222. /*!
  223. * \brief set whether enabling eager copy
  224. *
  225. * If eager copy is enabled, host to device copy would start immediately
  226. * and asynchronously when this callback is executed by megbrain
  227. */
  228. void set_eager_copy(bool flag);
  229. virtual ~_CompGraphCallback() = default;
  230. std::function<void(mgb::SmallVector<mgb::DeviceTensorND> &)> make_multi_input_callback();
  231. std::function<void(mgb::DeviceTensorND &)> make_callback();
  232. /*!
  233. * \brief call python callback
  234. */
  235. void call_pycb();
  236. /*!
  237. * \brief python callback to be overwritten
  238. */
  239. virtual void call(std::vector<CompGraphCallbackValueProxy>&) = 0;
  240. };
  241. /*!
  242. * \brief wrap around shared mgb::ComputingGraph
  243. */
  244. class CompGraph {
  245. class PyUserData;
  246. mgb::SmallVector<mgb::ComputingGraph::OutputSpec> m_out_specs;
  247. //! (callback, output spec part)
  248. mgb::SmallVector<std::pair<_CompGraphCallback*, size_t>> m_raw_callbacks;
  249. std::shared_ptr<mgb::ComputingGraph> m_comp_graph_own;
  250. std::weak_ptr<mgb::ComputingGraph> m_comp_graph_borrow;
  251. explicit CompGraph(const std::shared_ptr<mgb::ComputingGraph>& cg)
  252. : m_comp_graph_own{cg} {}
  253. explicit CompGraph(const std::weak_ptr<mgb::ComputingGraph> &cg):
  254. m_comp_graph_borrow{cg}
  255. {}
  256. public:
  257. CompGraph():
  258. m_comp_graph_own(mgb::ComputingGraph::make())
  259. {}
  260. // A mgb::cg::ComputingGraph may be wrapped in a CompGraph in two ways:
  261. // 1. Borrowing a ComputingGraph.
  262. // 2. Own a shared_ptr of ComputingGraph.
  263. // We make constructors private and use factory function instead to make
  264. // it explicit at the call site. (So-called "Named Constructor")
  265. /*!
  266. * \brief Wrap a ComputingGraph by borrowing a reference.
  267. */
  268. static CompGraph make_from_weak_ptr(
  269. const std::weak_ptr<mgb::ComputingGraph>& cg) {
  270. return CompGraph{cg};
  271. }
  272. /*!
  273. * \brief Wrap a ComputingGraph by owning one of its reference.
  274. */
  275. static CompGraph make_from_shared_ptr(
  276. const std::shared_ptr<mgb::ComputingGraph>& cg) {
  277. return CompGraph{cg};
  278. }
  279. CompGraph(const mgb::cg::SymbolVarArray& dest_symbol_vars) {
  280. m_comp_graph_own = mgb::ComputingGraph::make();
  281. mgb::cg::replace_vars_comp_graph(dest_symbol_vars,
  282. m_comp_graph_own.get());
  283. }
  284. void clear_device_memory();
  285. //! get underlying ComputingGraph instance
  286. mgb::ComputingGraph& get() const;
  287. CompGraph& share_device_memory_with(CompGraph &other) {
  288. get().share_device_memory_with(other.get());
  289. return *this;
  290. }
  291. //! get a dict to store arbitrary user data
  292. PyObject* _user_data();
  293. AsyncExec _do_compile(bool copy, bool optimize_for_inference);
  294. std::vector<AsyncExec> _do_compile_multi_part();
  295. /*!
  296. * \brief add an output spec
  297. * \param callback callback to be invoked; or nullptr for computing
  298. * output var only
  299. */
  300. void _add_output_spec(mgb::cg::SymbolVar &var,
  301. _CompGraphCallback *callback);
  302. //! mark currently added output specs as a part in multi-part compile
  303. void _add_multi_part_endpoint() {
  304. m_out_specs.emplace_back();
  305. }
  306. void _clear_output_spec() {
  307. m_raw_callbacks.clear();
  308. m_out_specs.resize(1);
  309. m_out_specs[0].clear();
  310. }
  311. size_t _release() {
  312. if (m_comp_graph_own) {
  313. auto ret = m_comp_graph_own.use_count();
  314. m_comp_graph_own.reset();
  315. return ret;
  316. }
  317. m_comp_graph_borrow.reset();
  318. return 0;
  319. }
  320. };
  321. //! wrap shared_ptr<DTypeScalar>
  322. class SharedScalar {
  323. bool m_dtype_locked = false;
  324. std::shared_ptr<mgb::DTypeScalar> m_val;
  325. mgb::HostTensorND m_val_as_host_nd;
  326. mgb::CompNode::UnorderedMap<std::shared_ptr<mgb::DeviceTensorND>> m_dev_val;
  327. mgb::HostTensorND& val_as_host_nd();
  328. public:
  329. SharedScalar(PyObject *val);
  330. void _set(PyObject *val);
  331. PyObject* _get();
  332. mgb::SymbolVar _as_sym_var(CompGraph &cg, mgb::CompNode &cn);
  333. void _lock_dtype() {
  334. m_dtype_locked = true;
  335. }
  336. bool _dtype_locked() {
  337. return m_dtype_locked;
  338. }
  339. const std::shared_ptr<mgb::DTypeScalar>& get_val() const {
  340. return m_val;
  341. }
  342. };
  343. /*!
  344. * \brief wrap around shared mgb::cg::OperatorNodeBase
  345. */
  346. class Operator {
  347. mgb::cg::OperatorNodeBase* m_operator_node;
  348. std::string m_params;
  349. static const std::unique_ptr<mgb::OprFootprint> sm_opr_footprint_ptr;
  350. public:
  351. Operator() : m_operator_node(nullptr){};
  352. Operator(mgb::cg::OperatorNodeBase* operator_node)
  353. : m_operator_node(operator_node),
  354. m_params(std::move(
  355. (sm_opr_footprint_ptr->calc_footprint(m_operator_node)).param->to_string()))
  356. {}
  357. size_t id() const { return m_operator_node->id(); }
  358. const std::string& name() const { return m_operator_node->name(); }
  359. const std::string& params() const { return m_params; }
  360. const std::shared_ptr<mgb::ComputingGraph> get_owner_graph() const {
  361. return m_operator_node->owner_graph()->shared_from_this();
  362. }
  363. const mgb::SymbolVarArray inputs() const {
  364. return mgb::cg::to_symbol_var_array(m_operator_node->input());
  365. }
  366. const mgb::SymbolVarArray outputs() const {
  367. return mgb::cg::to_symbol_var_array(m_operator_node->output());
  368. }
  369. mgb::cg::OperatorNodeBase* node() const { return m_operator_node; }
  370. };
  371. //! get inferred value as numpy ndarray or None
  372. PyObject* get_symvar_inferred_value(mgb::SymbolVar var);
  373. mgb::SymbolVar fill_retain_dtype(mgb::SymbolVar var, PyObject* value);
  374. //! whether _mgb_global_finalize() has been called
  375. bool global_finalized();
  376. #ifndef SWIG
  377. void mark_as_input(mgb::cg::ComputingGraph* cg, mgb::cg::SymbolVar var);
  378. #endif
  379. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台