You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

opr_defs.cpp 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. /**
  2. * \file python_module/src/cpp/opr_defs.cpp
  3. *
  4. * This file is part of MegBrain, a deep learning framework developed by Megvii.
  5. *
  6. * \copyright Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  7. *
  8. */
  9. #include "./opr_defs.h"
  10. #include "./opr_helper.h"
  11. #include "./python_helper.h"
  12. #if MGB_ENABLE_OPR_MM
  13. #include "mm_handler.h"
  14. #endif
  15. #include "megbrain/opr/io.h"
  16. #include "megbrain/serialization/extern_c_opr_io.h"
  17. using namespace mgb;
  18. using namespace mgb::opr;
  19. SymbolVar _Opr::_axis_add_remove(SymbolVar src,
  20. const std::vector<int>& axis, bool is_add,
  21. const OperatorNodeConfig &config) {
  22. using ADR = mgb::opr::AxisAddRemove;
  23. std::vector<ADR::AxisDesc> desc;
  24. mgb_assert(!axis.empty());
  25. for (auto i: axis) {
  26. if (is_add) {
  27. desc.emplace_back(ADR::AxisDesc::make_add(i));
  28. } else {
  29. desc.emplace_back(ADR::AxisDesc::make_remove(i));
  30. }
  31. }
  32. return ADR::make(src, desc, config);
  33. }
  34. SymbolVarArray _Opr::param_pack_split(
  35. SymbolVar src, SymbolVar table,
  36. const std::vector<std::vector<size_t>>& shapes,
  37. const OperatorNodeConfig& config) {
  38. auto size = shapes.size();
  39. mgb::TensorShapeArray shapearr(size);
  40. for (size_t i = 0; i < size; i++) {
  41. shapearr[i] = npy::vec2shape(shapes[i]);
  42. }
  43. if (!table.node()) {
  44. auto cn = src.node()->comp_node();
  45. if (config.has_comp_node_set()) {
  46. cn = config.get_single_comp_node();
  47. }
  48. auto table_val = megdnn::ParamPackSplit::gen_table(
  49. shapearr, cn.get_mem_addr_alignment(), src.dtype().size());
  50. HostTensorND hv{cn, TensorShape{table_val.size()}, dtype::Int32{}};
  51. memcpy(hv.raw_ptr(), table_val.data(), table_val.size() * sizeof(int));
  52. table = opr::ImmutableTensor::make(*src.node()->owner_graph(), hv);
  53. }
  54. return mgb::opr::ParamPackSplit::make(src, table, shapearr, config);
  55. }
  56. #if MGB_ENABLE_OPR_MM
  57. #include "megbrain/opr/lock.h"
  58. #include "megbrain/opr/io_remote.h"
  59. SymbolVar _Opr::lock_acquire(SymbolVar var, size_t lock_id, size_t group_id,
  60. const OperatorNodeConfig &config) {
  61. return mgb::opr::LockAcquire::make(var, {lock_id, group_id}, config);
  62. }
  63. SymbolVar _Opr::lock_release(SymbolVar var, size_t lock_id, size_t group_id,
  64. const OperatorNodeConfig &config) {
  65. return mgb::opr::LockRelease::make(var, {lock_id, group_id}, config);
  66. }
  67. SymbolVar _Opr::remote_send(
  68. const std::string& server_addr, const int port,
  69. const std::string& key, SymbolVar var,
  70. const bool is_grad,
  71. const OperatorNodeConfig& config) {
  72. return RemoteSend::make({key, RemoteIOBase::Type::SEND, is_grad}, var,
  73. std::make_shared<GroupClientProxy>(ssprintf(
  74. "%s:%d", server_addr.c_str(), port)),
  75. config);
  76. }
  77. SymbolVar _Opr::remote_recv(const std::string& server_addr, const int port,
  78. const std::string& key, CompGraph& graph,
  79. const std::vector<size_t>& shape, PyObject* dtype,
  80. const OperatorNodeConfig& config) {
  81. const TensorShape ishape = npy::vec2shape(shape);
  82. const DType idtype = npy::dtype_np2mgb(dtype);
  83. return RemoteRecv::make({key, RemoteIOBase::Type::RECV, false},
  84. graph.get(),
  85. std::make_shared<GroupClientProxy>(
  86. ssprintf("%s:%d", server_addr.c_str(), port)),
  87. config, ishape, idtype);
  88. }
  89. SymbolVar _Opr::collective_comm_with_input(
  90. SymbolVar inpvar, const std::string& key,
  91. const size_t nr_devices, const uint32_t rank, const uint32_t root,
  92. const std::string& server_addr, const int port,
  93. PyObject* params, PyObject* dtype,
  94. const std::string& backend, SharedND* output_buf,
  95. const OperatorNodeConfig& config, const SharedScalar& disable) {
  96. SymbolVarArray inputs(1, inpvar);
  97. ComputingGraph* graph = inpvar.node()->owner_graph();
  98. auto group_mgr = std::make_shared<GroupClientProxy>(
  99. ssprintf("%s:%d", server_addr.c_str(), port));
  100. SmallVector<std::shared_ptr<mgb::DeviceTensorND>> dev_buffer_arr(1, nullptr);
  101. if (output_buf)
  102. dev_buffer_arr[0] = output_buf->dev_tensor();
  103. CollectiveComm::Param param = load_collective_comm_params(params, graph);
  104. mgb::DType _dtype = DType();
  105. if (dtype != Py_None) {
  106. _dtype = npy::dtype_np2mgb(dtype);
  107. }
  108. return CollectiveComm::make(inputs, graph, key, nr_devices, rank, root, group_mgr,
  109. dev_buffer_arr, param, _dtype, backend, config, disable.get_val())[0];
  110. }
  111. SymbolVar _Opr::collective_comm_without_input(
  112. CompGraph& cg, const std::string& key,
  113. const size_t nr_devices, const uint32_t rank, const uint32_t root,
  114. const std::string& server_addr, const int port,
  115. PyObject* params, PyObject* dtype,
  116. const std::string& backend, SharedND* output_buf,
  117. const OperatorNodeConfig& config, const SharedScalar& disable) {
  118. SymbolVarArray inputs;
  119. auto& graph = cg.get();
  120. auto group_mgr = std::make_shared<GroupClientProxy>(
  121. ssprintf("%s:%d", server_addr.c_str(), port));
  122. SmallVector<std::shared_ptr<mgb::DeviceTensorND>> dev_buffer_arr(1, nullptr);
  123. if (output_buf)
  124. dev_buffer_arr[0] = output_buf->dev_tensor();
  125. CollectiveComm::Param param = load_collective_comm_params(params, &graph);
  126. mgb::DType _dtype = DType();
  127. if (dtype != Py_None) {
  128. _dtype = npy::dtype_np2mgb(dtype);
  129. }
  130. return CollectiveComm::make(inputs, &graph, key, nr_devices, rank, root, group_mgr,
  131. dev_buffer_arr, param, _dtype, backend, config, disable.get_val())[0];
  132. }
  133. #else
  134. namespace {
  135. [[noreturn]] void on_opr_mm() {
  136. mgb_throw(MegBrainError, "opr-mm disabled at compile time");
  137. }
  138. }
  139. SymbolVar _Opr::lock_acquire(SymbolVar var, size_t lock_id, size_t group_id,
  140. const OperatorNodeConfig &config) {
  141. on_opr_mm();
  142. }
  143. SymbolVar _Opr::lock_release(SymbolVar var, size_t lock_id, size_t group_id,
  144. const OperatorNodeConfig &config) {
  145. on_opr_mm();
  146. }
  147. SymbolVar _Opr::remote_send(
  148. const std::string& server_addr, const int port,
  149. const std::string& key, SymbolVar var,
  150. const bool is_grad,
  151. const OperatorNodeConfig& config) {
  152. on_opr_mm();
  153. }
  154. SymbolVar _Opr::remote_recv(const std::string& server_addr, const int port,
  155. const std::string& key, CompGraph& graph,
  156. const std::vector<size_t>& shape, PyObject* dtype,
  157. const OperatorNodeConfig& config) {
  158. on_opr_mm();
  159. }
  160. SymbolVar _Opr::collective_comm_with_input(
  161. SymbolVar inpvar, const std::string& key,
  162. const size_t nr_devices, const uint32_t rank, const uint32_t root,
  163. const std::string& server_addr, const int port, PyObject* params,
  164. PyObject* dtype, const std::string& backend, SharedND* output_buf,
  165. const OperatorNodeConfig& config, const SharedScalar& disable) {
  166. on_opr_mm();
  167. }
  168. SymbolVar _Opr::collective_comm_without_input(
  169. CompGraph& cg, const std::string& key,
  170. const size_t nr_devices, const uint32_t rank, const uint32_t root,
  171. const std::string& server_addr, const int port, PyObject* params,
  172. PyObject* dtype, const std::string& backend, SharedND* output_buf,
  173. const OperatorNodeConfig& config, const SharedScalar& disable) {
  174. on_opr_mm();
  175. }
  176. #endif // MGB_ENABLE_OPR_MM
  177. SymbolVarArray _Opr::extern_c_opr_placeholder(
  178. const SymbolVarArray& inputs,
  179. const std::vector<std::vector<size_t>>& output_shapes,
  180. PyObject* output_dtypes, const char* dump_name, PyObject* data_bytes,
  181. const OperatorNodeConfig& config) {
  182. mgb_assert(PyBytes_Check(data_bytes));
  183. if (output_dtypes != Py_None) {
  184. mgb_assert(PyTuple_Check(output_dtypes));
  185. mgb_assert(output_shapes.size() ==
  186. static_cast<size_t>(PyTuple_Size(output_dtypes)));
  187. }
  188. TensorShapeArray cpp_output_shapes(output_shapes.size());
  189. for (size_t i = 0; i < output_shapes.size(); ++i) {
  190. cpp_output_shapes[i] = npy::vec2shape(output_shapes[i]);
  191. }
  192. SmallVector<DType> cpp_output_dtypes;
  193. if (output_dtypes != Py_None) {
  194. size_t dtype_size = PyTuple_Size(output_dtypes);
  195. for (size_t i = 0; i < dtype_size; ++i) {
  196. cpp_output_dtypes.push_back(
  197. npy::dtype_np2mgb(PyTuple_GetItem(output_dtypes, i)));
  198. }
  199. }
  200. auto opr = opr::ExternCOprRunner::make_placeholder(
  201. inputs, cpp_output_shapes, dump_name, PyBytes_AsString(data_bytes),
  202. PyBytes_Size(data_bytes), config, cpp_output_dtypes);
  203. SymbolVarArray ret;
  204. ret.reserve(opr->output().size());
  205. for (auto i: opr->output())
  206. ret.emplace_back(i);
  207. return ret;
  208. }
  209. #if MGB_ENABLE_TENSOR_RT
  210. #include "megbrain/tensorrt/tensorrt_runtime_opr.h"
  211. SymbolVarArray _Opr::tensor_rt_runtime(const SymbolVarArray& inputs,
  212. PyObject* data_bytes,
  213. const OperatorNodeConfig& config) {
  214. mgb_assert(PyBytes_Check(data_bytes));
  215. auto size = PyBytes_Size(data_bytes);
  216. mgb_assert(size, "trt data bytes should not be empty");
  217. return opr::TensorRTRuntimeOpr::make(PyBytes_AsString(data_bytes),
  218. size, inputs,
  219. config);
  220. }
  221. #else
  222. SymbolVarArray _Opr::tensor_rt_runtime(const SymbolVarArray& inputs,
  223. PyObject* data_bytes,
  224. const OperatorNodeConfig& config) {
  225. mgb_throw(MegBrainError, "TensorRT disabled at compile time");
  226. }
  227. #endif
  228. SymbolVar _Opr::timestamp(SymbolVar input, PyObject* dest, size_t dest_off,
  229. const OperatorNodeConfig& config) {
  230. auto tensor = std::make_shared<HostTensorND>(
  231. npy::np2tensor(dest, npy::Meth::must_borrow(), dtype::Float32{}));
  232. return opr::Timestamp::make(input, std::move(tensor), dest_off, config);
  233. }
  234. SymbolVar _Opr::virtual_loss(const SymbolVarArray& ys,
  235. const SymbolVarArray& y_grads,
  236. const OperatorNodeConfig& config) {
  237. return opr::VirtualLoss::make(ys, y_grads, {}, config);
  238. }
  239. SymbolVar _Opr::virtual_dep(const SymbolVarArray& symvars,
  240. const OperatorNodeConfig& config) {
  241. return opr::VirtualDep::make(symvars, config);
  242. }
  243. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台

Contributors (1)