You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

imperative_rt.cpp 4.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. /**
  2. * \file imperative/python/src/imperative_rt.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./imperative_rt.h"
  12. #include <future>
  13. #include <variant>
  14. #include <unordered_map>
  15. #include <pybind11/numpy.h>
  16. #include <pybind11/operators.h>
  17. #include "megbrain/imperative.h"
  18. #include "megbrain/imperative/interpreter.h"
  19. #include "megbrain/imperative/ops/opr_attr.h"
  20. #include "./helper.h"
  21. #include "./common.h"
  22. namespace py = pybind11;
  23. using namespace mgb;
  24. using namespace imperative;
  25. using namespace interpreter;
  26. namespace {
  27. std::optional<std::tuple<std::shared_ptr<OpDef>, std::vector<bool>, std::vector<bool>>>
  28. make_backward_graph(
  29. const OpDef& opdef, std::vector<LogicalTensorDesc> inputs,
  30. std::vector<bool> input_requires_grad,
  31. std::vector<bool> output_has_grad) {
  32. auto res = OpDef::make_backward_graph(opdef,
  33. SmallVector<LogicalTensorDesc>(inputs.begin(), inputs.end()),
  34. SmallVector<bool>(input_requires_grad.begin(), input_requires_grad.end()),
  35. SmallVector<bool>(output_has_grad.begin(), output_has_grad.end()));
  36. if (res.backward) {
  37. return std::optional<std::tuple<std::shared_ptr<OpDef>, std::vector<bool>, std::vector<bool>>>{
  38. std::in_place, res.backward, res.save_for_backward, res.input_has_grad};
  39. } else {
  40. return {};
  41. }
  42. }
  43. } // namespace
  44. void init_imperative_rt(py::module m) {
  45. py::class_<Interpreter::Channel>(m, "Interpreter")
  46. .def("put", [](Interpreter::Channel& self, py::array data, DType dtype, CompNode cn) {
  47. if (!cn.valid()) {
  48. cn = CompNode::load(get_default_device());
  49. }
  50. constexpr int size_threshhold = TensorShape::MAX_NDIM;
  51. if (data.size() > size_threshhold) {
  52. return self.put(npy::np2tensor(data.ptr(), npy::Meth::borrow(cn), dtype));
  53. } else {
  54. HostTensorND ret(cn);
  55. return self.put(npy::np2tensor(data.ptr(), npy::Meth::copy_into(&ret), dtype));
  56. }
  57. }, py::arg(), py::arg("dtype") = py::none(), py::arg("device") = py::none())
  58. .def("put", py::overload_cast<const DeviceTensorND&>(&Interpreter::Channel::put))
  59. .def("delete", [](Interpreter::Channel& self, Interpreter::Handle handle) {
  60. return self.del(handle);
  61. })
  62. .def("get_value", [](Interpreter::Channel& self, Interpreter::Handle handle) {
  63. PyObject* optr = npy::ndarray_from_tensor(self.get_value(handle), npy::ShareType::TRY_SHARE);
  64. return py::reinterpret_steal<py::object>(optr);
  65. })
  66. .def("get_dtype", &Interpreter::Channel::get_dtype)
  67. .def("get_device", &Interpreter::Channel::get_device)
  68. .def("get_shape", &Interpreter::Channel::get_shape)
  69. .def("_get_dev_tensor", &Interpreter::Channel::get_dev_tensor)
  70. .def("apply_op", &Interpreter::Channel::apply_op)
  71. .def("config_async_level", &Interpreter::Channel::config_async_level)
  72. .def("get_async_level", &Interpreter::Channel::get_async_level)
  73. .def("sync", &Interpreter::Channel::sync, py::call_guard<py::gil_scoped_release>());
  74. std::unique_ptr<Interpreter::Channel> ch = Interpreter::inst().create_channel();
  75. m.attr("interpreter") = py::detail::make_caster<decltype(ch)>::cast(
  76. std::move(ch), py::return_value_policy::move, {});
  77. for (auto name : {"put", "delete", "get_value", "get_dtype", "get_device", "get_shape", "_get_dev_tensor", "apply_op", "config_async_level", "get_async_level"}) {
  78. m.attr(name) = m.attr("interpreter").attr(name);
  79. }
  80. m.def("sync", [m]() {
  81. m.attr("interpreter").attr("sync")();
  82. py::gil_scoped_release _;
  83. py_task_q.wait_all_task_finish();
  84. });
  85. m.def("make_backward_graph", &make_backward_graph);
  86. py::class_<OpDef, std::shared_ptr<OpDef>>(m, "OpDef")
  87. .def("ctype", [](const OpDef& opdef) {
  88. if (auto attr = opdef.try_cast_final<OprAttr>()) {
  89. return attr->type.c_str();
  90. }
  91. return opdef.dyn_typeinfo()->name;
  92. })
  93. .def("__eq__", [](const OpDef& lhs, const OpDef& rhs) {
  94. return lhs.is_same(rhs);
  95. })
  96. .def("__hash__", &OpDef::hash);
  97. }

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台