You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

helper.cpp 26 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720
  1. /**
  2. * \file imperative/python/src/helper.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./helper.h"
  12. #include <pybind11/eval.h>
  13. #include "megbrain/graph/exc_extra_info.h"
  14. #include "megbrain/graph/event.h"
  15. #include "megbrain/graph/cg.h"
  16. #include "megbrain/tensor.h"
  17. #include "megbrain/utils/mempool.h"
  18. #include "./numpy_dtypes.h"
  19. namespace py = pybind11;
  20. PyTaskDipatcher py_task_q = {};
  21. py::module submodule(py::module parent, const char* name, const char* doc) {
  22. auto m = parent.def_submodule(name, doc);
  23. m.attr("__package__") = parent.attr("__name__");
  24. m.attr("__builtins__") = py::module::import("builtins");
  25. return m;
  26. }
  27. py::module rel_import(py::str name, py::module m, int level) {
  28. py::object import = py::module::import("builtins").attr("__import__");
  29. return import(name, m.attr("__dict__"), py::arg("level")=level);
  30. }
  31. /*
  32. * demangle typeid, see
  33. * http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname
  34. */
  35. #ifdef __GNUG__
  36. #include <cxxabi.h>
  37. #include <cstdlib>
  38. #include <memory>
  39. namespace {
  40. std::string demangle_typeid(const char* name) {
  41. int status = -4; // some arbitrary value to eliminate the compiler warning
  42. // enable c++11 by passing the flag -std=c++11 to g++
  43. std::unique_ptr<char, void(*)(void*)> res {
  44. abi::__cxa_demangle(name, nullptr, nullptr, &status),
  45. std::free
  46. };
  47. return (status==0) ? res.get() : name ;
  48. }
  49. } // namespace
  50. #else
  51. namespace {
  52. // does nothing if not g++
  53. std::string demangle_typeid(const char* name) {
  54. return name;
  55. }
  56. }
  57. #endif
  58. using namespace mgb;
  59. using namespace cg;
  60. namespace {
  61. std::string repr_pyobj(PyObject *obj) {
  62. if (!obj)
  63. return "<null PyObject>";
  64. PYTHON_GIL;
  65. auto str = PyObject_Repr(obj);
  66. if (!str)
  67. return ssprintf("<PyObject at %p (repr failed)>", obj);
  68. std::string ret{PyUnicode_AsUTF8(str)};
  69. Py_DECREF(str);
  70. return ret;
  71. }
  72. template<typename T>
  73. std::string typeid_name(const T &t) {
  74. return demangle_typeid(typeid(t).name());
  75. }
  76. } // anonymous namespace
  77. /* ============== PyExceptionForward ============== */
  78. PyExceptionForward::~PyExceptionForward() {
  79. PYTHON_GIL;
  80. PyObjRefKeeper::deleter(m_type);
  81. PyObjRefKeeper::deleter(m_value);
  82. PyObjRefKeeper::deleter(m_traceback);
  83. }
  84. void PyExceptionForward::restore() {
  85. PyErr_Restore(m_type, m_value, m_traceback);
  86. m_type = m_value = m_traceback = nullptr;
  87. }
  88. void PyExceptionForward::throw_() {
  89. PyObject *etype, *obj, *trace;
  90. PyErr_Fetch(&etype, &obj, &trace);
  91. PyErr_NormalizeException(&etype, &obj, &trace);
  92. std::string msg{"python exception"};
  93. bool succ = false;
  94. if (etype && obj && trace) {
  95. auto run = [&]() {
  96. #define DEF(name, expr) \
  97. PyObjRefKeeper name{expr}; \
  98. if (!name.get()) \
  99. return
  100. DEF(mod, PyImport_ImportModule("traceback"));
  101. DEF(result, PyObject_CallMethod(mod.get(), "format_exception",
  102. "(OOO)", etype, obj, trace));
  103. if (!PyList_Check(result.get()))
  104. return;
  105. auto size = PyList_Size(result.get());
  106. msg.append(":\n");
  107. for (Py_ssize_t i = 0; i < size; ++i) {
  108. msg.append(" ");
  109. msg.append(PyUnicode_AsUTF8(PyList_GetItem(result.get(), i)));
  110. }
  111. msg.pop_back(); // remove last \n
  112. succ = true;
  113. #undef DEF
  114. };
  115. run();
  116. }
  117. if (!succ) {
  118. PyObject* obj_str_py;
  119. if (obj && (obj_str_py = PyObject_Repr(obj))) {
  120. msg.append(" with message ");
  121. msg.append(PyUnicode_AsUTF8(obj_str_py));
  122. Py_DECREF(obj_str_py);
  123. } else {
  124. msg.append(" with unknown message");
  125. }
  126. }
  127. // throwing exception may cause abort due to unknown reasons; so we first
  128. // log the message
  129. mgb_log_error("caught exception from python callback: %s", msg.c_str());
  130. fflush(stdout);
  131. fflush(stderr);
  132. throw PyExceptionForward{etype, obj, trace, msg};
  133. }
  134. /* ============== namespace npy ============== */
  135. namespace {
  136. int to_mgb_supported_dtype_raw(int dtype) {
  137. if (dtype == NPY_INT64)
  138. return NPY_INT32;
  139. if (dtype == NPY_FLOAT64)
  140. return NPY_FLOAT32;
  141. return dtype;
  142. }
  143. #define FOREACH_NPY_DTYPE_PAIR(cb) \
  144. cb(Uint8, NPY_UINT8) \
  145. cb(Int8, NPY_INT8) \
  146. cb(Int16, NPY_INT16) \
  147. cb(Int32, NPY_INT32) \
  148. cb(Float16, NPY_FLOAT16) \
  149. cb(Float32, NPY_FLOAT32) \
  150. cb(Bool, NPY_BOOL)
  151. #define FOREACH_NPY_MGB_DTYPE_PAIR(cb) \
  152. FOREACH_NPY_DTYPE_PAIR(cb) \
  153. FOREACH_MGB_DTYPE_PAIR(cb)
  154. //! convert megbrain dtype to numpy dtype
  155. int dtype_mgb2np_raw(DType dtype) {
  156. mgb_assert(dtype.valid(), "attempt to convert from invalid dtype");
  157. switch (dtype.enumv()) {
  158. #define cb(_m, _n) \
  159. case DTypeEnum::_m: \
  160. return _n;
  161. FOREACH_NPY_MGB_DTYPE_PAIR(cb)
  162. #undef cb
  163. default:
  164. break;
  165. }
  166. throw ConversionError(ssprintf(
  167. "can not convert dtype %s to numpy dtype", dtype.name()));
  168. }
  169. struct PyArrayDescrDeleter {
  170. void operator()(PyArray_Descr* obj) {
  171. Py_XDECREF(obj);
  172. }
  173. };
  174. //! Convert MegBrain DType to NumPy DType descriptor, the caller receives a new
  175. //! reference to the descriptor.
  176. std::unique_ptr<PyArray_Descr, PyArrayDescrDeleter> dtype_mgb2np_descr(
  177. DType dtype) {
  178. PYTHON_GIL;
  179. mgb_assert(dtype.valid(), "attempt to convert from invalid dtype");
  180. auto build_mgb_dtype_dict =
  181. [](const char* name,
  182. const std::vector<std::pair<const char*, PyObject*>>& data) {
  183. PyObject* metadata = PyDict_New();
  184. PyObject* mgb_dtype_metadata = PyDict_New();
  185. PyDict_SetItemString(mgb_dtype_metadata, "name",
  186. PyUnicode_FromString(name));
  187. for (const auto& d : data) {
  188. PyDict_SetItemString(mgb_dtype_metadata, d.first, d.second);
  189. }
  190. PyDict_SetItemString(metadata, "mgb_dtype", mgb_dtype_metadata);
  191. return metadata;
  192. };
  193. if (dtype.has_param()) {
  194. PyArray_Descr* type_descr;
  195. switch (dtype.enumv()) {
  196. case DTypeEnum::Quantized4Asymm: {
  197. auto& param = dtype.param<dtype::Quantized4Asymm>();
  198. type_descr = PyArray_DescrNewFromType(NPY_UINT8);
  199. type_descr->metadata = build_mgb_dtype_dict(
  200. DTypeTrait<dtype::Quantized4Asymm>::name,
  201. {{"scale", PyFloat_FromDouble(param.scale)},
  202. {"zero_point", PyLong_FromLong(param.zero_point)}});
  203. break;
  204. }
  205. case DTypeEnum::QuantizedS4: {
  206. auto& param = dtype.param<dtype::QuantizedS4>();
  207. type_descr = PyArray_DescrNewFromType(NPY_INT8);
  208. type_descr->metadata = build_mgb_dtype_dict(
  209. DTypeTrait<dtype::QuantizedS4>::name,
  210. {{"scale", PyFloat_FromDouble(param.scale)}});
  211. break;
  212. }
  213. case DTypeEnum::Quantized8Asymm: {
  214. auto& param = dtype.param<dtype::Quantized8Asymm>();
  215. type_descr = PyArray_DescrNewFromType(NPY_UINT8);
  216. type_descr->metadata = build_mgb_dtype_dict(
  217. DTypeTrait<dtype::Quantized8Asymm>::name,
  218. {{"scale", PyFloat_FromDouble(param.scale)},
  219. {"zero_point", PyLong_FromLong(param.zero_point)}});
  220. break;
  221. }
  222. case DTypeEnum::QuantizedS8: {
  223. auto& param = dtype.param<dtype::QuantizedS8>();
  224. type_descr = PyArray_DescrNewFromType(NPY_INT8);
  225. type_descr->metadata = build_mgb_dtype_dict(
  226. DTypeTrait<dtype::QuantizedS8>::name,
  227. {{"scale", PyFloat_FromDouble(param.scale)}});
  228. break;
  229. }
  230. case DTypeEnum::QuantizedS32: {
  231. auto& param = dtype.param<dtype::QuantizedS32>();
  232. type_descr = PyArray_DescrNewFromType(NPY_INT32);
  233. type_descr->metadata = build_mgb_dtype_dict(
  234. DTypeTrait<dtype::QuantizedS32>::name,
  235. {{"scale", PyFloat_FromDouble(param.scale)}});
  236. break;
  237. }
  238. default:
  239. mgb_throw(ConversionError, "unhandled parameterized DType %s",
  240. dtype.name());
  241. }
  242. return std::unique_ptr<PyArray_Descr, PyArrayDescrDeleter>(type_descr);
  243. }
  244. PyArray_Descr* basic_descr = PyArray_DescrFromType(dtype_mgb2np_raw(dtype));
  245. mgb_assert(basic_descr != nullptr,
  246. "failed to convert expected dtype to numpy type descriptor");
  247. return std::unique_ptr<PyArray_Descr, PyArrayDescrDeleter>(basic_descr);
  248. }
  249. DType dtype_np2mgb_raw(int npt) {
  250. switch (npt) {
  251. #define cb(_m, _n) \
  252. case _n: \
  253. return dtype::_m();
  254. FOREACH_NPY_DTYPE_PAIR(cb)
  255. #undef cb
  256. }
  257. #define cb(_m, _n) \
  258. if (_n == npt) return dtype::_m();
  259. FOREACH_MGB_DTYPE_PAIR(cb)
  260. #undef cb
  261. PYTHON_GIL;
  262. std::string msg;
  263. auto py_obj = PyArray_TypeObjectFromType(npt);
  264. if (!py_obj) {
  265. msg = ssprintf("unknown numpy dtype enum %d", npt);
  266. } else {
  267. msg = ssprintf("unsupported numpy dtype %s",
  268. repr_pyobj(py_obj).c_str());
  269. }
  270. Py_DECREF(py_obj);
  271. throw ConversionError(msg);
  272. }
  273. DType dtype_np2mgb_descr(PyArray_Descr* descr) {
  274. PYTHON_GIL;
  275. auto handle_parameterized_dtype = [](PyObject* metadata) -> DType {
  276. mgb_assert(PyDict_Check(metadata),
  277. "Invalid parameterized DType metadata: should be a dict");
  278. PyObject* dtype_name_py = PyDict_GetItemString(metadata, "name");
  279. mgb_assert(
  280. PyUnicode_Check(dtype_name_py),
  281. "Invalid parameterized DType metadata: name should be a str");
  282. std::string dtype_name(PyUnicode_AsUTF8(dtype_name_py));
  283. if (dtype_name == "Quantized8Asymm") {
  284. PyObject* scale_py = PyDict_GetItemString(metadata, "scale");
  285. PyObject* zero_point_py =
  286. PyDict_GetItemString(metadata, "zero_point");
  287. mgb_assert(scale_py && zero_point_py,
  288. "Invalid Quantized8Asymm metadata: missing scale or "
  289. "zero_point.");
  290. mgb_assert(
  291. PyFloat_Check(scale_py),
  292. "Invalid Quantized8Asymm metadata: scale should be float");
  293. mgb_assert(PyLong_Check(zero_point_py),
  294. "Invalid Quantized8Asymm metadata: zero_point should be "
  295. "integer");
  296. auto zero_point = PyLong_AS_LONG(zero_point_py);
  297. mgb_assert(zero_point >= 0 && zero_point < 256,
  298. "Invalid Quantized8Asymm metadata: zero_point should be "
  299. "in [0, 256)");
  300. return dtype::Quantized8Asymm(
  301. static_cast<float>(PyFloat_AS_DOUBLE(scale_py)),
  302. static_cast<uint8_t>(zero_point));
  303. }
  304. if (dtype_name == "Quantized4Asymm") {
  305. PyObject* scale_py = PyDict_GetItemString(metadata, "scale");
  306. PyObject* zero_point_py =
  307. PyDict_GetItemString(metadata, "zero_point");
  308. mgb_assert(scale_py && zero_point_py,
  309. "Invalid Quantized4Asymm metadata: missing scale or "
  310. "zero_point.");
  311. mgb_assert(
  312. PyFloat_Check(scale_py),
  313. "Invalid Quantized4Asymm metadata: scale should be float");
  314. mgb_assert(PyLong_Check(zero_point_py),
  315. "Invalid Quantized4Asymm metadata: zero_point should be "
  316. "integer");
  317. auto zero_point = PyLong_AS_LONG(zero_point_py);
  318. mgb_assert(zero_point >= 0 && zero_point < 15,
  319. "Invalid Quantized4Asymm metadata: zero_point should be "
  320. "in [0, 15)");
  321. return dtype::Quantized4Asymm(
  322. static_cast<float>(PyFloat_AS_DOUBLE(scale_py)),
  323. static_cast<uint8_t>(zero_point));
  324. }
  325. if (dtype_name == "QuantizedS32" || dtype_name == "QuantizedS8" ||
  326. dtype_name == "QuantizedS4") {
  327. PyObject* scale_py = PyDict_GetItemString(metadata, "scale");
  328. mgb_assert(scale_py, "Invalid metadata: missing scale");
  329. mgb_assert(PyFloat_Check(scale_py),
  330. "Invalid metadata: scale should be float");
  331. float scale = static_cast<float>(PyFloat_AS_DOUBLE(scale_py));
  332. if (dtype_name == "QuantizedS32") {
  333. return dtype::QuantizedS32(scale);
  334. } else if (dtype_name == "QuantizedS8"){
  335. return dtype::QuantizedS8(scale);
  336. } else {
  337. return dtype::QuantizedS4(scale);
  338. }
  339. }
  340. throw ConversionError(
  341. ssprintf("Unknown parameterized DType: %s", dtype_name.c_str())
  342. .c_str());
  343. };
  344. PyObject* dtype_metadata;
  345. if (descr->metadata && PyDict_Check(descr->metadata) &&
  346. (dtype_metadata = PyDict_GetItemString(descr->metadata, "mgb_dtype"))) {
  347. return handle_parameterized_dtype(dtype_metadata);
  348. }
  349. return dtype_np2mgb_raw(descr->type_num);
  350. }
  351. HostTensorND lowbit_ndarray_to_host_tensor(
  352. CompNode comp_node, TensorLayout &layout, PyArrayObject *input) {
  353. auto src_ptr = reinterpret_cast<dt_byte*>(PyArray_DATA(input));
  354. if (!layout.ndim) {
  355. // numpy scalar
  356. mgb_assert(src_ptr, "can not convert from null numpy array");
  357. layout.init_contiguous_stride({1});
  358. } else {
  359. mgb_assert(layout.ndim && layout.ndim <= TensorShape::MAX_NDIM,
  360. "unsupported ndim %zu", layout.ndim);
  361. for (size_t i = 0; i < layout.ndim; ++ i) {
  362. layout.shape[i] = PyArray_SHAPE(input)[i];
  363. layout.stride[i] = PyArray_STRIDE(input, i);
  364. mgb_assert(layout.shape[i], "zero shape not supported");
  365. }
  366. mgb_assert(layout.is_contiguous());
  367. }
  368. HostTensorND ret{comp_node, layout};
  369. lowbit_memcpy_byte2compact(layout.dtype, ret.raw_ptr(), src_ptr,
  370. layout.total_nr_elems());
  371. return ret;
  372. }
  373. /*!
  374. * \brief convert a python object to tensor and try to borrow memory if the
  375. * original object is a contiguous numpy array
  376. * \param dtype see np2tensor
  377. * \return the megbrain tensor, and whether memory is borrowed
  378. */
  379. std::pair<HostTensorND, bool> np2tensor_try_borrow(
  380. PyObject *obj, const npy::Meth& meth, DType dtype) {
  381. auto dest_cn = meth.dest_cn_;
  382. mgb_assert(dest_cn.valid());
  383. PYTHON_GIL;
  384. PyArray_Descr* expected_descr = nullptr;
  385. if (dtype.valid()) {
  386. // The reference to expected_descr will be stealed later.
  387. expected_descr = dtype_mgb2np_descr(dtype).release();
  388. }
  389. // make result from PyArrayObject; its reference may be stolen
  390. auto make_from_arr = [&](PyArrayObject *input, bool allow_borrow) {
  391. TensorLayout layout;
  392. layout.dtype = dtype_np2mgb_descr(PyArray_DESCR(input));
  393. if (dtype.valid())
  394. mgb_assert(dtype == layout.dtype);
  395. layout.ndim = PyArray_NDIM(input);
  396. if (layout.dtype.is_low_bit()) {
  397. auto ret = lowbit_ndarray_to_host_tensor(dest_cn, layout, input);
  398. if (meth.dest_tensor_) {
  399. meth.dest_tensor_->copy_from(ret);
  400. ret = *meth.dest_tensor_;
  401. }
  402. return std::make_pair(ret, false);
  403. }
  404. auto data = reinterpret_cast<dt_byte*>(PyArray_DATA(input));
  405. if (!layout.ndim) {
  406. // numpy scalar
  407. mgb_assert(data, "can not convert from null numpy array");
  408. layout.init_contiguous_stride({1});
  409. } else {
  410. mgb_assert(layout.ndim && layout.ndim <= TensorShape::MAX_NDIM,
  411. "unsupported ndim %zu", layout.ndim);
  412. auto dsize = layout.dtype.size();
  413. bool is_empty = false;
  414. for (size_t i = 0; i < layout.ndim; ++ i) {
  415. layout.shape[i] = PyArray_SHAPE(input)[i];
  416. layout.stride[i] = PyArray_STRIDE(input, i);
  417. if (!layout.shape[i]) {
  418. is_empty = true;
  419. }
  420. mgb_assert(layout.stride[i] % dsize == 0,
  421. "bad stride %zd", layout.stride[i]);
  422. layout.stride[i] /= dsize;
  423. }
  424. mgb_assert(is_empty || layout.is_contiguous());
  425. }
  426. if (!meth.dest_tensor_ && allow_borrow) {
  427. Py_INCREF(input);
  428. PyObjRefKeeper ref_obj_cvt{reinterpret_cast<PyObject*>(input)};
  429. HostTensorStorage storage;
  430. auto input_ptr = ref_obj_cvt.make_shared(data);
  431. storage.reset(dest_cn, layout.span().high_byte, input_ptr);
  432. HostTensorND ret;
  433. ret.reset(storage, layout);
  434. return std::make_pair(ret, true);
  435. } else {
  436. auto storage = HostTensorStorage(dest_cn);
  437. storage.ensure_size(layout.span().dist_byte());
  438. memcpy(storage.ptr(), data, layout.span().dist_byte());
  439. HostTensorND ret{dest_cn, layout.dtype};
  440. if (meth.dest_tensor_) {
  441. meth.dest_tensor_->reset(storage, layout);
  442. return std::make_pair(*meth.dest_tensor_, false);
  443. } else {
  444. HostTensorND ret;
  445. ret.reset(storage, layout);
  446. return std::make_pair(ret, false);
  447. }
  448. }
  449. };
  450. PyArrayObject *obj_as_arr = nullptr;
  451. do {
  452. // check contiguous and dtype, and borrow mem if ok
  453. if (!PyArray_Check(obj))
  454. break;
  455. obj_as_arr = reinterpret_cast<PyArrayObject*>(obj);
  456. int typenum = PyArray_DTYPE(obj_as_arr)->type_num;
  457. // We have to check dtype.valid() and typenum first to avoid
  458. // accidentally trigger ConversionError on incompatible dtypes which can
  459. // be automatically converted into comptaible ones (e.g. float64).
  460. if (dtype.valid() &&
  461. (expected_descr->type_num != typenum ||
  462. dtype_np2mgb_descr(PyArray_DTYPE(obj_as_arr)) != dtype))
  463. break;
  464. if (typenum != to_mgb_supported_dtype_raw(typenum)) {
  465. mgb_assert(!dtype.valid() && expected_descr == nullptr);
  466. expected_descr =
  467. PyArray_DescrFromType(to_mgb_supported_dtype_raw(typenum));
  468. break;
  469. }
  470. if (PyArray_ISCARRAY_RO(obj_as_arr)) {
  471. return make_from_arr(obj_as_arr, true);
  472. }
  473. } while(0);
  474. constexpr auto NP_FLAGS = NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_FORCECAST;
  475. PyObject *obj_cvt;
  476. if (obj_as_arr) {
  477. obj_cvt = PyArray_FromArray(obj_as_arr, expected_descr, NP_FLAGS);
  478. } else {
  479. obj_cvt = PyArray_FromAny(obj, expected_descr, 0, 0, NP_FLAGS, nullptr);
  480. }
  481. if (obj_cvt) {
  482. // convert to mgb supported dtype
  483. auto arr = reinterpret_cast<PyArrayObject*>(obj_cvt);
  484. int dt0 = PyArray_TYPE(arr), dt1 = to_mgb_supported_dtype_raw(dt0);
  485. if (dt0 != dt1) {
  486. mgb_assert(expected_descr == nullptr);
  487. expected_descr = PyArray_DescrFromType(dt1);
  488. mgb_assert(expected_descr);
  489. auto obj_cvt_new = PyArray_FromAny(
  490. obj_cvt, expected_descr, 0, 0, NP_FLAGS, nullptr);
  491. Py_DECREF(obj_cvt);
  492. obj_cvt = obj_cvt_new;
  493. }
  494. }
  495. if (!obj_cvt) {
  496. if (PyErr_Occurred()) {
  497. PyExceptionForward::throw_();
  498. }
  499. throw ConversionError(ssprintf("can not convert to numpy array from %s",
  500. repr_pyobj(obj).c_str()));
  501. }
  502. auto ret = make_from_arr(reinterpret_cast<PyArrayObject*>(obj_cvt), false);
  503. Py_DECREF(obj_cvt);
  504. return ret;
  505. }
  506. //! hold a reference to HostTensorND
  507. class HostTensorNDRefHolder final: public NonCopyableObj {
  508. HostTensorND m_val;
  509. static MemPool<HostTensorNDRefHolder> sm_mem_pool;
  510. friend class MemPool<HostTensorNDRefHolder>;
  511. HostTensorNDRefHolder(const HostTensorND &v):
  512. m_val{v}
  513. {
  514. }
  515. public:
  516. static HostTensorNDRefHolder* alloc(const HostTensorND &v) {
  517. return sm_mem_pool.alloc(v);
  518. }
  519. static void free(HostTensorNDRefHolder *p) {
  520. return sm_mem_pool.free(p);
  521. }
  522. };
  523. MemPool<HostTensorNDRefHolder> HostTensorNDRefHolder::sm_mem_pool;
  524. void ndarray_shared_from_tensor_py_capsule_dtor(PyObject *cap) {
  525. auto ptr = PyCapsule_GetPointer(cap, "HostTensorND");
  526. mgb_assert(ptr, "not a PyCapsule: %s", repr_pyobj(cap).c_str());
  527. HostTensorNDRefHolder::free(static_cast<HostTensorNDRefHolder*>(ptr));
  528. }
  529. } // anonymous namespace
  530. PyObject* npy::ndarray_from_tensor(
  531. const HostTensorND &val, ShareType share_type) {
  532. if (!val.layout().is_contiguous() && !val.shape().is_empty()) {
  533. mgb_assert(share_type != ShareType::MUST_SHARE);
  534. HostTensorND contig;
  535. contig.copy_from(val);
  536. return ndarray_from_tensor(contig, ShareType::TRY_SHARE);
  537. }
  538. PYTHON_GIL;
  539. npy_intp dims[TensorLayout::MAX_NDIM];
  540. for (size_t i = 0; i < val.layout().ndim; ++ i)
  541. dims[i] = val.shape()[i];
  542. PyObject* ret = nullptr;
  543. auto alloc_new_ret = [&]() {
  544. mgb_assert(!ret);
  545. ret = PyArray_NewFromDescr(
  546. &PyArray_Type, dtype_mgb2np_descr(val.dtype()).release(),
  547. val.layout().ndim, dims, nullptr, nullptr, 0, nullptr);
  548. mgb_assert(ret, "failed to allocate array");
  549. mgb_assert(PyArray_Check(ret));
  550. return PyArray_DATA(reinterpret_cast<PyArrayObject*>(ret));
  551. };
  552. if (val.dtype().is_low_bit()) {
  553. mgb_assert(share_type != ShareType::MUST_SHARE,
  554. "can not share memory for lowbit dtype");
  555. lowbit_memcpy_compact2byte(val.dtype(), alloc_new_ret(), val.raw_ptr(),
  556. val.layout().total_nr_elems());
  557. } else if (share_type == ShareType::MUST_UNSHARE) {
  558. memcpy(alloc_new_ret(), val.raw_ptr(), val.layout().span().dist_byte());
  559. } else {
  560. // share data
  561. ret = PyArray_NewFromDescr(
  562. &PyArray_Type, dtype_mgb2np_descr(val.dtype()).release(),
  563. val.layout().ndim, dims, nullptr,
  564. const_cast<dt_byte*>(val.raw_ptr()), 0, nullptr);
  565. mgb_assert(ret, "failed to alloc ndarray");
  566. auto capsule = PyCapsule_New(HostTensorNDRefHolder::alloc(val),
  567. "HostTensorND", ndarray_shared_from_tensor_py_capsule_dtor);
  568. mgb_assert(capsule, "failed to create PyCapsule");
  569. auto err = PyArray_SetBaseObject(
  570. reinterpret_cast<PyArrayObject*>(ret), capsule);
  571. mgb_assert(!err);
  572. }
  573. return ret;
  574. }
  575. HostTensorND npy::np2tensor(PyObject* obj, const Meth& meth, DType dtype) {
  576. auto ret_full = np2tensor_try_borrow(obj, meth, dtype);
  577. if (meth.must_borrow_) {
  578. mgb_assert(ret_full.second,
  579. "can not borrow from numpy array as contig array with dtype "
  580. "%s; src=%s",
  581. dtype.name(), repr_pyobj(obj).c_str());
  582. }
  583. return ret_full.first;
  584. }
  585. PyObject* npy::dtype_mgb2np(mgb::DType dtype) {
  586. PYTHON_GIL;
  587. // According to
  588. // https://docs.scipy.org/doc/numpy/reference/c-api.array.html#c.PyArray_TypeObjectFromType
  589. // the following is equivalent to PyArray_TypeObjectFromType for built-in
  590. // types.
  591. if(!dtype.valid()){
  592. Py_XINCREF(Py_None);
  593. return Py_None;
  594. }
  595. auto descr = dtype_mgb2np_descr(dtype);
  596. if (descr == nullptr) {
  597. Py_XINCREF(Py_None);
  598. return Py_None;
  599. }
  600. if (dtype.has_param()) {
  601. return reinterpret_cast<PyObject*>(descr.release());
  602. }
  603. PyObject* typeobj = reinterpret_cast<PyObject*>(descr->typeobj);
  604. Py_XINCREF(typeobj);
  605. return typeobj;
  606. }
  607. mgb::DType npy::dtype_np2mgb(PyObject *obj) {
  608. mgb_assert(obj && obj != Py_None,
  609. "can not convert null PyObject to numpy dtype");
  610. // see
  611. // http://stackoverflow.com/questions/8477122/numpy-c-api-convert-type-object-to-type-number
  612. PYTHON_GIL;
  613. PyArray_Descr* dtype;
  614. if(!PyArray_DescrConverter(obj, &dtype)) {
  615. throw ConversionError(ssprintf("can not convert to np.dtype from %s",
  616. repr_pyobj(obj).c_str()));
  617. }
  618. mgb::DType result = dtype_np2mgb_descr(dtype);
  619. Py_DECREF(dtype);
  620. return result;
  621. }
  622. PyObject* npy::to_mgb_supported_dtype(PyObject* dtype) {
  623. PYTHON_GIL;
  624. PyArray_Descr* descr;
  625. if (!PyArray_DescrConverter(dtype, &descr)) {
  626. throw ConversionError(ssprintf("can not convert to np.dtype from %s",
  627. repr_pyobj(dtype).c_str()));
  628. }
  629. mgb_assert(!descr->metadata,
  630. "unexpected metadata in dtype: "
  631. "dtype_obj=%s metadata=%s",
  632. repr_pyobj(dtype).c_str(), repr_pyobj(descr->metadata).c_str());
  633. int type_num = to_mgb_supported_dtype_raw(descr->type_num);
  634. return PyArray_TypeObjectFromType(type_num);
  635. }
  636. TensorShape npy::vec2shape(const std::vector<size_t> &vec) {
  637. TensorShape shape;
  638. mgb_assert(vec.size() <= TensorShape::MAX_NDIM,
  639. "dim too large: %zd (max %zd)",
  640. vec.size(), TensorShape::MAX_NDIM);
  641. shape.ndim = vec.size();
  642. for (size_t i = 0; i < vec.size(); i ++) {
  643. if (!vec[i]) {
  644. shape.ndim = 0;
  645. break;
  646. }
  647. shape[i] = vec[i];
  648. }
  649. mgb_assert(shape.ndim, "shape should not be empty");
  650. return shape;
  651. }

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台