You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

extern_c_opr.cpp 18 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. /**
  2. * \file src/serialization/impl/extern_c_opr.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megbrain/comp_node_env.h"
  12. #include "megbrain/serialization/extern_c_opr.h"
  13. #include "megbrain/serialization/extern_c_opr_io.h"
  14. #include "megbrain/serialization/opr_load_dump.h"
  15. #include <cstdlib>
  16. using namespace mgb;
  17. using namespace serialization;
  18. using namespace opr;
  19. namespace {
  20. const char PLACEHOLDER_TYPE_NAME[] = "placeholder";
  21. typedef MGBOprDesc* (*opr_desc_transformer_t)(void* input);
  22. using LoaderMap =
  23. std::unordered_map<std::string,
  24. std::pair<MGBOprLoader, opr_desc_transformer_t>>;
  25. //! singleton LoaderMap
  26. LoaderMap& loader_map() {
  27. static LoaderMap ret;
  28. return ret;
  29. }
  30. class MGBOprDescHash final : public HashableVD {
  31. MGB_DYN_TYPE_OBJ_FINAL_DECL;
  32. MGBOprDesc* const m_desc;
  33. bool is_same_st(const Hashable& rhs) const override {
  34. return m_desc->is_same(m_desc,
  35. static_cast<const MGBOprDescHash&>(rhs).m_desc);
  36. }
  37. public:
  38. MGBOprDescHash(MGBOprDesc* desc) : m_desc{desc} {}
  39. size_t hash() const override { return m_desc->hash(m_desc); }
  40. };
  41. MGB_DYN_TYPE_OBJ_FINAL_IMPL(MGBOprDescHash);
  42. MGBDType dtype_cpp2c(DType dtype) {
  43. switch (dtype.enumv()) {
  44. case DTypeEnum::Float32:
  45. return MGB_DTYPE_FLOAT32;
  46. case DTypeEnum::Int32:
  47. return MGB_DTYPE_INT32;
  48. case DTypeEnum::Int16:
  49. return MGB_DTYPE_INT16;
  50. case DTypeEnum::Uint8:
  51. return MGB_DTYPE_UINT8;
  52. #if !MEGDNN_DISABLE_FLOAT16
  53. case DTypeEnum::Float16:
  54. return MGB_DTYPE_FLOAT16;
  55. #endif
  56. default:
  57. mgb_throw(InternalError, "unsupported dtype for extern C API: %s",
  58. dtype.name());
  59. }
  60. }
  61. DType dtype_c2cpp(MGBDType dtype) {
  62. switch (dtype) {
  63. case MGB_DTYPE_UINT8:
  64. return dtype::Uint8{};
  65. case MGB_DTYPE_INT16:
  66. return dtype::Int16{};
  67. case MGB_DTYPE_INT32:
  68. return dtype::Int32{};
  69. case MGB_DTYPE_FLOAT32:
  70. return dtype::Float32{};
  71. #if !MEGDNN_DISABLE_FLOAT16
  72. case MGB_DTYPE_FLOAT16:
  73. return dtype::Float16{};
  74. #endif
  75. default:
  76. mgb_throw(SerializationError, "bad dtype value: %d",
  77. static_cast<int>(dtype));
  78. }
  79. }
  80. template <typename S>
  81. MGBTensor tensor_to_c(const TensorND<S>& src) {
  82. MGBTensor ret;
  83. ret.data = const_cast<void*>(static_cast<const void*>(src.raw_ptr()));
  84. ret.layout.dtype = dtype_cpp2c(src.dtype());
  85. ret.layout.shape = ExternCOprRunner::tensor_shape_to_c(src.shape());
  86. return ret;
  87. }
  88. struct MGBOprDescV23 {
  89. size_t nr_input, nr_output;
  90. //! operator type name
  91. const char* type_name;
  92. //! release this descriptor
  93. void (*release)(MGBOprDescV23* self);
  94. //! compute hash
  95. size_t (*hash)(const MGBOprDescV23* self);
  96. //! equality check
  97. int (*is_same)(const MGBOprDescV23* self, const MGBOprDescV23* rhs);
  98. //! perform the computation
  99. void (*execute)(const MGBOprDescV23* self, const MGBTensor* input,
  100. const MGBTensor* output);
  101. //! infer output shapes from input shapes
  102. void (*infer_shape)(const MGBOprDescV23* self, const MGBTensorShape* input,
  103. MGBTensorShape* output);
  104. //! custom user data to be associated with this descriptor
  105. void* user_data;
  106. static MGBOprDesc* as_opr_desc(void* v23_raw) {
  107. auto release = [](MGBOprDesc* self) {
  108. auto p = static_cast<MGBOprDescV23*>(self->user_data);
  109. p->release(p);
  110. delete self;
  111. };
  112. auto hash = [](const MGBOprDesc* self) {
  113. auto p = static_cast<MGBOprDescV23*>(self->user_data);
  114. return p->hash(p);
  115. };
  116. auto is_same = [](const MGBOprDesc* self, const MGBOprDesc* rhs) {
  117. auto p0 = static_cast<MGBOprDescV23*>(self->user_data);
  118. auto p1 = static_cast<MGBOprDescV23*>(rhs->user_data);
  119. return p0->is_same(p0, p1);
  120. };
  121. auto execute = [](const MGBOprDesc* self, const MGBTensor* input,
  122. const MGBTensor* output) {
  123. auto p = static_cast<MGBOprDescV23*>(self->user_data);
  124. p->execute(p, input, output);
  125. };
  126. auto infer_shape = [](const MGBOprDesc* self,
  127. const MGBTensorShape* input,
  128. MGBTensorShape* output) {
  129. auto p = static_cast<MGBOprDescV23*>(self->user_data);
  130. p->infer_shape(p, input, output);
  131. };
  132. auto v23 = static_cast<MGBOprDescV23*>(v23_raw);
  133. auto ret = std::make_unique<MGBOprDesc>();
  134. mgb_init_opr_desc(ret.get(), v23->nr_output, v23->type_name);
  135. ret->user_data = v23;
  136. #define ASSIGN(name) ret->name = name;
  137. MGB_OPR_DESC_FOREACH_MEM_FN(ASSIGN);
  138. #undef ASSIGN
  139. return ret.release();
  140. }
  141. };
  142. //! impl MGBOprDesc for ExternCOprRunner::make_placeholder
  143. class PlaceholderMGBOprDesc {
  144. struct UserData {
  145. std::string name;
  146. TensorShapeArray output_shapes;
  147. SmallVector<DType> output_dtypes;
  148. std::unique_ptr<uint8_t[]> data;
  149. size_t data_len;
  150. };
  151. static UserData* user_data(const MGBOprDesc* self) {
  152. return static_cast<UserData*>(self->user_data);
  153. }
  154. static void release(MGBOprDesc* self) {
  155. user_data(self)->~UserData();
  156. ::free(self);
  157. }
  158. static size_t hash(const MGBOprDesc* self) {
  159. return reinterpret_cast<size_t>(self); // hash disabled
  160. }
  161. static int is_same(const MGBOprDesc* self, const MGBOprDesc* rhs) {
  162. return self == rhs;
  163. }
  164. //! perform the computation
  165. static void execute(const MGBOprDesc*, const MGBTensor*, const MGBTensor*) {
  166. mgb_throw(MegBrainError,
  167. "placeholder ExternCOprRunner can not be executed");
  168. }
  169. static void infer_shape(const MGBOprDesc* self, const MGBTensorShape* input,
  170. MGBTensorShape* output);
  171. static void infer_dtype(const struct MGBOprDesc* self,
  172. const MGBDType* input, MGBDType* output);
  173. public:
  174. static MGBOprDesc* make(size_t nr_input, const char* name,
  175. const TensorShapeArray& output_shapes,
  176. const SmallVector<DType>& output_dtypes,
  177. const void* data, size_t data_len);
  178. static void dump(OprDumpContext& ctx, MGBOprDesc* desc);
  179. };
  180. } // anonymous namespace
  181. /* ===================== PlaceholderMGBOprDesc ===================== */
  182. void PlaceholderMGBOprDesc::infer_shape(const MGBOprDesc* self,
  183. const MGBTensorShape* input,
  184. MGBTensorShape* output) {
  185. auto ud = user_data(self);
  186. for (size_t i = 0; i < ud->output_shapes.size(); ++i) {
  187. output[i] = ExternCOprRunner::tensor_shape_to_c(ud->output_shapes[i]);
  188. }
  189. }
  190. void PlaceholderMGBOprDesc::infer_dtype(const struct MGBOprDesc* self,
  191. const MGBDType* input,
  192. MGBDType* output) {
  193. auto ud = user_data(self);
  194. for (size_t i = 0; i < ud->output_dtypes.size(); ++i) {
  195. output[i] = dtype_cpp2c(ud->output_dtypes[i]);
  196. }
  197. }
  198. MGBOprDesc* PlaceholderMGBOprDesc::make(size_t nr_input, const char* name,
  199. const TensorShapeArray& output_shapes,
  200. const SmallVector<DType>& output_dtypes,
  201. const void* data, size_t data_len) {
  202. constexpr size_t align = std::max(alignof(MGBOprDesc), alignof(UserData)),
  203. desc_size = ((sizeof(MGBOprDesc) - 1) / align + 1) * align;
  204. std::unique_ptr<uint8_t, void (*)(void*)> ptr(
  205. static_cast<uint8_t*>(malloc(desc_size + sizeof(UserData))),
  206. ::free);
  207. mgb_assert(ptr);
  208. auto del_ud = [](UserData* p) { p->~UserData(); };
  209. std::unique_ptr<UserData, decltype(del_ud)> ud(
  210. new (ptr.get() + desc_size) UserData, del_ud);
  211. ud->name = name;
  212. ud->output_shapes = output_shapes;
  213. ud->output_dtypes = output_dtypes;
  214. ud->data.reset(new uint8_t[data_len]);
  215. ud->data_len = data_len;
  216. memcpy(ud->data.get(), data, data_len);
  217. auto desc = new (ptr.get()) MGBOprDesc;
  218. mgb_init_opr_desc(desc, output_shapes.size(), PLACEHOLDER_TYPE_NAME);
  219. desc->user_data = ud.release();
  220. #define s(n) desc->n = &PlaceholderMGBOprDesc::n;
  221. MGB_OPR_DESC_FOREACH_MEM_FN(s);
  222. if (!output_dtypes.empty()) {
  223. desc->infer_dtype = &PlaceholderMGBOprDesc::infer_dtype;
  224. }
  225. #undef s
  226. return reinterpret_cast<MGBOprDesc*>(ptr.release());
  227. }
  228. void PlaceholderMGBOprDesc::dump(OprDumpContext& ctx, MGBOprDesc* desc) {
  229. mgb_assert(desc->type_name == PLACEHOLDER_TYPE_NAME,
  230. "only placeholder ExternCOprRunner can be dumped; got type %s",
  231. desc->type_name);
  232. auto ud = user_data(desc);
  233. ctx.dump_buf_with_len(ud->name.c_str(), ud->name.size());
  234. ctx.dump_buf_with_len(ud->data.get(), ud->data_len);
  235. }
  236. /* ===================== ExternCOprRunner ===================== */
  237. MGB_DYN_TYPE_OBJ_FINAL_IMPL(ExternCOprRunner);
  238. ExternCOprRunner::ExternCOprRunner(const VarNodeArray& inputs,
  239. std::shared_ptr<MGBOprDesc> desc,
  240. const OperatorNodeConfig& config)
  241. : Super{inputs[0]->owner_graph(), config, desc->type_name, inputs},
  242. m_desc{std::move(desc)} {
  243. mgb_assert(m_desc->size == sizeof(MGBOprDesc),
  244. "invalid MGBOprDesc size: expect=%zu got=%u", sizeof(MGBOprDesc),
  245. m_desc->size);
  246. for (auto i : inputs) {
  247. add_input({i});
  248. }
  249. auto nr_out = m_desc->nr_output;
  250. if (nr_out > 1) {
  251. for (size_t i = 0, it = nr_out; i < it; ++i)
  252. add_output(ssprintf("o%zu", i));
  253. } else {
  254. mgb_assert(nr_out == 1,
  255. "could not create an operator with %u outputs: %s", nr_out,
  256. cname());
  257. add_output(None);
  258. }
  259. add_equivalence_component<MGBOprDescHash>(m_desc.get());
  260. }
  261. void ExternCOprRunner::get_output_var_shape(const TensorShapeArray& inp_shape,
  262. TensorShapeArray& out_shape) const {
  263. SmallVector<MGBTensorShape> c_inp(inp_shape.size()),
  264. c_out(out_shape.size());
  265. for (size_t i = 0; i < inp_shape.size(); ++i) {
  266. c_inp[i] = tensor_shape_to_c(inp_shape[i]);
  267. }
  268. m_desc->infer_shape(m_desc.get(), c_inp.data(), c_out.data());
  269. for (size_t i = 0; i < out_shape.size(); ++i) {
  270. out_shape[i] = tensor_shape_from_c(c_out[i]);
  271. }
  272. }
  273. void ExternCOprRunner::init_output_dtype() {
  274. if (!m_desc->infer_dtype) {
  275. Super::init_output_dtype();
  276. return;
  277. }
  278. SmallVector<MGBDType> inp_dtypes, out_dtypes(output().size());
  279. inp_dtypes.reserve(input().size());
  280. for (auto i : input()) {
  281. inp_dtypes.push_back(dtype_cpp2c(i->dtype()));
  282. }
  283. m_desc->infer_dtype(m_desc.get(), inp_dtypes.data(), out_dtypes.data());
  284. for (size_t i = 0; i < out_dtypes.size(); ++i) {
  285. output(i)->dtype(dtype_c2cpp(out_dtypes[i]));
  286. }
  287. }
  288. void ExternCOprRunner::scn_do_execute() {
  289. SmallVector<MGBTensor> c_inp(input().size()), c_out(output().size());
  290. SmallVector<HostTensorND> cpu_inp, cpu_out;
  291. bool need_copy = false;
  292. if (comp_node().device_type() == CompNode::DeviceType::CPU) {
  293. for (size_t i = 0; i < input().size(); ++i) {
  294. c_inp[i] = tensor_to_c(input(i)->dev_tensor());
  295. }
  296. for (size_t i = 0; i < output().size(); ++i) {
  297. c_out[i] = tensor_to_c(output(i)->dev_tensor());
  298. }
  299. } else {
  300. need_copy = true;
  301. mgb_log_debug(
  302. "copy is needed to execute extern C "
  303. "opr `%s' on comp node `%s'",
  304. cname(), comp_node().to_string().c_str());
  305. cpu_inp.resize(input().size());
  306. cpu_out.resize(output().size());
  307. for (size_t i = 0; i < input().size(); ++i) {
  308. cpu_inp[i].copy_from(input(i)->dev_tensor());
  309. c_inp[i] = tensor_to_c(cpu_inp[i]);
  310. }
  311. for (size_t i = 0; i < output().size(); ++i) {
  312. cpu_out[i]
  313. .comp_node(comp_node())
  314. .dtype(output(i)->dtype())
  315. .resize(output(i)->shape());
  316. c_out[i] = tensor_to_c(cpu_out[i]);
  317. }
  318. }
  319. if (need_copy) {
  320. comp_node().sync();
  321. m_desc->execute(m_desc.get(), c_inp.data(), c_out.data());
  322. for (size_t i = 0; i < output().size(); ++i)
  323. output(i)->dev_tensor().copy_from_fixlayout(cpu_out[i]);
  324. } else {
  325. CompNodeEnv::from_comp_node(comp_node())
  326. .cpu_env()
  327. .dispatch([this, c_inp, c_out]() mutable {
  328. m_desc->execute(m_desc.get(), c_inp.data(), c_out.data());
  329. });
  330. }
  331. }
  332. void ExternCOprRunner::add_input_layout_constraint() {
  333. for (auto i : input())
  334. i->add_layout_constraint_contiguous();
  335. }
  336. cg::OperatorNodeBase* ExternCOprRunner::make_placeholder(
  337. const SymbolVarArray& inputs, const TensorShapeArray& output_shapes,
  338. const char* name, const void* data, size_t data_len,
  339. const OperatorNodeConfig& config,
  340. const SmallVector<DType>& output_dtypes) {
  341. auto desc = PlaceholderMGBOprDesc::make(inputs.size(), name, output_shapes,
  342. output_dtypes, data, data_len);
  343. VarNodeArray var_inp(inputs.size());
  344. for (size_t i = 0; i < inputs.size(); ++i) {
  345. var_inp[i] = inputs[i].node();
  346. }
  347. return make_from_desc(var_inp, desc, config);
  348. }
  349. cg::OperatorNodeBase* ExternCOprRunner::make_from_desc(
  350. const VarNodeArray& inputs, MGBOprDesc* desc,
  351. const OperatorNodeConfig& config) {
  352. auto desc_del = [](MGBOprDesc* ptr) { ptr->release(ptr); };
  353. return make_from_desc_shared(inputs, {desc, desc_del}, config);
  354. }
  355. cg::OperatorNodeBase* ExternCOprRunner::make_from_desc_shared(
  356. const VarNodeArray& inputs, std::shared_ptr<MGBOprDesc> desc,
  357. const OperatorNodeConfig& config) {
  358. mgb_assert(!inputs.empty() && desc->nr_output);
  359. #define CHECK(name) mgb_assert(desc->name, #name " is not given");
  360. MGB_OPR_DESC_FOREACH_MEM_FN(CHECK);
  361. #undef CHECK
  362. auto opr = inputs[0]->owner_graph()->insert_opr(
  363. std::make_unique<ExternCOprRunner>(inputs, std::move(desc),
  364. config));
  365. return &opr->cast_final_safe<ExternCOprRunner>();
  366. }
  367. bool ExternCOprRunner::unregister_loader(const char* name) {
  368. return loader_map().erase(name);
  369. }
  370. void ExternCOprRunner::dump(OprDumpContext& ctx,
  371. const cg::OperatorNodeBase& opr_) {
  372. auto&& opr = opr_.cast_final<ExternCOprRunner>();
  373. PlaceholderMGBOprDesc::dump(ctx, opr.m_desc.get());
  374. }
  375. cg::OperatorNodeBase* ExternCOprRunner::load(OprLoadContext& ctx,
  376. const cg::VarNodeArray& inputs,
  377. const OperatorNodeConfig& config) {
  378. auto name = ctx.load_buf_with_len();
  379. auto&& map = loader_map();
  380. auto iter = map.find(name);
  381. mgb_assert(iter != map.end(),
  382. "can not find loader for ExternCOprRunner `%s'", name.c_str());
  383. auto data = ctx.load_shared_buf_with_len();
  384. auto desc = iter->second.first.create_desc(inputs.size(), data.data(),
  385. data.size());
  386. if (auto trans = iter->second.second) {
  387. desc = trans(desc);
  388. }
  389. return make_from_desc(inputs, desc, config);
  390. }
  391. cg::OperatorNodeBase* ExternCOprRunner::shallow_copy(
  392. const serialization::OprShallowCopyContext& ctx,
  393. const cg::OperatorNodeBase& opr_, const VarNodeArray& inputs,
  394. const OperatorNodeConfig& config) {
  395. auto&& opr = opr_.cast_final_safe<ExternCOprRunner>();
  396. return make_from_desc_shared(inputs, opr.m_desc, config);
  397. }
  398. MGBTensorShape ExternCOprRunner::tensor_shape_to_c(const TensorShape& shape) {
  399. mgb_assert(shape.ndim <= MGB_TENSOR_MAX_NDIM, "shape ndim too large: %zu",
  400. shape.ndim);
  401. MGBTensorShape ret;
  402. ret.ndim = shape.ndim;
  403. for (size_t i = 0; i < shape.ndim; ++i) {
  404. ret.shape[i] = shape[i];
  405. }
  406. return ret;
  407. }
  408. TensorShape ExternCOprRunner::tensor_shape_from_c(const MGBTensorShape& shape) {
  409. mgb_assert(shape.ndim <= TensorShape::MAX_NDIM, "shape ndim too large: %u",
  410. shape.ndim);
  411. TensorShape ret;
  412. ret.ndim = shape.ndim;
  413. for (size_t i = 0; i < shape.ndim; ++i) {
  414. ret.shape[i] = shape.shape[i];
  415. }
  416. return ret;
  417. }
  418. /* ===================== public APIs ===================== */
  419. const MGBExternCOprApi* mgb_get_extern_c_opr_api_versioned(int version) {
  420. auto unreg = [](const char* name) -> int {
  421. return ExternCOprRunner::unregister_loader(name);
  422. };
  423. if (version == 0x23) {
  424. auto reg23 = [](const MGBOprLoader* loader) -> int {
  425. return loader_map()
  426. .insert({loader->name,
  427. {*loader, MGBOprDescV23::as_opr_desc}})
  428. .second;
  429. };
  430. static const MGBExternCOprApi ret = {reg23, unreg};
  431. return &ret;
  432. }
  433. if (version != MGB_EXTERN_C_OPR_VERSION)
  434. return nullptr;
  435. auto reg = [](const MGBOprLoader* loader) -> int {
  436. return loader_map().insert({loader->name, {*loader, nullptr}}).second;
  437. };
  438. static const MGBExternCOprApi ret = {reg, unreg};
  439. return &ret;
  440. }
  441. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台