You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

extern_c_opr.cpp 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. #include <memory>
  2. #include "megbrain/graph/extern_copr_api.h"
  3. #include "megbrain/opr/io.h"
  4. #include "megbrain/opr/utility.h"
  5. #include "megbrain/serialization/extern_c_opr_io.h"
  6. #include "megbrain/serialization/serializer.h"
  7. #include "megbrain/test/helper.h"
  8. #include "megbrain/utils/debug.h"
  9. using namespace mgb;
  10. using namespace serialization;
  11. namespace {
  12. DType dtype_c2cpp(MGBDType dtype) {
  13. switch (dtype) {
  14. case MGB_DTYPE_UINT8:
  15. return dtype::Uint8{};
  16. case MGB_DTYPE_INT32:
  17. return dtype::Int32{};
  18. case MGB_DTYPE_FLOAT32:
  19. return dtype::Float32{};
  20. #if !MEGDNN_DISABLE_FLOAT16
  21. case MGB_DTYPE_FLOAT16:
  22. return dtype::Float16{};
  23. #endif
  24. default:
  25. mgb_throw(
  26. SerializationError, "bad dtype value: %d", static_cast<int>(dtype));
  27. }
  28. }
  29. const void* prev_desc_buf_addr;
  30. size_t prev_desc_buf_size;
  31. //! a custom opr to compute x + bias
  32. template <MGBDType out_dtype = MGB_DTYPE_FLOAT32>
  33. class MGBOprDescImpl {
  34. struct UserData {
  35. float bias;
  36. };
  37. static UserData* user_data(const MGBOprDesc* self) {
  38. return static_cast<UserData*>(self->user_data);
  39. }
  40. static void release(MGBOprDesc* self) {
  41. delete user_data(self);
  42. delete self;
  43. --nr_inst;
  44. }
  45. static size_t hash(const MGBOprDesc* self) {
  46. return mgb::hash<float>(user_data(self)->bias);
  47. }
  48. static int is_same(const MGBOprDesc* self, const MGBOprDesc* rhs) {
  49. return user_data(self)->bias == user_data(rhs)->bias;
  50. }
  51. static void execute(
  52. const MGBOprDesc* self, const MGBTensor* input, const MGBTensor* output) {
  53. if (self->dynamic_param) {
  54. auto device_id = self->dynamic_param->device_id;
  55. mgb_assert(0 == device_id || 8 == device_id);
  56. }
  57. bool use_extern_input =
  58. (self->dynamic_param && self->dynamic_param->nr_input > 0) ? true
  59. : false;
  60. bool use_extern_output =
  61. (self->dynamic_param && self->dynamic_param->nr_output > 0) ? true
  62. : false;
  63. auto&& i = input[0].layout;
  64. auto&& o = output[0].layout;
  65. mgb_assert(
  66. i.shape.ndim == 1 && o.shape.ndim == 1 &&
  67. i.shape.shape[0] == o.shape.shape[0]);
  68. mgb_assert(i.dtype == MGB_DTYPE_FLOAT32 && o.dtype == out_dtype);
  69. auto input_p = static_cast<float*>(input[0].data);
  70. if (use_extern_input)
  71. input_p = static_cast<float*>(self->dynamic_param->input[0].device_ptr);
  72. auto bias = user_data(self)->bias;
  73. if (out_dtype == MGB_DTYPE_FLOAT32) {
  74. auto output_p = static_cast<float*>(output[0].data);
  75. if (use_extern_output)
  76. output_p =
  77. static_cast<float*>(self->dynamic_param->output[0].device_ptr);
  78. for (size_t x = 0; x < i.shape.shape[0]; ++x) {
  79. output_p[x] = input_p[x] + bias;
  80. }
  81. } else if (DNN_FLOAT16_SELECT(out_dtype == MGB_DTYPE_FLOAT16, false)) {
  82. #if !MEGDNN_DISABLE_FLOAT16
  83. auto output_p = static_cast<dt_float16*>(output[0].data);
  84. for (size_t x = 0; x < i.shape.shape[0]; ++x) {
  85. output_p[x] = input_p[x] + bias;
  86. }
  87. #endif
  88. } else {
  89. mgb_assert(out_dtype == MGB_DTYPE_INT32);
  90. auto output_p = static_cast<int32_t*>(output[0].data);
  91. for (size_t x = 0; x < i.shape.shape[0]; ++x) {
  92. output_p[x] = input_p[x] + bias;
  93. }
  94. }
  95. }
  96. static void infer_shape(
  97. const MGBOprDesc*, const MGBTensorShape* input, MGBTensorShape* output) {
  98. output[0] = input[0];
  99. }
  100. static void infer_dtype(
  101. const struct MGBOprDesc* self, const MGBDType* input, MGBDType* output) {
  102. output[0] = out_dtype;
  103. }
  104. static const char* name() {
  105. return out_dtype == MGB_DTYPE_FLOAT32
  106. ? "bias_adder_f23"
  107. : (out_dtype == MGB_DTYPE_INT32 ? "bias_adder_int32"
  108. : "bias_addr_float16");
  109. }
  110. public:
  111. static int nr_inst;
  112. static MGBOprDesc* make(float bias) {
  113. ++nr_inst;
  114. auto ud = std::make_unique<UserData>();
  115. ud->bias = bias;
  116. auto desc = std::make_unique<MGBOprDesc>();
  117. mgb_init_opr_desc(desc.get(), 1, name());
  118. desc->user_data = ud.release();
  119. #define s(n) desc->n = &MGBOprDescImpl::n;
  120. MGB_OPR_DESC_FOREACH_MEM_FN(s);
  121. #undef s
  122. if (out_dtype != MGB_DTYPE_FLOAT32) {
  123. desc->infer_dtype = infer_dtype;
  124. }
  125. return desc.release();
  126. }
  127. };
  128. template <MGBDType out_dtype>
  129. int MGBOprDescImpl<out_dtype>::nr_inst = 0;
  130. template <MGBDType out_dtype = MGBDType::MGB_DTYPE_FLOAT32>
  131. class MGBOprLoaderImpl {
  132. static MGBOprDesc* create_desc(size_t nr_input, const void* buf, size_t buf_len) {
  133. mgb_assert(buf_len == sizeof(float));
  134. prev_desc_buf_addr = buf;
  135. prev_desc_buf_size = buf_len;
  136. float fv;
  137. memcpy(&fv, buf, buf_len);
  138. return MGBOprDescImpl<out_dtype>::make(fv);
  139. }
  140. public:
  141. static MGBOprLoader make() { return {name(), &create_desc}; }
  142. static const char* name() {
  143. return out_dtype == MGB_DTYPE_FLOAT32
  144. ? "bias_adder_dump"
  145. : (out_dtype == MGB_DTYPE_INT32 ? "bias_adder_dump_i32"
  146. : "bias_adder_dump_f16");
  147. }
  148. };
  149. template <MGBDType out_dtype>
  150. class MGBOprLoaderReg {
  151. public:
  152. MGBOprLoaderReg() {
  153. auto api = mgb_get_extern_c_opr_api();
  154. auto loader = MGBOprLoaderImpl<out_dtype>::make();
  155. auto succ = api->register_loader(&loader);
  156. mgb_assert(succ);
  157. }
  158. };
  159. MGBOprLoaderReg<MGB_DTYPE_FLOAT32> loader_reg_f32;
  160. MGBOprLoaderReg<MGB_DTYPE_INT32> loader_reg_i32;
  161. #if !MEGDNN_DISABLE_FLOAT16
  162. MGBOprLoaderReg<MGB_DTYPE_FLOAT16> loader_reg_f16;
  163. #endif
  164. std::vector<uint8_t> create_graph_dump(
  165. float bias, float extra_scale, float sleep, MGBDType dtype) {
  166. HostTensorGenerator<> gen;
  167. auto host_x = gen({1}, "cpux");
  168. auto graph = ComputingGraph::make();
  169. auto x = opr::Host2DeviceCopy::make(*graph, host_x);
  170. if (sleep)
  171. x = opr::Sleep::make(x, sleep);
  172. x = opr::ExternCOprRunner::make_placeholder(
  173. {x}, {TensorShape{1}},
  174. dtype == MGB_DTYPE_FLOAT32
  175. ? "bias_adder_dump:test"
  176. : (dtype == MGB_DTYPE_INT32 ? "bias_adder_dump_i32"
  177. : "bias_adder_dump_f16"),
  178. &bias, sizeof(bias), {}, {dtype_c2cpp(dtype)})
  179. ->output(0);
  180. if (extra_scale)
  181. x = x * extra_scale;
  182. std::vector<uint8_t> ret;
  183. auto dumper = GraphDumper::make(OutputFile::make_vector_proxy(&ret));
  184. dumper->dump({x});
  185. return ret;
  186. }
  187. void check_dump_by_compute(
  188. std::unique_ptr<serialization::InputFile> input_file, CompNode cn,
  189. MGBDType dtype, float bias, float scale) {
  190. GraphLoadConfig config;
  191. config.comp_node_mapper = [loc = cn.locator()](CompNode::Locator& t) { t = loc; };
  192. auto loader = GraphLoader::make(std::move(input_file));
  193. auto load_ret = loader->load(config);
  194. load_ret.graph->options().var_sanity_check_first_run = false;
  195. SymbolVar y;
  196. unpack_vector(load_ret.output_var_list, y);
  197. HostTensorGenerator<> gen;
  198. auto host_x = load_ret.tensor_map.begin()->second;
  199. *host_x = *gen({23}, cn);
  200. HostTensorND y_expect;
  201. y_expect.copy_from(*host_x);
  202. {
  203. auto py = y_expect.ptr<float>();
  204. for (int i = 0; i < 23; ++i) {
  205. auto t = py[i] + bias;
  206. if (dtype == MGB_DTYPE_INT32) {
  207. t = int(t);
  208. #if !MEGDNN_DISABLE_FLOAT16
  209. } else if (dtype == MGB_DTYPE_FLOAT16) {
  210. t = dt_float16(t);
  211. #endif
  212. }
  213. py[i] = t * scale;
  214. }
  215. }
  216. HostTensorND host_y;
  217. auto func = load_ret.graph->compile({make_callback_copy(y, host_y)});
  218. func->execute();
  219. MGB_ASSERT_TENSOR_EQ(y_expect, host_y);
  220. }
  221. void check_dump_by_compute_with_param(
  222. std::unique_ptr<serialization::InputFile> input_file, CompNode cn,
  223. MGBDType dtype, float bias, std::shared_ptr<ExternCOprParam> param) {
  224. GraphLoadConfig config;
  225. config.comp_node_mapper = [loc = cn.locator()](CompNode::Locator& t) { t = loc; };
  226. auto loader = GraphLoader::make(std::move(input_file));
  227. auto load_ret = loader->load(config);
  228. load_ret.graph->options().var_sanity_check_first_run = false;
  229. SymbolVar y;
  230. unpack_vector(load_ret.output_var_list, y);
  231. HostTensorGenerator<> gen;
  232. auto host_x = load_ret.tensor_map.begin()->second;
  233. *host_x = *gen({23}, cn);
  234. HostTensorND y_expect;
  235. y_expect.copy_from(*host_x);
  236. {
  237. auto py = y_expect.ptr<float>();
  238. float* extern_input_device_ptr = nullptr;
  239. if (param->nr_input && param->input && param->input->device_ptr) {
  240. extern_input_device_ptr = static_cast<float*>(param->input->device_ptr);
  241. }
  242. for (int i = 0; i < 23; ++i) {
  243. float t = 0;
  244. //! this test code is run before config_extern_c_opr_dynamic_param
  245. //! so we need double child member ptr is valid or not
  246. if (param->nr_input && param->input && param->input->device_ptr) {
  247. t = extern_input_device_ptr[i] + bias;
  248. } else {
  249. t = py[i] + bias;
  250. }
  251. if (dtype == MGB_DTYPE_INT32) {
  252. t = int(t);
  253. #if !MEGDNN_DISABLE_FLOAT16
  254. } else if (dtype == MGB_DTYPE_FLOAT16) {
  255. t = dt_float16(t);
  256. #endif
  257. }
  258. py[i] = t;
  259. }
  260. }
  261. HostTensorND host_y;
  262. auto func = load_ret.graph->compile({make_callback_copy(y, host_y)});
  263. config_extern_c_opr_dynamic_param(func, param);
  264. func->execute();
  265. if (param->nr_output) {
  266. auto ph = host_y.ptr<float>();
  267. auto outp = static_cast<float*>(param->output->device_ptr);
  268. for (int i = 0; i < 23; ++i) {
  269. ph[i] = outp[i];
  270. }
  271. }
  272. MGB_ASSERT_TENSOR_EQ(y_expect, host_y);
  273. }
  274. void run_compute_test(CompNode cn, MGBDType dtype) {
  275. float bias = 1.2, scale = -2.1;
  276. auto graph_dump = create_graph_dump(bias, scale, 0.3, dtype);
  277. check_dump_by_compute(
  278. InputFile::make_mem_proxy(graph_dump.data(), graph_dump.size()), cn, dtype,
  279. bias, scale);
  280. }
  281. void run_compute_test_with_param(
  282. CompNode cn, MGBDType dtype, std::shared_ptr<ExternCOprParam> param) {
  283. float bias = 1.2, scale = 0;
  284. auto graph_dump = create_graph_dump(bias, scale, 0.3, dtype);
  285. check_dump_by_compute_with_param(
  286. InputFile::make_mem_proxy(graph_dump.data(), graph_dump.size()), cn, dtype,
  287. bias, param);
  288. }
  289. } // namespace
  290. TEST(TestExternCOpr, ExternCOprParam) {
  291. //! same with check_dump_by_compute_with_param
  292. constexpr int input_output_size = 23;
  293. auto c_opr_param = std::make_shared<ExternCOprParam>();
  294. MGBTensorLayout input_layput, output_layput;
  295. ExternDeviceTensor input, output;
  296. float* input_device_ptr = (float*)malloc(input_output_size * sizeof(float));
  297. float* output_device_ptr = (float*)malloc(input_output_size * sizeof(float));
  298. auto reset = [&] {
  299. memset(c_opr_param.get(), 0, sizeof(ExternCOprParam));
  300. memset(&input_layput, 0, sizeof(MGBTensorLayout));
  301. memset(&input, 0, sizeof(ExternDeviceTensor));
  302. memset(&output_layput, 0, sizeof(MGBTensorLayout));
  303. memset(&output, 0, sizeof(ExternDeviceTensor));
  304. memset(input_device_ptr, 0, input_output_size * sizeof(float));
  305. memset(output_device_ptr, 0, input_output_size * sizeof(float));
  306. for (size_t i = 0; i < input_output_size; i++) {
  307. input_device_ptr[i] = i;
  308. }
  309. };
  310. auto run_test = [&] {
  311. run_compute_test_with_param(
  312. CompNode::load("cpux"), MGB_DTYPE_FLOAT32, c_opr_param);
  313. };
  314. auto init_param = [&] {
  315. reset();
  316. c_opr_param->nr_input = 1;
  317. input_layput.shape = {1, {input_output_size}};
  318. input.layout = input_layput;
  319. input.device_ptr = input_device_ptr;
  320. c_opr_param->input = &input;
  321. c_opr_param->nr_output = 1;
  322. output_layput.shape = {1, {input_output_size}};
  323. output.layout = output_layput;
  324. output.device_ptr = output_device_ptr;
  325. c_opr_param->output = &output;
  326. };
  327. //! run with null param
  328. reset();
  329. run_test();
  330. //! run with full param
  331. init_param();
  332. run_test();
  333. //! run with a right index
  334. init_param();
  335. c_opr_param->extern_c_opr_dump_name = "bias_adder_dump:test";
  336. run_test();
  337. //! set a wrong index
  338. init_param();
  339. c_opr_param->extern_c_opr_dump_name = "bias_adder_dump";
  340. ASSERT_THROW(run_test(), MegBrainError);
  341. //! set a wrong index
  342. init_param();
  343. c_opr_param->extern_c_opr_dump_name = "sdfsdfs";
  344. ASSERT_THROW(run_test(), MegBrainError);
  345. //! set wrong input
  346. init_param();
  347. c_opr_param->input = nullptr;
  348. ASSERT_THROW(run_test(), MegBrainError);
  349. //! set wrong nr_input
  350. init_param();
  351. c_opr_param->nr_input = 3;
  352. ASSERT_THROW(run_test(), MegBrainError);
  353. //! set wrong input device_ptr
  354. init_param();
  355. c_opr_param->input->device_ptr = nullptr;
  356. ASSERT_THROW(run_test(), MegBrainError);
  357. //! set wrong input shape
  358. init_param();
  359. c_opr_param->input->layout.shape.shape[0] = input_output_size - 2;
  360. ASSERT_THROW(run_test(), MegBrainError);
  361. //! set wrong output
  362. init_param();
  363. c_opr_param->output = nullptr;
  364. ASSERT_THROW(run_test(), MegBrainError);
  365. //! set wrong nr_output
  366. init_param();
  367. c_opr_param->nr_output = 3;
  368. ASSERT_THROW(run_test(), MegBrainError);
  369. //! set wrong output device_ptr
  370. init_param();
  371. c_opr_param->output->device_ptr = nullptr;
  372. ASSERT_THROW(run_test(), MegBrainError);
  373. //! set wrong output shape
  374. init_param();
  375. c_opr_param->output->layout.shape.shape[0] = input_output_size - 2;
  376. ASSERT_THROW(run_test(), MegBrainError);
  377. //! set wrong dtype(test MGB_DTYPE_FLOAT32)
  378. init_param();
  379. c_opr_param->input[0].layout.dtype = MGB_DTYPE_INT32;
  380. ASSERT_THROW(run_test(), MegBrainError);
  381. //! test only device_id
  382. reset();
  383. c_opr_param->device_id = 8;
  384. run_test();
  385. //! free
  386. free(input_device_ptr);
  387. free(output_device_ptr);
  388. }
  389. TEST(TestExternCOpr, CPUCompute) {
  390. run_compute_test(CompNode::load("cpux"), MGB_DTYPE_FLOAT32);
  391. }
  392. TEST(TestExternCOpr, GPUCompute) {
  393. REQUIRE_GPU(1);
  394. run_compute_test(CompNode::load("gpux"), MGB_DTYPE_FLOAT32);
  395. }
  396. #if MGB_OPENCL
  397. #include "megcore_opencl.h"
  398. #define REQUIRE_OPENCL() \
  399. do { \
  400. if (!CompNode::get_device_count(CompNode::DeviceType::OPENCL)) { \
  401. return; \
  402. } \
  403. } while (0)
  404. TEST(TestExternCOpr, OPENCLCompute) {
  405. REQUIRE_OPENCL();
  406. run_compute_test(CompNode::load("openclx"), MGB_DTYPE_FLOAT32);
  407. }
  408. #endif
  409. TEST(TestExternCOpr, CPUComputeMultiDtype) {
  410. run_compute_test(CompNode::load("cpux"), MGB_DTYPE_INT32);
  411. #if !MEGDNN_DISABLE_FLOAT16
  412. run_compute_test(CompNode::load("cpux"), MGB_DTYPE_FLOAT16);
  413. #endif
  414. }
  415. TEST(TestExternCOpr, Register) {
  416. auto api = mgb_get_extern_c_opr_api();
  417. ASSERT_TRUE(api->unregister_loader("bias_adder_dump"));
  418. ASSERT_FALSE(api->unregister_loader("bias_adder_dump"));
  419. auto loader = MGBOprLoaderImpl<MGB_DTYPE_FLOAT32>::make();
  420. ASSERT_TRUE(api->register_loader(&loader));
  421. ASSERT_FALSE(api->register_loader(&loader));
  422. }
  423. TEST(TestExternCOpr, Dedup) {
  424. ASSERT_EQ(0, MGBOprDescImpl<>::nr_inst);
  425. {
  426. HostTensorGenerator<> gen;
  427. auto host_x = gen({1});
  428. auto graph = ComputingGraph::make();
  429. auto x = opr::Host2DeviceCopy::make(*graph, host_x);
  430. auto make_opr = [x](float bias) {
  431. std::string name = "test";
  432. return opr::ExternCOprRunner::make_from_desc(
  433. name, {x.node()}, MGBOprDescImpl<>::make(bias));
  434. };
  435. auto y0 = make_opr(0.5), y1 = make_opr(0.6), y2 = make_opr(0.5);
  436. ASSERT_EQ(y0, y2);
  437. ASSERT_NE(y0, y1);
  438. ASSERT_EQ(2, MGBOprDescImpl<>::nr_inst);
  439. }
  440. ASSERT_EQ(0, MGBOprDescImpl<>::nr_inst);
  441. }
  442. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}