You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_network.cpp 35 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009
  1. /**
  2. * \file test/test_network.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "lite_build_config.h"
  12. #if LITE_BUILD_WITH_MGE
  13. #include "./test_common.h"
  14. #include "megbrain/tensor.h"
  15. #include <chrono>
  16. #include <memory>
  17. #include <random>
  18. #include <unordered_map>
  19. using namespace lite;
  20. namespace {
  21. class CheckAllocator : public lite::Allocator {
  22. public:
  23. //! allocate memory of size in the given device with the given align
  24. void* allocate(LiteDeviceType device, int, size_t size,
  25. size_t align) override {
  26. LITE_ASSERT(device == LiteDeviceType::LITE_CPU);
  27. m_nr_left++;
  28. m_nr_allocated++;
  29. #ifdef WIN32
  30. return _aligned_malloc(size, align);
  31. #elif defined(__ANDROID__) || defined(ANDROID)
  32. return memalign(align, size);
  33. #else
  34. void* ptr = nullptr;
  35. auto err = posix_memalign(&ptr, align, size);
  36. mgb_assert(!err, "failed to malloc %zubytes with align %zu", size,
  37. align);
  38. return ptr;
  39. #endif
  40. };
  41. //! free the memory pointed by ptr in the given device
  42. void free(LiteDeviceType device, int, void* ptr) override {
  43. m_nr_left--;
  44. LITE_ASSERT(device == LiteDeviceType::LITE_CPU);
  45. #ifdef WIN32
  46. _aligned_free(ptr);
  47. #else
  48. ::free(ptr);
  49. #endif
  50. };
  51. std::atomic_size_t m_nr_left{0};
  52. std::atomic_size_t m_nr_allocated{0};
  53. };
  54. } // namespace
  55. TEST(TestNetWork, Basic) {
  56. Config config;
  57. auto lite_tensor = get_input_data("./input_data.npy");
  58. std::string model_path = "./shufflenet.mge";
  59. auto result_lite = mgelite_lar(model_path, config, "data", lite_tensor);
  60. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  61. compare_lite_tensor<float>(result_lite, result_mgb);
  62. }
  63. TEST(TestNetWork, SetDeviceId) {
  64. Config config;
  65. auto lite_tensor = get_input_data("./input_data.npy");
  66. std::string model_path = "./shufflenet.mge";
  67. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  68. network->set_device_id(4);
  69. network->load_model(model_path);
  70. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  71. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  72. network->forward();
  73. network->wait();
  74. ASSERT_EQ(input_tensor->get_device_id(), 4);
  75. ASSERT_EQ(output_tensor->get_device_id(), 4);
  76. }
  77. TEST(TestNetWork, GetAllName) {
  78. Config config;
  79. auto lite_tensor = get_input_data("./input_data.npy");
  80. std::string model_path = "./shufflenet.mge";
  81. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  82. network->load_model(model_path);
  83. auto input_names = network->get_all_input_name();
  84. auto output_names = network->get_all_output_name();
  85. ASSERT_EQ(input_names.size(), 1);
  86. ASSERT_EQ(output_names.size(), 1);
  87. ASSERT_TRUE(input_names[0] == "data");
  88. ASSERT_TRUE(output_names[0] ==
  89. "TRUE_DIV(EXP[12065],reduce0[12067])[12077]");
  90. }
  91. TEST(TestNetWork, BasicInplaceAndSingleThreadAffinity) {
  92. Config config;
  93. auto lite_tensor = get_input_data("./input_data.npy");
  94. std::string model_path = "./shufflenet.mge";
  95. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  96. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  97. Runtime::set_cpu_inplace_mode(network);
  98. network->load_model(model_path);
  99. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  100. int affinity_set = false;
  101. Runtime::set_runtime_thread_affinity(network, [&affinity_set](int id) {
  102. ASSERT_EQ(id, 0);
  103. affinity_set = true;
  104. });
  105. auto src_ptr = lite_tensor->get_memory_ptr();
  106. auto src_layout = lite_tensor->get_layout();
  107. input_tensor->reset(src_ptr, src_layout);
  108. //! inplace mode not support async mode
  109. ASSERT_THROW(network->set_async_callback([]() {}), std::exception);
  110. network->forward();
  111. network->wait();
  112. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  113. ASSERT_EQ(affinity_set, true);
  114. compare_lite_tensor<float>(output_tensor, result_mgb);
  115. }
  116. TEST(TestNetWork, NetworkShareWeights) {
  117. Config config;
  118. auto lite_tensor = get_input_data("./input_data.npy");
  119. std::string model_path = "./shufflenet.mge";
  120. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  121. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  122. network->load_model(model_path);
  123. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  124. std::shared_ptr<Network> network2 = std::make_shared<Network>(config);
  125. Runtime::set_cpu_inplace_mode(network2);
  126. Runtime::shared_weight_with_network(network2, network);
  127. std::shared_ptr<Tensor> input_tensor2 = network2->get_input_tensor(0);
  128. auto src_ptr = lite_tensor->get_memory_ptr();
  129. auto src_layout = lite_tensor->get_layout();
  130. input_tensor->reset(src_ptr, src_layout);
  131. input_tensor2->reset(src_ptr, src_layout);
  132. ASSERT_NE(input_tensor, input_tensor2);
  133. network->forward();
  134. network->wait();
  135. network2->forward();
  136. network2->wait();
  137. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  138. std::shared_ptr<Tensor> output_tensor2 = network2->get_output_tensor(0);
  139. ASSERT_NE(output_tensor->get_memory_ptr(),
  140. output_tensor2->get_memory_ptr());
  141. compare_lite_tensor<float>(output_tensor, result_mgb);
  142. compare_lite_tensor<float>(output_tensor2, result_mgb);
  143. }
  144. TEST(TestNetWork, SharedRuntimeMem) {
  145. Config config;
  146. auto lite_tensor = get_input_data("./input_data.npy");
  147. std::string model_path = "./shufflenet.mge";
  148. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  149. std::shared_ptr<Network> network_src = std::make_shared<Network>(config);
  150. std::shared_ptr<Network> network_dst = std::make_shared<Network>(config);
  151. Runtime::share_runtime_memory_with(network_dst, network_src);
  152. network_src->load_model(model_path);
  153. network_dst->load_model(model_path);
  154. }
  155. TEST(TestNetWork, UserAllocator) {
  156. auto allocator = std::make_shared<CheckAllocator>();
  157. {
  158. Config config;
  159. auto lite_tensor = get_input_data("./input_data.npy");
  160. std::string model_path = "./shufflenet.mge";
  161. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  162. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  163. Runtime::set_memory_allocator(network, allocator);
  164. network->load_model(model_path);
  165. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  166. auto src_ptr = lite_tensor->get_memory_ptr();
  167. auto src_layout = lite_tensor->get_layout();
  168. input_tensor->reset(src_ptr, src_layout);
  169. network->forward();
  170. network->wait();
  171. ASSERT_GE(allocator->m_nr_allocated, 1);
  172. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  173. compare_lite_tensor<float>(output_tensor, result_mgb);
  174. }
  175. ASSERT_EQ(allocator->m_nr_left, 0);
  176. }
  177. TEST(TestNetWork, BasicMultiThread) {
  178. Config config;
  179. auto lite_tensor = get_input_data("./input_data.npy");
  180. std::string model_path = "./shufflenet.mge";
  181. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  182. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  183. Runtime::set_cpu_threads_number(network, 2);
  184. network->load_model(model_path);
  185. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  186. auto src_ptr = lite_tensor->get_memory_ptr();
  187. auto src_layout = lite_tensor->get_layout();
  188. input_tensor->reset(src_ptr, src_layout);
  189. network->forward();
  190. network->wait();
  191. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  192. compare_lite_tensor<float>(output_tensor, result_mgb);
  193. }
  194. TEST(TestNetWork, ThreadAffinity) {
  195. size_t nr_threads = 4;
  196. Config config;
  197. auto lite_tensor = get_input_data("./input_data.npy");
  198. std::string model_path = "./shufflenet.mge";
  199. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  200. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  201. Runtime::set_cpu_threads_number(network, nr_threads);
  202. ASSERT_THROW(Runtime::set_runtime_thread_affinity(network, [](int) {}),
  203. std::exception);
  204. network->load_model(model_path);
  205. std::vector<std::thread::id> thread_ids(nr_threads);
  206. auto affinity = [&](int id) {
  207. thread_ids[id] = std::this_thread::get_id();
  208. };
  209. Runtime::set_runtime_thread_affinity(network, affinity);
  210. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  211. auto src_ptr = lite_tensor->get_memory_ptr();
  212. auto src_layout = lite_tensor->get_layout();
  213. input_tensor->reset(src_ptr, src_layout);
  214. network->forward();
  215. network->wait();
  216. for (size_t i = 0; i < nr_threads; i++) {
  217. for (size_t j = i + 1; j < nr_threads; j++) {
  218. ASSERT_NE(thread_ids[i], thread_ids[j]);
  219. }
  220. }
  221. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  222. compare_lite_tensor<float>(output_tensor, result_mgb);
  223. }
  224. TEST(TestNetWork, BasicCryptAes) {
  225. Config config;
  226. auto lite_tensor = get_input_data("./input_data.npy");
  227. std::string model_path = "./shufflenet.mge";
  228. std::string model_crypt_path = "./shufflenet_crypt_aes.mge";
  229. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  230. config.bare_model_cryption_name = "AES_default";
  231. auto result_lite =
  232. mgelite_lar(model_crypt_path, config, "data", lite_tensor);
  233. compare_lite_tensor<float>(result_lite, result_mgb);
  234. }
  235. TEST(TestNetWork, BasicCryptRc4) {
  236. Config config;
  237. auto lite_tensor = get_input_data("./input_data.npy");
  238. std::string model_path = "./shufflenet.mge";
  239. std::string model_crypt_path = "./shufflenet_crypt_rc4.mge";
  240. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  241. config.bare_model_cryption_name = "RC4_default";
  242. auto result_lite =
  243. mgelite_lar(model_crypt_path, config, "data", lite_tensor);
  244. compare_lite_tensor<float>(result_lite, result_mgb);
  245. }
  246. TEST(TestNetWork, PackedCryptRc4) {
  247. Config config;
  248. auto lite_tensor = get_input_data("./input_data.npy");
  249. std::string model_path = "./shufflenet.mge";
  250. std::string model_crypt_path = "./test_packed_model_rc4.lite";
  251. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  252. auto result_lite =
  253. mgelite_lar(model_crypt_path, config, "data", lite_tensor);
  254. compare_lite_tensor<float>(result_lite, result_mgb);
  255. }
  256. TEST(TestNetWork, BasicCryptSfRc4) {
  257. Config config;
  258. auto lite_tensor = get_input_data("./input_data.npy");
  259. std::string model_path = "./shufflenet.mge";
  260. std::string model_crypt_path = "./shufflenet_crypt_sfrc4.mge";
  261. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  262. config.bare_model_cryption_name = "SIMPLE_FAST_RC4_default";
  263. auto result_lite =
  264. mgelite_lar(model_crypt_path, config, "data", lite_tensor);
  265. compare_lite_tensor<float>(result_lite, result_mgb);
  266. }
  267. TEST(TestNetWork, ResetInput) {
  268. Config config;
  269. auto tensor = get_input_data("./input_data.npy");
  270. std::string model_path = "./shufflenet.mge";
  271. std::string input_name = "data";
  272. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  273. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  274. network->load_model(model_path);
  275. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  276. auto src_ptr = tensor->get_memory_ptr();
  277. auto src_layout = tensor->get_layout();
  278. input_tensor->reset(src_ptr, src_layout);
  279. network->forward();
  280. network->wait();
  281. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  282. compare_lite_tensor<float>(output_tensor, result_mgb);
  283. }
  284. TEST(TestNetWork, ChangeInputShape) {
  285. Config config;
  286. auto tensor = get_input_data("./input_data.npy");
  287. std::string model_path = "./shufflenet.mge";
  288. std::string input_name = "data";
  289. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  290. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  291. network->load_model(model_path);
  292. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  293. auto src_layout = Layout{{2, 3, 200, 200}, 4, LiteDataType::LITE_FLOAT};
  294. input_tensor->set_layout(src_layout);
  295. std::shared_ptr<Tensor> input_tensor2 = network->get_io_tensor(input_name);
  296. //! Check memory is equal
  297. ASSERT_EQ(input_tensor->get_memory_ptr(), input_tensor2->get_memory_ptr());
  298. network->forward();
  299. network->wait();
  300. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  301. auto output_layout = output_tensor->get_layout();
  302. ASSERT_EQ(output_layout.shapes[0], 2);
  303. ASSERT_EQ(output_layout.shapes[1], 1000);
  304. }
  305. TEST(TestNetWork, ResetOutput) {
  306. Config config;
  307. auto tensor = get_input_data("./input_data.npy");
  308. std::string model_path = "./shufflenet.mge";
  309. std::string input_name = "data";
  310. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  311. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  312. network->load_model(model_path);
  313. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  314. auto src_ptr = tensor->get_memory_ptr();
  315. auto src_layout = tensor->get_layout();
  316. input_tensor->reset(src_ptr, src_layout);
  317. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  318. auto result_tensor = std::make_shared<Tensor>(
  319. LiteDeviceType::LITE_CPU,
  320. Layout{{1, 1000}, 2, LiteDataType::LITE_FLOAT});
  321. void* out_data = result_tensor->get_memory_ptr();
  322. output_tensor->reset(out_data, result_tensor->get_layout());
  323. network->forward();
  324. network->wait();
  325. compare_lite_tensor<float>(output_tensor, result_mgb);
  326. }
  327. TEST(TestNetWork, AsyncExec) {
  328. Config config;
  329. config.options.var_sanity_check_first_run = false;
  330. auto tensor = get_input_data("./input_data.npy");
  331. std::string model_path = "./shufflenet.mge";
  332. std::string input_name = "data";
  333. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  334. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  335. network->load_model(model_path);
  336. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  337. auto src_ptr = tensor->get_memory_ptr();
  338. auto src_layout = tensor->get_layout();
  339. input_tensor->reset(src_ptr, src_layout);
  340. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  341. auto result_tensor = std::make_shared<Tensor>(
  342. LiteDeviceType::LITE_CPU,
  343. Layout{{1, 1000}, 2, LiteDataType::LITE_FLOAT});
  344. void* out_data = result_tensor->get_memory_ptr();
  345. output_tensor->reset(out_data, result_tensor->get_layout());
  346. //! set async mode and callback
  347. volatile bool finished = false;
  348. network->set_async_callback([&finished]() { finished = true; });
  349. network->forward();
  350. size_t count = 0;
  351. while (finished == false) {
  352. count++;
  353. }
  354. ASSERT_GT(count, 0);
  355. compare_lite_tensor<float>(output_tensor, result_mgb);
  356. }
  357. TEST(TestNetWork, CPUDeviceInput) {
  358. auto tensor = get_input_data("./input_data.npy");
  359. Layout layout{{1, 3, 224, 224}, 4, LiteDataType::LITE_FLOAT};
  360. std::string model_path = "./shufflenet.mge";
  361. std::string input_name = "data";
  362. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  363. NetworkIO IO;
  364. bool is_host = false;
  365. IO.inputs.push_back({input_name, is_host});
  366. std::shared_ptr<Network> network = std::make_shared<Network>(IO);
  367. network->load_model(model_path);
  368. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  369. auto src_ptr = tensor->get_memory_ptr();
  370. input_tensor->reset(src_ptr, layout);
  371. network->forward();
  372. network->wait();
  373. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  374. compare_lite_tensor<float>(output_tensor, result_mgb);
  375. }
  376. TEST(TestNetWork, ShareTensorWith) {
  377. auto tensor = get_input_data("./input_data.npy");
  378. std::string model_path = "./shufflenet.mge";
  379. std::string input_name = "data";
  380. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  381. std::shared_ptr<Network> network = std::make_shared<Network>();
  382. network->load_model(model_path);
  383. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  384. input_tensor->share_memory_with(*tensor);
  385. network->forward();
  386. network->wait();
  387. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  388. compare_lite_tensor<float>(output_tensor, result_mgb);
  389. }
  390. TEST(TestNetWork, InputCallBack) {
  391. auto tensor = get_input_data("./input_data.npy");
  392. std::string model_path = "./shufflenet.mge";
  393. std::string input_name = "data";
  394. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  395. NetworkIO ios;
  396. bool is_host = false;
  397. ios.inputs.push_back({input_name, is_host});
  398. std::shared_ptr<Network> network = std::make_shared<Network>(ios);
  399. network->load_model(model_path);
  400. volatile bool finised_check_input = false;
  401. auto input_callback =
  402. [&tensor, &finised_check_input,
  403. input_name](const std::unordered_map<
  404. std::string, std::pair<IO, std::shared_ptr<Tensor>>>&
  405. input_map) {
  406. ASSERT_EQ(input_map.size(), 1);
  407. auto tensor_input = input_map.at(input_name).second;
  408. compare_lite_tensor<float>(tensor_input, tensor);
  409. finised_check_input = true;
  410. };
  411. network->set_start_callback(input_callback);
  412. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  413. input_tensor->share_memory_with(*tensor);
  414. network->forward();
  415. network->wait();
  416. ASSERT_TRUE(finised_check_input);
  417. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  418. compare_lite_tensor<float>(output_tensor, result_mgb);
  419. }
  420. TEST(TestNetWork, OutputCallBack) {
  421. auto tensor = get_input_data("./input_data.npy");
  422. std::string model_path = "./shufflenet.mge";
  423. std::string input_name = "data";
  424. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  425. std::shared_ptr<Network> network = std::make_shared<Network>();
  426. network->load_model(model_path);
  427. auto output_name = network->get_output_name(0);
  428. volatile bool finised_check_output = false;
  429. auto output_callback =
  430. [&result_mgb, &finised_check_output,
  431. output_name](const std::unordered_map<
  432. std::string, std::pair<IO, std::shared_ptr<Tensor>>>&
  433. output_map) {
  434. ASSERT_EQ(output_map.size(), 1);
  435. auto tensor_output = output_map.at(output_name).second;
  436. compare_lite_tensor<float>(tensor_output, result_mgb);
  437. finised_check_output = true;
  438. };
  439. network->set_finish_callback(output_callback);
  440. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  441. input_tensor->share_memory_with(*tensor);
  442. network->forward();
  443. network->wait();
  444. ASSERT_TRUE(finised_check_output);
  445. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  446. compare_lite_tensor<float>(output_tensor, result_mgb);
  447. }
  448. TEST(TestNetWork, OutputShapeOnly) {
  449. auto tensor = get_input_data("./input_data.npy");
  450. std::string model_path = "./shufflenet.mge";
  451. std::string input_name = "data";
  452. std::string output_name = "TRUE_DIV(EXP[12065],reduce0[12067])[12077]";
  453. NetworkIO IO;
  454. bool is_host = true;
  455. IO.outputs.push_back({output_name, is_host, LiteIOType::LITE_IO_SHAPE});
  456. Config config;
  457. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  458. network->load_model(model_path);
  459. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  460. std::shared_ptr<Tensor> output_tensor = network->get_io_tensor(output_name);
  461. auto src_ptr = tensor->get_memory_ptr();
  462. auto src_layout = tensor->get_layout();
  463. input_tensor->reset(src_ptr, src_layout);
  464. network->forward();
  465. network->wait();
  466. ASSERT_EQ(output_tensor->get_tensor_total_size_in_byte() / sizeof(float),
  467. 1000);
  468. }
  469. TEST(TestNetWork, ProfileIOdump) {
  470. auto tensor = get_input_data("./input_data.npy");
  471. std::string model_path = "./shufflenet.mge";
  472. std::string input_name = "data";
  473. NetworkIO IO;
  474. Config config;
  475. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  476. network->enable_profile_performance("./profile.json");
  477. network->load_model(model_path);
  478. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  479. auto src_ptr = tensor->get_memory_ptr();
  480. auto src_layout = tensor->get_layout();
  481. input_tensor->reset(src_ptr, src_layout);
  482. network->forward();
  483. network->wait();
  484. ASSERT_TRUE(fopen("./profile.json", "r"));
  485. Runtime::enable_io_txt_dump(network, "./io_txt_dump.txt");
  486. network->forward();
  487. network->wait();
  488. ASSERT_TRUE(fopen("./io_txt_dump.txt", "r"));
  489. }
  490. TEST(TestNetWork, LoadPackedModel) {
  491. auto tensor = get_input_data("./input_data.npy");
  492. std::string model_path = "./test_packed_model.lite";
  493. std::string input_name = "data";
  494. NetworkIO IO;
  495. Config config;
  496. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  497. network->load_model(model_path);
  498. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  499. auto src_ptr = tensor->get_memory_ptr();
  500. auto src_layout = tensor->get_layout();
  501. input_tensor->reset(src_ptr, src_layout);
  502. network->forward();
  503. network->wait();
  504. }
  505. TEST(TestNetWork, GetDeviceType) {
  506. auto tensor = get_input_data("./input_data.npy");
  507. std::string model_path = "./shufflenet.mge";
  508. Config config;
  509. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  510. network->load_model(model_path);
  511. ASSERT_TRUE(network->get_device_type() == LiteDeviceType::LITE_CPU);
  512. }
  513. TEST(TestNetWork, GetModelExtraInfo) {
  514. std::string model_path = "./track_640_320_pack_model_rc4_with_info.lite";
  515. Config config;
  516. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  517. network->load_model(model_path);
  518. auto& extra_info = network->get_model_extra_info();
  519. ASSERT_TRUE(extra_info.size() > 0);
  520. printf("extra_info %s \n", extra_info.c_str());
  521. }
  522. #if LITE_WITH_CUDA
  523. TEST(TestNetWork, BasicDevice) {
  524. auto lite_tensor = get_input_data("./input_data.npy");
  525. Config config;
  526. config.device_type = LiteDeviceType::LITE_CUDA;
  527. std::string model_path = "./shufflenet.mge";
  528. auto result_lite = mgelite_lar(model_path, config, "data", lite_tensor);
  529. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  530. compare_lite_tensor<float>(result_lite, result_mgb);
  531. }
  532. TEST(TestNetWork, DeviceInput) {
  533. auto tensor = get_input_data("./input_data.npy");
  534. Layout layout{{1, 3, 224, 224}, 4, LiteDataType::LITE_FLOAT};
  535. std::string model_path = "./shufflenet.mge";
  536. std::string input_name = "data";
  537. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  538. NetworkIO IO;
  539. bool is_host = false;
  540. IO.inputs.push_back({input_name, is_host});
  541. Config config;
  542. config.device_type = LiteDeviceType::LITE_CUDA;
  543. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  544. network->load_model(model_path);
  545. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  546. auto tensor_cuda = Tensor(LiteDeviceType::LITE_CUDA, layout);
  547. tensor_cuda.copy_from(*tensor);
  548. auto src_ptr = tensor_cuda.get_memory_ptr();
  549. input_tensor->reset(src_ptr, layout);
  550. network->forward();
  551. network->wait();
  552. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  553. compare_lite_tensor<float>(output_tensor, result_mgb);
  554. }
  555. TEST(TestNetWork, ChangeInputShapeDevice) {
  556. Config config;
  557. auto tensor = get_input_data("./input_data.npy");
  558. std::string model_path = "./shufflenet.mge";
  559. std::string input_name = "data";
  560. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  561. config.device_type = LiteDeviceType::LITE_CUDA;
  562. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  563. network->load_model(model_path);
  564. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  565. auto src_layout = Layout{{2, 3, 200, 200}, 4, LiteDataType::LITE_FLOAT};
  566. input_tensor->set_layout(src_layout);
  567. std::shared_ptr<Tensor> input_tensor2 = network->get_io_tensor(input_name);
  568. //! Check memory is equal
  569. ASSERT_EQ(input_tensor->get_memory_ptr(), input_tensor2->get_memory_ptr());
  570. network->forward();
  571. network->wait();
  572. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  573. auto output_layout = output_tensor->get_layout();
  574. ASSERT_EQ(output_layout.shapes[0], 2);
  575. ASSERT_EQ(output_layout.shapes[1], 1000);
  576. }
  577. TEST(TestNetWork, DeviceOutput) {
  578. auto tensor = get_input_data("./input_data.npy");
  579. std::string model_path = "./shufflenet.mge";
  580. std::string input_name = "data";
  581. std::string output_name = "TRUE_DIV(EXP[12065],reduce0[12067])[12077]";
  582. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  583. NetworkIO IO;
  584. bool is_host = false;
  585. IO.outputs.push_back({output_name, is_host});
  586. Config config;
  587. config.device_type = LiteDeviceType::LITE_CUDA;
  588. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  589. network->load_model(model_path);
  590. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  591. std::shared_ptr<Tensor> output_tensor_cuda =
  592. network->get_io_tensor(output_name);
  593. auto src_ptr = tensor->get_memory_ptr();
  594. auto src_layout = tensor->get_layout();
  595. input_tensor->reset(src_ptr, src_layout);
  596. network->forward();
  597. network->wait();
  598. auto output_tensor = std::make_shared<Tensor>();
  599. output_tensor->copy_from(*output_tensor_cuda);
  600. compare_lite_tensor<float>(output_tensor, result_mgb);
  601. }
  602. TEST(TestNetWork, WrongIONameDevice) {
  603. auto tensor = get_input_data("./input_data.npy");
  604. Layout layout{{1, 3, 224, 224}, 4, LiteDataType::LITE_FLOAT};
  605. std::string model_path = "./shufflenet.mge";
  606. std::string input_name = "data";
  607. std::string input_name_wrong = "data0";
  608. std::string output_name = "TRUE_DIV(EXP[12065],reduce0[12067])[12077]";
  609. std::string output_name_wrong =
  610. "w_TRUE_DIV(EXP[12065],reduce0[12067])[12077]";
  611. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  612. NetworkIO IO;
  613. bool is_host = false;
  614. IO.inputs.push_back({input_name, is_host});
  615. IO.outputs.push_back({output_name, is_host});
  616. IO.outputs.push_back({output_name_wrong, is_host});
  617. Config config;
  618. config.device_type = LiteDeviceType::LITE_CUDA;
  619. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  620. network->load_model(model_path);
  621. auto tensor_cuda = Tensor(LiteDeviceType::LITE_CUDA, layout);
  622. tensor_cuda.copy_from(*tensor);
  623. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  624. auto src_ptr = tensor_cuda.get_memory_ptr();
  625. auto src_layout = tensor_cuda.get_layout();
  626. input_tensor->reset(src_ptr, src_layout);
  627. std::shared_ptr<Tensor> output_tensor_cuda =
  628. network->get_io_tensor(output_name);
  629. network->forward();
  630. network->wait();
  631. auto output_tensor = std::make_shared<Tensor>();
  632. output_tensor->copy_from(*output_tensor_cuda);
  633. compare_lite_tensor<float>(output_tensor, result_mgb);
  634. }
  635. TEST(TestNetWork, ConfigIONameDevice) {
  636. std::string model_path = "./model.mgb";
  637. NetworkIO IO;
  638. bool is_host = false;
  639. IO.outputs.push_back({"clsfy", is_host});
  640. Config config;
  641. config.device_type = LiteDeviceType::LITE_CUDA;
  642. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  643. network->compute_only_configured_output();
  644. network->load_model(model_path);
  645. ASSERT_EQ(network->get_all_output_name().size(), 1);
  646. ASSERT_EQ(network->get_all_output_name()[0], "clsfy");
  647. std::shared_ptr<Network> network2 = std::make_shared<Network>(config, IO);
  648. network2->load_model(model_path);
  649. ASSERT_EQ(network2->get_all_output_name().size(), 2);
  650. }
  651. TEST(TestNetWork, SetDeviceIdDeviceTest) {
  652. #if LITE_WITH_CUDA
  653. if(get_device_count(LITE_CUDA) <= 1)
  654. return;
  655. #endif
  656. std::string model_path = "./model.mgb";
  657. NetworkIO IO;
  658. bool is_host = false;
  659. IO.inputs.push_back({"data", is_host});
  660. IO.outputs.push_back({"clsfy", is_host});
  661. Config config;
  662. config.device_type = LiteDeviceType::LITE_CUDA;
  663. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  664. network->set_device_id(1);
  665. network->load_model(model_path);
  666. auto inputs_names = network->get_all_input_name();
  667. for (auto name : inputs_names) {
  668. auto tensor = network->get_io_tensor(name);
  669. ASSERT_EQ(tensor->get_device_id(), 1);
  670. if (name == "idx") {
  671. int* index_ptr = static_cast<int*>(tensor->get_memory_ptr());
  672. for (int i = 0; i < 23; i++) {
  673. index_ptr[i] = i % 3;
  674. }
  675. }
  676. if (name == "landmark") {
  677. float* landmakrk_ptr =
  678. static_cast<float*>(tensor->get_memory_ptr());
  679. for (int i = 0; i < 23 * 18 * 2; i++) {
  680. landmakrk_ptr[i] = 0.1f;
  681. }
  682. }
  683. }
  684. auto outputs_names = network->get_all_output_name();
  685. for (auto name : outputs_names) {
  686. auto tensor = network->get_io_tensor(name);
  687. ASSERT_EQ(tensor->get_device_id(), 1);
  688. }
  689. network->forward();
  690. network->wait();
  691. }
  692. TEST(TestNetWork, SetStreamIdDeviceTest) {
  693. std::string model_path = "./model.mgb";
  694. NetworkIO IO;
  695. bool is_host = false;
  696. IO.inputs.push_back({"data", is_host});
  697. IO.outputs.push_back({"clsfy", is_host});
  698. Config config;
  699. config.device_type = LiteDeviceType::LITE_CUDA;
  700. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  701. network->set_stream_id(1);
  702. network->load_model(model_path);
  703. auto inputs_names = network->get_all_input_name();
  704. for (auto name : inputs_names) {
  705. auto tensor = network->get_io_tensor(name);
  706. if (name == "idx") {
  707. int* index_ptr = static_cast<int*>(tensor->get_memory_ptr());
  708. for (int i = 0; i < 23; i++) {
  709. index_ptr[i] = i % 3;
  710. }
  711. }
  712. if (name == "landmark") {
  713. float* landmakrk_ptr =
  714. static_cast<float*>(tensor->get_memory_ptr());
  715. for (int i = 0; i < 23 * 18 * 2; i++) {
  716. landmakrk_ptr[i] = 0.1f;
  717. }
  718. }
  719. }
  720. network->forward();
  721. network->wait();
  722. }
  723. #if CUDART_VERSION >= 10000
  724. TEST(TestNetWork, DeviceAsyncExec) {
  725. auto tensor = get_input_data("./input_data.npy");
  726. Config config;
  727. config.device_type = LiteDeviceType::LITE_CUDA;
  728. config.options.var_sanity_check_first_run = false;
  729. std::string model_path = "./shufflenet.mge";
  730. std::string input_name = "data";
  731. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  732. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  733. network->load_model(model_path);
  734. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  735. auto src_ptr = tensor->get_memory_ptr();
  736. auto src_layout = tensor->get_layout();
  737. input_tensor->reset(src_ptr, src_layout);
  738. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  739. auto result_tensor = std::make_shared<Tensor>(
  740. LiteDeviceType::LITE_CPU,
  741. Layout{{1, 1000}, 2, LiteDataType::LITE_FLOAT});
  742. void* out_data = result_tensor->get_memory_ptr();
  743. output_tensor->reset(out_data, result_tensor->get_layout());
  744. //! set async mode and callback
  745. volatile bool finished = false;
  746. network->set_async_callback([&finished]() { finished = true; });
  747. network->forward();
  748. size_t count = 0;
  749. while (finished == false) {
  750. count++;
  751. }
  752. ASSERT_GT(count, 0);
  753. compare_lite_tensor<float>(output_tensor, result_mgb);
  754. }
  755. #endif
  756. #endif
  757. #if MGB_ATLAS
  758. TEST(TestNetWork, AtlasLoadNoDevice) {
  759. lite::Config config;
  760. config.device_type = LiteDeviceType::LITE_DEVICE_DEFAULT;
  761. auto network = std::make_shared<lite::Network>(config);
  762. network->load_model("./model_atlas.mgb");
  763. network->forward();
  764. network->wait();
  765. }
  766. TEST(TestNetWork, AtlasLoadDeviceInput) {
  767. lite::NetworkIO networkio;
  768. lite::IO input_data_io = {};
  769. input_data_io.name = "data";
  770. input_data_io.is_host = false;
  771. networkio.inputs.emplace_back(input_data_io);
  772. lite::IO input_input0_io = {};
  773. input_input0_io.name = "input0";
  774. input_input0_io.is_host = false;
  775. networkio.inputs.emplace_back(input_input0_io);
  776. lite::Config config;
  777. config.device_type = LiteDeviceType::LITE_DEVICE_DEFAULT;
  778. auto network = std::make_shared<lite::Network>(config, networkio);
  779. network->load_model("./model_atlas.mgb");
  780. network->forward();
  781. network->wait();
  782. }
  783. TEST(TestNetWork, AtlasLoadAtlas) {
  784. lite::Config config;
  785. config.device_type = LiteDeviceType::LITE_ATLAS;
  786. auto network = std::make_shared<lite::Network>(config);
  787. network->load_model("./model_atlas.mgb");
  788. network->forward();
  789. network->wait();
  790. }
  791. TEST(TestNetWork, AtlasLoadAtlasDeviceInput) {
  792. lite::NetworkIO networkio;
  793. lite::IO input_data_io = {};
  794. input_data_io.name = "data";
  795. input_data_io.is_host = false;
  796. networkio.inputs.emplace_back(input_data_io);
  797. lite::IO input_input0_io = {};
  798. input_input0_io.name = "input0";
  799. input_input0_io.is_host = false;
  800. networkio.inputs.emplace_back(input_input0_io);
  801. lite::Config config;
  802. config.device_type = LiteDeviceType::LITE_ATLAS;
  803. auto network = std::make_shared<lite::Network>(config, networkio);
  804. network->load_model("./model_atlas.mgb");
  805. network->forward();
  806. network->wait();
  807. }
  808. TEST(TestNetWork, AtlasDeviceID) {
  809. lite::Config config;
  810. config.device_type = LiteDeviceType::LITE_ATLAS;
  811. auto network = std::make_shared<lite::Network>(config);
  812. network->set_device_id(1);
  813. network->load_model("./model_atlas.mgb");
  814. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  815. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  816. network->forward();
  817. network->wait();
  818. ASSERT_EQ(output_tensor->get_device_id(), 1);
  819. }
  820. #endif
  821. #endif
  822. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台