You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_network.cpp 35 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990
  1. /**
  2. * \file test/test_network.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "lite_build_config.h"
  12. #if LITE_BUILD_WITH_MGE
  13. #include "./test_common.h"
  14. #include "megbrain/tensor.h"
  15. #include <chrono>
  16. #include <memory>
  17. #include <random>
  18. #include <unordered_map>
  19. using namespace lite;
  20. namespace {
  21. class CheckAllocator : public lite::Allocator {
  22. public:
  23. //! allocate memory of size in the given device with the given align
  24. void* allocate(LiteDeviceType device, int, size_t size, size_t align) override {
  25. LITE_ASSERT(device == LiteDeviceType::LITE_CPU);
  26. m_nr_left++;
  27. m_nr_allocated++;
  28. #ifdef WIN32
  29. return _aligned_malloc(size, align);
  30. #elif defined(__ANDROID__) || defined(ANDROID)
  31. return memalign(align, size);
  32. #else
  33. void* ptr = nullptr;
  34. auto err = posix_memalign(&ptr, align, size);
  35. mgb_assert(!err, "failed to malloc %zubytes with align %zu", size, align);
  36. return ptr;
  37. #endif
  38. };
  39. //! free the memory pointed by ptr in the given device
  40. void free(LiteDeviceType device, int, void* ptr) override {
  41. m_nr_left--;
  42. LITE_ASSERT(device == LiteDeviceType::LITE_CPU);
  43. #ifdef WIN32
  44. _aligned_free(ptr);
  45. #else
  46. ::free(ptr);
  47. #endif
  48. };
  49. std::atomic_size_t m_nr_left{0};
  50. std::atomic_size_t m_nr_allocated{0};
  51. };
  52. } // namespace
  53. TEST(TestNetWork, Basic) {
  54. Config config;
  55. auto lite_tensor = get_input_data("./input_data.npy");
  56. std::string model_path = "./shufflenet.mge";
  57. auto result_lite = mgelite_lar(model_path, config, "data", lite_tensor);
  58. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  59. compare_lite_tensor<float>(result_lite, result_mgb);
  60. }
  61. TEST(TestNetWork, SetDeviceId) {
  62. Config config;
  63. auto lite_tensor = get_input_data("./input_data.npy");
  64. std::string model_path = "./shufflenet.mge";
  65. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  66. network->set_device_id(4);
  67. network->load_model(model_path);
  68. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  69. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  70. network->forward();
  71. network->wait();
  72. ASSERT_EQ(input_tensor->get_device_id(), 4);
  73. ASSERT_EQ(output_tensor->get_device_id(), 4);
  74. }
  75. TEST(TestNetWork, GetAllName) {
  76. Config config;
  77. auto lite_tensor = get_input_data("./input_data.npy");
  78. std::string model_path = "./shufflenet.mge";
  79. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  80. network->load_model(model_path);
  81. auto input_names = network->get_all_input_name();
  82. auto output_names = network->get_all_output_name();
  83. ASSERT_EQ(input_names.size(), 1);
  84. ASSERT_EQ(output_names.size(), 1);
  85. ASSERT_TRUE(input_names[0] == "data");
  86. ASSERT_TRUE(output_names[0] == "TRUE_DIV(EXP[12065],reduce0[12067])[12077]");
  87. }
  88. TEST(TestNetWork, BasicInplaceAndSingleThreadAffinity) {
  89. Config config;
  90. auto lite_tensor = get_input_data("./input_data.npy");
  91. std::string model_path = "./shufflenet.mge";
  92. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  93. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  94. Runtime::set_cpu_inplace_mode(network);
  95. network->load_model(model_path);
  96. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  97. int affinity_set = false;
  98. Runtime::set_runtime_thread_affinity(network, [&affinity_set](int id) {
  99. ASSERT_EQ(id, 0);
  100. affinity_set = true;
  101. });
  102. auto src_ptr = lite_tensor->get_memory_ptr();
  103. auto src_layout = lite_tensor->get_layout();
  104. input_tensor->reset(src_ptr, src_layout);
  105. //! inplace mode not support async mode
  106. ASSERT_THROW(network->set_async_callback([]() {}), std::exception);
  107. network->forward();
  108. network->wait();
  109. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  110. ASSERT_EQ(affinity_set, true);
  111. compare_lite_tensor<float>(output_tensor, result_mgb);
  112. }
  113. TEST(TestNetWork, NetworkShareWeights) {
  114. Config config;
  115. auto lite_tensor = get_input_data("./input_data.npy");
  116. std::string model_path = "./shufflenet.mge";
  117. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  118. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  119. network->load_model(model_path);
  120. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  121. std::shared_ptr<Network> network2 = std::make_shared<Network>(config);
  122. Runtime::set_cpu_inplace_mode(network2);
  123. Runtime::shared_weight_with_network(network2, network);
  124. std::shared_ptr<Tensor> input_tensor2 = network2->get_input_tensor(0);
  125. auto src_ptr = lite_tensor->get_memory_ptr();
  126. auto src_layout = lite_tensor->get_layout();
  127. input_tensor->reset(src_ptr, src_layout);
  128. input_tensor2->reset(src_ptr, src_layout);
  129. ASSERT_NE(input_tensor, input_tensor2);
  130. network->forward();
  131. network->wait();
  132. network2->forward();
  133. network2->wait();
  134. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  135. std::shared_ptr<Tensor> output_tensor2 = network2->get_output_tensor(0);
  136. ASSERT_NE(output_tensor->get_memory_ptr(), output_tensor2->get_memory_ptr());
  137. compare_lite_tensor<float>(output_tensor, result_mgb);
  138. compare_lite_tensor<float>(output_tensor2, result_mgb);
  139. }
  140. TEST(TestNetWork, SharedRuntimeMem) {
  141. Config config;
  142. auto lite_tensor = get_input_data("./input_data.npy");
  143. std::string model_path = "./shufflenet.mge";
  144. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  145. std::shared_ptr<Network> network_src = std::make_shared<Network>(config);
  146. std::shared_ptr<Network> network_dst = std::make_shared<Network>(config);
  147. Runtime::share_runtime_memory_with(network_dst, network_src);
  148. network_src->load_model(model_path);
  149. network_dst->load_model(model_path);
  150. }
  151. TEST(TestNetWork, UserAllocator) {
  152. auto allocator = std::make_shared<CheckAllocator>();
  153. {
  154. Config config;
  155. auto lite_tensor = get_input_data("./input_data.npy");
  156. std::string model_path = "./shufflenet.mge";
  157. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  158. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  159. Runtime::set_memory_allocator(network, allocator);
  160. network->load_model(model_path);
  161. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  162. auto src_ptr = lite_tensor->get_memory_ptr();
  163. auto src_layout = lite_tensor->get_layout();
  164. input_tensor->reset(src_ptr, src_layout);
  165. network->forward();
  166. network->wait();
  167. ASSERT_GE(allocator->m_nr_allocated, 1);
  168. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  169. compare_lite_tensor<float>(output_tensor, result_mgb);
  170. }
  171. ASSERT_EQ(allocator->m_nr_left, 0);
  172. }
  173. TEST(TestNetWork, BasicMultiThread) {
  174. Config config;
  175. auto lite_tensor = get_input_data("./input_data.npy");
  176. std::string model_path = "./shufflenet.mge";
  177. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  178. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  179. Runtime::set_cpu_threads_number(network, 2);
  180. network->load_model(model_path);
  181. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  182. auto src_ptr = lite_tensor->get_memory_ptr();
  183. auto src_layout = lite_tensor->get_layout();
  184. input_tensor->reset(src_ptr, src_layout);
  185. network->forward();
  186. network->wait();
  187. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  188. compare_lite_tensor<float>(output_tensor, result_mgb);
  189. }
  190. TEST(TestNetWork, ThreadAffinity) {
  191. size_t nr_threads = 4;
  192. Config config;
  193. auto lite_tensor = get_input_data("./input_data.npy");
  194. std::string model_path = "./shufflenet.mge";
  195. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  196. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  197. Runtime::set_cpu_threads_number(network, nr_threads);
  198. ASSERT_THROW(
  199. Runtime::set_runtime_thread_affinity(network, [](int) {}), std::exception);
  200. network->load_model(model_path);
  201. std::vector<std::thread::id> thread_ids(nr_threads);
  202. auto affinity = [&](int id) { thread_ids[id] = std::this_thread::get_id(); };
  203. Runtime::set_runtime_thread_affinity(network, affinity);
  204. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  205. auto src_ptr = lite_tensor->get_memory_ptr();
  206. auto src_layout = lite_tensor->get_layout();
  207. input_tensor->reset(src_ptr, src_layout);
  208. network->forward();
  209. network->wait();
  210. for (size_t i = 0; i < nr_threads; i++) {
  211. for (size_t j = i + 1; j < nr_threads; j++) {
  212. ASSERT_NE(thread_ids[i], thread_ids[j]);
  213. }
  214. }
  215. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  216. compare_lite_tensor<float>(output_tensor, result_mgb);
  217. }
  218. TEST(TestNetWork, BasicCryptAes) {
  219. Config config;
  220. auto lite_tensor = get_input_data("./input_data.npy");
  221. std::string model_path = "./shufflenet.mge";
  222. std::string model_crypt_path = "./shufflenet_crypt_aes.mge";
  223. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  224. config.bare_model_cryption_name = "AES_default";
  225. auto result_lite = mgelite_lar(model_crypt_path, config, "data", lite_tensor);
  226. compare_lite_tensor<float>(result_lite, result_mgb);
  227. }
  228. TEST(TestNetWork, BasicCryptRc4) {
  229. Config config;
  230. auto lite_tensor = get_input_data("./input_data.npy");
  231. std::string model_path = "./shufflenet.mge";
  232. std::string model_crypt_path = "./shufflenet_crypt_rc4.mge";
  233. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  234. config.bare_model_cryption_name = "RC4_default";
  235. auto result_lite = mgelite_lar(model_crypt_path, config, "data", lite_tensor);
  236. compare_lite_tensor<float>(result_lite, result_mgb);
  237. }
  238. TEST(TestNetWork, PackedCryptRc4) {
  239. Config config;
  240. auto lite_tensor = get_input_data("./input_data.npy");
  241. std::string model_path = "./shufflenet.mge";
  242. std::string model_crypt_path = "./test_packed_model_rc4.lite";
  243. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  244. auto result_lite = mgelite_lar(model_crypt_path, config, "data", lite_tensor);
  245. compare_lite_tensor<float>(result_lite, result_mgb);
  246. }
  247. TEST(TestNetWork, BasicCryptSfRc4) {
  248. Config config;
  249. auto lite_tensor = get_input_data("./input_data.npy");
  250. std::string model_path = "./shufflenet.mge";
  251. std::string model_crypt_path = "./shufflenet_crypt_sfrc4.mge";
  252. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  253. config.bare_model_cryption_name = "SIMPLE_FAST_RC4_default";
  254. auto result_lite = mgelite_lar(model_crypt_path, config, "data", lite_tensor);
  255. compare_lite_tensor<float>(result_lite, result_mgb);
  256. }
  257. TEST(TestNetWork, ResetInput) {
  258. Config config;
  259. auto tensor = get_input_data("./input_data.npy");
  260. std::string model_path = "./shufflenet.mge";
  261. std::string input_name = "data";
  262. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  263. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  264. network->load_model(model_path);
  265. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  266. auto src_ptr = tensor->get_memory_ptr();
  267. auto src_layout = tensor->get_layout();
  268. input_tensor->reset(src_ptr, src_layout);
  269. network->forward();
  270. network->wait();
  271. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  272. compare_lite_tensor<float>(output_tensor, result_mgb);
  273. }
  274. TEST(TestNetWork, ChangeInputShape) {
  275. Config config;
  276. auto tensor = get_input_data("./input_data.npy");
  277. std::string model_path = "./shufflenet.mge";
  278. std::string input_name = "data";
  279. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  280. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  281. network->load_model(model_path);
  282. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  283. auto src_layout = Layout{{2, 3, 200, 200}, 4, LiteDataType::LITE_FLOAT};
  284. input_tensor->set_layout(src_layout);
  285. std::shared_ptr<Tensor> input_tensor2 = network->get_io_tensor(input_name);
  286. //! Check memory is equal
  287. ASSERT_EQ(input_tensor->get_memory_ptr(), input_tensor2->get_memory_ptr());
  288. network->forward();
  289. network->wait();
  290. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  291. auto output_layout = output_tensor->get_layout();
  292. ASSERT_EQ(output_layout.shapes[0], 2);
  293. ASSERT_EQ(output_layout.shapes[1], 1000);
  294. }
  295. TEST(TestNetWork, ResetOutput) {
  296. Config config;
  297. auto tensor = get_input_data("./input_data.npy");
  298. std::string model_path = "./shufflenet.mge";
  299. std::string input_name = "data";
  300. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  301. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  302. network->load_model(model_path);
  303. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  304. auto src_ptr = tensor->get_memory_ptr();
  305. auto src_layout = tensor->get_layout();
  306. input_tensor->reset(src_ptr, src_layout);
  307. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  308. auto result_tensor = std::make_shared<Tensor>(
  309. LiteDeviceType::LITE_CPU, Layout{{1, 1000}, 2, LiteDataType::LITE_FLOAT});
  310. void* out_data = result_tensor->get_memory_ptr();
  311. output_tensor->reset(out_data, result_tensor->get_layout());
  312. network->forward();
  313. network->wait();
  314. compare_lite_tensor<float>(output_tensor, result_mgb);
  315. }
  316. TEST(TestNetWork, AsyncExec) {
  317. Config config;
  318. config.options.var_sanity_check_first_run = false;
  319. auto tensor = get_input_data("./input_data.npy");
  320. std::string model_path = "./shufflenet.mge";
  321. std::string input_name = "data";
  322. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  323. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  324. network->load_model(model_path);
  325. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  326. auto src_ptr = tensor->get_memory_ptr();
  327. auto src_layout = tensor->get_layout();
  328. input_tensor->reset(src_ptr, src_layout);
  329. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  330. auto result_tensor = std::make_shared<Tensor>(
  331. LiteDeviceType::LITE_CPU, Layout{{1, 1000}, 2, LiteDataType::LITE_FLOAT});
  332. void* out_data = result_tensor->get_memory_ptr();
  333. output_tensor->reset(out_data, result_tensor->get_layout());
  334. //! set async mode and callback
  335. volatile bool finished = false;
  336. network->set_async_callback([&finished]() { finished = true; });
  337. network->forward();
  338. size_t count = 0;
  339. while (finished == false) {
  340. count++;
  341. }
  342. ASSERT_GT(count, 0);
  343. compare_lite_tensor<float>(output_tensor, result_mgb);
  344. }
  345. TEST(TestNetWork, CPUDeviceInput) {
  346. auto tensor = get_input_data("./input_data.npy");
  347. Layout layout{{1, 3, 224, 224}, 4, LiteDataType::LITE_FLOAT};
  348. std::string model_path = "./shufflenet.mge";
  349. std::string input_name = "data";
  350. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  351. NetworkIO IO;
  352. bool is_host = false;
  353. IO.inputs.push_back({input_name, is_host});
  354. std::shared_ptr<Network> network = std::make_shared<Network>(IO);
  355. network->load_model(model_path);
  356. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  357. auto src_ptr = tensor->get_memory_ptr();
  358. input_tensor->reset(src_ptr, layout);
  359. network->forward();
  360. network->wait();
  361. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  362. compare_lite_tensor<float>(output_tensor, result_mgb);
  363. }
  364. TEST(TestNetWork, ShareTensorWith) {
  365. auto tensor = get_input_data("./input_data.npy");
  366. std::string model_path = "./shufflenet.mge";
  367. std::string input_name = "data";
  368. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  369. std::shared_ptr<Network> network = std::make_shared<Network>();
  370. network->load_model(model_path);
  371. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  372. input_tensor->share_memory_with(*tensor);
  373. network->forward();
  374. network->wait();
  375. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  376. compare_lite_tensor<float>(output_tensor, result_mgb);
  377. }
  378. TEST(TestNetWork, InputCallBack) {
  379. auto tensor = get_input_data("./input_data.npy");
  380. std::string model_path = "./shufflenet.mge";
  381. std::string input_name = "data";
  382. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  383. NetworkIO ios;
  384. bool is_host = false;
  385. ios.inputs.push_back({input_name, is_host});
  386. std::shared_ptr<Network> network = std::make_shared<Network>(ios);
  387. network->load_model(model_path);
  388. volatile bool finised_check_input = false;
  389. auto input_callback =
  390. [&tensor, &finised_check_input,
  391. input_name](const std::unordered_map<
  392. std::string, std::pair<IO, std::shared_ptr<Tensor>>>&
  393. input_map) {
  394. ASSERT_EQ(input_map.size(), 1);
  395. auto tensor_input = input_map.at(input_name).second;
  396. compare_lite_tensor<float>(tensor_input, tensor);
  397. finised_check_input = true;
  398. };
  399. network->set_start_callback(input_callback);
  400. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  401. input_tensor->share_memory_with(*tensor);
  402. network->forward();
  403. network->wait();
  404. ASSERT_TRUE(finised_check_input);
  405. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  406. compare_lite_tensor<float>(output_tensor, result_mgb);
  407. }
  408. TEST(TestNetWork, OutputCallBack) {
  409. auto tensor = get_input_data("./input_data.npy");
  410. std::string model_path = "./shufflenet.mge";
  411. std::string input_name = "data";
  412. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  413. std::shared_ptr<Network> network = std::make_shared<Network>();
  414. network->load_model(model_path);
  415. auto output_name = network->get_output_name(0);
  416. volatile bool finised_check_output = false;
  417. auto output_callback =
  418. [&result_mgb, &finised_check_output,
  419. output_name](const std::unordered_map<
  420. std::string, std::pair<IO, std::shared_ptr<Tensor>>>&
  421. output_map) {
  422. ASSERT_EQ(output_map.size(), 1);
  423. auto tensor_output = output_map.at(output_name).second;
  424. compare_lite_tensor<float>(tensor_output, result_mgb);
  425. finised_check_output = true;
  426. };
  427. network->set_finish_callback(output_callback);
  428. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  429. input_tensor->share_memory_with(*tensor);
  430. network->forward();
  431. network->wait();
  432. ASSERT_TRUE(finised_check_output);
  433. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  434. compare_lite_tensor<float>(output_tensor, result_mgb);
  435. }
  436. TEST(TestNetWork, OutputShapeOnly) {
  437. auto tensor = get_input_data("./input_data.npy");
  438. std::string model_path = "./shufflenet.mge";
  439. std::string input_name = "data";
  440. std::string output_name = "TRUE_DIV(EXP[12065],reduce0[12067])[12077]";
  441. NetworkIO IO;
  442. bool is_host = true;
  443. IO.outputs.push_back({output_name, is_host, LiteIOType::LITE_IO_SHAPE});
  444. Config config;
  445. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  446. network->load_model(model_path);
  447. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  448. std::shared_ptr<Tensor> output_tensor = network->get_io_tensor(output_name);
  449. auto src_ptr = tensor->get_memory_ptr();
  450. auto src_layout = tensor->get_layout();
  451. input_tensor->reset(src_ptr, src_layout);
  452. network->forward();
  453. network->wait();
  454. ASSERT_EQ(output_tensor->get_tensor_total_size_in_byte() / sizeof(float), 1000);
  455. }
  456. TEST(TestNetWork, ProfileIOdump) {
  457. auto tensor = get_input_data("./input_data.npy");
  458. std::string model_path = "./shufflenet.mge";
  459. std::string input_name = "data";
  460. NetworkIO IO;
  461. Config config;
  462. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  463. network->enable_profile_performance("./profile.json");
  464. network->load_model(model_path);
  465. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  466. auto src_ptr = tensor->get_memory_ptr();
  467. auto src_layout = tensor->get_layout();
  468. input_tensor->reset(src_ptr, src_layout);
  469. network->forward();
  470. network->wait();
  471. ASSERT_TRUE(fopen("./profile.json", "r"));
  472. Runtime::enable_io_txt_dump(network, "./io_txt_dump.txt");
  473. network->forward();
  474. network->wait();
  475. ASSERT_TRUE(fopen("./io_txt_dump.txt", "r"));
  476. }
  477. TEST(TestNetWork, LoadPackedModel) {
  478. auto tensor = get_input_data("./input_data.npy");
  479. std::string model_path = "./test_packed_model.lite";
  480. std::string input_name = "data";
  481. NetworkIO IO;
  482. Config config;
  483. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  484. network->load_model(model_path);
  485. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  486. auto src_ptr = tensor->get_memory_ptr();
  487. auto src_layout = tensor->get_layout();
  488. input_tensor->reset(src_ptr, src_layout);
  489. network->forward();
  490. network->wait();
  491. }
  492. TEST(TestNetWork, GetDeviceType) {
  493. auto tensor = get_input_data("./input_data.npy");
  494. std::string model_path = "./shufflenet.mge";
  495. Config config;
  496. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  497. network->load_model(model_path);
  498. ASSERT_TRUE(network->get_device_type() == LiteDeviceType::LITE_CPU);
  499. }
  500. TEST(TestNetWork, GetModelExtraInfo) {
  501. std::string model_path = "./track_640_320_pack_model_rc4_with_info.lite";
  502. Config config;
  503. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  504. network->load_model(model_path);
  505. auto& extra_info = network->get_model_extra_info();
  506. ASSERT_TRUE(extra_info.size() > 0);
  507. printf("extra_info %s \n", extra_info.c_str());
  508. }
  509. #if LITE_WITH_CUDA
  510. TEST(TestNetWork, BasicDevice) {
  511. auto lite_tensor = get_input_data("./input_data.npy");
  512. Config config;
  513. config.device_type = LiteDeviceType::LITE_CUDA;
  514. std::string model_path = "./shufflenet.mge";
  515. auto result_lite = mgelite_lar(model_path, config, "data", lite_tensor);
  516. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  517. compare_lite_tensor<float>(result_lite, result_mgb);
  518. }
  519. TEST(TestNetWork, DeviceInput) {
  520. auto tensor = get_input_data("./input_data.npy");
  521. Layout layout{{1, 3, 224, 224}, 4, LiteDataType::LITE_FLOAT};
  522. std::string model_path = "./shufflenet.mge";
  523. std::string input_name = "data";
  524. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  525. NetworkIO IO;
  526. bool is_host = false;
  527. IO.inputs.push_back({input_name, is_host});
  528. Config config;
  529. config.device_type = LiteDeviceType::LITE_CUDA;
  530. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  531. network->load_model(model_path);
  532. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  533. auto tensor_cuda = Tensor(LiteDeviceType::LITE_CUDA, layout);
  534. tensor_cuda.copy_from(*tensor);
  535. auto src_ptr = tensor_cuda.get_memory_ptr();
  536. input_tensor->reset(src_ptr, layout);
  537. network->forward();
  538. network->wait();
  539. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  540. compare_lite_tensor<float>(output_tensor, result_mgb);
  541. }
  542. TEST(TestNetWork, ChangeInputShapeDevice) {
  543. Config config;
  544. auto tensor = get_input_data("./input_data.npy");
  545. std::string model_path = "./shufflenet.mge";
  546. std::string input_name = "data";
  547. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  548. config.device_type = LiteDeviceType::LITE_CUDA;
  549. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  550. network->load_model(model_path);
  551. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  552. auto src_layout = Layout{{2, 3, 200, 200}, 4, LiteDataType::LITE_FLOAT};
  553. input_tensor->set_layout(src_layout);
  554. std::shared_ptr<Tensor> input_tensor2 = network->get_io_tensor(input_name);
  555. //! Check memory is equal
  556. ASSERT_EQ(input_tensor->get_memory_ptr(), input_tensor2->get_memory_ptr());
  557. network->forward();
  558. network->wait();
  559. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  560. auto output_layout = output_tensor->get_layout();
  561. ASSERT_EQ(output_layout.shapes[0], 2);
  562. ASSERT_EQ(output_layout.shapes[1], 1000);
  563. }
  564. TEST(TestNetWork, DeviceOutput) {
  565. auto tensor = get_input_data("./input_data.npy");
  566. std::string model_path = "./shufflenet.mge";
  567. std::string input_name = "data";
  568. std::string output_name = "TRUE_DIV(EXP[12065],reduce0[12067])[12077]";
  569. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  570. NetworkIO IO;
  571. bool is_host = false;
  572. IO.outputs.push_back({output_name, is_host});
  573. Config config;
  574. config.device_type = LiteDeviceType::LITE_CUDA;
  575. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  576. network->load_model(model_path);
  577. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  578. std::shared_ptr<Tensor> output_tensor_cuda = network->get_io_tensor(output_name);
  579. auto src_ptr = tensor->get_memory_ptr();
  580. auto src_layout = tensor->get_layout();
  581. input_tensor->reset(src_ptr, src_layout);
  582. network->forward();
  583. network->wait();
  584. auto output_tensor = std::make_shared<Tensor>();
  585. output_tensor->copy_from(*output_tensor_cuda);
  586. compare_lite_tensor<float>(output_tensor, result_mgb);
  587. }
  588. TEST(TestNetWork, WrongIONameDevice) {
  589. auto tensor = get_input_data("./input_data.npy");
  590. Layout layout{{1, 3, 224, 224}, 4, LiteDataType::LITE_FLOAT};
  591. std::string model_path = "./shufflenet.mge";
  592. std::string input_name = "data";
  593. std::string input_name_wrong = "data0";
  594. std::string output_name = "TRUE_DIV(EXP[12065],reduce0[12067])[12077]";
  595. std::string output_name_wrong = "w_TRUE_DIV(EXP[12065],reduce0[12067])[12077]";
  596. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  597. NetworkIO IO;
  598. bool is_host = false;
  599. IO.inputs.push_back({input_name, is_host});
  600. IO.outputs.push_back({output_name, is_host});
  601. IO.outputs.push_back({output_name_wrong, is_host});
  602. Config config;
  603. config.device_type = LiteDeviceType::LITE_CUDA;
  604. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  605. network->load_model(model_path);
  606. auto tensor_cuda = Tensor(LiteDeviceType::LITE_CUDA, layout);
  607. tensor_cuda.copy_from(*tensor);
  608. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  609. auto src_ptr = tensor_cuda.get_memory_ptr();
  610. auto src_layout = tensor_cuda.get_layout();
  611. input_tensor->reset(src_ptr, src_layout);
  612. std::shared_ptr<Tensor> output_tensor_cuda = network->get_io_tensor(output_name);
  613. network->forward();
  614. network->wait();
  615. auto output_tensor = std::make_shared<Tensor>();
  616. output_tensor->copy_from(*output_tensor_cuda);
  617. compare_lite_tensor<float>(output_tensor, result_mgb);
  618. }
  619. TEST(TestNetWork, ConfigIONameDevice) {
  620. std::string model_path = "./model.mgb";
  621. NetworkIO IO;
  622. bool is_host = false;
  623. IO.outputs.push_back({"clsfy", is_host});
  624. Config config;
  625. config.device_type = LiteDeviceType::LITE_CUDA;
  626. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  627. network->compute_only_configured_output();
  628. network->load_model(model_path);
  629. ASSERT_EQ(network->get_all_output_name().size(), 1);
  630. ASSERT_EQ(network->get_all_output_name()[0], "clsfy");
  631. std::shared_ptr<Network> network2 = std::make_shared<Network>(config, IO);
  632. network2->load_model(model_path);
  633. ASSERT_EQ(network2->get_all_output_name().size(), 2);
  634. }
  635. TEST(TestNetWork, SetDeviceIdDeviceTest) {
  636. #if LITE_WITH_CUDA
  637. if (get_device_count(LITE_CUDA) <= 1)
  638. return;
  639. #endif
  640. std::string model_path = "./model.mgb";
  641. NetworkIO IO;
  642. bool is_host = false;
  643. IO.inputs.push_back({"data", is_host});
  644. IO.outputs.push_back({"clsfy", is_host});
  645. Config config;
  646. config.device_type = LiteDeviceType::LITE_CUDA;
  647. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  648. network->set_device_id(1);
  649. network->load_model(model_path);
  650. auto inputs_names = network->get_all_input_name();
  651. for (auto name : inputs_names) {
  652. auto tensor = network->get_io_tensor(name);
  653. ASSERT_EQ(tensor->get_device_id(), 1);
  654. if (name == "idx") {
  655. int* index_ptr = static_cast<int*>(tensor->get_memory_ptr());
  656. for (int i = 0; i < 23; i++) {
  657. index_ptr[i] = i % 3;
  658. }
  659. }
  660. if (name == "landmark") {
  661. float* landmakrk_ptr = static_cast<float*>(tensor->get_memory_ptr());
  662. for (int i = 0; i < 23 * 18 * 2; i++) {
  663. landmakrk_ptr[i] = 0.1f;
  664. }
  665. }
  666. }
  667. auto outputs_names = network->get_all_output_name();
  668. for (auto name : outputs_names) {
  669. auto tensor = network->get_io_tensor(name);
  670. ASSERT_EQ(tensor->get_device_id(), 1);
  671. }
  672. network->forward();
  673. network->wait();
  674. }
  675. TEST(TestNetWork, SetStreamIdDeviceTest) {
  676. std::string model_path = "./model.mgb";
  677. NetworkIO IO;
  678. bool is_host = false;
  679. IO.inputs.push_back({"data", is_host});
  680. IO.outputs.push_back({"clsfy", is_host});
  681. Config config;
  682. config.device_type = LiteDeviceType::LITE_CUDA;
  683. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  684. network->set_stream_id(1);
  685. network->load_model(model_path);
  686. auto inputs_names = network->get_all_input_name();
  687. for (auto name : inputs_names) {
  688. auto tensor = network->get_io_tensor(name);
  689. if (name == "idx") {
  690. int* index_ptr = static_cast<int*>(tensor->get_memory_ptr());
  691. for (int i = 0; i < 23; i++) {
  692. index_ptr[i] = i % 3;
  693. }
  694. }
  695. if (name == "landmark") {
  696. float* landmakrk_ptr = static_cast<float*>(tensor->get_memory_ptr());
  697. for (int i = 0; i < 23 * 18 * 2; i++) {
  698. landmakrk_ptr[i] = 0.1f;
  699. }
  700. }
  701. }
  702. network->forward();
  703. network->wait();
  704. }
  705. #if CUDART_VERSION >= 10000
  706. TEST(TestNetWork, DeviceAsyncExec) {
  707. auto tensor = get_input_data("./input_data.npy");
  708. Config config;
  709. config.device_type = LiteDeviceType::LITE_CUDA;
  710. config.options.var_sanity_check_first_run = false;
  711. std::string model_path = "./shufflenet.mge";
  712. std::string input_name = "data";
  713. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  714. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  715. network->load_model(model_path);
  716. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  717. auto src_ptr = tensor->get_memory_ptr();
  718. auto src_layout = tensor->get_layout();
  719. input_tensor->reset(src_ptr, src_layout);
  720. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  721. auto result_tensor = std::make_shared<Tensor>(
  722. LiteDeviceType::LITE_CPU, Layout{{1, 1000}, 2, LiteDataType::LITE_FLOAT});
  723. void* out_data = result_tensor->get_memory_ptr();
  724. output_tensor->reset(out_data, result_tensor->get_layout());
  725. //! set async mode and callback
  726. volatile bool finished = false;
  727. network->set_async_callback([&finished]() { finished = true; });
  728. network->forward();
  729. size_t count = 0;
  730. while (finished == false) {
  731. count++;
  732. }
  733. ASSERT_GT(count, 0);
  734. compare_lite_tensor<float>(output_tensor, result_mgb);
  735. }
  736. #endif
  737. #endif
  738. #if MGB_ATLAS
  739. TEST(TestNetWork, AtlasLoadNoDevice) {
  740. lite::Config config;
  741. config.device_type = LiteDeviceType::LITE_DEVICE_DEFAULT;
  742. auto network = std::make_shared<lite::Network>(config);
  743. network->load_model("./model_atlas.mgb");
  744. network->forward();
  745. network->wait();
  746. }
  747. TEST(TestNetWork, AtlasLoadDeviceInput) {
  748. lite::NetworkIO networkio;
  749. lite::IO input_data_io = {};
  750. input_data_io.name = "data";
  751. input_data_io.is_host = false;
  752. networkio.inputs.emplace_back(input_data_io);
  753. lite::IO input_input0_io = {};
  754. input_input0_io.name = "input0";
  755. input_input0_io.is_host = false;
  756. networkio.inputs.emplace_back(input_input0_io);
  757. lite::Config config;
  758. config.device_type = LiteDeviceType::LITE_DEVICE_DEFAULT;
  759. auto network = std::make_shared<lite::Network>(config, networkio);
  760. network->load_model("./model_atlas.mgb");
  761. network->forward();
  762. network->wait();
  763. }
  764. TEST(TestNetWork, AtlasLoadAtlas) {
  765. lite::Config config;
  766. config.device_type = LiteDeviceType::LITE_ATLAS;
  767. auto network = std::make_shared<lite::Network>(config);
  768. network->load_model("./model_atlas.mgb");
  769. network->forward();
  770. network->wait();
  771. }
  772. TEST(TestNetWork, AtlasLoadAtlasDeviceInput) {
  773. lite::NetworkIO networkio;
  774. lite::IO input_data_io = {};
  775. input_data_io.name = "data";
  776. input_data_io.is_host = false;
  777. networkio.inputs.emplace_back(input_data_io);
  778. lite::IO input_input0_io = {};
  779. input_input0_io.name = "input0";
  780. input_input0_io.is_host = false;
  781. networkio.inputs.emplace_back(input_input0_io);
  782. lite::Config config;
  783. config.device_type = LiteDeviceType::LITE_ATLAS;
  784. auto network = std::make_shared<lite::Network>(config, networkio);
  785. network->load_model("./model_atlas.mgb");
  786. network->forward();
  787. network->wait();
  788. }
  789. TEST(TestNetWork, AtlasDeviceID) {
  790. lite::Config config;
  791. config.device_type = LiteDeviceType::LITE_ATLAS;
  792. auto network = std::make_shared<lite::Network>(config);
  793. network->set_device_id(1);
  794. network->load_model("./model_atlas.mgb");
  795. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  796. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  797. network->forward();
  798. network->wait();
  799. ASSERT_EQ(output_tensor->get_device_id(), 1);
  800. }
  801. #endif
  802. #endif
  803. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台