You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_network.cpp 35 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007
  1. /**
  2. * \file test/test_network.cpp
  3. *
  4. * This file is part of MegEngine, a deep learning framework developed by
  5. * Megvii.
  6. *
  7. * \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
  8. */
  9. #include "lite_build_config.h"
  10. #if LITE_BUILD_WITH_MGE
  11. #include "./test_common.h"
  12. #include "megbrain/tensor.h"
  13. #include <chrono>
  14. #include <memory>
  15. #include <random>
  16. #include <unordered_map>
  17. using namespace lite;
  18. namespace {
  19. class CheckAllocator : public lite::Allocator {
  20. public:
  21. //! allocate memory of size in the given device with the given align
  22. void* allocate(LiteDeviceType device, int, size_t size,
  23. size_t align) override {
  24. LITE_ASSERT(device == LiteDeviceType::LITE_CPU);
  25. m_nr_left++;
  26. m_nr_allocated++;
  27. #ifdef WIN32
  28. return _aligned_malloc(size, align);
  29. #elif defined(__ANDROID__) || defined(ANDROID)
  30. return memalign(align, size);
  31. #else
  32. void* ptr = nullptr;
  33. auto err = posix_memalign(&ptr, align, size);
  34. mgb_assert(!err, "failed to malloc %zubytes with align %zu", size,
  35. align);
  36. return ptr;
  37. #endif
  38. };
  39. //! free the memory pointed by ptr in the given device
  40. void free(LiteDeviceType device, int, void* ptr) override {
  41. m_nr_left--;
  42. LITE_ASSERT(device == LiteDeviceType::LITE_CPU);
  43. #ifdef WIN32
  44. _aligned_free(ptr);
  45. #else
  46. ::free(ptr);
  47. #endif
  48. };
  49. std::atomic_size_t m_nr_left{0};
  50. std::atomic_size_t m_nr_allocated{0};
  51. };
  52. } // namespace
  53. TEST(TestNetWork, Basic) {
  54. Config config;
  55. auto lite_tensor = get_input_data("./input_data.npy");
  56. std::string model_path = "./shufflenet.mge";
  57. auto result_lite = mgelite_lar(model_path, config, "data", lite_tensor);
  58. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  59. compare_lite_tensor<float>(result_lite, result_mgb);
  60. }
  61. TEST(TestNetWork, SetDeviceId) {
  62. Config config;
  63. auto lite_tensor = get_input_data("./input_data.npy");
  64. std::string model_path = "./shufflenet.mge";
  65. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  66. network->set_device_id(4);
  67. network->load_model(model_path);
  68. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  69. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  70. network->forward();
  71. network->wait();
  72. ASSERT_EQ(input_tensor->get_device_id(), 4);
  73. ASSERT_EQ(output_tensor->get_device_id(), 4);
  74. }
  75. TEST(TestNetWork, GetAllName) {
  76. Config config;
  77. auto lite_tensor = get_input_data("./input_data.npy");
  78. std::string model_path = "./shufflenet.mge";
  79. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  80. network->load_model(model_path);
  81. auto input_names = network->get_all_input_name();
  82. auto output_names = network->get_all_output_name();
  83. ASSERT_EQ(input_names.size(), 1);
  84. ASSERT_EQ(output_names.size(), 1);
  85. ASSERT_TRUE(input_names[0] == "data");
  86. ASSERT_TRUE(output_names[0] ==
  87. "TRUE_DIV(EXP[12065],reduce0[12067])[12077]");
  88. }
  89. TEST(TestNetWork, BasicInplaceAndSingleThreadAffinity) {
  90. Config config;
  91. auto lite_tensor = get_input_data("./input_data.npy");
  92. std::string model_path = "./shufflenet.mge";
  93. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  94. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  95. Runtime::set_cpu_inplace_mode(network);
  96. network->load_model(model_path);
  97. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  98. int affinity_set = false;
  99. Runtime::set_runtime_thread_affinity(network, [&affinity_set](int id) {
  100. ASSERT_EQ(id, 0);
  101. affinity_set = true;
  102. });
  103. auto src_ptr = lite_tensor->get_memory_ptr();
  104. auto src_layout = lite_tensor->get_layout();
  105. input_tensor->reset(src_ptr, src_layout);
  106. //! inplace mode not support async mode
  107. ASSERT_THROW(network->set_async_callback([]() {}), std::exception);
  108. network->forward();
  109. network->wait();
  110. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  111. ASSERT_EQ(affinity_set, true);
  112. compare_lite_tensor<float>(output_tensor, result_mgb);
  113. }
  114. TEST(TestNetWork, NetworkShareWeights) {
  115. Config config;
  116. auto lite_tensor = get_input_data("./input_data.npy");
  117. std::string model_path = "./shufflenet.mge";
  118. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  119. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  120. network->load_model(model_path);
  121. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  122. std::shared_ptr<Network> network2 = std::make_shared<Network>(config);
  123. Runtime::set_cpu_inplace_mode(network2);
  124. Runtime::shared_weight_with_network(network2, network);
  125. std::shared_ptr<Tensor> input_tensor2 = network2->get_input_tensor(0);
  126. auto src_ptr = lite_tensor->get_memory_ptr();
  127. auto src_layout = lite_tensor->get_layout();
  128. input_tensor->reset(src_ptr, src_layout);
  129. input_tensor2->reset(src_ptr, src_layout);
  130. ASSERT_NE(input_tensor, input_tensor2);
  131. network->forward();
  132. network->wait();
  133. network2->forward();
  134. network2->wait();
  135. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  136. std::shared_ptr<Tensor> output_tensor2 = network2->get_output_tensor(0);
  137. ASSERT_NE(output_tensor->get_memory_ptr(),
  138. output_tensor2->get_memory_ptr());
  139. compare_lite_tensor<float>(output_tensor, result_mgb);
  140. compare_lite_tensor<float>(output_tensor2, result_mgb);
  141. }
  142. TEST(TestNetWork, SharedRuntimeMem) {
  143. Config config;
  144. auto lite_tensor = get_input_data("./input_data.npy");
  145. std::string model_path = "./shufflenet.mge";
  146. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  147. std::shared_ptr<Network> network_src = std::make_shared<Network>(config);
  148. std::shared_ptr<Network> network_dst = std::make_shared<Network>(config);
  149. Runtime::share_runtime_memory_with(network_dst, network_src);
  150. network_src->load_model(model_path);
  151. network_dst->load_model(model_path);
  152. }
  153. TEST(TestNetWork, UserAllocator) {
  154. auto allocator = std::make_shared<CheckAllocator>();
  155. {
  156. Config config;
  157. auto lite_tensor = get_input_data("./input_data.npy");
  158. std::string model_path = "./shufflenet.mge";
  159. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  160. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  161. Runtime::set_memory_allocator(network, allocator);
  162. network->load_model(model_path);
  163. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  164. auto src_ptr = lite_tensor->get_memory_ptr();
  165. auto src_layout = lite_tensor->get_layout();
  166. input_tensor->reset(src_ptr, src_layout);
  167. network->forward();
  168. network->wait();
  169. ASSERT_GE(allocator->m_nr_allocated, 1);
  170. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  171. compare_lite_tensor<float>(output_tensor, result_mgb);
  172. }
  173. ASSERT_EQ(allocator->m_nr_left, 0);
  174. }
  175. TEST(TestNetWork, BasicMultiThread) {
  176. Config config;
  177. auto lite_tensor = get_input_data("./input_data.npy");
  178. std::string model_path = "./shufflenet.mge";
  179. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  180. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  181. Runtime::set_cpu_threads_number(network, 2);
  182. network->load_model(model_path);
  183. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  184. auto src_ptr = lite_tensor->get_memory_ptr();
  185. auto src_layout = lite_tensor->get_layout();
  186. input_tensor->reset(src_ptr, src_layout);
  187. network->forward();
  188. network->wait();
  189. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  190. compare_lite_tensor<float>(output_tensor, result_mgb);
  191. }
  192. TEST(TestNetWork, ThreadAffinity) {
  193. size_t nr_threads = 4;
  194. Config config;
  195. auto lite_tensor = get_input_data("./input_data.npy");
  196. std::string model_path = "./shufflenet.mge";
  197. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  198. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  199. Runtime::set_cpu_threads_number(network, nr_threads);
  200. ASSERT_THROW(Runtime::set_runtime_thread_affinity(network, [](int) {}),
  201. std::exception);
  202. network->load_model(model_path);
  203. std::vector<std::thread::id> thread_ids(nr_threads);
  204. auto affinity = [&](int id) {
  205. thread_ids[id] = std::this_thread::get_id();
  206. };
  207. Runtime::set_runtime_thread_affinity(network, affinity);
  208. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  209. auto src_ptr = lite_tensor->get_memory_ptr();
  210. auto src_layout = lite_tensor->get_layout();
  211. input_tensor->reset(src_ptr, src_layout);
  212. network->forward();
  213. network->wait();
  214. for (size_t i = 0; i < nr_threads; i++) {
  215. for (size_t j = i + 1; j < nr_threads; j++) {
  216. ASSERT_NE(thread_ids[i], thread_ids[j]);
  217. }
  218. }
  219. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  220. compare_lite_tensor<float>(output_tensor, result_mgb);
  221. }
  222. TEST(TestNetWork, BasicCryptAes) {
  223. Config config;
  224. auto lite_tensor = get_input_data("./input_data.npy");
  225. std::string model_path = "./shufflenet.mge";
  226. std::string model_crypt_path = "./shufflenet_crypt_aes.mge";
  227. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  228. config.bare_model_cryption_name = "AES_default";
  229. auto result_lite =
  230. mgelite_lar(model_crypt_path, config, "data", lite_tensor);
  231. compare_lite_tensor<float>(result_lite, result_mgb);
  232. }
  233. TEST(TestNetWork, BasicCryptRc4) {
  234. Config config;
  235. auto lite_tensor = get_input_data("./input_data.npy");
  236. std::string model_path = "./shufflenet.mge";
  237. std::string model_crypt_path = "./shufflenet_crypt_rc4.mge";
  238. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  239. config.bare_model_cryption_name = "RC4_default";
  240. auto result_lite =
  241. mgelite_lar(model_crypt_path, config, "data", lite_tensor);
  242. compare_lite_tensor<float>(result_lite, result_mgb);
  243. }
  244. TEST(TestNetWork, PackedCryptRc4) {
  245. Config config;
  246. auto lite_tensor = get_input_data("./input_data.npy");
  247. std::string model_path = "./shufflenet.mge";
  248. std::string model_crypt_path = "./test_packed_model_rc4.lite";
  249. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  250. auto result_lite =
  251. mgelite_lar(model_crypt_path, config, "data", lite_tensor);
  252. compare_lite_tensor<float>(result_lite, result_mgb);
  253. }
  254. TEST(TestNetWork, BasicCryptSfRc4) {
  255. Config config;
  256. auto lite_tensor = get_input_data("./input_data.npy");
  257. std::string model_path = "./shufflenet.mge";
  258. std::string model_crypt_path = "./shufflenet_crypt_sfrc4.mge";
  259. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  260. config.bare_model_cryption_name = "SIMPLE_FAST_RC4_default";
  261. auto result_lite =
  262. mgelite_lar(model_crypt_path, config, "data", lite_tensor);
  263. compare_lite_tensor<float>(result_lite, result_mgb);
  264. }
  265. TEST(TestNetWork, ResetInput) {
  266. Config config;
  267. auto tensor = get_input_data("./input_data.npy");
  268. std::string model_path = "./shufflenet.mge";
  269. std::string input_name = "data";
  270. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  271. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  272. network->load_model(model_path);
  273. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  274. auto src_ptr = tensor->get_memory_ptr();
  275. auto src_layout = tensor->get_layout();
  276. input_tensor->reset(src_ptr, src_layout);
  277. network->forward();
  278. network->wait();
  279. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  280. compare_lite_tensor<float>(output_tensor, result_mgb);
  281. }
  282. TEST(TestNetWork, ChangeInputShape) {
  283. Config config;
  284. auto tensor = get_input_data("./input_data.npy");
  285. std::string model_path = "./shufflenet.mge";
  286. std::string input_name = "data";
  287. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  288. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  289. network->load_model(model_path);
  290. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  291. auto src_layout = Layout{{2, 3, 200, 200}, 4, LiteDataType::LITE_FLOAT};
  292. input_tensor->set_layout(src_layout);
  293. std::shared_ptr<Tensor> input_tensor2 = network->get_io_tensor(input_name);
  294. //! Check memory is equal
  295. ASSERT_EQ(input_tensor->get_memory_ptr(), input_tensor2->get_memory_ptr());
  296. network->forward();
  297. network->wait();
  298. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  299. auto output_layout = output_tensor->get_layout();
  300. ASSERT_EQ(output_layout.shapes[0], 2);
  301. ASSERT_EQ(output_layout.shapes[1], 1000);
  302. }
  303. TEST(TestNetWork, ResetOutput) {
  304. Config config;
  305. auto tensor = get_input_data("./input_data.npy");
  306. std::string model_path = "./shufflenet.mge";
  307. std::string input_name = "data";
  308. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  309. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  310. network->load_model(model_path);
  311. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  312. auto src_ptr = tensor->get_memory_ptr();
  313. auto src_layout = tensor->get_layout();
  314. input_tensor->reset(src_ptr, src_layout);
  315. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  316. auto result_tensor = std::make_shared<Tensor>(
  317. LiteDeviceType::LITE_CPU,
  318. Layout{{1, 1000}, 2, LiteDataType::LITE_FLOAT});
  319. void* out_data = result_tensor->get_memory_ptr();
  320. output_tensor->reset(out_data, result_tensor->get_layout());
  321. network->forward();
  322. network->wait();
  323. compare_lite_tensor<float>(output_tensor, result_mgb);
  324. }
  325. TEST(TestNetWork, AsyncExec) {
  326. Config config;
  327. config.options.var_sanity_check_first_run = false;
  328. auto tensor = get_input_data("./input_data.npy");
  329. std::string model_path = "./shufflenet.mge";
  330. std::string input_name = "data";
  331. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  332. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  333. network->load_model(model_path);
  334. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  335. auto src_ptr = tensor->get_memory_ptr();
  336. auto src_layout = tensor->get_layout();
  337. input_tensor->reset(src_ptr, src_layout);
  338. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  339. auto result_tensor = std::make_shared<Tensor>(
  340. LiteDeviceType::LITE_CPU,
  341. Layout{{1, 1000}, 2, LiteDataType::LITE_FLOAT});
  342. void* out_data = result_tensor->get_memory_ptr();
  343. output_tensor->reset(out_data, result_tensor->get_layout());
  344. //! set async mode and callback
  345. volatile bool finished = false;
  346. network->set_async_callback([&finished]() { finished = true; });
  347. network->forward();
  348. size_t count = 0;
  349. while (finished == false) {
  350. count++;
  351. }
  352. ASSERT_GT(count, 0);
  353. compare_lite_tensor<float>(output_tensor, result_mgb);
  354. }
  355. TEST(TestNetWork, CPUDeviceInput) {
  356. auto tensor = get_input_data("./input_data.npy");
  357. Layout layout{{1, 3, 224, 224}, 4, LiteDataType::LITE_FLOAT};
  358. std::string model_path = "./shufflenet.mge";
  359. std::string input_name = "data";
  360. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  361. NetworkIO IO;
  362. bool is_host = false;
  363. IO.inputs.push_back({input_name, is_host});
  364. std::shared_ptr<Network> network = std::make_shared<Network>(IO);
  365. network->load_model(model_path);
  366. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  367. auto src_ptr = tensor->get_memory_ptr();
  368. input_tensor->reset(src_ptr, layout);
  369. network->forward();
  370. network->wait();
  371. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  372. compare_lite_tensor<float>(output_tensor, result_mgb);
  373. }
  374. TEST(TestNetWork, ShareTensorWith) {
  375. auto tensor = get_input_data("./input_data.npy");
  376. std::string model_path = "./shufflenet.mge";
  377. std::string input_name = "data";
  378. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  379. std::shared_ptr<Network> network = std::make_shared<Network>();
  380. network->load_model(model_path);
  381. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  382. input_tensor->share_memory_with(*tensor);
  383. network->forward();
  384. network->wait();
  385. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  386. compare_lite_tensor<float>(output_tensor, result_mgb);
  387. }
  388. TEST(TestNetWork, InputCallBack) {
  389. auto tensor = get_input_data("./input_data.npy");
  390. std::string model_path = "./shufflenet.mge";
  391. std::string input_name = "data";
  392. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  393. NetworkIO ios;
  394. bool is_host = false;
  395. ios.inputs.push_back({input_name, is_host});
  396. std::shared_ptr<Network> network = std::make_shared<Network>(ios);
  397. network->load_model(model_path);
  398. volatile bool finised_check_input = false;
  399. auto input_callback =
  400. [&tensor, &finised_check_input,
  401. input_name](const std::unordered_map<
  402. std::string, std::pair<IO, std::shared_ptr<Tensor>>>&
  403. input_map) {
  404. ASSERT_EQ(input_map.size(), 1);
  405. auto tensor_input = input_map.at(input_name).second;
  406. compare_lite_tensor<float>(tensor_input, tensor);
  407. finised_check_input = true;
  408. };
  409. network->set_start_callback(input_callback);
  410. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  411. input_tensor->share_memory_with(*tensor);
  412. network->forward();
  413. network->wait();
  414. ASSERT_TRUE(finised_check_input);
  415. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  416. compare_lite_tensor<float>(output_tensor, result_mgb);
  417. }
  418. TEST(TestNetWork, OutputCallBack) {
  419. auto tensor = get_input_data("./input_data.npy");
  420. std::string model_path = "./shufflenet.mge";
  421. std::string input_name = "data";
  422. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  423. std::shared_ptr<Network> network = std::make_shared<Network>();
  424. network->load_model(model_path);
  425. auto output_name = network->get_output_name(0);
  426. volatile bool finised_check_output = false;
  427. auto output_callback =
  428. [&result_mgb, &finised_check_output,
  429. output_name](const std::unordered_map<
  430. std::string, std::pair<IO, std::shared_ptr<Tensor>>>&
  431. output_map) {
  432. ASSERT_EQ(output_map.size(), 1);
  433. auto tensor_output = output_map.at(output_name).second;
  434. compare_lite_tensor<float>(tensor_output, result_mgb);
  435. finised_check_output = true;
  436. };
  437. network->set_finish_callback(output_callback);
  438. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  439. input_tensor->share_memory_with(*tensor);
  440. network->forward();
  441. network->wait();
  442. ASSERT_TRUE(finised_check_output);
  443. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  444. compare_lite_tensor<float>(output_tensor, result_mgb);
  445. }
  446. TEST(TestNetWork, OutputShapeOnly) {
  447. auto tensor = get_input_data("./input_data.npy");
  448. std::string model_path = "./shufflenet.mge";
  449. std::string input_name = "data";
  450. std::string output_name = "TRUE_DIV(EXP[12065],reduce0[12067])[12077]";
  451. NetworkIO IO;
  452. bool is_host = true;
  453. IO.outputs.push_back({output_name, is_host, LiteIOType::LITE_IO_SHAPE});
  454. Config config;
  455. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  456. network->load_model(model_path);
  457. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  458. std::shared_ptr<Tensor> output_tensor = network->get_io_tensor(output_name);
  459. auto src_ptr = tensor->get_memory_ptr();
  460. auto src_layout = tensor->get_layout();
  461. input_tensor->reset(src_ptr, src_layout);
  462. network->forward();
  463. network->wait();
  464. ASSERT_EQ(output_tensor->get_tensor_total_size_in_byte() / sizeof(float),
  465. 1000);
  466. }
  467. TEST(TestNetWork, ProfileIOdump) {
  468. auto tensor = get_input_data("./input_data.npy");
  469. std::string model_path = "./shufflenet.mge";
  470. std::string input_name = "data";
  471. NetworkIO IO;
  472. Config config;
  473. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  474. network->enable_profile_performance("./profile.json");
  475. network->load_model(model_path);
  476. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  477. auto src_ptr = tensor->get_memory_ptr();
  478. auto src_layout = tensor->get_layout();
  479. input_tensor->reset(src_ptr, src_layout);
  480. network->forward();
  481. network->wait();
  482. ASSERT_TRUE(fopen("./profile.json", "r"));
  483. Runtime::enable_io_txt_dump(network, "./io_txt_dump.txt");
  484. network->forward();
  485. network->wait();
  486. ASSERT_TRUE(fopen("./io_txt_dump.txt", "r"));
  487. }
  488. TEST(TestNetWork, LoadPackedModel) {
  489. auto tensor = get_input_data("./input_data.npy");
  490. std::string model_path = "./test_packed_model.lite";
  491. std::string input_name = "data";
  492. NetworkIO IO;
  493. Config config;
  494. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  495. network->load_model(model_path);
  496. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  497. auto src_ptr = tensor->get_memory_ptr();
  498. auto src_layout = tensor->get_layout();
  499. input_tensor->reset(src_ptr, src_layout);
  500. network->forward();
  501. network->wait();
  502. }
  503. TEST(TestNetWork, GetDeviceType) {
  504. auto tensor = get_input_data("./input_data.npy");
  505. std::string model_path = "./shufflenet.mge";
  506. Config config;
  507. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  508. network->load_model(model_path);
  509. ASSERT_TRUE(network->get_device_type() == LiteDeviceType::LITE_CPU);
  510. }
  511. TEST(TestNetWork, GetModelExtraInfo) {
  512. std::string model_path = "./track_640_320_pack_model_rc4_with_info.lite";
  513. Config config;
  514. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  515. network->load_model(model_path);
  516. auto& extra_info = network->get_model_extra_info();
  517. ASSERT_TRUE(extra_info.size() > 0);
  518. printf("extra_info %s \n", extra_info.c_str());
  519. }
  520. #if LITE_WITH_CUDA
  521. TEST(TestNetWork, BasicDevice) {
  522. auto lite_tensor = get_input_data("./input_data.npy");
  523. Config config;
  524. config.device_type = LiteDeviceType::LITE_CUDA;
  525. std::string model_path = "./shufflenet.mge";
  526. auto result_lite = mgelite_lar(model_path, config, "data", lite_tensor);
  527. auto result_mgb = mgb_lar(model_path, config, "data", lite_tensor);
  528. compare_lite_tensor<float>(result_lite, result_mgb);
  529. }
  530. TEST(TestNetWork, DeviceInput) {
  531. auto tensor = get_input_data("./input_data.npy");
  532. Layout layout{{1, 3, 224, 224}, 4, LiteDataType::LITE_FLOAT};
  533. std::string model_path = "./shufflenet.mge";
  534. std::string input_name = "data";
  535. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  536. NetworkIO IO;
  537. bool is_host = false;
  538. IO.inputs.push_back({input_name, is_host});
  539. Config config;
  540. config.device_type = LiteDeviceType::LITE_CUDA;
  541. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  542. network->load_model(model_path);
  543. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  544. auto tensor_cuda = Tensor(LiteDeviceType::LITE_CUDA, layout);
  545. tensor_cuda.copy_from(*tensor);
  546. auto src_ptr = tensor_cuda.get_memory_ptr();
  547. input_tensor->reset(src_ptr, layout);
  548. network->forward();
  549. network->wait();
  550. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  551. compare_lite_tensor<float>(output_tensor, result_mgb);
  552. }
  553. TEST(TestNetWork, ChangeInputShapeDevice) {
  554. Config config;
  555. auto tensor = get_input_data("./input_data.npy");
  556. std::string model_path = "./shufflenet.mge";
  557. std::string input_name = "data";
  558. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  559. config.device_type = LiteDeviceType::LITE_CUDA;
  560. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  561. network->load_model(model_path);
  562. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  563. auto src_layout = Layout{{2, 3, 200, 200}, 4, LiteDataType::LITE_FLOAT};
  564. input_tensor->set_layout(src_layout);
  565. std::shared_ptr<Tensor> input_tensor2 = network->get_io_tensor(input_name);
  566. //! Check memory is equal
  567. ASSERT_EQ(input_tensor->get_memory_ptr(), input_tensor2->get_memory_ptr());
  568. network->forward();
  569. network->wait();
  570. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  571. auto output_layout = output_tensor->get_layout();
  572. ASSERT_EQ(output_layout.shapes[0], 2);
  573. ASSERT_EQ(output_layout.shapes[1], 1000);
  574. }
  575. TEST(TestNetWork, DeviceOutput) {
  576. auto tensor = get_input_data("./input_data.npy");
  577. std::string model_path = "./shufflenet.mge";
  578. std::string input_name = "data";
  579. std::string output_name = "TRUE_DIV(EXP[12065],reduce0[12067])[12077]";
  580. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  581. NetworkIO IO;
  582. bool is_host = false;
  583. IO.outputs.push_back({output_name, is_host});
  584. Config config;
  585. config.device_type = LiteDeviceType::LITE_CUDA;
  586. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  587. network->load_model(model_path);
  588. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  589. std::shared_ptr<Tensor> output_tensor_cuda =
  590. network->get_io_tensor(output_name);
  591. auto src_ptr = tensor->get_memory_ptr();
  592. auto src_layout = tensor->get_layout();
  593. input_tensor->reset(src_ptr, src_layout);
  594. network->forward();
  595. network->wait();
  596. auto output_tensor = std::make_shared<Tensor>();
  597. output_tensor->copy_from(*output_tensor_cuda);
  598. compare_lite_tensor<float>(output_tensor, result_mgb);
  599. }
  600. TEST(TestNetWork, WrongIONameDevice) {
  601. auto tensor = get_input_data("./input_data.npy");
  602. Layout layout{{1, 3, 224, 224}, 4, LiteDataType::LITE_FLOAT};
  603. std::string model_path = "./shufflenet.mge";
  604. std::string input_name = "data";
  605. std::string input_name_wrong = "data0";
  606. std::string output_name = "TRUE_DIV(EXP[12065],reduce0[12067])[12077]";
  607. std::string output_name_wrong =
  608. "w_TRUE_DIV(EXP[12065],reduce0[12067])[12077]";
  609. auto result_mgb = mgb_lar(model_path, {}, input_name, tensor);
  610. NetworkIO IO;
  611. bool is_host = false;
  612. IO.inputs.push_back({input_name, is_host});
  613. IO.outputs.push_back({output_name, is_host});
  614. IO.outputs.push_back({output_name_wrong, is_host});
  615. Config config;
  616. config.device_type = LiteDeviceType::LITE_CUDA;
  617. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  618. network->load_model(model_path);
  619. auto tensor_cuda = Tensor(LiteDeviceType::LITE_CUDA, layout);
  620. tensor_cuda.copy_from(*tensor);
  621. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  622. auto src_ptr = tensor_cuda.get_memory_ptr();
  623. auto src_layout = tensor_cuda.get_layout();
  624. input_tensor->reset(src_ptr, src_layout);
  625. std::shared_ptr<Tensor> output_tensor_cuda =
  626. network->get_io_tensor(output_name);
  627. network->forward();
  628. network->wait();
  629. auto output_tensor = std::make_shared<Tensor>();
  630. output_tensor->copy_from(*output_tensor_cuda);
  631. compare_lite_tensor<float>(output_tensor, result_mgb);
  632. }
  633. TEST(TestNetWork, ConfigIONameDevice) {
  634. std::string model_path = "./model.mgb";
  635. NetworkIO IO;
  636. bool is_host = false;
  637. IO.outputs.push_back({"clsfy", is_host});
  638. Config config;
  639. config.device_type = LiteDeviceType::LITE_CUDA;
  640. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  641. network->compute_only_configured_output();
  642. network->load_model(model_path);
  643. ASSERT_EQ(network->get_all_output_name().size(), 1);
  644. ASSERT_EQ(network->get_all_output_name()[0], "clsfy");
  645. std::shared_ptr<Network> network2 = std::make_shared<Network>(config, IO);
  646. network2->load_model(model_path);
  647. ASSERT_EQ(network2->get_all_output_name().size(), 2);
  648. }
  649. TEST(TestNetWork, SetDeviceIdDeviceTest) {
  650. #if LITE_WITH_CUDA
  651. if(get_device_count(LITE_CUDA) <= 1)
  652. return;
  653. #endif
  654. std::string model_path = "./model.mgb";
  655. NetworkIO IO;
  656. bool is_host = false;
  657. IO.inputs.push_back({"data", is_host});
  658. IO.outputs.push_back({"clsfy", is_host});
  659. Config config;
  660. config.device_type = LiteDeviceType::LITE_CUDA;
  661. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  662. network->set_device_id(1);
  663. network->load_model(model_path);
  664. auto inputs_names = network->get_all_input_name();
  665. for (auto name : inputs_names) {
  666. auto tensor = network->get_io_tensor(name);
  667. ASSERT_EQ(tensor->get_device_id(), 1);
  668. if (name == "idx") {
  669. int* index_ptr = static_cast<int*>(tensor->get_memory_ptr());
  670. for (int i = 0; i < 23; i++) {
  671. index_ptr[i] = i % 3;
  672. }
  673. }
  674. if (name == "landmark") {
  675. float* landmakrk_ptr =
  676. static_cast<float*>(tensor->get_memory_ptr());
  677. for (int i = 0; i < 23 * 18 * 2; i++) {
  678. landmakrk_ptr[i] = 0.1f;
  679. }
  680. }
  681. }
  682. auto outputs_names = network->get_all_output_name();
  683. for (auto name : outputs_names) {
  684. auto tensor = network->get_io_tensor(name);
  685. ASSERT_EQ(tensor->get_device_id(), 1);
  686. }
  687. network->forward();
  688. network->wait();
  689. }
  690. TEST(TestNetWork, SetStreamIdDeviceTest) {
  691. std::string model_path = "./model.mgb";
  692. NetworkIO IO;
  693. bool is_host = false;
  694. IO.inputs.push_back({"data", is_host});
  695. IO.outputs.push_back({"clsfy", is_host});
  696. Config config;
  697. config.device_type = LiteDeviceType::LITE_CUDA;
  698. std::shared_ptr<Network> network = std::make_shared<Network>(config, IO);
  699. network->set_stream_id(1);
  700. network->load_model(model_path);
  701. auto inputs_names = network->get_all_input_name();
  702. for (auto name : inputs_names) {
  703. auto tensor = network->get_io_tensor(name);
  704. if (name == "idx") {
  705. int* index_ptr = static_cast<int*>(tensor->get_memory_ptr());
  706. for (int i = 0; i < 23; i++) {
  707. index_ptr[i] = i % 3;
  708. }
  709. }
  710. if (name == "landmark") {
  711. float* landmakrk_ptr =
  712. static_cast<float*>(tensor->get_memory_ptr());
  713. for (int i = 0; i < 23 * 18 * 2; i++) {
  714. landmakrk_ptr[i] = 0.1f;
  715. }
  716. }
  717. }
  718. network->forward();
  719. network->wait();
  720. }
  721. #if CUDART_VERSION >= 10000
  722. TEST(TestNetWork, DeviceAsyncExec) {
  723. auto tensor = get_input_data("./input_data.npy");
  724. Config config;
  725. config.device_type = LiteDeviceType::LITE_CUDA;
  726. config.options.var_sanity_check_first_run = false;
  727. std::string model_path = "./shufflenet.mge";
  728. std::string input_name = "data";
  729. auto result_mgb = mgb_lar(model_path, config, input_name, tensor);
  730. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  731. network->load_model(model_path);
  732. std::shared_ptr<Tensor> input_tensor = network->get_io_tensor(input_name);
  733. auto src_ptr = tensor->get_memory_ptr();
  734. auto src_layout = tensor->get_layout();
  735. input_tensor->reset(src_ptr, src_layout);
  736. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  737. auto result_tensor = std::make_shared<Tensor>(
  738. LiteDeviceType::LITE_CPU,
  739. Layout{{1, 1000}, 2, LiteDataType::LITE_FLOAT});
  740. void* out_data = result_tensor->get_memory_ptr();
  741. output_tensor->reset(out_data, result_tensor->get_layout());
  742. //! set async mode and callback
  743. volatile bool finished = false;
  744. network->set_async_callback([&finished]() { finished = true; });
  745. network->forward();
  746. size_t count = 0;
  747. while (finished == false) {
  748. count++;
  749. }
  750. ASSERT_GT(count, 0);
  751. compare_lite_tensor<float>(output_tensor, result_mgb);
  752. }
  753. #endif
  754. #endif
  755. #if MGB_ATLAS
  756. TEST(TestNetWork, AtlasLoadNoDevice) {
  757. lite::Config config;
  758. config.device_type = LiteDeviceType::LITE_DEVICE_DEFAULT;
  759. auto network = std::make_shared<lite::Network>(config);
  760. network->load_model("./model_atlas.mgb");
  761. network->forward();
  762. network->wait();
  763. }
  764. TEST(TestNetWork, AtlasLoadDeviceInput) {
  765. lite::NetworkIO networkio;
  766. lite::IO input_data_io = {};
  767. input_data_io.name = "data";
  768. input_data_io.is_host = false;
  769. networkio.inputs.emplace_back(input_data_io);
  770. lite::IO input_input0_io = {};
  771. input_input0_io.name = "input0";
  772. input_input0_io.is_host = false;
  773. networkio.inputs.emplace_back(input_input0_io);
  774. lite::Config config;
  775. config.device_type = LiteDeviceType::LITE_DEVICE_DEFAULT;
  776. auto network = std::make_shared<lite::Network>(config, networkio);
  777. network->load_model("./model_atlas.mgb");
  778. network->forward();
  779. network->wait();
  780. }
  781. TEST(TestNetWork, AtlasLoadAtlas) {
  782. lite::Config config;
  783. config.device_type = LiteDeviceType::LITE_ATLAS;
  784. auto network = std::make_shared<lite::Network>(config);
  785. network->load_model("./model_atlas.mgb");
  786. network->forward();
  787. network->wait();
  788. }
  789. TEST(TestNetWork, AtlasLoadAtlasDeviceInput) {
  790. lite::NetworkIO networkio;
  791. lite::IO input_data_io = {};
  792. input_data_io.name = "data";
  793. input_data_io.is_host = false;
  794. networkio.inputs.emplace_back(input_data_io);
  795. lite::IO input_input0_io = {};
  796. input_input0_io.name = "input0";
  797. input_input0_io.is_host = false;
  798. networkio.inputs.emplace_back(input_input0_io);
  799. lite::Config config;
  800. config.device_type = LiteDeviceType::LITE_ATLAS;
  801. auto network = std::make_shared<lite::Network>(config, networkio);
  802. network->load_model("./model_atlas.mgb");
  803. network->forward();
  804. network->wait();
  805. }
  806. TEST(TestNetWork, AtlasDeviceID) {
  807. lite::Config config;
  808. config.device_type = LiteDeviceType::LITE_ATLAS;
  809. auto network = std::make_shared<lite::Network>(config);
  810. network->set_device_id(1);
  811. network->load_model("./model_atlas.mgb");
  812. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  813. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  814. network->forward();
  815. network->wait();
  816. ASSERT_EQ(output_tensor->get_device_id(), 1);
  817. }
  818. #endif
  819. #endif
  820. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台