You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

basic.cpp 18 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. #include <thread>
  2. #include "example.h"
  3. #if LITE_BUILD_WITH_MGE
  4. #include <cstdio>
  5. #include "helper.h"
  6. using namespace lite;
  7. using namespace example;
  8. namespace {
  9. void output_info(std::shared_ptr<Network> network, size_t output_size) {
  10. for (size_t index = 0; index < output_size; index++) {
  11. printf("output[%zu] names %s \n", index,
  12. network->get_all_output_name()[index].c_str());
  13. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(index);
  14. size_t ndim = output_tensor->get_layout().ndim;
  15. for (size_t i = 0; i < ndim; i++) {
  16. printf("output[%zu] tensor.shape[%zu] %zu \n", index, i,
  17. output_tensor->get_layout().shapes[i]);
  18. }
  19. }
  20. }
  21. void output_data_info(std::shared_ptr<Network> network, size_t output_size) {
  22. for (size_t index = 0; index < output_size; index++) {
  23. auto output_tensor = network->get_output_tensor(index);
  24. void* out_data = output_tensor->get_memory_ptr();
  25. size_t out_length = output_tensor->get_tensor_total_size_in_byte() /
  26. output_tensor->get_layout().get_elem_size();
  27. LiteDataType dtype = output_tensor->get_layout().data_type;
  28. float max = -1000.0f;
  29. float min = 1000.0f;
  30. int max_idx = 0;
  31. int min_idx = 0;
  32. float sum = 0.0f;
  33. #define cb(_dtype, _real_dtype) \
  34. case LiteDataType::_dtype: { \
  35. for (size_t i = 0; i < out_length; i++) { \
  36. _real_dtype data = static_cast<_real_dtype*>(out_data)[i]; \
  37. sum += data; \
  38. if (max < data) { \
  39. max = data; \
  40. max_idx = i; \
  41. } \
  42. if (min > data) { \
  43. min = data; \
  44. min_idx = i; \
  45. } \
  46. } \
  47. } break;
  48. switch (dtype) {
  49. cb(LITE_FLOAT, float);
  50. cb(LITE_INT, int);
  51. cb(LITE_INT8, int8_t);
  52. cb(LITE_UINT8, uint8_t);
  53. default:
  54. printf("unknow datatype");
  55. }
  56. printf("output_length %zu index %zu max=%e , max idx=%d, min=%e , min_idx=%d, "
  57. "sum=%e\n",
  58. out_length, index, max, max_idx, min, min_idx, sum);
  59. }
  60. #undef cb
  61. }
  62. } // namespace
  63. namespace {
  64. bool basic_load_from_path(const Args& args) {
  65. set_log_level(LiteLogLevel::DEBUG);
  66. std::string network_path = args.model_path;
  67. std::string input_path = args.input_path;
  68. //! create and load the network
  69. std::shared_ptr<Network> network = std::make_shared<Network>();
  70. network->load_model(network_path);
  71. //! set input data to input tensor
  72. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  73. auto layout = input_tensor->get_layout();
  74. for (size_t i = 0; i < layout.ndim; i++) {
  75. printf("model input shape[%zu]=%zu \n", i, layout.shapes[i]);
  76. }
  77. //! copy or forward data to network
  78. size_t length = input_tensor->get_tensor_total_size_in_byte();
  79. void* dst_ptr = input_tensor->get_memory_ptr();
  80. auto src_tensor = parse_npy(input_path);
  81. auto layout0 = src_tensor->get_layout();
  82. for (size_t i = 0; i < layout0.ndim; i++) {
  83. printf("src shape[%zu]=%zu \n", i, layout0.shapes[i]);
  84. }
  85. void* src = src_tensor->get_memory_ptr();
  86. memcpy(dst_ptr, src, length);
  87. //! forward
  88. {
  89. lite_example_helper::Timer ltimer("warmup");
  90. network->forward();
  91. network->wait();
  92. ltimer.print_used_time(0);
  93. }
  94. lite_example_helper::Timer ltimer("forward_iter");
  95. for (int i = 0; i < 10; i++) {
  96. network->forward();
  97. network->wait();
  98. ltimer.print_used_time(i);
  99. }
  100. //! forward
  101. {
  102. lite_example_helper::Timer ltimer("warmup");
  103. network->forward();
  104. network->wait();
  105. ltimer.print_used_time(0);
  106. }
  107. for (int i = 0; i < 10; i++) {
  108. ltimer.reset_start();
  109. network->forward();
  110. network->wait();
  111. ltimer.print_used_time(i);
  112. }
  113. //! get the output data or read tensor set in network_in
  114. size_t output_size = network->get_all_output_name().size();
  115. output_info(network, output_size);
  116. output_data_info(network, output_size);
  117. return true;
  118. }
  119. bool basic_load_from_path_with_loader(const Args& args) {
  120. set_log_level(LiteLogLevel::DEBUG);
  121. lite::set_loader_lib_path(args.loader_path);
  122. std::string network_path = args.model_path;
  123. std::string input_path = args.input_path;
  124. //! create and load the network
  125. std::shared_ptr<Network> network = std::make_shared<Network>();
  126. network->load_model(network_path);
  127. //! set input data to input tensor
  128. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  129. auto input_layout = input_tensor->get_layout();
  130. //! copy or forward data to network
  131. auto src_tensor = parse_npy(input_path);
  132. auto src_layout = src_tensor->get_layout();
  133. if (src_layout.ndim != input_layout.ndim) {
  134. printf("src dim is not equal model input dim\n");
  135. }
  136. //! pay attention the input shape can change
  137. for (size_t i = 0; i < input_layout.ndim; i++) {
  138. if (input_layout.shapes[i] != src_layout.shapes[i]) {
  139. printf("src shape not equal input shape");
  140. }
  141. }
  142. input_tensor->set_layout(src_tensor->get_layout());
  143. //! reset or forward data to network
  144. input_tensor->reset(src_tensor->get_memory_ptr(), src_tensor->get_layout());
  145. //! forward
  146. network->forward();
  147. network->wait();
  148. //! forward
  149. {
  150. lite_example_helper::Timer ltimer("warmup");
  151. network->forward();
  152. network->wait();
  153. ltimer.print_used_time(0);
  154. }
  155. lite_example_helper::Timer ltimer("forward_iter");
  156. for (int i = 0; i < 10; i++) {
  157. ltimer.reset_start();
  158. network->forward();
  159. network->wait();
  160. ltimer.print_used_time(i);
  161. }
  162. //! get the output data or read tensor set in network_in
  163. size_t output_size = network->get_all_output_name().size();
  164. output_info(network, output_size);
  165. output_data_info(network, output_size);
  166. return true;
  167. }
  168. bool basic_load_from_memory(const Args& args) {
  169. std::string network_path = args.model_path;
  170. std::string input_path = args.input_path;
  171. //! create and load the network
  172. std::shared_ptr<Network> network = std::make_shared<Network>();
  173. FILE* fin = fopen(network_path.c_str(), "rb");
  174. if (!fin) {
  175. printf("failed to open %s.", network_path.c_str());
  176. }
  177. fseek(fin, 0, SEEK_END);
  178. size_t size = ftell(fin);
  179. fseek(fin, 0, SEEK_SET);
  180. void* ptr = malloc(size);
  181. std::shared_ptr<void> buf{ptr, ::free};
  182. auto len = fread(buf.get(), 1, size, fin);
  183. if (len < 1) {
  184. printf("read file failed.\n");
  185. }
  186. fclose(fin);
  187. network->load_model(buf.get(), size);
  188. //! set input data to input tensor
  189. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  190. //! copy or forward data to network
  191. size_t length = input_tensor->get_tensor_total_size_in_byte();
  192. void* dst_ptr = input_tensor->get_memory_ptr();
  193. auto src_tensor = parse_npy(input_path);
  194. void* src = src_tensor->get_memory_ptr();
  195. memcpy(dst_ptr, src, length);
  196. //! forward
  197. network->forward();
  198. network->wait();
  199. //! get the output data or read tensor set in network_in
  200. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  201. void* out_data = output_tensor->get_memory_ptr();
  202. size_t out_length = output_tensor->get_tensor_total_size_in_byte() /
  203. output_tensor->get_layout().get_elem_size();
  204. printf("length=%zu\n", length);
  205. float max = -1.0f;
  206. float sum = 0.0f;
  207. for (size_t i = 0; i < out_length; i++) {
  208. float data = static_cast<float*>(out_data)[i];
  209. sum += data;
  210. if (max < data)
  211. max = data;
  212. }
  213. printf("max=%e, sum=%e\n", max, sum);
  214. return true;
  215. }
  216. bool async_forward(const Args& args) {
  217. std::string network_path = args.model_path;
  218. std::string input_path = args.input_path;
  219. Config config;
  220. config.options.var_sanity_check_first_run = false;
  221. //! create and load the network
  222. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  223. network->load_model(network_path);
  224. //! set input data to input tensor
  225. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  226. //! copy or forward data to network
  227. size_t length = input_tensor->get_tensor_total_size_in_byte();
  228. void* dst_ptr = input_tensor->get_memory_ptr();
  229. auto src_tensor = parse_npy(input_path);
  230. void* src = src_tensor->get_memory_ptr();
  231. memcpy(dst_ptr, src, length);
  232. //! set async mode and callback
  233. volatile bool finished = false;
  234. network->set_async_callback([&finished]() {
  235. #if !__DEPLOY_ON_XP_SP2__
  236. std::cout << "worker thread_id:" << std::this_thread::get_id() << std::endl;
  237. #endif
  238. finished = true;
  239. });
  240. #if !__DEPLOY_ON_XP_SP2__
  241. std::cout << "out thread_id:" << std::this_thread::get_id() << std::endl;
  242. #endif
  243. //! forward
  244. network->forward();
  245. size_t count = 0;
  246. while (finished == false) {
  247. count++;
  248. }
  249. printf("Forward finish, count is %zu\n", count);
  250. //! get the output data or read tensor set in network_in
  251. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  252. void* out_data = output_tensor->get_memory_ptr();
  253. size_t out_length = output_tensor->get_tensor_total_size_in_byte() /
  254. output_tensor->get_layout().get_elem_size();
  255. printf("length=%zu\n", length);
  256. float max = -1.0f;
  257. float sum = 0.0f;
  258. for (size_t i = 0; i < out_length; i++) {
  259. float data = static_cast<float*>(out_data)[i];
  260. sum += data;
  261. if (max < data)
  262. max = data;
  263. }
  264. printf("max=%e, sum=%e\n", max, sum);
  265. return true;
  266. }
  267. bool set_input_callback(const Args& args) {
  268. std::string network_path = args.model_path;
  269. std::string input_path = args.input_path;
  270. Config config;
  271. config.options.var_sanity_check_first_run = false;
  272. //! create and load the network
  273. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  274. network->load_model(network_path);
  275. //! set input data to input tensor
  276. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  277. //! copy or forward data to network
  278. size_t length = input_tensor->get_tensor_total_size_in_byte();
  279. void* dst_ptr = input_tensor->get_memory_ptr();
  280. auto src_tensor = parse_npy(input_path);
  281. void* src = src_tensor->get_memory_ptr();
  282. memcpy(dst_ptr, src, length);
  283. //! set input callback
  284. volatile bool finished = false;
  285. network->set_start_callback(
  286. [&finished](const std::unordered_map<
  287. std::string, std::pair<IO, std::shared_ptr<Tensor>>>& inputs) {
  288. #if !__DEPLOY_ON_XP_SP2__
  289. std::cout << "worker thread_id:" << std::this_thread::get_id()
  290. << std::endl;
  291. #endif
  292. for (auto&& item : inputs) {
  293. std::cout << "input name: " << item.first
  294. << "input dim: " << item.second.second->get_layout().ndim
  295. << std::endl;
  296. }
  297. finished = true;
  298. });
  299. #if !__DEPLOY_ON_XP_SP2__
  300. std::cout << "out thread_id:" << std::this_thread::get_id() << std::endl;
  301. #endif
  302. //! forward
  303. network->forward();
  304. size_t count = 0;
  305. while (finished == false) {
  306. count++;
  307. }
  308. printf("Forward finish, count is %zu\n", count);
  309. //! get the output data or read tensor set in network_in
  310. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  311. void* out_data = output_tensor->get_memory_ptr();
  312. size_t out_length = output_tensor->get_tensor_total_size_in_byte() /
  313. output_tensor->get_layout().get_elem_size();
  314. printf("length=%zu\n", length);
  315. float max = -1.0f;
  316. float sum = 0.0f;
  317. for (size_t i = 0; i < out_length; i++) {
  318. float data = static_cast<float*>(out_data)[i];
  319. sum += data;
  320. if (max < data)
  321. max = data;
  322. }
  323. printf("max=%e, sum=%e\n", max, sum);
  324. return true;
  325. }
  326. bool set_output_callback(const Args& args) {
  327. std::string network_path = args.model_path;
  328. std::string input_path = args.input_path;
  329. Config config;
  330. config.options.var_sanity_check_first_run = false;
  331. //! create and load the network
  332. std::shared_ptr<Network> network = std::make_shared<Network>(config);
  333. network->load_model(network_path);
  334. //! set input data to input tensor
  335. std::shared_ptr<Tensor> input_tensor = network->get_output_tensor(0);
  336. //! copy or forward data to network
  337. size_t length = input_tensor->get_tensor_total_size_in_byte();
  338. void* dst_ptr = input_tensor->get_memory_ptr();
  339. auto src_tensor = parse_npy(input_path);
  340. void* src = src_tensor->get_memory_ptr();
  341. memcpy(dst_ptr, src, length);
  342. //! set output callback
  343. volatile bool finished = false;
  344. network->set_finish_callback(
  345. [&finished](const std::unordered_map<
  346. std::string, std::pair<IO, std::shared_ptr<Tensor>>>& outputs) {
  347. #if !__DEPLOY_ON_XP_SP2__
  348. std::cout << "worker thread_id:" << std::this_thread::get_id()
  349. << std::endl;
  350. #endif
  351. for (auto&& item : outputs) {
  352. std::cout << "output name: " << item.first
  353. << "output dim: " << item.second.second->get_layout().ndim
  354. << std::endl;
  355. }
  356. finished = true;
  357. });
  358. #if !__DEPLOY_ON_XP_SP2__
  359. std::cout << "out thread_id:" << std::this_thread::get_id() << std::endl;
  360. #endif
  361. //! forward
  362. network->forward();
  363. network->wait();
  364. size_t count = 0;
  365. while (finished == false) {
  366. count++;
  367. }
  368. printf("Forward finish, count is %zu\n", count);
  369. //! get the output data or read tensor set in network_in
  370. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  371. void* out_data = output_tensor->get_memory_ptr();
  372. size_t out_length = output_tensor->get_tensor_total_size_in_byte() /
  373. output_tensor->get_layout().get_elem_size();
  374. printf("length=%zu\n", length);
  375. float max = -1.0f;
  376. float sum = 0.0f;
  377. for (size_t i = 0; i < out_length; i++) {
  378. float data = static_cast<float*>(out_data)[i];
  379. sum += data;
  380. if (max < data)
  381. max = data;
  382. }
  383. printf("max=%e, sum=%e\n", max, sum);
  384. return true;
  385. }
  386. } // namespace
  387. REGIST_EXAMPLE("basic_load_from_path", basic_load_from_path);
  388. REGIST_EXAMPLE("basic_load_from_path_with_loader", basic_load_from_path_with_loader);
  389. REGIST_EXAMPLE("basic_load_from_memory", basic_load_from_memory);
  390. REGIST_EXAMPLE("async_forward", async_forward);
  391. REGIST_EXAMPLE("set_input_callback", set_input_callback);
  392. REGIST_EXAMPLE("set_output_callback", set_output_callback);
  393. #if LITE_WITH_CUDA
  394. namespace {
  395. bool load_from_path_run_cuda(const Args& args) {
  396. std::string network_path = args.model_path;
  397. std::string input_path = args.input_path;
  398. set_log_level(LiteLogLevel::DEBUG);
  399. //! config the network running in CUDA device
  400. lite::Config config{false, -1, LiteDeviceType::LITE_CUDA};
  401. //! set NetworkIO
  402. NetworkIO network_io;
  403. std::string input_name = "img0_comp_fullface";
  404. bool is_host = false;
  405. IO device_input{input_name, is_host};
  406. network_io.inputs.push_back(device_input);
  407. //! create and load the network
  408. std::shared_ptr<Network> network = std::make_shared<Network>(config, network_io);
  409. network->load_model(network_path);
  410. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  411. Layout input_layout = input_tensor->get_layout();
  412. //! read data from numpy data file
  413. auto src_tensor = parse_npy(input_path);
  414. //! malloc the device memory
  415. auto tensor_device = Tensor(LiteDeviceType::LITE_CUDA, input_layout);
  416. //! copy to the device memory
  417. tensor_device.copy_from(*src_tensor);
  418. //! Now the device memory if filled with user input data, set it to the
  419. //! input tensor
  420. input_tensor->reset(tensor_device.get_memory_ptr(), input_layout);
  421. //! forward
  422. {
  423. lite_example_helper::Timer ltimer("warmup");
  424. network->forward();
  425. network->wait();
  426. ltimer.print_used_time(0);
  427. }
  428. lite_example_helper::Timer ltimer("forward_iter");
  429. for (int i = 0; i < 10; i++) {
  430. ltimer.reset_start();
  431. network->forward();
  432. network->wait();
  433. ltimer.print_used_time(i);
  434. }
  435. //! get the output data or read tensor set in network_in
  436. size_t output_size = network->get_all_output_name().size();
  437. output_info(network, output_size);
  438. output_data_info(network, output_size);
  439. return true;
  440. }
  441. } // namespace
  442. REGIST_EXAMPLE("load_from_path_run_cuda", load_from_path_run_cuda);
  443. #endif
  444. #endif
  445. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}