You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_tensor.cpp 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. /**
  2. * \file test/test_tensor.cpp
  3. *
  4. * This file is part of MegEngine, a deep learning framework developed by
  5. * Megvii.
  6. *
  7. * \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
  8. */
  9. #include "lite_build_config.h"
  10. #if LITE_BUILD_WITH_MGE
  11. #include "../src/misc.h"
  12. #include "../src/mge/common.h"
  13. #include "../src/mge/network_impl.h"
  14. #include "lite/tensor.h"
  15. #include <gtest/gtest.h>
  16. #include <string.h>
  17. #include <memory>
  18. using namespace lite;
  19. TEST(TestTensor, Basic) {
  20. Layout layout{{1, 3, 224, 224}, 4};
  21. Tensor tensor1(LiteDeviceType::LITE_CPU);
  22. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  23. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  24. //! mge tensor has created
  25. ASSERT_TRUE(TensorHelper::implement(&tensor1));
  26. ASSERT_TRUE(TensorHelper::implement(&tensor2));
  27. ASSERT_TRUE(TensorHelper::implement(&tensor3));
  28. //! check member
  29. ASSERT_EQ(tensor2.get_device_type(), LiteDeviceType::LITE_CPU);
  30. ASSERT_EQ(tensor2.get_layout(), layout);
  31. ASSERT_EQ(tensor3.get_layout(), layout);
  32. //! check the real tensor
  33. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  34. ASSERT_EQ(tensor3.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  35. ASSERT_TRUE(TensorHelper::implement(&tensor1)
  36. ->cast_final_safe<TensorImplDft>()
  37. .host_tensor());
  38. ASSERT_FALSE(TensorHelper::implement(&tensor1)
  39. ->cast_final_safe<TensorImplDft>()
  40. .dev_tensor());
  41. ASSERT_FALSE(TensorHelper::implement(&tensor1)
  42. ->cast_final_safe<TensorImplDft>()
  43. .dev_tensor());
  44. ASSERT_TRUE(TensorHelper::implement(&tensor1)
  45. ->cast_final_safe<TensorImplDft>()
  46. .host_tensor());
  47. }
  48. TEST(TestTensor, SetLayoutReAlloc) {
  49. Layout layout{{1, 3, 224, 224}, 4};
  50. Tensor tensor1;
  51. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  52. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  53. auto old_ptr2 = tensor2.get_memory_ptr();
  54. auto old_ptr3 = tensor3.get_memory_ptr();
  55. //! layout set through
  56. Layout layout1{{1, 3, 100, 100}, 4, LiteDataType::LITE_INT8};
  57. tensor1.set_layout(layout1);
  58. tensor2.set_layout(layout1);
  59. tensor3.set_layout(layout1);
  60. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 100 * 100);
  61. ASSERT_EQ(tensor3.get_tensor_total_size_in_byte(), 1 * 3 * 100 * 100);
  62. auto layout2 = TensorHelper::implement(&tensor2)
  63. ->cast_final_safe<TensorImplDft>()
  64. .host_tensor()
  65. ->layout();
  66. auto layout3 = TensorHelper::implement(&tensor3)
  67. ->cast_final_safe<TensorImplDft>()
  68. .host_tensor()
  69. ->layout();
  70. ASSERT_EQ(to_lite_layout(layout2), layout1);
  71. ASSERT_EQ(to_lite_layout(layout3), layout1);
  72. auto new_ptr2 = tensor2.get_memory_ptr();
  73. auto new_ptr3 = tensor3.get_memory_ptr();
  74. ASSERT_EQ(old_ptr2, new_ptr2);
  75. ASSERT_EQ(old_ptr3, new_ptr3);
  76. }
  77. TEST(TestTensor, Reset) {
  78. Layout layout{{3, 20}, 2, LiteDataType::LITE_FLOAT};
  79. Tensor tensor1;
  80. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  81. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  82. auto old_ptr2 = tensor2.get_memory_ptr();
  83. auto old_ptr3 = tensor3.get_memory_ptr();
  84. //! make sure memory is allocted
  85. ASSERT_NO_THROW(memcpy(old_ptr2, old_ptr3, 3 * 20 * 2));
  86. std::shared_ptr<float> new_ptr2(new float[3 * 20],
  87. [](float* ptr) { delete[] ptr; });
  88. std::shared_ptr<float> new_ptr3(new float[3 * 20],
  89. [](float* ptr) { delete[] ptr; });
  90. tensor1.reset(new_ptr2.get(), layout);
  91. tensor2.reset(new_ptr2.get(), 3 * 20 * 4);
  92. tensor3.reset(new_ptr3.get(), 3 * 20 * 4);
  93. //! After reset the original mem is freed
  94. /*ASSERT_EXIT((memcpy(old_ptr2, old_ptr3, 3 * 20 * 2), exit(0)),
  95. ::testing::KilledBySignal(SIGSEGV), ".*");*/
  96. ASSERT_EQ(tensor2.get_memory_ptr(), new_ptr2.get());
  97. ASSERT_EQ(tensor3.get_memory_ptr(), new_ptr3.get());
  98. ASSERT_NO_THROW(memcpy(new_ptr2.get(), new_ptr3.get(), 3 * 20 * 2));
  99. Layout layout1{{6, 20}, 2, LiteDataType::LITE_FLOAT};
  100. std::shared_ptr<float> ptr2(new float[6 * 20],
  101. [](float* ptr) { delete[] ptr; });
  102. std::shared_ptr<float> ptr3(new float[6 * 20],
  103. [](float* ptr) { delete[] ptr; });
  104. tensor2.reset(ptr2.get(), layout1);
  105. tensor3.reset(ptr3.get(), layout1);
  106. //! memory is not freed by Tensor reset
  107. ASSERT_NO_THROW(memcpy(new_ptr2.get(), new_ptr3.get(), 3 * 20 * 2));
  108. auto host_layout2 = TensorHelper::implement(&tensor2)
  109. ->cast_final_safe<TensorImplDft>()
  110. .host_tensor()
  111. ->layout();
  112. auto host_layout3 = TensorHelper::implement(&tensor3)
  113. ->cast_final_safe<TensorImplDft>()
  114. .host_tensor()
  115. ->layout();
  116. ASSERT_EQ(to_lite_layout(host_layout2), layout1);
  117. ASSERT_EQ(to_lite_layout(host_layout3), layout1);
  118. }
  119. TEST(TestTensor, CrossCNCopy) {
  120. Layout layout{{1, 3, 224, 224}, 4};
  121. Tensor tensor1(LiteDeviceType::LITE_CPU);
  122. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  123. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  124. tensor2.copy_from(tensor3);
  125. tensor3.copy_from(tensor2);
  126. auto old_ptr2 = tensor2.get_memory_ptr();
  127. auto old_ptr3 = tensor3.get_memory_ptr();
  128. //! test source tenor is empty
  129. ASSERT_THROW(tensor2.copy_from(tensor1), std::exception);
  130. tensor1.copy_from(tensor2);
  131. tensor2.copy_from(tensor3);
  132. tensor3.copy_from(tensor2);
  133. ASSERT_EQ(tensor2.get_memory_ptr(), old_ptr2);
  134. ASSERT_EQ(tensor3.get_memory_ptr(), old_ptr3);
  135. }
  136. TEST(TestTensor, SharedTensorMemory) {
  137. Layout layout{{1, 3, 224, 224}, 4};
  138. Tensor tensor1(LiteDeviceType::LITE_CPU);
  139. {
  140. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  141. tensor1.share_memory_with(tensor2);
  142. auto ptr1 = tensor1.get_memory_ptr();
  143. auto ptr2 = tensor2.get_memory_ptr();
  144. ASSERT_EQ(ptr1, ptr2);
  145. }
  146. // check after tensor2 destroy, tensor1 can also visit
  147. auto ptr1 = static_cast<float*>(tensor1.get_memory_ptr());
  148. size_t length = tensor1.get_tensor_total_size_in_byte() /
  149. tensor1.get_layout().get_elem_size();
  150. for (size_t i = 0; i < length; i++) {
  151. ptr1[i] = i;
  152. }
  153. }
  154. TEST(TestTensor, Reshape) {
  155. Layout layout{{1, 3, 224, 224}, 4};
  156. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  157. auto ptr = tensor2.get_memory_ptr();
  158. //! test wrong case
  159. ASSERT_THROW(tensor2.reshape({-1, -1, 3 * 224 * 224}), std::exception);
  160. ASSERT_THROW(tensor2.reshape({-1, 3, 3 * 224 * 224}), std::exception);
  161. ASSERT_THROW(tensor2.reshape({1, 3, 3 * 224 * 224}), std::exception);
  162. ASSERT_THROW(tensor2.reshape({3, 3, 3 * 224 * 224}), std::exception);
  163. tensor2.reshape({3 * 224 * 224});
  164. ASSERT_EQ(tensor2.get_layout().ndim, 1);
  165. ASSERT_EQ(tensor2.get_layout().data_type, LiteDataType::LITE_FLOAT);
  166. ASSERT_EQ(tensor2.get_layout().shapes[0], 3 * 224 * 224);
  167. tensor2.reshape({-1, 224, 224});
  168. ASSERT_EQ(tensor2.get_layout().ndim, 3);
  169. ASSERT_EQ(tensor2.get_layout().shapes[0], 3);
  170. ASSERT_EQ(tensor2.get_layout().shapes[1], 224);
  171. ASSERT_EQ(tensor2.get_memory_ptr(), ptr);
  172. }
  173. TEST(TestTensor, Slice) {
  174. Layout layout{{20, 20}, 2};
  175. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  176. auto ptr = tensor2.get_memory_ptr();
  177. //! test source tenor is empty
  178. ASSERT_THROW(tensor2.slice({5, 10, 10}, {10, 15}), std::exception);
  179. ASSERT_THROW(tensor2.slice({5, 10}, {10, 15}, {5}), std::exception);
  180. ASSERT_THROW(tensor2.slice({5, 10}, {10, 15, 10}), std::exception);
  181. for (int i = 0; i < 20 * 20; i++) {
  182. *(static_cast<float*>(ptr) + i) = i;
  183. }
  184. auto check = [&](size_t start, size_t end, size_t step) {
  185. Tensor tensor3;
  186. tensor3.copy_from(
  187. *tensor2.slice({start, start}, {end, end}, {step, step}));
  188. float* new_ptr = static_cast<float*>(tensor3.get_memory_ptr());
  189. for (size_t i = start; i < end; i += step) {
  190. for (size_t j = start; j < end; j += step) {
  191. ASSERT_EQ(float(i * 20 + j), *new_ptr);
  192. ++new_ptr;
  193. }
  194. }
  195. };
  196. check(5, 10, 1);
  197. check(5, 11, 2);
  198. check(2, 18, 4);
  199. Tensor tensor3;
  200. tensor3.copy_from(*tensor2.slice({3}, {9}, {2}));
  201. float* new_ptr = static_cast<float*>(tensor3.get_memory_ptr());
  202. for (size_t i = 3; i < 9; i += 2) {
  203. for (size_t j = 0; j < 20; j++) {
  204. ASSERT_EQ(float(i * 20 + j), *new_ptr);
  205. ++new_ptr;
  206. }
  207. }
  208. }
  209. TEST(TestTensor, SliceCopy) {
  210. Layout layout{{20, 20}, 2};
  211. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  212. //! alloc memory
  213. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  214. Layout layout_slice{{20, 10}, 2};
  215. Tensor tensor0(LiteDeviceType::LITE_CPU, layout_slice);
  216. auto ptr0 = tensor0.get_memory_ptr();
  217. for (int i = 0; i < 10 * 20; i++) {
  218. *(static_cast<float*>(ptr0) + i) = i;
  219. }
  220. Tensor tensor1(LiteDeviceType::LITE_CPU, layout_slice);
  221. auto ptr1 = tensor1.get_memory_ptr();
  222. for (int i = 0; i < 10 * 20; i++) {
  223. *(static_cast<float*>(ptr1) + i) = i + 200;
  224. }
  225. auto slice0 = tensor.slice({0, 0}, {20, 10});
  226. auto slice1 = tensor.slice({0, 10}, {20, 20});
  227. slice0->copy_from(tensor0);
  228. slice1->copy_from(tensor1);
  229. ASSERT_FALSE(slice0->is_continue_memory());
  230. ASSERT_FALSE(slice1->is_continue_memory());
  231. for (size_t i = 0; i < 20; i++) {
  232. for (size_t j = 0; j < 10; j++) {
  233. ASSERT_EQ(float(i * 10 + j), *ptr);
  234. ++ptr;
  235. }
  236. for (size_t j = 0; j < 10; j++) {
  237. ASSERT_EQ(float(i * 10 + j + 200), *ptr);
  238. ++ptr;
  239. }
  240. }
  241. slice0->fill_zero();
  242. Tensor tmp;
  243. tmp.copy_from(*slice0);
  244. float* tmp_ptr = static_cast<float*>(tmp.get_memory_ptr());
  245. for (size_t i = 0; i < 20; i++) {
  246. for (size_t j = 0; j < 10; j++) {
  247. ASSERT_EQ(float(0), *tmp_ptr);
  248. ++tmp_ptr;
  249. }
  250. }
  251. }
  252. TEST(TestTensor, GetPtrOffset) {
  253. Layout layout{{20, 20}, 2};
  254. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  255. //! alloc memory
  256. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  257. auto ptr_offset = tensor.get_memory_ptr({10, 10});
  258. ASSERT_EQ(ptr_offset, ptr + 10 * 20 + 10);
  259. auto slice0 = tensor.slice({0, 0}, {20, 10});
  260. auto slice1 = tensor.slice({0, 10}, {20, 20});
  261. ASSERT_FALSE(slice0->is_continue_memory());
  262. ASSERT_FALSE(slice1->is_continue_memory());
  263. auto ptr_offset_slice0 = slice0->get_memory_ptr({6, 5});
  264. auto ptr_offset_slice1 = slice1->get_memory_ptr({2, 5});
  265. ASSERT_EQ(ptr_offset_slice0, ptr + 6 * 20 + 5);
  266. ASSERT_EQ(ptr_offset_slice1, ptr + 2 * 20 + 10 + 5);
  267. }
  268. TEST(TestTensor, Concat) {
  269. Layout layout{{5, 5, 5}, 3};
  270. std::vector<Tensor> tensors;
  271. for (int i = 0; i < 4; i++) {
  272. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  273. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  274. for (int n = 0; n < 5 * 5 * 5; n++) {
  275. ptr[n] = i;
  276. }
  277. tensors.push_back(tensor);
  278. }
  279. auto check = [&](int dim) {
  280. auto new_tensor = TensorUtils::concat(tensors, dim);
  281. auto ptr = static_cast<float*>(new_tensor->get_memory_ptr());
  282. size_t stride = std::pow(5, (3 - dim));
  283. for (int i = 0; i < 4; i++) {
  284. for (size_t j = 0; j < stride; j++) {
  285. ASSERT_EQ(ptr[i * stride + j], i);
  286. }
  287. }
  288. };
  289. check(0);
  290. check(1);
  291. check(2);
  292. }
  293. #if LITE_WITH_CUDA
  294. TEST(TestTensor, BasicDevice) {
  295. Layout layout{{1, 3, 224, 224}, 4};
  296. Tensor tensor1(LiteDeviceType::LITE_CUDA, layout);
  297. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  298. //! mge tensor has created
  299. ASSERT_TRUE(TensorHelper::implement(&tensor1));
  300. ASSERT_TRUE(TensorHelper::implement(&tensor2));
  301. //! check member
  302. ASSERT_EQ(tensor1.get_device_type(), LiteDeviceType::LITE_CUDA);
  303. ASSERT_EQ(tensor2.get_device_type(), LiteDeviceType::LITE_CPU);
  304. ASSERT_EQ(tensor2.get_layout(), layout);
  305. //! check the real tensor
  306. ASSERT_EQ(tensor1.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  307. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  308. ASSERT_TRUE(TensorHelper::implement(&tensor2)
  309. ->cast_final_safe<TensorImplDft>()
  310. .host_tensor());
  311. ASSERT_FALSE(TensorHelper::implement(&tensor2)
  312. ->cast_final_safe<TensorImplDft>()
  313. .dev_tensor());
  314. ASSERT_TRUE(TensorHelper::implement(&tensor1)
  315. ->cast_final_safe<TensorImplDft>()
  316. .dev_tensor());
  317. ASSERT_FALSE(TensorHelper::implement(&tensor1)
  318. ->cast_final_safe<TensorImplDft>()
  319. .host_tensor());
  320. }
  321. TEST(TestTensor, SetLayoutReAllocDevice) {
  322. Layout layout{{1, 3, 224, 224}, 4};
  323. Tensor tensor2(LiteDeviceType::LITE_CUDA, layout);
  324. auto old_ptr2 = tensor2.get_memory_ptr();
  325. //! layout set through
  326. Layout layout1{{1, 3, 100, 100}, 4, LiteDataType::LITE_INT8};
  327. tensor2.set_layout(layout1);
  328. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 100 * 100);
  329. auto layout2 = TensorHelper::implement(&tensor2)
  330. ->cast_final_safe<TensorImplDft>()
  331. .dev_tensor()
  332. ->layout();
  333. ASSERT_EQ(to_lite_layout(layout2), layout1);
  334. auto new_ptr2 = tensor2.get_memory_ptr();
  335. ASSERT_EQ(old_ptr2, new_ptr2);
  336. }
  337. TEST(TestTensor, CrossCNCopyDevice) {
  338. Layout layout{{1, 3, 224, 224}, 4};
  339. Tensor tensor0;
  340. Tensor tensor1(LiteDeviceType::LITE_CPU);
  341. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  342. Tensor tensor3(LiteDeviceType::LITE_CUDA, layout);
  343. tensor2.copy_from(tensor3);
  344. tensor3.copy_from(tensor2);
  345. auto old_ptr2 = tensor2.get_memory_ptr();
  346. auto old_ptr3 = tensor3.get_memory_ptr();
  347. ASSERT_THROW(tensor3.copy_from(tensor1), std::exception);
  348. tensor1.copy_from(tensor3);
  349. tensor0.copy_from(tensor3);
  350. tensor2.copy_from(tensor3);
  351. tensor3.copy_from(tensor2);
  352. ASSERT_EQ(tensor2.get_memory_ptr(), old_ptr2);
  353. ASSERT_EQ(tensor3.get_memory_ptr(), old_ptr3);
  354. }
  355. TEST(TestTensor, PinnedHostMem) {
  356. Layout layout{{1, 3, 224, 224}, 4};
  357. Tensor tensor1(LiteDeviceType::LITE_CPU);
  358. bool is_pinned_host = true;
  359. Tensor tensor2(LiteDeviceType::LITE_CUDA, layout, is_pinned_host);
  360. Tensor tensor3(LiteDeviceType::LITE_CUDA, layout);
  361. tensor2.copy_from(tensor3);
  362. tensor3.copy_from(tensor2);
  363. ASSERT_EQ(tensor2.is_pinned_host(), true);
  364. ASSERT_EQ(tensor3.is_pinned_host(), false);
  365. auto old_ptr2 = tensor2.get_memory_ptr();
  366. auto old_ptr3 = tensor3.get_memory_ptr();
  367. //! test source tenor is empty
  368. ASSERT_THROW(tensor2.copy_from(tensor1), std::exception);
  369. tensor1.copy_from(tensor2);
  370. tensor2.copy_from(tensor3);
  371. tensor3.copy_from(tensor2);
  372. ASSERT_EQ(tensor2.get_memory_ptr(), old_ptr2);
  373. ASSERT_EQ(tensor3.get_memory_ptr(), old_ptr3);
  374. }
  375. TEST(TestTensor, DeviceId) {
  376. if(get_device_count(LITE_CUDA) <= 1)
  377. return;
  378. Layout layout{{1, 3, 224, 224}, 4};
  379. Tensor tensor2(0, LiteDeviceType::LITE_CUDA, layout);
  380. Tensor tensor3(1, LiteDeviceType::LITE_CUDA, layout);
  381. tensor2.copy_from(tensor3);
  382. tensor3.copy_from(tensor2);
  383. Tensor tensor1;
  384. tensor1.copy_from(tensor2);
  385. tensor1.copy_from(tensor3);
  386. }
  387. TEST(TestTensor, SliceDevice) {
  388. Layout layout{{20, 20}, 2};
  389. Tensor host_tensor0;
  390. Tensor dev_tensor0(LiteDeviceType::LITE_CUDA, layout);
  391. host_tensor0.copy_from(dev_tensor0);
  392. auto ptr = host_tensor0.get_memory_ptr();
  393. for (int i = 0; i < 20 * 20; i++) {
  394. *(static_cast<float*>(ptr) + i) = i;
  395. }
  396. dev_tensor0.copy_from(host_tensor0);
  397. auto check = [&](size_t start, size_t end, size_t step) {
  398. Tensor host_tensor;
  399. host_tensor.copy_from(
  400. *dev_tensor0.slice({start, start}, {end, end}, {step, step}));
  401. float* new_ptr = static_cast<float*>(host_tensor.get_memory_ptr());
  402. for (size_t i = start; i < end; i += step) {
  403. for (size_t j = start; j < end; j += step) {
  404. ASSERT_EQ(float(i * 20 + j), *new_ptr);
  405. ++new_ptr;
  406. }
  407. }
  408. };
  409. check(5, 10, 1);
  410. check(5, 11, 2);
  411. check(2, 18, 4);
  412. }
  413. TEST(TestTensor, MemSetDevice) {
  414. Layout layout{{20, 20}, 2, LiteDataType::LITE_INT8};
  415. Tensor host_tensor0(LiteDeviceType::LITE_CPU, layout);
  416. Tensor dev_tensor0(LiteDeviceType::LITE_CUDA, layout);
  417. auto check = [&](uint8_t val, const Tensor& tensor) {
  418. auto ptr = static_cast<uint8_t*>(tensor.get_memory_ptr());
  419. for (int i = 0; i < 20 * 20; i++) {
  420. ASSERT_EQ(val, *(ptr + i));
  421. }
  422. };
  423. host_tensor0.fill_zero();
  424. check(0, host_tensor0);
  425. Tensor host_tensor1;
  426. dev_tensor0.fill_zero();
  427. host_tensor1.copy_from(dev_tensor0);
  428. check(0, host_tensor1);
  429. }
  430. TEST(TestTensor, DeviceSliceCopy) {
  431. Layout layout{{20, 20}, 2};
  432. Tensor tensor(LiteDeviceType::LITE_CUDA, layout);
  433. //! alloc memory
  434. tensor.get_memory_ptr();
  435. Layout layout_slice{{20, 10}, 2};
  436. Tensor tensor0(LiteDeviceType::LITE_CPU, layout_slice);
  437. auto ptr0 = tensor0.get_memory_ptr();
  438. for (int i = 0; i < 10 * 20; i++) {
  439. *(static_cast<float*>(ptr0) + i) = i;
  440. }
  441. Tensor tensor1(LiteDeviceType::LITE_CPU, layout_slice);
  442. auto ptr1 = tensor1.get_memory_ptr();
  443. for (int i = 0; i < 10 * 20; i++) {
  444. *(static_cast<float*>(ptr1) + i) = i + 200;
  445. }
  446. auto slice0 = tensor.slice({0, 0}, {20, 10});
  447. auto slice1 = tensor.slice({0, 10}, {20, 20});
  448. slice0->copy_from(tensor0);
  449. slice1->copy_from(tensor1);
  450. ASSERT_FALSE(slice0->is_continue_memory());
  451. ASSERT_FALSE(slice1->is_continue_memory());
  452. Tensor host_tensor;
  453. host_tensor.copy_from(tensor);
  454. auto ptr = static_cast<float*>(host_tensor.get_memory_ptr());
  455. for (size_t i = 0; i < 20; i++) {
  456. for (size_t j = 0; j < 10; j++) {
  457. ASSERT_EQ(float(i * 10 + j), *ptr);
  458. ++ptr;
  459. }
  460. for (size_t j = 0; j < 10; j++) {
  461. ASSERT_EQ(float(i * 10 + j + 200), *ptr);
  462. ++ptr;
  463. }
  464. }
  465. slice0->fill_zero();
  466. Tensor tmp;
  467. tmp.copy_from(*slice0);
  468. float* tmp_ptr = static_cast<float*>(tmp.get_memory_ptr());
  469. for (size_t i = 0; i < 20; i++) {
  470. for (size_t j = 0; j < 10; j++) {
  471. ASSERT_EQ(float(0), *tmp_ptr);
  472. ++tmp_ptr;
  473. }
  474. }
  475. }
  476. TEST(TestTensor, ConcatDevice) {
  477. Layout layout{{5, 5, 5}, 3};
  478. std::vector<Tensor> tensors;
  479. for (int i = 0; i < 4; i++) {
  480. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  481. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  482. for (int n = 0; n < 5 * 5 * 5; n++) {
  483. ptr[n] = i;
  484. }
  485. tensors.push_back(tensor);
  486. }
  487. auto check = [&](int dim) {
  488. auto new_tensor =
  489. TensorUtils::concat(tensors, dim, LiteDeviceType::LITE_CUDA, 0);
  490. Tensor tensor(LiteDeviceType::LITE_CPU);
  491. tensor.copy_from(*new_tensor);
  492. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  493. size_t stride = std::pow(5, (3 - dim));
  494. for (int i = 0; i < 4; i++) {
  495. for (size_t j = 0; j < stride; j++) {
  496. ASSERT_EQ(ptr[i * stride + j], i);
  497. }
  498. }
  499. ASSERT_EQ(new_tensor->get_device_type(), LiteDeviceType::LITE_CUDA);
  500. ASSERT_EQ(new_tensor->get_device_id(), 0);
  501. };
  502. check(0);
  503. check(1);
  504. check(2);
  505. }
  506. #endif
  507. #endif
  508. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台