You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_tensor.cpp 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577
  1. #include "lite_build_config.h"
  2. #if LITE_BUILD_WITH_MGE
  3. #include "../src/mge/common.h"
  4. #include "../src/mge/network_impl.h"
  5. #include "../src/misc.h"
  6. #include "lite/tensor.h"
  7. #include <gtest/gtest.h>
  8. #include <string.h>
  9. #include <memory>
  10. using namespace lite;
  11. TEST(TestTensor, Basic) {
  12. Layout layout{{1, 3, 224, 224}, 4};
  13. Tensor tensor1(LiteDeviceType::LITE_CPU);
  14. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  15. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  16. //! mge tensor has created
  17. ASSERT_TRUE(TensorHelper::implement(&tensor1));
  18. ASSERT_TRUE(TensorHelper::implement(&tensor2));
  19. ASSERT_TRUE(TensorHelper::implement(&tensor3));
  20. //! check member
  21. ASSERT_EQ(tensor2.get_device_type(), LiteDeviceType::LITE_CPU);
  22. ASSERT_EQ(tensor2.get_layout(), layout);
  23. ASSERT_EQ(tensor3.get_layout(), layout);
  24. //! check the real tensor
  25. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  26. ASSERT_EQ(tensor3.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  27. ASSERT_TRUE(TensorHelper::implement(&tensor1)
  28. ->cast_final_safe<TensorImplDft>()
  29. .host_tensor());
  30. ASSERT_FALSE(TensorHelper::implement(&tensor1)
  31. ->cast_final_safe<TensorImplDft>()
  32. .dev_tensor());
  33. ASSERT_FALSE(TensorHelper::implement(&tensor1)
  34. ->cast_final_safe<TensorImplDft>()
  35. .dev_tensor());
  36. ASSERT_TRUE(TensorHelper::implement(&tensor1)
  37. ->cast_final_safe<TensorImplDft>()
  38. .host_tensor());
  39. }
  40. TEST(TestTensor, SetLayoutReAlloc) {
  41. Layout layout{{1, 3, 224, 224}, 4};
  42. Tensor tensor1;
  43. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  44. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  45. auto old_ptr2 = tensor2.get_memory_ptr();
  46. auto old_ptr3 = tensor3.get_memory_ptr();
  47. //! layout set through
  48. Layout layout1{{1, 3, 100, 100}, 4, LiteDataType::LITE_INT8};
  49. tensor1.set_layout(layout1);
  50. tensor2.set_layout(layout1);
  51. tensor3.set_layout(layout1);
  52. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 100 * 100);
  53. ASSERT_EQ(tensor3.get_tensor_total_size_in_byte(), 1 * 3 * 100 * 100);
  54. auto layout2 = TensorHelper::implement(&tensor2)
  55. ->cast_final_safe<TensorImplDft>()
  56. .host_tensor()
  57. ->layout();
  58. auto layout3 = TensorHelper::implement(&tensor3)
  59. ->cast_final_safe<TensorImplDft>()
  60. .host_tensor()
  61. ->layout();
  62. ASSERT_EQ(to_lite_layout(layout2), layout1);
  63. ASSERT_EQ(to_lite_layout(layout3), layout1);
  64. auto new_ptr2 = tensor2.get_memory_ptr();
  65. auto new_ptr3 = tensor3.get_memory_ptr();
  66. ASSERT_EQ(old_ptr2, new_ptr2);
  67. ASSERT_EQ(old_ptr3, new_ptr3);
  68. }
  69. TEST(TestTensor, Reset) {
  70. Layout layout{{3, 20}, 2, LiteDataType::LITE_FLOAT};
  71. Tensor tensor1;
  72. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  73. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  74. auto old_ptr2 = tensor2.get_memory_ptr();
  75. auto old_ptr3 = tensor3.get_memory_ptr();
  76. //! make sure memory is allocted
  77. ASSERT_NO_THROW(memcpy(old_ptr2, old_ptr3, 3 * 20 * 2));
  78. std::shared_ptr<float> new_ptr2(
  79. new float[3 * 20], [](float* ptr) { delete[] ptr; });
  80. std::shared_ptr<float> new_ptr3(
  81. new float[3 * 20], [](float* ptr) { delete[] ptr; });
  82. tensor1.reset(new_ptr2.get(), layout);
  83. tensor2.reset(new_ptr2.get(), 3 * 20 * 4);
  84. tensor3.reset(new_ptr3.get(), 3 * 20 * 4);
  85. //! After reset the original mem is freed
  86. /*ASSERT_EXIT((memcpy(old_ptr2, old_ptr3, 3 * 20 * 2), exit(0)),
  87. ::testing::KilledBySignal(SIGSEGV), ".*");*/
  88. ASSERT_EQ(tensor2.get_memory_ptr(), new_ptr2.get());
  89. ASSERT_EQ(tensor3.get_memory_ptr(), new_ptr3.get());
  90. ASSERT_NO_THROW(memcpy(new_ptr2.get(), new_ptr3.get(), 3 * 20 * 2));
  91. Layout layout1{{6, 20}, 2, LiteDataType::LITE_FLOAT};
  92. std::shared_ptr<float> ptr2(new float[6 * 20], [](float* ptr) { delete[] ptr; });
  93. std::shared_ptr<float> ptr3(new float[6 * 20], [](float* ptr) { delete[] ptr; });
  94. tensor2.reset(ptr2.get(), layout1);
  95. tensor3.reset(ptr3.get(), layout1);
  96. //! memory is not freed by Tensor reset
  97. ASSERT_NO_THROW(memcpy(new_ptr2.get(), new_ptr3.get(), 3 * 20 * 2));
  98. auto host_layout2 = TensorHelper::implement(&tensor2)
  99. ->cast_final_safe<TensorImplDft>()
  100. .host_tensor()
  101. ->layout();
  102. auto host_layout3 = TensorHelper::implement(&tensor3)
  103. ->cast_final_safe<TensorImplDft>()
  104. .host_tensor()
  105. ->layout();
  106. ASSERT_EQ(to_lite_layout(host_layout2), layout1);
  107. ASSERT_EQ(to_lite_layout(host_layout3), layout1);
  108. }
  109. TEST(TestTensor, CrossCNCopy) {
  110. Layout layout{{1, 3, 224, 224}, 4};
  111. Tensor tensor1(LiteDeviceType::LITE_CPU);
  112. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  113. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  114. tensor2.copy_from(tensor3);
  115. tensor3.copy_from(tensor2);
  116. auto old_ptr2 = tensor2.get_memory_ptr();
  117. auto old_ptr3 = tensor3.get_memory_ptr();
  118. //! test source tenor is empty
  119. ASSERT_THROW(tensor2.copy_from(tensor1), std::exception);
  120. tensor1.copy_from(tensor2);
  121. tensor2.copy_from(tensor3);
  122. tensor3.copy_from(tensor2);
  123. ASSERT_EQ(tensor2.get_memory_ptr(), old_ptr2);
  124. ASSERT_EQ(tensor3.get_memory_ptr(), old_ptr3);
  125. }
  126. TEST(TestTensor, SharedTensorMemory) {
  127. Layout layout{{1, 3, 224, 224}, 4};
  128. Tensor tensor1(LiteDeviceType::LITE_CPU);
  129. {
  130. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  131. tensor1.share_memory_with(tensor2);
  132. auto ptr1 = tensor1.get_memory_ptr();
  133. auto ptr2 = tensor2.get_memory_ptr();
  134. ASSERT_EQ(ptr1, ptr2);
  135. }
  136. // check after tensor2 destroy, tensor1 can also visit
  137. auto ptr1 = static_cast<float*>(tensor1.get_memory_ptr());
  138. size_t length = tensor1.get_tensor_total_size_in_byte() /
  139. tensor1.get_layout().get_elem_size();
  140. for (size_t i = 0; i < length; i++) {
  141. ptr1[i] = i;
  142. }
  143. }
  144. TEST(TestTensor, Reshape) {
  145. Layout layout{{1, 3, 224, 224}, 4};
  146. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  147. auto ptr = tensor2.get_memory_ptr();
  148. //! test wrong case
  149. ASSERT_THROW(tensor2.reshape({-1, -1, 3 * 224 * 224}), std::exception);
  150. ASSERT_THROW(tensor2.reshape({-1, 3, 3 * 224 * 224}), std::exception);
  151. ASSERT_THROW(tensor2.reshape({1, 3, 3 * 224 * 224}), std::exception);
  152. ASSERT_THROW(tensor2.reshape({3, 3, 3 * 224 * 224}), std::exception);
  153. tensor2.reshape({3 * 224 * 224});
  154. ASSERT_EQ(tensor2.get_layout().ndim, 1);
  155. ASSERT_EQ(tensor2.get_layout().data_type, LiteDataType::LITE_FLOAT);
  156. ASSERT_EQ(tensor2.get_layout().shapes[0], 3 * 224 * 224);
  157. tensor2.reshape({-1, 224, 224});
  158. ASSERT_EQ(tensor2.get_layout().ndim, 3);
  159. ASSERT_EQ(tensor2.get_layout().shapes[0], 3);
  160. ASSERT_EQ(tensor2.get_layout().shapes[1], 224);
  161. ASSERT_EQ(tensor2.get_memory_ptr(), ptr);
  162. }
  163. TEST(TestTensor, Slice) {
  164. Layout layout{{20, 20}, 2};
  165. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  166. auto ptr = tensor2.get_memory_ptr();
  167. //! test source tenor is empty
  168. ASSERT_THROW(tensor2.slice({5, 10, 10}, {10, 15}), std::exception);
  169. ASSERT_THROW(tensor2.slice({5, 10}, {10, 15}, {5}), std::exception);
  170. ASSERT_THROW(tensor2.slice({5, 10}, {10, 15, 10}), std::exception);
  171. for (int i = 0; i < 20 * 20; i++) {
  172. *(static_cast<float*>(ptr) + i) = i;
  173. }
  174. auto check = [&](size_t start, size_t end, size_t step) {
  175. Tensor tensor3;
  176. tensor3.copy_from(*tensor2.slice({start, start}, {end, end}, {step, step}));
  177. float* new_ptr = static_cast<float*>(tensor3.get_memory_ptr());
  178. for (size_t i = start; i < end; i += step) {
  179. for (size_t j = start; j < end; j += step) {
  180. ASSERT_EQ(float(i * 20 + j), *new_ptr);
  181. ++new_ptr;
  182. }
  183. }
  184. };
  185. check(5, 10, 1);
  186. check(5, 11, 2);
  187. check(2, 18, 4);
  188. Tensor tensor3;
  189. tensor3.copy_from(*tensor2.slice({3}, {9}, {2}));
  190. float* new_ptr = static_cast<float*>(tensor3.get_memory_ptr());
  191. for (size_t i = 3; i < 9; i += 2) {
  192. for (size_t j = 0; j < 20; j++) {
  193. ASSERT_EQ(float(i * 20 + j), *new_ptr);
  194. ++new_ptr;
  195. }
  196. }
  197. }
  198. TEST(TestTensor, SliceCopy) {
  199. Layout layout{{20, 20}, 2};
  200. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  201. //! alloc memory
  202. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  203. Layout layout_slice{{20, 10}, 2};
  204. Tensor tensor0(LiteDeviceType::LITE_CPU, layout_slice);
  205. auto ptr0 = tensor0.get_memory_ptr();
  206. for (int i = 0; i < 10 * 20; i++) {
  207. *(static_cast<float*>(ptr0) + i) = i;
  208. }
  209. Tensor tensor1(LiteDeviceType::LITE_CPU, layout_slice);
  210. auto ptr1 = tensor1.get_memory_ptr();
  211. for (int i = 0; i < 10 * 20; i++) {
  212. *(static_cast<float*>(ptr1) + i) = i + 200;
  213. }
  214. auto slice0 = tensor.slice({0, 0}, {20, 10});
  215. auto slice1 = tensor.slice({0, 10}, {20, 20});
  216. slice0->copy_from(tensor0);
  217. slice1->copy_from(tensor1);
  218. ASSERT_FALSE(slice0->is_continue_memory());
  219. ASSERT_FALSE(slice1->is_continue_memory());
  220. for (size_t i = 0; i < 20; i++) {
  221. for (size_t j = 0; j < 10; j++) {
  222. ASSERT_EQ(float(i * 10 + j), *ptr);
  223. ++ptr;
  224. }
  225. for (size_t j = 0; j < 10; j++) {
  226. ASSERT_EQ(float(i * 10 + j + 200), *ptr);
  227. ++ptr;
  228. }
  229. }
  230. slice0->fill_zero();
  231. Tensor tmp;
  232. tmp.copy_from(*slice0);
  233. float* tmp_ptr = static_cast<float*>(tmp.get_memory_ptr());
  234. for (size_t i = 0; i < 20; i++) {
  235. for (size_t j = 0; j < 10; j++) {
  236. ASSERT_EQ(float(0), *tmp_ptr);
  237. ++tmp_ptr;
  238. }
  239. }
  240. }
  241. TEST(TestTensor, GetPtrOffset) {
  242. Layout layout{{20, 20}, 2};
  243. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  244. //! alloc memory
  245. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  246. auto ptr_offset = tensor.get_memory_ptr({10, 10});
  247. ASSERT_EQ(ptr_offset, ptr + 10 * 20 + 10);
  248. auto slice0 = tensor.slice({0, 0}, {20, 10});
  249. auto slice1 = tensor.slice({0, 10}, {20, 20});
  250. ASSERT_FALSE(slice0->is_continue_memory());
  251. ASSERT_FALSE(slice1->is_continue_memory());
  252. auto ptr_offset_slice0 = slice0->get_memory_ptr({6, 5});
  253. auto ptr_offset_slice1 = slice1->get_memory_ptr({2, 5});
  254. ASSERT_EQ(ptr_offset_slice0, ptr + 6 * 20 + 5);
  255. ASSERT_EQ(ptr_offset_slice1, ptr + 2 * 20 + 10 + 5);
  256. }
  257. TEST(TestTensor, Concat) {
  258. Layout layout{{5, 5, 5}, 3};
  259. std::vector<Tensor> tensors;
  260. for (int i = 0; i < 4; i++) {
  261. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  262. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  263. for (int n = 0; n < 5 * 5 * 5; n++) {
  264. ptr[n] = i;
  265. }
  266. tensors.push_back(tensor);
  267. }
  268. auto check = [&](int dim) {
  269. auto new_tensor = TensorUtils::concat(tensors, dim);
  270. auto ptr = static_cast<float*>(new_tensor->get_memory_ptr());
  271. size_t stride = std::pow(5, (3 - dim));
  272. for (int i = 0; i < 4; i++) {
  273. for (size_t j = 0; j < stride; j++) {
  274. ASSERT_EQ(ptr[i * stride + j], i);
  275. }
  276. }
  277. };
  278. check(0);
  279. check(1);
  280. check(2);
  281. }
  282. #if LITE_WITH_CUDA
  283. TEST(TestTensor, BasicDevice) {
  284. Layout layout{{1, 3, 224, 224}, 4};
  285. Tensor tensor1(LiteDeviceType::LITE_CUDA, layout);
  286. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  287. //! mge tensor has created
  288. ASSERT_TRUE(TensorHelper::implement(&tensor1));
  289. ASSERT_TRUE(TensorHelper::implement(&tensor2));
  290. //! check member
  291. ASSERT_EQ(tensor1.get_device_type(), LiteDeviceType::LITE_CUDA);
  292. ASSERT_EQ(tensor2.get_device_type(), LiteDeviceType::LITE_CPU);
  293. ASSERT_EQ(tensor2.get_layout(), layout);
  294. //! check the real tensor
  295. ASSERT_EQ(tensor1.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  296. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  297. ASSERT_TRUE(TensorHelper::implement(&tensor2)
  298. ->cast_final_safe<TensorImplDft>()
  299. .host_tensor());
  300. ASSERT_FALSE(TensorHelper::implement(&tensor2)
  301. ->cast_final_safe<TensorImplDft>()
  302. .dev_tensor());
  303. ASSERT_TRUE(TensorHelper::implement(&tensor1)
  304. ->cast_final_safe<TensorImplDft>()
  305. .dev_tensor());
  306. ASSERT_FALSE(TensorHelper::implement(&tensor1)
  307. ->cast_final_safe<TensorImplDft>()
  308. .host_tensor());
  309. }
  310. TEST(TestTensor, SetLayoutReAllocDevice) {
  311. Layout layout{{1, 3, 224, 224}, 4};
  312. Tensor tensor2(LiteDeviceType::LITE_CUDA, layout);
  313. auto old_ptr2 = tensor2.get_memory_ptr();
  314. //! layout set through
  315. Layout layout1{{1, 3, 100, 100}, 4, LiteDataType::LITE_INT8};
  316. tensor2.set_layout(layout1);
  317. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 100 * 100);
  318. auto layout2 = TensorHelper::implement(&tensor2)
  319. ->cast_final_safe<TensorImplDft>()
  320. .dev_tensor()
  321. ->layout();
  322. ASSERT_EQ(to_lite_layout(layout2), layout1);
  323. auto new_ptr2 = tensor2.get_memory_ptr();
  324. ASSERT_EQ(old_ptr2, new_ptr2);
  325. }
  326. TEST(TestTensor, CrossCNCopyDevice) {
  327. Layout layout{{1, 3, 224, 224}, 4};
  328. Tensor tensor0;
  329. Tensor tensor1(LiteDeviceType::LITE_CPU);
  330. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  331. Tensor tensor3(LiteDeviceType::LITE_CUDA, layout);
  332. tensor2.copy_from(tensor3);
  333. tensor3.copy_from(tensor2);
  334. auto old_ptr2 = tensor2.get_memory_ptr();
  335. auto old_ptr3 = tensor3.get_memory_ptr();
  336. ASSERT_THROW(tensor3.copy_from(tensor1), std::exception);
  337. tensor1.copy_from(tensor3);
  338. tensor0.copy_from(tensor3);
  339. tensor2.copy_from(tensor3);
  340. tensor3.copy_from(tensor2);
  341. ASSERT_EQ(tensor2.get_memory_ptr(), old_ptr2);
  342. ASSERT_EQ(tensor3.get_memory_ptr(), old_ptr3);
  343. }
  344. TEST(TestTensor, PinnedHostMem) {
  345. Layout layout{{1, 3, 224, 224}, 4};
  346. Tensor tensor1(LiteDeviceType::LITE_CPU);
  347. bool is_pinned_host = true;
  348. Tensor tensor2(LiteDeviceType::LITE_CUDA, layout, is_pinned_host);
  349. Tensor tensor3(LiteDeviceType::LITE_CUDA, layout);
  350. tensor2.copy_from(tensor3);
  351. tensor3.copy_from(tensor2);
  352. ASSERT_EQ(tensor2.is_pinned_host(), true);
  353. ASSERT_EQ(tensor3.is_pinned_host(), false);
  354. auto old_ptr2 = tensor2.get_memory_ptr();
  355. auto old_ptr3 = tensor3.get_memory_ptr();
  356. //! test source tenor is empty
  357. ASSERT_THROW(tensor2.copy_from(tensor1), std::exception);
  358. tensor1.copy_from(tensor2);
  359. tensor2.copy_from(tensor3);
  360. tensor3.copy_from(tensor2);
  361. ASSERT_EQ(tensor2.get_memory_ptr(), old_ptr2);
  362. ASSERT_EQ(tensor3.get_memory_ptr(), old_ptr3);
  363. }
  364. TEST(TestTensor, DeviceId) {
  365. if (get_device_count(LITE_CUDA) <= 1)
  366. return;
  367. Layout layout{{1, 3, 224, 224}, 4};
  368. Tensor tensor2(0, LiteDeviceType::LITE_CUDA, layout);
  369. Tensor tensor3(1, LiteDeviceType::LITE_CUDA, layout);
  370. tensor2.copy_from(tensor3);
  371. tensor3.copy_from(tensor2);
  372. Tensor tensor1;
  373. tensor1.copy_from(tensor2);
  374. tensor1.copy_from(tensor3);
  375. }
  376. TEST(TestTensor, SliceDevice) {
  377. Layout layout{{20, 20}, 2};
  378. Tensor host_tensor0;
  379. Tensor dev_tensor0(LiteDeviceType::LITE_CUDA, layout);
  380. host_tensor0.copy_from(dev_tensor0);
  381. auto ptr = host_tensor0.get_memory_ptr();
  382. for (int i = 0; i < 20 * 20; i++) {
  383. *(static_cast<float*>(ptr) + i) = i;
  384. }
  385. dev_tensor0.copy_from(host_tensor0);
  386. auto check = [&](size_t start, size_t end, size_t step) {
  387. Tensor host_tensor;
  388. host_tensor.copy_from(
  389. *dev_tensor0.slice({start, start}, {end, end}, {step, step}));
  390. float* new_ptr = static_cast<float*>(host_tensor.get_memory_ptr());
  391. for (size_t i = start; i < end; i += step) {
  392. for (size_t j = start; j < end; j += step) {
  393. ASSERT_EQ(float(i * 20 + j), *new_ptr);
  394. ++new_ptr;
  395. }
  396. }
  397. };
  398. check(5, 10, 1);
  399. check(5, 11, 2);
  400. check(2, 18, 4);
  401. }
  402. TEST(TestTensor, MemSetDevice) {
  403. Layout layout{{20, 20}, 2, LiteDataType::LITE_INT8};
  404. Tensor host_tensor0(LiteDeviceType::LITE_CPU, layout);
  405. Tensor dev_tensor0(LiteDeviceType::LITE_CUDA, layout);
  406. auto check = [&](uint8_t val, const Tensor& tensor) {
  407. auto ptr = static_cast<uint8_t*>(tensor.get_memory_ptr());
  408. for (int i = 0; i < 20 * 20; i++) {
  409. ASSERT_EQ(val, *(ptr + i));
  410. }
  411. };
  412. host_tensor0.fill_zero();
  413. check(0, host_tensor0);
  414. Tensor host_tensor1;
  415. dev_tensor0.fill_zero();
  416. host_tensor1.copy_from(dev_tensor0);
  417. check(0, host_tensor1);
  418. }
  419. TEST(TestTensor, DeviceSliceCopy) {
  420. Layout layout{{20, 20}, 2};
  421. Tensor tensor(LiteDeviceType::LITE_CUDA, layout);
  422. //! alloc memory
  423. tensor.get_memory_ptr();
  424. Layout layout_slice{{20, 10}, 2};
  425. Tensor tensor0(LiteDeviceType::LITE_CPU, layout_slice);
  426. auto ptr0 = tensor0.get_memory_ptr();
  427. for (int i = 0; i < 10 * 20; i++) {
  428. *(static_cast<float*>(ptr0) + i) = i;
  429. }
  430. Tensor tensor1(LiteDeviceType::LITE_CPU, layout_slice);
  431. auto ptr1 = tensor1.get_memory_ptr();
  432. for (int i = 0; i < 10 * 20; i++) {
  433. *(static_cast<float*>(ptr1) + i) = i + 200;
  434. }
  435. auto slice0 = tensor.slice({0, 0}, {20, 10});
  436. auto slice1 = tensor.slice({0, 10}, {20, 20});
  437. slice0->copy_from(tensor0);
  438. slice1->copy_from(tensor1);
  439. ASSERT_FALSE(slice0->is_continue_memory());
  440. ASSERT_FALSE(slice1->is_continue_memory());
  441. Tensor host_tensor;
  442. host_tensor.copy_from(tensor);
  443. auto ptr = static_cast<float*>(host_tensor.get_memory_ptr());
  444. for (size_t i = 0; i < 20; i++) {
  445. for (size_t j = 0; j < 10; j++) {
  446. ASSERT_EQ(float(i * 10 + j), *ptr);
  447. ++ptr;
  448. }
  449. for (size_t j = 0; j < 10; j++) {
  450. ASSERT_EQ(float(i * 10 + j + 200), *ptr);
  451. ++ptr;
  452. }
  453. }
  454. slice0->fill_zero();
  455. Tensor tmp;
  456. tmp.copy_from(*slice0);
  457. float* tmp_ptr = static_cast<float*>(tmp.get_memory_ptr());
  458. for (size_t i = 0; i < 20; i++) {
  459. for (size_t j = 0; j < 10; j++) {
  460. ASSERT_EQ(float(0), *tmp_ptr);
  461. ++tmp_ptr;
  462. }
  463. }
  464. }
  465. TEST(TestTensor, ConcatDevice) {
  466. Layout layout{{5, 5, 5}, 3};
  467. std::vector<Tensor> tensors;
  468. for (int i = 0; i < 4; i++) {
  469. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  470. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  471. for (int n = 0; n < 5 * 5 * 5; n++) {
  472. ptr[n] = i;
  473. }
  474. tensors.push_back(tensor);
  475. }
  476. auto check = [&](int dim) {
  477. auto new_tensor =
  478. TensorUtils::concat(tensors, dim, LiteDeviceType::LITE_CUDA, 0);
  479. Tensor tensor(LiteDeviceType::LITE_CPU);
  480. tensor.copy_from(*new_tensor);
  481. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  482. size_t stride = std::pow(5, (3 - dim));
  483. for (int i = 0; i < 4; i++) {
  484. for (size_t j = 0; j < stride; j++) {
  485. ASSERT_EQ(ptr[i * stride + j], i);
  486. }
  487. }
  488. ASSERT_EQ(new_tensor->get_device_type(), LiteDeviceType::LITE_CUDA);
  489. ASSERT_EQ(new_tensor->get_device_id(), 0);
  490. };
  491. check(0);
  492. check(1);
  493. check(2);
  494. }
  495. #endif
  496. #endif
  497. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}