You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_tensor.cpp 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. /**
  2. * \file test/test_tensor.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "lite_build_config.h"
  12. #if LITE_BUILD_WITH_MGE
  13. #include "../src/mge/common.h"
  14. #include "../src/mge/network_impl.h"
  15. #include "../src/misc.h"
  16. #include "lite/tensor.h"
  17. #include <gtest/gtest.h>
  18. #include <string.h>
  19. #include <memory>
  20. using namespace lite;
  21. TEST(TestTensor, Basic) {
  22. Layout layout{{1, 3, 224, 224}, 4};
  23. Tensor tensor1(LiteDeviceType::LITE_CPU);
  24. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  25. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  26. //! mge tensor has created
  27. ASSERT_TRUE(TensorHelper::implement(&tensor1));
  28. ASSERT_TRUE(TensorHelper::implement(&tensor2));
  29. ASSERT_TRUE(TensorHelper::implement(&tensor3));
  30. //! check member
  31. ASSERT_EQ(tensor2.get_device_type(), LiteDeviceType::LITE_CPU);
  32. ASSERT_EQ(tensor2.get_layout(), layout);
  33. ASSERT_EQ(tensor3.get_layout(), layout);
  34. //! check the real tensor
  35. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  36. ASSERT_EQ(tensor3.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  37. ASSERT_TRUE(TensorHelper::implement(&tensor1)
  38. ->cast_final_safe<TensorImplDft>()
  39. .host_tensor());
  40. ASSERT_FALSE(TensorHelper::implement(&tensor1)
  41. ->cast_final_safe<TensorImplDft>()
  42. .dev_tensor());
  43. ASSERT_FALSE(TensorHelper::implement(&tensor1)
  44. ->cast_final_safe<TensorImplDft>()
  45. .dev_tensor());
  46. ASSERT_TRUE(TensorHelper::implement(&tensor1)
  47. ->cast_final_safe<TensorImplDft>()
  48. .host_tensor());
  49. }
  50. TEST(TestTensor, SetLayoutReAlloc) {
  51. Layout layout{{1, 3, 224, 224}, 4};
  52. Tensor tensor1;
  53. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  54. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  55. auto old_ptr2 = tensor2.get_memory_ptr();
  56. auto old_ptr3 = tensor3.get_memory_ptr();
  57. //! layout set through
  58. Layout layout1{{1, 3, 100, 100}, 4, LiteDataType::LITE_INT8};
  59. tensor1.set_layout(layout1);
  60. tensor2.set_layout(layout1);
  61. tensor3.set_layout(layout1);
  62. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 100 * 100);
  63. ASSERT_EQ(tensor3.get_tensor_total_size_in_byte(), 1 * 3 * 100 * 100);
  64. auto layout2 = TensorHelper::implement(&tensor2)
  65. ->cast_final_safe<TensorImplDft>()
  66. .host_tensor()
  67. ->layout();
  68. auto layout3 = TensorHelper::implement(&tensor3)
  69. ->cast_final_safe<TensorImplDft>()
  70. .host_tensor()
  71. ->layout();
  72. ASSERT_EQ(to_lite_layout(layout2), layout1);
  73. ASSERT_EQ(to_lite_layout(layout3), layout1);
  74. auto new_ptr2 = tensor2.get_memory_ptr();
  75. auto new_ptr3 = tensor3.get_memory_ptr();
  76. ASSERT_EQ(old_ptr2, new_ptr2);
  77. ASSERT_EQ(old_ptr3, new_ptr3);
  78. }
  79. TEST(TestTensor, Reset) {
  80. Layout layout{{3, 20}, 2, LiteDataType::LITE_FLOAT};
  81. Tensor tensor1;
  82. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  83. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  84. auto old_ptr2 = tensor2.get_memory_ptr();
  85. auto old_ptr3 = tensor3.get_memory_ptr();
  86. //! make sure memory is allocted
  87. ASSERT_NO_THROW(memcpy(old_ptr2, old_ptr3, 3 * 20 * 2));
  88. std::shared_ptr<float> new_ptr2(
  89. new float[3 * 20], [](float* ptr) { delete[] ptr; });
  90. std::shared_ptr<float> new_ptr3(
  91. new float[3 * 20], [](float* ptr) { delete[] ptr; });
  92. tensor1.reset(new_ptr2.get(), layout);
  93. tensor2.reset(new_ptr2.get(), 3 * 20 * 4);
  94. tensor3.reset(new_ptr3.get(), 3 * 20 * 4);
  95. //! After reset the original mem is freed
  96. /*ASSERT_EXIT((memcpy(old_ptr2, old_ptr3, 3 * 20 * 2), exit(0)),
  97. ::testing::KilledBySignal(SIGSEGV), ".*");*/
  98. ASSERT_EQ(tensor2.get_memory_ptr(), new_ptr2.get());
  99. ASSERT_EQ(tensor3.get_memory_ptr(), new_ptr3.get());
  100. ASSERT_NO_THROW(memcpy(new_ptr2.get(), new_ptr3.get(), 3 * 20 * 2));
  101. Layout layout1{{6, 20}, 2, LiteDataType::LITE_FLOAT};
  102. std::shared_ptr<float> ptr2(new float[6 * 20], [](float* ptr) { delete[] ptr; });
  103. std::shared_ptr<float> ptr3(new float[6 * 20], [](float* ptr) { delete[] ptr; });
  104. tensor2.reset(ptr2.get(), layout1);
  105. tensor3.reset(ptr3.get(), layout1);
  106. //! memory is not freed by Tensor reset
  107. ASSERT_NO_THROW(memcpy(new_ptr2.get(), new_ptr3.get(), 3 * 20 * 2));
  108. auto host_layout2 = TensorHelper::implement(&tensor2)
  109. ->cast_final_safe<TensorImplDft>()
  110. .host_tensor()
  111. ->layout();
  112. auto host_layout3 = TensorHelper::implement(&tensor3)
  113. ->cast_final_safe<TensorImplDft>()
  114. .host_tensor()
  115. ->layout();
  116. ASSERT_EQ(to_lite_layout(host_layout2), layout1);
  117. ASSERT_EQ(to_lite_layout(host_layout3), layout1);
  118. }
  119. TEST(TestTensor, CrossCNCopy) {
  120. Layout layout{{1, 3, 224, 224}, 4};
  121. Tensor tensor1(LiteDeviceType::LITE_CPU);
  122. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  123. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  124. tensor2.copy_from(tensor3);
  125. tensor3.copy_from(tensor2);
  126. auto old_ptr2 = tensor2.get_memory_ptr();
  127. auto old_ptr3 = tensor3.get_memory_ptr();
  128. //! test source tenor is empty
  129. ASSERT_THROW(tensor2.copy_from(tensor1), std::exception);
  130. tensor1.copy_from(tensor2);
  131. tensor2.copy_from(tensor3);
  132. tensor3.copy_from(tensor2);
  133. ASSERT_EQ(tensor2.get_memory_ptr(), old_ptr2);
  134. ASSERT_EQ(tensor3.get_memory_ptr(), old_ptr3);
  135. }
  136. TEST(TestTensor, SharedTensorMemory) {
  137. Layout layout{{1, 3, 224, 224}, 4};
  138. Tensor tensor1(LiteDeviceType::LITE_CPU);
  139. {
  140. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  141. tensor1.share_memory_with(tensor2);
  142. auto ptr1 = tensor1.get_memory_ptr();
  143. auto ptr2 = tensor2.get_memory_ptr();
  144. ASSERT_EQ(ptr1, ptr2);
  145. }
  146. // check after tensor2 destroy, tensor1 can also visit
  147. auto ptr1 = static_cast<float*>(tensor1.get_memory_ptr());
  148. size_t length = tensor1.get_tensor_total_size_in_byte() /
  149. tensor1.get_layout().get_elem_size();
  150. for (size_t i = 0; i < length; i++) {
  151. ptr1[i] = i;
  152. }
  153. }
  154. TEST(TestTensor, Reshape) {
  155. Layout layout{{1, 3, 224, 224}, 4};
  156. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  157. auto ptr = tensor2.get_memory_ptr();
  158. //! test wrong case
  159. ASSERT_THROW(tensor2.reshape({-1, -1, 3 * 224 * 224}), std::exception);
  160. ASSERT_THROW(tensor2.reshape({-1, 3, 3 * 224 * 224}), std::exception);
  161. ASSERT_THROW(tensor2.reshape({1, 3, 3 * 224 * 224}), std::exception);
  162. ASSERT_THROW(tensor2.reshape({3, 3, 3 * 224 * 224}), std::exception);
  163. tensor2.reshape({3 * 224 * 224});
  164. ASSERT_EQ(tensor2.get_layout().ndim, 1);
  165. ASSERT_EQ(tensor2.get_layout().data_type, LiteDataType::LITE_FLOAT);
  166. ASSERT_EQ(tensor2.get_layout().shapes[0], 3 * 224 * 224);
  167. tensor2.reshape({-1, 224, 224});
  168. ASSERT_EQ(tensor2.get_layout().ndim, 3);
  169. ASSERT_EQ(tensor2.get_layout().shapes[0], 3);
  170. ASSERT_EQ(tensor2.get_layout().shapes[1], 224);
  171. ASSERT_EQ(tensor2.get_memory_ptr(), ptr);
  172. }
  173. TEST(TestTensor, Slice) {
  174. Layout layout{{20, 20}, 2};
  175. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  176. auto ptr = tensor2.get_memory_ptr();
  177. //! test source tenor is empty
  178. ASSERT_THROW(tensor2.slice({5, 10, 10}, {10, 15}), std::exception);
  179. ASSERT_THROW(tensor2.slice({5, 10}, {10, 15}, {5}), std::exception);
  180. ASSERT_THROW(tensor2.slice({5, 10}, {10, 15, 10}), std::exception);
  181. for (int i = 0; i < 20 * 20; i++) {
  182. *(static_cast<float*>(ptr) + i) = i;
  183. }
  184. auto check = [&](size_t start, size_t end, size_t step) {
  185. Tensor tensor3;
  186. tensor3.copy_from(*tensor2.slice({start, start}, {end, end}, {step, step}));
  187. float* new_ptr = static_cast<float*>(tensor3.get_memory_ptr());
  188. for (size_t i = start; i < end; i += step) {
  189. for (size_t j = start; j < end; j += step) {
  190. ASSERT_EQ(float(i * 20 + j), *new_ptr);
  191. ++new_ptr;
  192. }
  193. }
  194. };
  195. check(5, 10, 1);
  196. check(5, 11, 2);
  197. check(2, 18, 4);
  198. Tensor tensor3;
  199. tensor3.copy_from(*tensor2.slice({3}, {9}, {2}));
  200. float* new_ptr = static_cast<float*>(tensor3.get_memory_ptr());
  201. for (size_t i = 3; i < 9; i += 2) {
  202. for (size_t j = 0; j < 20; j++) {
  203. ASSERT_EQ(float(i * 20 + j), *new_ptr);
  204. ++new_ptr;
  205. }
  206. }
  207. }
  208. TEST(TestTensor, SliceCopy) {
  209. Layout layout{{20, 20}, 2};
  210. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  211. //! alloc memory
  212. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  213. Layout layout_slice{{20, 10}, 2};
  214. Tensor tensor0(LiteDeviceType::LITE_CPU, layout_slice);
  215. auto ptr0 = tensor0.get_memory_ptr();
  216. for (int i = 0; i < 10 * 20; i++) {
  217. *(static_cast<float*>(ptr0) + i) = i;
  218. }
  219. Tensor tensor1(LiteDeviceType::LITE_CPU, layout_slice);
  220. auto ptr1 = tensor1.get_memory_ptr();
  221. for (int i = 0; i < 10 * 20; i++) {
  222. *(static_cast<float*>(ptr1) + i) = i + 200;
  223. }
  224. auto slice0 = tensor.slice({0, 0}, {20, 10});
  225. auto slice1 = tensor.slice({0, 10}, {20, 20});
  226. slice0->copy_from(tensor0);
  227. slice1->copy_from(tensor1);
  228. ASSERT_FALSE(slice0->is_continue_memory());
  229. ASSERT_FALSE(slice1->is_continue_memory());
  230. for (size_t i = 0; i < 20; i++) {
  231. for (size_t j = 0; j < 10; j++) {
  232. ASSERT_EQ(float(i * 10 + j), *ptr);
  233. ++ptr;
  234. }
  235. for (size_t j = 0; j < 10; j++) {
  236. ASSERT_EQ(float(i * 10 + j + 200), *ptr);
  237. ++ptr;
  238. }
  239. }
  240. slice0->fill_zero();
  241. Tensor tmp;
  242. tmp.copy_from(*slice0);
  243. float* tmp_ptr = static_cast<float*>(tmp.get_memory_ptr());
  244. for (size_t i = 0; i < 20; i++) {
  245. for (size_t j = 0; j < 10; j++) {
  246. ASSERT_EQ(float(0), *tmp_ptr);
  247. ++tmp_ptr;
  248. }
  249. }
  250. }
  251. TEST(TestTensor, GetPtrOffset) {
  252. Layout layout{{20, 20}, 2};
  253. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  254. //! alloc memory
  255. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  256. auto ptr_offset = tensor.get_memory_ptr({10, 10});
  257. ASSERT_EQ(ptr_offset, ptr + 10 * 20 + 10);
  258. auto slice0 = tensor.slice({0, 0}, {20, 10});
  259. auto slice1 = tensor.slice({0, 10}, {20, 20});
  260. ASSERT_FALSE(slice0->is_continue_memory());
  261. ASSERT_FALSE(slice1->is_continue_memory());
  262. auto ptr_offset_slice0 = slice0->get_memory_ptr({6, 5});
  263. auto ptr_offset_slice1 = slice1->get_memory_ptr({2, 5});
  264. ASSERT_EQ(ptr_offset_slice0, ptr + 6 * 20 + 5);
  265. ASSERT_EQ(ptr_offset_slice1, ptr + 2 * 20 + 10 + 5);
  266. }
  267. TEST(TestTensor, Concat) {
  268. Layout layout{{5, 5, 5}, 3};
  269. std::vector<Tensor> tensors;
  270. for (int i = 0; i < 4; i++) {
  271. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  272. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  273. for (int n = 0; n < 5 * 5 * 5; n++) {
  274. ptr[n] = i;
  275. }
  276. tensors.push_back(tensor);
  277. }
  278. auto check = [&](int dim) {
  279. auto new_tensor = TensorUtils::concat(tensors, dim);
  280. auto ptr = static_cast<float*>(new_tensor->get_memory_ptr());
  281. size_t stride = std::pow(5, (3 - dim));
  282. for (int i = 0; i < 4; i++) {
  283. for (size_t j = 0; j < stride; j++) {
  284. ASSERT_EQ(ptr[i * stride + j], i);
  285. }
  286. }
  287. };
  288. check(0);
  289. check(1);
  290. check(2);
  291. }
  292. #if LITE_WITH_CUDA
  293. TEST(TestTensor, BasicDevice) {
  294. Layout layout{{1, 3, 224, 224}, 4};
  295. Tensor tensor1(LiteDeviceType::LITE_CUDA, layout);
  296. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  297. //! mge tensor has created
  298. ASSERT_TRUE(TensorHelper::implement(&tensor1));
  299. ASSERT_TRUE(TensorHelper::implement(&tensor2));
  300. //! check member
  301. ASSERT_EQ(tensor1.get_device_type(), LiteDeviceType::LITE_CUDA);
  302. ASSERT_EQ(tensor2.get_device_type(), LiteDeviceType::LITE_CPU);
  303. ASSERT_EQ(tensor2.get_layout(), layout);
  304. //! check the real tensor
  305. ASSERT_EQ(tensor1.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  306. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  307. ASSERT_TRUE(TensorHelper::implement(&tensor2)
  308. ->cast_final_safe<TensorImplDft>()
  309. .host_tensor());
  310. ASSERT_FALSE(TensorHelper::implement(&tensor2)
  311. ->cast_final_safe<TensorImplDft>()
  312. .dev_tensor());
  313. ASSERT_TRUE(TensorHelper::implement(&tensor1)
  314. ->cast_final_safe<TensorImplDft>()
  315. .dev_tensor());
  316. ASSERT_FALSE(TensorHelper::implement(&tensor1)
  317. ->cast_final_safe<TensorImplDft>()
  318. .host_tensor());
  319. }
  320. TEST(TestTensor, SetLayoutReAllocDevice) {
  321. Layout layout{{1, 3, 224, 224}, 4};
  322. Tensor tensor2(LiteDeviceType::LITE_CUDA, layout);
  323. auto old_ptr2 = tensor2.get_memory_ptr();
  324. //! layout set through
  325. Layout layout1{{1, 3, 100, 100}, 4, LiteDataType::LITE_INT8};
  326. tensor2.set_layout(layout1);
  327. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 100 * 100);
  328. auto layout2 = TensorHelper::implement(&tensor2)
  329. ->cast_final_safe<TensorImplDft>()
  330. .dev_tensor()
  331. ->layout();
  332. ASSERT_EQ(to_lite_layout(layout2), layout1);
  333. auto new_ptr2 = tensor2.get_memory_ptr();
  334. ASSERT_EQ(old_ptr2, new_ptr2);
  335. }
  336. TEST(TestTensor, CrossCNCopyDevice) {
  337. Layout layout{{1, 3, 224, 224}, 4};
  338. Tensor tensor0;
  339. Tensor tensor1(LiteDeviceType::LITE_CPU);
  340. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  341. Tensor tensor3(LiteDeviceType::LITE_CUDA, layout);
  342. tensor2.copy_from(tensor3);
  343. tensor3.copy_from(tensor2);
  344. auto old_ptr2 = tensor2.get_memory_ptr();
  345. auto old_ptr3 = tensor3.get_memory_ptr();
  346. ASSERT_THROW(tensor3.copy_from(tensor1), std::exception);
  347. tensor1.copy_from(tensor3);
  348. tensor0.copy_from(tensor3);
  349. tensor2.copy_from(tensor3);
  350. tensor3.copy_from(tensor2);
  351. ASSERT_EQ(tensor2.get_memory_ptr(), old_ptr2);
  352. ASSERT_EQ(tensor3.get_memory_ptr(), old_ptr3);
  353. }
  354. TEST(TestTensor, PinnedHostMem) {
  355. Layout layout{{1, 3, 224, 224}, 4};
  356. Tensor tensor1(LiteDeviceType::LITE_CPU);
  357. bool is_pinned_host = true;
  358. Tensor tensor2(LiteDeviceType::LITE_CUDA, layout, is_pinned_host);
  359. Tensor tensor3(LiteDeviceType::LITE_CUDA, layout);
  360. tensor2.copy_from(tensor3);
  361. tensor3.copy_from(tensor2);
  362. ASSERT_EQ(tensor2.is_pinned_host(), true);
  363. ASSERT_EQ(tensor3.is_pinned_host(), false);
  364. auto old_ptr2 = tensor2.get_memory_ptr();
  365. auto old_ptr3 = tensor3.get_memory_ptr();
  366. //! test source tenor is empty
  367. ASSERT_THROW(tensor2.copy_from(tensor1), std::exception);
  368. tensor1.copy_from(tensor2);
  369. tensor2.copy_from(tensor3);
  370. tensor3.copy_from(tensor2);
  371. ASSERT_EQ(tensor2.get_memory_ptr(), old_ptr2);
  372. ASSERT_EQ(tensor3.get_memory_ptr(), old_ptr3);
  373. }
  374. TEST(TestTensor, DeviceId) {
  375. if (get_device_count(LITE_CUDA) <= 1)
  376. return;
  377. Layout layout{{1, 3, 224, 224}, 4};
  378. Tensor tensor2(0, LiteDeviceType::LITE_CUDA, layout);
  379. Tensor tensor3(1, LiteDeviceType::LITE_CUDA, layout);
  380. tensor2.copy_from(tensor3);
  381. tensor3.copy_from(tensor2);
  382. Tensor tensor1;
  383. tensor1.copy_from(tensor2);
  384. tensor1.copy_from(tensor3);
  385. }
  386. TEST(TestTensor, SliceDevice) {
  387. Layout layout{{20, 20}, 2};
  388. Tensor host_tensor0;
  389. Tensor dev_tensor0(LiteDeviceType::LITE_CUDA, layout);
  390. host_tensor0.copy_from(dev_tensor0);
  391. auto ptr = host_tensor0.get_memory_ptr();
  392. for (int i = 0; i < 20 * 20; i++) {
  393. *(static_cast<float*>(ptr) + i) = i;
  394. }
  395. dev_tensor0.copy_from(host_tensor0);
  396. auto check = [&](size_t start, size_t end, size_t step) {
  397. Tensor host_tensor;
  398. host_tensor.copy_from(
  399. *dev_tensor0.slice({start, start}, {end, end}, {step, step}));
  400. float* new_ptr = static_cast<float*>(host_tensor.get_memory_ptr());
  401. for (size_t i = start; i < end; i += step) {
  402. for (size_t j = start; j < end; j += step) {
  403. ASSERT_EQ(float(i * 20 + j), *new_ptr);
  404. ++new_ptr;
  405. }
  406. }
  407. };
  408. check(5, 10, 1);
  409. check(5, 11, 2);
  410. check(2, 18, 4);
  411. }
  412. TEST(TestTensor, MemSetDevice) {
  413. Layout layout{{20, 20}, 2, LiteDataType::LITE_INT8};
  414. Tensor host_tensor0(LiteDeviceType::LITE_CPU, layout);
  415. Tensor dev_tensor0(LiteDeviceType::LITE_CUDA, layout);
  416. auto check = [&](uint8_t val, const Tensor& tensor) {
  417. auto ptr = static_cast<uint8_t*>(tensor.get_memory_ptr());
  418. for (int i = 0; i < 20 * 20; i++) {
  419. ASSERT_EQ(val, *(ptr + i));
  420. }
  421. };
  422. host_tensor0.fill_zero();
  423. check(0, host_tensor0);
  424. Tensor host_tensor1;
  425. dev_tensor0.fill_zero();
  426. host_tensor1.copy_from(dev_tensor0);
  427. check(0, host_tensor1);
  428. }
  429. TEST(TestTensor, DeviceSliceCopy) {
  430. Layout layout{{20, 20}, 2};
  431. Tensor tensor(LiteDeviceType::LITE_CUDA, layout);
  432. //! alloc memory
  433. tensor.get_memory_ptr();
  434. Layout layout_slice{{20, 10}, 2};
  435. Tensor tensor0(LiteDeviceType::LITE_CPU, layout_slice);
  436. auto ptr0 = tensor0.get_memory_ptr();
  437. for (int i = 0; i < 10 * 20; i++) {
  438. *(static_cast<float*>(ptr0) + i) = i;
  439. }
  440. Tensor tensor1(LiteDeviceType::LITE_CPU, layout_slice);
  441. auto ptr1 = tensor1.get_memory_ptr();
  442. for (int i = 0; i < 10 * 20; i++) {
  443. *(static_cast<float*>(ptr1) + i) = i + 200;
  444. }
  445. auto slice0 = tensor.slice({0, 0}, {20, 10});
  446. auto slice1 = tensor.slice({0, 10}, {20, 20});
  447. slice0->copy_from(tensor0);
  448. slice1->copy_from(tensor1);
  449. ASSERT_FALSE(slice0->is_continue_memory());
  450. ASSERT_FALSE(slice1->is_continue_memory());
  451. Tensor host_tensor;
  452. host_tensor.copy_from(tensor);
  453. auto ptr = static_cast<float*>(host_tensor.get_memory_ptr());
  454. for (size_t i = 0; i < 20; i++) {
  455. for (size_t j = 0; j < 10; j++) {
  456. ASSERT_EQ(float(i * 10 + j), *ptr);
  457. ++ptr;
  458. }
  459. for (size_t j = 0; j < 10; j++) {
  460. ASSERT_EQ(float(i * 10 + j + 200), *ptr);
  461. ++ptr;
  462. }
  463. }
  464. slice0->fill_zero();
  465. Tensor tmp;
  466. tmp.copy_from(*slice0);
  467. float* tmp_ptr = static_cast<float*>(tmp.get_memory_ptr());
  468. for (size_t i = 0; i < 20; i++) {
  469. for (size_t j = 0; j < 10; j++) {
  470. ASSERT_EQ(float(0), *tmp_ptr);
  471. ++tmp_ptr;
  472. }
  473. }
  474. }
  475. TEST(TestTensor, ConcatDevice) {
  476. Layout layout{{5, 5, 5}, 3};
  477. std::vector<Tensor> tensors;
  478. for (int i = 0; i < 4; i++) {
  479. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  480. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  481. for (int n = 0; n < 5 * 5 * 5; n++) {
  482. ptr[n] = i;
  483. }
  484. tensors.push_back(tensor);
  485. }
  486. auto check = [&](int dim) {
  487. auto new_tensor =
  488. TensorUtils::concat(tensors, dim, LiteDeviceType::LITE_CUDA, 0);
  489. Tensor tensor(LiteDeviceType::LITE_CPU);
  490. tensor.copy_from(*new_tensor);
  491. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  492. size_t stride = std::pow(5, (3 - dim));
  493. for (int i = 0; i < 4; i++) {
  494. for (size_t j = 0; j < stride; j++) {
  495. ASSERT_EQ(ptr[i * stride + j], i);
  496. }
  497. }
  498. ASSERT_EQ(new_tensor->get_device_type(), LiteDeviceType::LITE_CUDA);
  499. ASSERT_EQ(new_tensor->get_device_id(), 0);
  500. };
  501. check(0);
  502. check(1);
  503. check(2);
  504. }
  505. #endif
  506. #endif
  507. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}