You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_tensor.cpp 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. /**
  2. * \file test/test_tensor.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "lite_build_config.h"
  12. #if LITE_BUILD_WITH_MGE
  13. #include "../src/misc.h"
  14. #include "../src/mge/common.h"
  15. #include "../src/mge/network_impl.h"
  16. #include "lite/tensor.h"
  17. #include <gtest/gtest.h>
  18. #include <string.h>
  19. #include <memory>
  20. using namespace lite;
  21. TEST(TestTensor, Basic) {
  22. Layout layout{{1, 3, 224, 224}, 4};
  23. Tensor tensor1(LiteDeviceType::LITE_CPU);
  24. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  25. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  26. //! mge tensor has created
  27. ASSERT_TRUE(TensorHelper::implement(&tensor1));
  28. ASSERT_TRUE(TensorHelper::implement(&tensor2));
  29. ASSERT_TRUE(TensorHelper::implement(&tensor3));
  30. //! check member
  31. ASSERT_EQ(tensor2.get_device_type(), LiteDeviceType::LITE_CPU);
  32. ASSERT_EQ(tensor2.get_layout(), layout);
  33. ASSERT_EQ(tensor3.get_layout(), layout);
  34. //! check the real tensor
  35. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  36. ASSERT_EQ(tensor3.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  37. ASSERT_TRUE(TensorHelper::implement(&tensor1)
  38. ->cast_final_safe<TensorImplDft>()
  39. .host_tensor());
  40. ASSERT_FALSE(TensorHelper::implement(&tensor1)
  41. ->cast_final_safe<TensorImplDft>()
  42. .dev_tensor());
  43. ASSERT_FALSE(TensorHelper::implement(&tensor1)
  44. ->cast_final_safe<TensorImplDft>()
  45. .dev_tensor());
  46. ASSERT_TRUE(TensorHelper::implement(&tensor1)
  47. ->cast_final_safe<TensorImplDft>()
  48. .host_tensor());
  49. }
  50. TEST(TestTensor, SetLayoutReAlloc) {
  51. Layout layout{{1, 3, 224, 224}, 4};
  52. Tensor tensor1;
  53. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  54. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  55. auto old_ptr2 = tensor2.get_memory_ptr();
  56. auto old_ptr3 = tensor3.get_memory_ptr();
  57. //! layout set through
  58. Layout layout1{{1, 3, 100, 100}, 4, LiteDataType::LITE_INT8};
  59. tensor1.set_layout(layout1);
  60. tensor2.set_layout(layout1);
  61. tensor3.set_layout(layout1);
  62. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 100 * 100);
  63. ASSERT_EQ(tensor3.get_tensor_total_size_in_byte(), 1 * 3 * 100 * 100);
  64. auto layout2 = TensorHelper::implement(&tensor2)
  65. ->cast_final_safe<TensorImplDft>()
  66. .host_tensor()
  67. ->layout();
  68. auto layout3 = TensorHelper::implement(&tensor3)
  69. ->cast_final_safe<TensorImplDft>()
  70. .host_tensor()
  71. ->layout();
  72. ASSERT_EQ(to_lite_layout(layout2), layout1);
  73. ASSERT_EQ(to_lite_layout(layout3), layout1);
  74. auto new_ptr2 = tensor2.get_memory_ptr();
  75. auto new_ptr3 = tensor3.get_memory_ptr();
  76. ASSERT_EQ(old_ptr2, new_ptr2);
  77. ASSERT_EQ(old_ptr3, new_ptr3);
  78. }
  79. TEST(TestTensor, Reset) {
  80. Layout layout{{3, 20}, 2, LiteDataType::LITE_FLOAT};
  81. Tensor tensor1;
  82. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  83. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  84. auto old_ptr2 = tensor2.get_memory_ptr();
  85. auto old_ptr3 = tensor3.get_memory_ptr();
  86. //! make sure memory is allocted
  87. ASSERT_NO_THROW(memcpy(old_ptr2, old_ptr3, 3 * 20 * 2));
  88. std::shared_ptr<float> new_ptr2(new float[3 * 20],
  89. [](float* ptr) { delete[] ptr; });
  90. std::shared_ptr<float> new_ptr3(new float[3 * 20],
  91. [](float* ptr) { delete[] ptr; });
  92. tensor1.reset(new_ptr2.get(), layout);
  93. tensor2.reset(new_ptr2.get(), 3 * 20 * 4);
  94. tensor3.reset(new_ptr3.get(), 3 * 20 * 4);
  95. //! After reset the original mem is freed
  96. /*ASSERT_EXIT((memcpy(old_ptr2, old_ptr3, 3 * 20 * 2), exit(0)),
  97. ::testing::KilledBySignal(SIGSEGV), ".*");*/
  98. ASSERT_EQ(tensor2.get_memory_ptr(), new_ptr2.get());
  99. ASSERT_EQ(tensor3.get_memory_ptr(), new_ptr3.get());
  100. ASSERT_NO_THROW(memcpy(new_ptr2.get(), new_ptr3.get(), 3 * 20 * 2));
  101. Layout layout1{{6, 20}, 2, LiteDataType::LITE_FLOAT};
  102. std::shared_ptr<float> ptr2(new float[6 * 20],
  103. [](float* ptr) { delete[] ptr; });
  104. std::shared_ptr<float> ptr3(new float[6 * 20],
  105. [](float* ptr) { delete[] ptr; });
  106. tensor2.reset(ptr2.get(), layout1);
  107. tensor3.reset(ptr3.get(), layout1);
  108. //! memory is not freed by Tensor reset
  109. ASSERT_NO_THROW(memcpy(new_ptr2.get(), new_ptr3.get(), 3 * 20 * 2));
  110. auto host_layout2 = TensorHelper::implement(&tensor2)
  111. ->cast_final_safe<TensorImplDft>()
  112. .host_tensor()
  113. ->layout();
  114. auto host_layout3 = TensorHelper::implement(&tensor3)
  115. ->cast_final_safe<TensorImplDft>()
  116. .host_tensor()
  117. ->layout();
  118. ASSERT_EQ(to_lite_layout(host_layout2), layout1);
  119. ASSERT_EQ(to_lite_layout(host_layout3), layout1);
  120. }
  121. TEST(TestTensor, CrossCNCopy) {
  122. Layout layout{{1, 3, 224, 224}, 4};
  123. Tensor tensor1(LiteDeviceType::LITE_CPU);
  124. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  125. Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
  126. tensor2.copy_from(tensor3);
  127. tensor3.copy_from(tensor2);
  128. auto old_ptr2 = tensor2.get_memory_ptr();
  129. auto old_ptr3 = tensor3.get_memory_ptr();
  130. //! test source tenor is empty
  131. ASSERT_THROW(tensor2.copy_from(tensor1), std::exception);
  132. tensor1.copy_from(tensor2);
  133. tensor2.copy_from(tensor3);
  134. tensor3.copy_from(tensor2);
  135. ASSERT_EQ(tensor2.get_memory_ptr(), old_ptr2);
  136. ASSERT_EQ(tensor3.get_memory_ptr(), old_ptr3);
  137. }
  138. TEST(TestTensor, SharedTensorMemory) {
  139. Layout layout{{1, 3, 224, 224}, 4};
  140. Tensor tensor1(LiteDeviceType::LITE_CPU);
  141. {
  142. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  143. tensor1.share_memory_with(tensor2);
  144. auto ptr1 = tensor1.get_memory_ptr();
  145. auto ptr2 = tensor2.get_memory_ptr();
  146. ASSERT_EQ(ptr1, ptr2);
  147. }
  148. // check after tensor2 destroy, tensor1 can also visit
  149. auto ptr1 = static_cast<float*>(tensor1.get_memory_ptr());
  150. size_t length = tensor1.get_tensor_total_size_in_byte() /
  151. tensor1.get_layout().get_elem_size();
  152. for (size_t i = 0; i < length; i++) {
  153. ptr1[i] = i;
  154. }
  155. }
  156. TEST(TestTensor, Reshape) {
  157. Layout layout{{1, 3, 224, 224}, 4};
  158. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  159. auto ptr = tensor2.get_memory_ptr();
  160. //! test wrong case
  161. ASSERT_THROW(tensor2.reshape({-1, -1, 3 * 224 * 224}), std::exception);
  162. ASSERT_THROW(tensor2.reshape({-1, 3, 3 * 224 * 224}), std::exception);
  163. ASSERT_THROW(tensor2.reshape({1, 3, 3 * 224 * 224}), std::exception);
  164. ASSERT_THROW(tensor2.reshape({3, 3, 3 * 224 * 224}), std::exception);
  165. tensor2.reshape({3 * 224 * 224});
  166. ASSERT_EQ(tensor2.get_layout().ndim, 1);
  167. ASSERT_EQ(tensor2.get_layout().data_type, LiteDataType::LITE_FLOAT);
  168. ASSERT_EQ(tensor2.get_layout().shapes[0], 3 * 224 * 224);
  169. tensor2.reshape({-1, 224, 224});
  170. ASSERT_EQ(tensor2.get_layout().ndim, 3);
  171. ASSERT_EQ(tensor2.get_layout().shapes[0], 3);
  172. ASSERT_EQ(tensor2.get_layout().shapes[1], 224);
  173. ASSERT_EQ(tensor2.get_memory_ptr(), ptr);
  174. }
  175. TEST(TestTensor, Slice) {
  176. Layout layout{{20, 20}, 2};
  177. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  178. auto ptr = tensor2.get_memory_ptr();
  179. //! test source tenor is empty
  180. ASSERT_THROW(tensor2.slice({5, 10, 10}, {10, 15}), std::exception);
  181. ASSERT_THROW(tensor2.slice({5, 10}, {10, 15}, {5}), std::exception);
  182. ASSERT_THROW(tensor2.slice({5, 10}, {10, 15, 10}), std::exception);
  183. for (int i = 0; i < 20 * 20; i++) {
  184. *(static_cast<float*>(ptr) + i) = i;
  185. }
  186. auto check = [&](size_t start, size_t end, size_t step) {
  187. Tensor tensor3;
  188. tensor3.copy_from(
  189. *tensor2.slice({start, start}, {end, end}, {step, step}));
  190. float* new_ptr = static_cast<float*>(tensor3.get_memory_ptr());
  191. for (size_t i = start; i < end; i += step) {
  192. for (size_t j = start; j < end; j += step) {
  193. ASSERT_EQ(float(i * 20 + j), *new_ptr);
  194. ++new_ptr;
  195. }
  196. }
  197. };
  198. check(5, 10, 1);
  199. check(5, 11, 2);
  200. check(2, 18, 4);
  201. Tensor tensor3;
  202. tensor3.copy_from(*tensor2.slice({3}, {9}, {2}));
  203. float* new_ptr = static_cast<float*>(tensor3.get_memory_ptr());
  204. for (size_t i = 3; i < 9; i += 2) {
  205. for (size_t j = 0; j < 20; j++) {
  206. ASSERT_EQ(float(i * 20 + j), *new_ptr);
  207. ++new_ptr;
  208. }
  209. }
  210. }
  211. TEST(TestTensor, SliceCopy) {
  212. Layout layout{{20, 20}, 2};
  213. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  214. //! alloc memory
  215. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  216. Layout layout_slice{{20, 10}, 2};
  217. Tensor tensor0(LiteDeviceType::LITE_CPU, layout_slice);
  218. auto ptr0 = tensor0.get_memory_ptr();
  219. for (int i = 0; i < 10 * 20; i++) {
  220. *(static_cast<float*>(ptr0) + i) = i;
  221. }
  222. Tensor tensor1(LiteDeviceType::LITE_CPU, layout_slice);
  223. auto ptr1 = tensor1.get_memory_ptr();
  224. for (int i = 0; i < 10 * 20; i++) {
  225. *(static_cast<float*>(ptr1) + i) = i + 200;
  226. }
  227. auto slice0 = tensor.slice({0, 0}, {20, 10});
  228. auto slice1 = tensor.slice({0, 10}, {20, 20});
  229. slice0->copy_from(tensor0);
  230. slice1->copy_from(tensor1);
  231. ASSERT_FALSE(slice0->is_continue_memory());
  232. ASSERT_FALSE(slice1->is_continue_memory());
  233. for (size_t i = 0; i < 20; i++) {
  234. for (size_t j = 0; j < 10; j++) {
  235. ASSERT_EQ(float(i * 10 + j), *ptr);
  236. ++ptr;
  237. }
  238. for (size_t j = 0; j < 10; j++) {
  239. ASSERT_EQ(float(i * 10 + j + 200), *ptr);
  240. ++ptr;
  241. }
  242. }
  243. slice0->fill_zero();
  244. Tensor tmp;
  245. tmp.copy_from(*slice0);
  246. float* tmp_ptr = static_cast<float*>(tmp.get_memory_ptr());
  247. for (size_t i = 0; i < 20; i++) {
  248. for (size_t j = 0; j < 10; j++) {
  249. ASSERT_EQ(float(0), *tmp_ptr);
  250. ++tmp_ptr;
  251. }
  252. }
  253. }
  254. TEST(TestTensor, GetPtrOffset) {
  255. Layout layout{{20, 20}, 2};
  256. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  257. //! alloc memory
  258. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  259. auto ptr_offset = tensor.get_memory_ptr({10, 10});
  260. ASSERT_EQ(ptr_offset, ptr + 10 * 20 + 10);
  261. auto slice0 = tensor.slice({0, 0}, {20, 10});
  262. auto slice1 = tensor.slice({0, 10}, {20, 20});
  263. ASSERT_FALSE(slice0->is_continue_memory());
  264. ASSERT_FALSE(slice1->is_continue_memory());
  265. auto ptr_offset_slice0 = slice0->get_memory_ptr({6, 5});
  266. auto ptr_offset_slice1 = slice1->get_memory_ptr({2, 5});
  267. ASSERT_EQ(ptr_offset_slice0, ptr + 6 * 20 + 5);
  268. ASSERT_EQ(ptr_offset_slice1, ptr + 2 * 20 + 10 + 5);
  269. }
  270. TEST(TestTensor, Concat) {
  271. Layout layout{{5, 5, 5}, 3};
  272. std::vector<Tensor> tensors;
  273. for (int i = 0; i < 4; i++) {
  274. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  275. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  276. for (int n = 0; n < 5 * 5 * 5; n++) {
  277. ptr[n] = i;
  278. }
  279. tensors.push_back(tensor);
  280. }
  281. auto check = [&](int dim) {
  282. auto new_tensor = TensorUtils::concat(tensors, dim);
  283. auto ptr = static_cast<float*>(new_tensor->get_memory_ptr());
  284. size_t stride = std::pow(5, (3 - dim));
  285. for (int i = 0; i < 4; i++) {
  286. for (size_t j = 0; j < stride; j++) {
  287. ASSERT_EQ(ptr[i * stride + j], i);
  288. }
  289. }
  290. };
  291. check(0);
  292. check(1);
  293. check(2);
  294. }
  295. #if LITE_WITH_CUDA
  296. TEST(TestTensor, BasicDevice) {
  297. Layout layout{{1, 3, 224, 224}, 4};
  298. Tensor tensor1(LiteDeviceType::LITE_CUDA, layout);
  299. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  300. //! mge tensor has created
  301. ASSERT_TRUE(TensorHelper::implement(&tensor1));
  302. ASSERT_TRUE(TensorHelper::implement(&tensor2));
  303. //! check member
  304. ASSERT_EQ(tensor1.get_device_type(), LiteDeviceType::LITE_CUDA);
  305. ASSERT_EQ(tensor2.get_device_type(), LiteDeviceType::LITE_CPU);
  306. ASSERT_EQ(tensor2.get_layout(), layout);
  307. //! check the real tensor
  308. ASSERT_EQ(tensor1.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  309. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
  310. ASSERT_TRUE(TensorHelper::implement(&tensor2)
  311. ->cast_final_safe<TensorImplDft>()
  312. .host_tensor());
  313. ASSERT_FALSE(TensorHelper::implement(&tensor2)
  314. ->cast_final_safe<TensorImplDft>()
  315. .dev_tensor());
  316. ASSERT_TRUE(TensorHelper::implement(&tensor1)
  317. ->cast_final_safe<TensorImplDft>()
  318. .dev_tensor());
  319. ASSERT_FALSE(TensorHelper::implement(&tensor1)
  320. ->cast_final_safe<TensorImplDft>()
  321. .host_tensor());
  322. }
  323. TEST(TestTensor, SetLayoutReAllocDevice) {
  324. Layout layout{{1, 3, 224, 224}, 4};
  325. Tensor tensor2(LiteDeviceType::LITE_CUDA, layout);
  326. auto old_ptr2 = tensor2.get_memory_ptr();
  327. //! layout set through
  328. Layout layout1{{1, 3, 100, 100}, 4, LiteDataType::LITE_INT8};
  329. tensor2.set_layout(layout1);
  330. ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 100 * 100);
  331. auto layout2 = TensorHelper::implement(&tensor2)
  332. ->cast_final_safe<TensorImplDft>()
  333. .dev_tensor()
  334. ->layout();
  335. ASSERT_EQ(to_lite_layout(layout2), layout1);
  336. auto new_ptr2 = tensor2.get_memory_ptr();
  337. ASSERT_EQ(old_ptr2, new_ptr2);
  338. }
  339. TEST(TestTensor, CrossCNCopyDevice) {
  340. Layout layout{{1, 3, 224, 224}, 4};
  341. Tensor tensor0;
  342. Tensor tensor1(LiteDeviceType::LITE_CPU);
  343. Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
  344. Tensor tensor3(LiteDeviceType::LITE_CUDA, layout);
  345. tensor2.copy_from(tensor3);
  346. tensor3.copy_from(tensor2);
  347. auto old_ptr2 = tensor2.get_memory_ptr();
  348. auto old_ptr3 = tensor3.get_memory_ptr();
  349. ASSERT_THROW(tensor3.copy_from(tensor1), std::exception);
  350. tensor1.copy_from(tensor3);
  351. tensor0.copy_from(tensor3);
  352. tensor2.copy_from(tensor3);
  353. tensor3.copy_from(tensor2);
  354. ASSERT_EQ(tensor2.get_memory_ptr(), old_ptr2);
  355. ASSERT_EQ(tensor3.get_memory_ptr(), old_ptr3);
  356. }
  357. TEST(TestTensor, PinnedHostMem) {
  358. Layout layout{{1, 3, 224, 224}, 4};
  359. Tensor tensor1(LiteDeviceType::LITE_CPU);
  360. bool is_pinned_host = true;
  361. Tensor tensor2(LiteDeviceType::LITE_CUDA, layout, is_pinned_host);
  362. Tensor tensor3(LiteDeviceType::LITE_CUDA, layout);
  363. tensor2.copy_from(tensor3);
  364. tensor3.copy_from(tensor2);
  365. ASSERT_EQ(tensor2.is_pinned_host(), true);
  366. ASSERT_EQ(tensor3.is_pinned_host(), false);
  367. auto old_ptr2 = tensor2.get_memory_ptr();
  368. auto old_ptr3 = tensor3.get_memory_ptr();
  369. //! test source tenor is empty
  370. ASSERT_THROW(tensor2.copy_from(tensor1), std::exception);
  371. tensor1.copy_from(tensor2);
  372. tensor2.copy_from(tensor3);
  373. tensor3.copy_from(tensor2);
  374. ASSERT_EQ(tensor2.get_memory_ptr(), old_ptr2);
  375. ASSERT_EQ(tensor3.get_memory_ptr(), old_ptr3);
  376. }
  377. TEST(TestTensor, DeviceId) {
  378. if(get_device_count(LITE_CUDA) <= 1)
  379. return;
  380. Layout layout{{1, 3, 224, 224}, 4};
  381. Tensor tensor2(0, LiteDeviceType::LITE_CUDA, layout);
  382. Tensor tensor3(1, LiteDeviceType::LITE_CUDA, layout);
  383. tensor2.copy_from(tensor3);
  384. tensor3.copy_from(tensor2);
  385. Tensor tensor1;
  386. tensor1.copy_from(tensor2);
  387. tensor1.copy_from(tensor3);
  388. }
  389. TEST(TestTensor, SliceDevice) {
  390. Layout layout{{20, 20}, 2};
  391. Tensor host_tensor0;
  392. Tensor dev_tensor0(LiteDeviceType::LITE_CUDA, layout);
  393. host_tensor0.copy_from(dev_tensor0);
  394. auto ptr = host_tensor0.get_memory_ptr();
  395. for (int i = 0; i < 20 * 20; i++) {
  396. *(static_cast<float*>(ptr) + i) = i;
  397. }
  398. dev_tensor0.copy_from(host_tensor0);
  399. auto check = [&](size_t start, size_t end, size_t step) {
  400. Tensor host_tensor;
  401. host_tensor.copy_from(
  402. *dev_tensor0.slice({start, start}, {end, end}, {step, step}));
  403. float* new_ptr = static_cast<float*>(host_tensor.get_memory_ptr());
  404. for (size_t i = start; i < end; i += step) {
  405. for (size_t j = start; j < end; j += step) {
  406. ASSERT_EQ(float(i * 20 + j), *new_ptr);
  407. ++new_ptr;
  408. }
  409. }
  410. };
  411. check(5, 10, 1);
  412. check(5, 11, 2);
  413. check(2, 18, 4);
  414. }
  415. TEST(TestTensor, MemSetDevice) {
  416. Layout layout{{20, 20}, 2, LiteDataType::LITE_INT8};
  417. Tensor host_tensor0(LiteDeviceType::LITE_CPU, layout);
  418. Tensor dev_tensor0(LiteDeviceType::LITE_CUDA, layout);
  419. auto check = [&](uint8_t val, const Tensor& tensor) {
  420. auto ptr = static_cast<uint8_t*>(tensor.get_memory_ptr());
  421. for (int i = 0; i < 20 * 20; i++) {
  422. ASSERT_EQ(val, *(ptr + i));
  423. }
  424. };
  425. host_tensor0.fill_zero();
  426. check(0, host_tensor0);
  427. Tensor host_tensor1;
  428. dev_tensor0.fill_zero();
  429. host_tensor1.copy_from(dev_tensor0);
  430. check(0, host_tensor1);
  431. }
  432. TEST(TestTensor, DeviceSliceCopy) {
  433. Layout layout{{20, 20}, 2};
  434. Tensor tensor(LiteDeviceType::LITE_CUDA, layout);
  435. //! alloc memory
  436. tensor.get_memory_ptr();
  437. Layout layout_slice{{20, 10}, 2};
  438. Tensor tensor0(LiteDeviceType::LITE_CPU, layout_slice);
  439. auto ptr0 = tensor0.get_memory_ptr();
  440. for (int i = 0; i < 10 * 20; i++) {
  441. *(static_cast<float*>(ptr0) + i) = i;
  442. }
  443. Tensor tensor1(LiteDeviceType::LITE_CPU, layout_slice);
  444. auto ptr1 = tensor1.get_memory_ptr();
  445. for (int i = 0; i < 10 * 20; i++) {
  446. *(static_cast<float*>(ptr1) + i) = i + 200;
  447. }
  448. auto slice0 = tensor.slice({0, 0}, {20, 10});
  449. auto slice1 = tensor.slice({0, 10}, {20, 20});
  450. slice0->copy_from(tensor0);
  451. slice1->copy_from(tensor1);
  452. ASSERT_FALSE(slice0->is_continue_memory());
  453. ASSERT_FALSE(slice1->is_continue_memory());
  454. Tensor host_tensor;
  455. host_tensor.copy_from(tensor);
  456. auto ptr = static_cast<float*>(host_tensor.get_memory_ptr());
  457. for (size_t i = 0; i < 20; i++) {
  458. for (size_t j = 0; j < 10; j++) {
  459. ASSERT_EQ(float(i * 10 + j), *ptr);
  460. ++ptr;
  461. }
  462. for (size_t j = 0; j < 10; j++) {
  463. ASSERT_EQ(float(i * 10 + j + 200), *ptr);
  464. ++ptr;
  465. }
  466. }
  467. slice0->fill_zero();
  468. Tensor tmp;
  469. tmp.copy_from(*slice0);
  470. float* tmp_ptr = static_cast<float*>(tmp.get_memory_ptr());
  471. for (size_t i = 0; i < 20; i++) {
  472. for (size_t j = 0; j < 10; j++) {
  473. ASSERT_EQ(float(0), *tmp_ptr);
  474. ++tmp_ptr;
  475. }
  476. }
  477. }
  478. TEST(TestTensor, ConcatDevice) {
  479. Layout layout{{5, 5, 5}, 3};
  480. std::vector<Tensor> tensors;
  481. for (int i = 0; i < 4; i++) {
  482. Tensor tensor(LiteDeviceType::LITE_CPU, layout);
  483. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  484. for (int n = 0; n < 5 * 5 * 5; n++) {
  485. ptr[n] = i;
  486. }
  487. tensors.push_back(tensor);
  488. }
  489. auto check = [&](int dim) {
  490. auto new_tensor =
  491. TensorUtils::concat(tensors, dim, LiteDeviceType::LITE_CUDA, 0);
  492. Tensor tensor(LiteDeviceType::LITE_CPU);
  493. tensor.copy_from(*new_tensor);
  494. auto ptr = static_cast<float*>(tensor.get_memory_ptr());
  495. size_t stride = std::pow(5, (3 - dim));
  496. for (int i = 0; i < 4; i++) {
  497. for (size_t j = 0; j < stride; j++) {
  498. ASSERT_EQ(ptr[i * stride + j], i);
  499. }
  500. }
  501. ASSERT_EQ(new_tensor->get_device_type(), LiteDeviceType::LITE_CUDA);
  502. ASSERT_EQ(new_tensor->get_device_id(), 0);
  503. };
  504. check(0);
  505. check(1);
  506. check(2);
  507. }
  508. #endif
  509. #endif
  510. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台