|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577 |
- #include "lite_build_config.h"
-
- #if LITE_BUILD_WITH_MGE
- #include "../src/mge/common.h"
- #include "../src/mge/network_impl.h"
- #include "../src/misc.h"
- #include "lite/tensor.h"
-
- #include <gtest/gtest.h>
-
- #include <string.h>
- #include <memory>
-
- using namespace lite;
-
- TEST(TestTensor, Basic) {
- Layout layout{{1, 3, 224, 224}, 4};
- Tensor tensor1(LiteDeviceType::LITE_CPU);
- Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
- Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
- //! mge tensor has created
- ASSERT_TRUE(TensorHelper::implement(&tensor1));
- ASSERT_TRUE(TensorHelper::implement(&tensor2));
- ASSERT_TRUE(TensorHelper::implement(&tensor3));
- //! check member
- ASSERT_EQ(tensor2.get_device_type(), LiteDeviceType::LITE_CPU);
- ASSERT_EQ(tensor2.get_layout(), layout);
- ASSERT_EQ(tensor3.get_layout(), layout);
- //! check the real tensor
- ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
- ASSERT_EQ(tensor3.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
-
- ASSERT_TRUE(TensorHelper::implement(&tensor1)
- ->cast_final_safe<TensorImplDft>()
- .host_tensor());
-
- ASSERT_FALSE(TensorHelper::implement(&tensor1)
- ->cast_final_safe<TensorImplDft>()
- .dev_tensor());
- ASSERT_FALSE(TensorHelper::implement(&tensor1)
- ->cast_final_safe<TensorImplDft>()
- .dev_tensor());
- ASSERT_TRUE(TensorHelper::implement(&tensor1)
- ->cast_final_safe<TensorImplDft>()
- .host_tensor());
- }
-
- TEST(TestTensor, SetLayoutReAlloc) {
- Layout layout{{1, 3, 224, 224}, 4};
- Tensor tensor1;
- Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
- Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
- auto old_ptr2 = tensor2.get_memory_ptr();
- auto old_ptr3 = tensor3.get_memory_ptr();
-
- //! layout set through
- Layout layout1{{1, 3, 100, 100}, 4, LiteDataType::LITE_INT8};
- tensor1.set_layout(layout1);
- tensor2.set_layout(layout1);
- tensor3.set_layout(layout1);
- ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 100 * 100);
- ASSERT_EQ(tensor3.get_tensor_total_size_in_byte(), 1 * 3 * 100 * 100);
- auto layout2 = TensorHelper::implement(&tensor2)
- ->cast_final_safe<TensorImplDft>()
- .host_tensor()
- ->layout();
- auto layout3 = TensorHelper::implement(&tensor3)
- ->cast_final_safe<TensorImplDft>()
- .host_tensor()
- ->layout();
- ASSERT_EQ(to_lite_layout(layout2), layout1);
- ASSERT_EQ(to_lite_layout(layout3), layout1);
-
- auto new_ptr2 = tensor2.get_memory_ptr();
- auto new_ptr3 = tensor3.get_memory_ptr();
-
- ASSERT_EQ(old_ptr2, new_ptr2);
- ASSERT_EQ(old_ptr3, new_ptr3);
- }
-
- TEST(TestTensor, Reset) {
- Layout layout{{3, 20}, 2, LiteDataType::LITE_FLOAT};
- Tensor tensor1;
- Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
- Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
-
- auto old_ptr2 = tensor2.get_memory_ptr();
- auto old_ptr3 = tensor3.get_memory_ptr();
- //! make sure memory is allocted
- ASSERT_NO_THROW(memcpy(old_ptr2, old_ptr3, 3 * 20 * 2));
-
- std::shared_ptr<float> new_ptr2(
- new float[3 * 20], [](float* ptr) { delete[] ptr; });
- std::shared_ptr<float> new_ptr3(
- new float[3 * 20], [](float* ptr) { delete[] ptr; });
- tensor1.reset(new_ptr2.get(), layout);
- tensor2.reset(new_ptr2.get(), 3 * 20 * 4);
- tensor3.reset(new_ptr3.get(), 3 * 20 * 4);
- //! After reset the original mem is freed
- /*ASSERT_EXIT((memcpy(old_ptr2, old_ptr3, 3 * 20 * 2), exit(0)),
- ::testing::KilledBySignal(SIGSEGV), ".*");*/
-
- ASSERT_EQ(tensor2.get_memory_ptr(), new_ptr2.get());
- ASSERT_EQ(tensor3.get_memory_ptr(), new_ptr3.get());
-
- ASSERT_NO_THROW(memcpy(new_ptr2.get(), new_ptr3.get(), 3 * 20 * 2));
-
- Layout layout1{{6, 20}, 2, LiteDataType::LITE_FLOAT};
- std::shared_ptr<float> ptr2(new float[6 * 20], [](float* ptr) { delete[] ptr; });
- std::shared_ptr<float> ptr3(new float[6 * 20], [](float* ptr) { delete[] ptr; });
- tensor2.reset(ptr2.get(), layout1);
- tensor3.reset(ptr3.get(), layout1);
-
- //! memory is not freed by Tensor reset
- ASSERT_NO_THROW(memcpy(new_ptr2.get(), new_ptr3.get(), 3 * 20 * 2));
- auto host_layout2 = TensorHelper::implement(&tensor2)
- ->cast_final_safe<TensorImplDft>()
- .host_tensor()
- ->layout();
- auto host_layout3 = TensorHelper::implement(&tensor3)
- ->cast_final_safe<TensorImplDft>()
- .host_tensor()
- ->layout();
-
- ASSERT_EQ(to_lite_layout(host_layout2), layout1);
- ASSERT_EQ(to_lite_layout(host_layout3), layout1);
- }
-
- TEST(TestTensor, CrossCNCopy) {
- Layout layout{{1, 3, 224, 224}, 4};
- Tensor tensor1(LiteDeviceType::LITE_CPU);
- Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
- Tensor tensor3(LiteDeviceType::LITE_CPU, layout);
- tensor2.copy_from(tensor3);
- tensor3.copy_from(tensor2);
- auto old_ptr2 = tensor2.get_memory_ptr();
- auto old_ptr3 = tensor3.get_memory_ptr();
-
- //! test source tenor is empty
- ASSERT_THROW(tensor2.copy_from(tensor1), std::exception);
- tensor1.copy_from(tensor2);
- tensor2.copy_from(tensor3);
- tensor3.copy_from(tensor2);
-
- ASSERT_EQ(tensor2.get_memory_ptr(), old_ptr2);
- ASSERT_EQ(tensor3.get_memory_ptr(), old_ptr3);
- }
-
- TEST(TestTensor, SharedTensorMemory) {
- Layout layout{{1, 3, 224, 224}, 4};
- Tensor tensor1(LiteDeviceType::LITE_CPU);
- {
- Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
- tensor1.share_memory_with(tensor2);
- auto ptr1 = tensor1.get_memory_ptr();
- auto ptr2 = tensor2.get_memory_ptr();
- ASSERT_EQ(ptr1, ptr2);
- }
- // check after tensor2 destroy, tensor1 can also visit
- auto ptr1 = static_cast<float*>(tensor1.get_memory_ptr());
- size_t length = tensor1.get_tensor_total_size_in_byte() /
- tensor1.get_layout().get_elem_size();
- for (size_t i = 0; i < length; i++) {
- ptr1[i] = i;
- }
- }
-
- TEST(TestTensor, Reshape) {
- Layout layout{{1, 3, 224, 224}, 4};
- Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
- auto ptr = tensor2.get_memory_ptr();
-
- //! test wrong case
- ASSERT_THROW(tensor2.reshape({-1, -1, 3 * 224 * 224}), std::exception);
- ASSERT_THROW(tensor2.reshape({-1, 3, 3 * 224 * 224}), std::exception);
- ASSERT_THROW(tensor2.reshape({1, 3, 3 * 224 * 224}), std::exception);
- ASSERT_THROW(tensor2.reshape({3, 3, 3 * 224 * 224}), std::exception);
-
- tensor2.reshape({3 * 224 * 224});
- ASSERT_EQ(tensor2.get_layout().ndim, 1);
- ASSERT_EQ(tensor2.get_layout().data_type, LiteDataType::LITE_FLOAT);
- ASSERT_EQ(tensor2.get_layout().shapes[0], 3 * 224 * 224);
- tensor2.reshape({-1, 224, 224});
- ASSERT_EQ(tensor2.get_layout().ndim, 3);
- ASSERT_EQ(tensor2.get_layout().shapes[0], 3);
- ASSERT_EQ(tensor2.get_layout().shapes[1], 224);
-
- ASSERT_EQ(tensor2.get_memory_ptr(), ptr);
- }
-
- TEST(TestTensor, Slice) {
- Layout layout{{20, 20}, 2};
- Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
- auto ptr = tensor2.get_memory_ptr();
-
- //! test source tenor is empty
- ASSERT_THROW(tensor2.slice({5, 10, 10}, {10, 15}), std::exception);
- ASSERT_THROW(tensor2.slice({5, 10}, {10, 15}, {5}), std::exception);
- ASSERT_THROW(tensor2.slice({5, 10}, {10, 15, 10}), std::exception);
- for (int i = 0; i < 20 * 20; i++) {
- *(static_cast<float*>(ptr) + i) = i;
- }
- auto check = [&](size_t start, size_t end, size_t step) {
- Tensor tensor3;
- tensor3.copy_from(*tensor2.slice({start, start}, {end, end}, {step, step}));
- float* new_ptr = static_cast<float*>(tensor3.get_memory_ptr());
- for (size_t i = start; i < end; i += step) {
- for (size_t j = start; j < end; j += step) {
- ASSERT_EQ(float(i * 20 + j), *new_ptr);
- ++new_ptr;
- }
- }
- };
- check(5, 10, 1);
- check(5, 11, 2);
- check(2, 18, 4);
-
- Tensor tensor3;
- tensor3.copy_from(*tensor2.slice({3}, {9}, {2}));
- float* new_ptr = static_cast<float*>(tensor3.get_memory_ptr());
- for (size_t i = 3; i < 9; i += 2) {
- for (size_t j = 0; j < 20; j++) {
- ASSERT_EQ(float(i * 20 + j), *new_ptr);
- ++new_ptr;
- }
- }
- }
-
- TEST(TestTensor, SliceCopy) {
- Layout layout{{20, 20}, 2};
- Tensor tensor(LiteDeviceType::LITE_CPU, layout);
- //! alloc memory
- auto ptr = static_cast<float*>(tensor.get_memory_ptr());
-
- Layout layout_slice{{20, 10}, 2};
- Tensor tensor0(LiteDeviceType::LITE_CPU, layout_slice);
- auto ptr0 = tensor0.get_memory_ptr();
- for (int i = 0; i < 10 * 20; i++) {
- *(static_cast<float*>(ptr0) + i) = i;
- }
- Tensor tensor1(LiteDeviceType::LITE_CPU, layout_slice);
- auto ptr1 = tensor1.get_memory_ptr();
- for (int i = 0; i < 10 * 20; i++) {
- *(static_cast<float*>(ptr1) + i) = i + 200;
- }
-
- auto slice0 = tensor.slice({0, 0}, {20, 10});
- auto slice1 = tensor.slice({0, 10}, {20, 20});
-
- slice0->copy_from(tensor0);
- slice1->copy_from(tensor1);
-
- ASSERT_FALSE(slice0->is_continue_memory());
- ASSERT_FALSE(slice1->is_continue_memory());
-
- for (size_t i = 0; i < 20; i++) {
- for (size_t j = 0; j < 10; j++) {
- ASSERT_EQ(float(i * 10 + j), *ptr);
- ++ptr;
- }
- for (size_t j = 0; j < 10; j++) {
- ASSERT_EQ(float(i * 10 + j + 200), *ptr);
- ++ptr;
- }
- }
- slice0->fill_zero();
- Tensor tmp;
- tmp.copy_from(*slice0);
- float* tmp_ptr = static_cast<float*>(tmp.get_memory_ptr());
- for (size_t i = 0; i < 20; i++) {
- for (size_t j = 0; j < 10; j++) {
- ASSERT_EQ(float(0), *tmp_ptr);
- ++tmp_ptr;
- }
- }
- }
-
- TEST(TestTensor, GetPtrOffset) {
- Layout layout{{20, 20}, 2};
- Tensor tensor(LiteDeviceType::LITE_CPU, layout);
- //! alloc memory
- auto ptr = static_cast<float*>(tensor.get_memory_ptr());
-
- auto ptr_offset = tensor.get_memory_ptr({10, 10});
- ASSERT_EQ(ptr_offset, ptr + 10 * 20 + 10);
-
- auto slice0 = tensor.slice({0, 0}, {20, 10});
- auto slice1 = tensor.slice({0, 10}, {20, 20});
-
- ASSERT_FALSE(slice0->is_continue_memory());
- ASSERT_FALSE(slice1->is_continue_memory());
-
- auto ptr_offset_slice0 = slice0->get_memory_ptr({6, 5});
- auto ptr_offset_slice1 = slice1->get_memory_ptr({2, 5});
-
- ASSERT_EQ(ptr_offset_slice0, ptr + 6 * 20 + 5);
- ASSERT_EQ(ptr_offset_slice1, ptr + 2 * 20 + 10 + 5);
- }
-
- TEST(TestTensor, Concat) {
- Layout layout{{5, 5, 5}, 3};
- std::vector<Tensor> tensors;
- for (int i = 0; i < 4; i++) {
- Tensor tensor(LiteDeviceType::LITE_CPU, layout);
- auto ptr = static_cast<float*>(tensor.get_memory_ptr());
- for (int n = 0; n < 5 * 5 * 5; n++) {
- ptr[n] = i;
- }
- tensors.push_back(tensor);
- }
- auto check = [&](int dim) {
- auto new_tensor = TensorUtils::concat(tensors, dim);
- auto ptr = static_cast<float*>(new_tensor->get_memory_ptr());
- size_t stride = std::pow(5, (3 - dim));
- for (int i = 0; i < 4; i++) {
- for (size_t j = 0; j < stride; j++) {
- ASSERT_EQ(ptr[i * stride + j], i);
- }
- }
- };
- check(0);
- check(1);
- check(2);
- }
-
- #if LITE_WITH_CUDA
- TEST(TestTensor, BasicDevice) {
- Layout layout{{1, 3, 224, 224}, 4};
- Tensor tensor1(LiteDeviceType::LITE_CUDA, layout);
- Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
- //! mge tensor has created
- ASSERT_TRUE(TensorHelper::implement(&tensor1));
- ASSERT_TRUE(TensorHelper::implement(&tensor2));
-
- //! check member
- ASSERT_EQ(tensor1.get_device_type(), LiteDeviceType::LITE_CUDA);
- ASSERT_EQ(tensor2.get_device_type(), LiteDeviceType::LITE_CPU);
- ASSERT_EQ(tensor2.get_layout(), layout);
- //! check the real tensor
- ASSERT_EQ(tensor1.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
- ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 224 * 224 * 4);
-
- ASSERT_TRUE(TensorHelper::implement(&tensor2)
- ->cast_final_safe<TensorImplDft>()
- .host_tensor());
-
- ASSERT_FALSE(TensorHelper::implement(&tensor2)
- ->cast_final_safe<TensorImplDft>()
- .dev_tensor());
- ASSERT_TRUE(TensorHelper::implement(&tensor1)
- ->cast_final_safe<TensorImplDft>()
- .dev_tensor());
- ASSERT_FALSE(TensorHelper::implement(&tensor1)
- ->cast_final_safe<TensorImplDft>()
- .host_tensor());
- }
-
- TEST(TestTensor, SetLayoutReAllocDevice) {
- Layout layout{{1, 3, 224, 224}, 4};
- Tensor tensor2(LiteDeviceType::LITE_CUDA, layout);
- auto old_ptr2 = tensor2.get_memory_ptr();
-
- //! layout set through
- Layout layout1{{1, 3, 100, 100}, 4, LiteDataType::LITE_INT8};
- tensor2.set_layout(layout1);
- ASSERT_EQ(tensor2.get_tensor_total_size_in_byte(), 1 * 3 * 100 * 100);
- auto layout2 = TensorHelper::implement(&tensor2)
- ->cast_final_safe<TensorImplDft>()
- .dev_tensor()
- ->layout();
- ASSERT_EQ(to_lite_layout(layout2), layout1);
-
- auto new_ptr2 = tensor2.get_memory_ptr();
-
- ASSERT_EQ(old_ptr2, new_ptr2);
- }
-
- TEST(TestTensor, CrossCNCopyDevice) {
- Layout layout{{1, 3, 224, 224}, 4};
- Tensor tensor0;
- Tensor tensor1(LiteDeviceType::LITE_CPU);
- Tensor tensor2(LiteDeviceType::LITE_CPU, layout);
- Tensor tensor3(LiteDeviceType::LITE_CUDA, layout);
-
- tensor2.copy_from(tensor3);
- tensor3.copy_from(tensor2);
-
- auto old_ptr2 = tensor2.get_memory_ptr();
- auto old_ptr3 = tensor3.get_memory_ptr();
- ASSERT_THROW(tensor3.copy_from(tensor1), std::exception);
-
- tensor1.copy_from(tensor3);
- tensor0.copy_from(tensor3);
-
- tensor2.copy_from(tensor3);
- tensor3.copy_from(tensor2);
-
- ASSERT_EQ(tensor2.get_memory_ptr(), old_ptr2);
- ASSERT_EQ(tensor3.get_memory_ptr(), old_ptr3);
- }
-
- TEST(TestTensor, PinnedHostMem) {
- Layout layout{{1, 3, 224, 224}, 4};
- Tensor tensor1(LiteDeviceType::LITE_CPU);
- bool is_pinned_host = true;
- Tensor tensor2(LiteDeviceType::LITE_CUDA, layout, is_pinned_host);
- Tensor tensor3(LiteDeviceType::LITE_CUDA, layout);
- tensor2.copy_from(tensor3);
- tensor3.copy_from(tensor2);
-
- ASSERT_EQ(tensor2.is_pinned_host(), true);
- ASSERT_EQ(tensor3.is_pinned_host(), false);
-
- auto old_ptr2 = tensor2.get_memory_ptr();
- auto old_ptr3 = tensor3.get_memory_ptr();
-
- //! test source tenor is empty
- ASSERT_THROW(tensor2.copy_from(tensor1), std::exception);
- tensor1.copy_from(tensor2);
- tensor2.copy_from(tensor3);
- tensor3.copy_from(tensor2);
-
- ASSERT_EQ(tensor2.get_memory_ptr(), old_ptr2);
- ASSERT_EQ(tensor3.get_memory_ptr(), old_ptr3);
- }
-
- TEST(TestTensor, DeviceId) {
- if (get_device_count(LITE_CUDA) <= 1)
- return;
- Layout layout{{1, 3, 224, 224}, 4};
- Tensor tensor2(0, LiteDeviceType::LITE_CUDA, layout);
- Tensor tensor3(1, LiteDeviceType::LITE_CUDA, layout);
-
- tensor2.copy_from(tensor3);
- tensor3.copy_from(tensor2);
-
- Tensor tensor1;
- tensor1.copy_from(tensor2);
- tensor1.copy_from(tensor3);
- }
-
- TEST(TestTensor, SliceDevice) {
- Layout layout{{20, 20}, 2};
- Tensor host_tensor0;
- Tensor dev_tensor0(LiteDeviceType::LITE_CUDA, layout);
- host_tensor0.copy_from(dev_tensor0);
- auto ptr = host_tensor0.get_memory_ptr();
-
- for (int i = 0; i < 20 * 20; i++) {
- *(static_cast<float*>(ptr) + i) = i;
- }
- dev_tensor0.copy_from(host_tensor0);
-
- auto check = [&](size_t start, size_t end, size_t step) {
- Tensor host_tensor;
- host_tensor.copy_from(
- *dev_tensor0.slice({start, start}, {end, end}, {step, step}));
- float* new_ptr = static_cast<float*>(host_tensor.get_memory_ptr());
- for (size_t i = start; i < end; i += step) {
- for (size_t j = start; j < end; j += step) {
- ASSERT_EQ(float(i * 20 + j), *new_ptr);
- ++new_ptr;
- }
- }
- };
- check(5, 10, 1);
- check(5, 11, 2);
- check(2, 18, 4);
- }
-
- TEST(TestTensor, MemSetDevice) {
- Layout layout{{20, 20}, 2, LiteDataType::LITE_INT8};
- Tensor host_tensor0(LiteDeviceType::LITE_CPU, layout);
- Tensor dev_tensor0(LiteDeviceType::LITE_CUDA, layout);
- auto check = [&](uint8_t val, const Tensor& tensor) {
- auto ptr = static_cast<uint8_t*>(tensor.get_memory_ptr());
- for (int i = 0; i < 20 * 20; i++) {
- ASSERT_EQ(val, *(ptr + i));
- }
- };
- host_tensor0.fill_zero();
- check(0, host_tensor0);
-
- Tensor host_tensor1;
- dev_tensor0.fill_zero();
- host_tensor1.copy_from(dev_tensor0);
- check(0, host_tensor1);
- }
-
- TEST(TestTensor, DeviceSliceCopy) {
- Layout layout{{20, 20}, 2};
- Tensor tensor(LiteDeviceType::LITE_CUDA, layout);
- //! alloc memory
- tensor.get_memory_ptr();
-
- Layout layout_slice{{20, 10}, 2};
- Tensor tensor0(LiteDeviceType::LITE_CPU, layout_slice);
- auto ptr0 = tensor0.get_memory_ptr();
- for (int i = 0; i < 10 * 20; i++) {
- *(static_cast<float*>(ptr0) + i) = i;
- }
- Tensor tensor1(LiteDeviceType::LITE_CPU, layout_slice);
- auto ptr1 = tensor1.get_memory_ptr();
- for (int i = 0; i < 10 * 20; i++) {
- *(static_cast<float*>(ptr1) + i) = i + 200;
- }
-
- auto slice0 = tensor.slice({0, 0}, {20, 10});
- auto slice1 = tensor.slice({0, 10}, {20, 20});
-
- slice0->copy_from(tensor0);
- slice1->copy_from(tensor1);
-
- ASSERT_FALSE(slice0->is_continue_memory());
- ASSERT_FALSE(slice1->is_continue_memory());
-
- Tensor host_tensor;
- host_tensor.copy_from(tensor);
- auto ptr = static_cast<float*>(host_tensor.get_memory_ptr());
-
- for (size_t i = 0; i < 20; i++) {
- for (size_t j = 0; j < 10; j++) {
- ASSERT_EQ(float(i * 10 + j), *ptr);
- ++ptr;
- }
- for (size_t j = 0; j < 10; j++) {
- ASSERT_EQ(float(i * 10 + j + 200), *ptr);
- ++ptr;
- }
- }
- slice0->fill_zero();
- Tensor tmp;
- tmp.copy_from(*slice0);
- float* tmp_ptr = static_cast<float*>(tmp.get_memory_ptr());
- for (size_t i = 0; i < 20; i++) {
- for (size_t j = 0; j < 10; j++) {
- ASSERT_EQ(float(0), *tmp_ptr);
- ++tmp_ptr;
- }
- }
- }
-
- TEST(TestTensor, ConcatDevice) {
- Layout layout{{5, 5, 5}, 3};
- std::vector<Tensor> tensors;
- for (int i = 0; i < 4; i++) {
- Tensor tensor(LiteDeviceType::LITE_CPU, layout);
- auto ptr = static_cast<float*>(tensor.get_memory_ptr());
- for (int n = 0; n < 5 * 5 * 5; n++) {
- ptr[n] = i;
- }
- tensors.push_back(tensor);
- }
- auto check = [&](int dim) {
- auto new_tensor =
- TensorUtils::concat(tensors, dim, LiteDeviceType::LITE_CUDA, 0);
-
- Tensor tensor(LiteDeviceType::LITE_CPU);
- tensor.copy_from(*new_tensor);
- auto ptr = static_cast<float*>(tensor.get_memory_ptr());
- size_t stride = std::pow(5, (3 - dim));
- for (int i = 0; i < 4; i++) {
- for (size_t j = 0; j < stride; j++) {
- ASSERT_EQ(ptr[i * stride + j], i);
- }
- }
- ASSERT_EQ(new_tensor->get_device_type(), LiteDeviceType::LITE_CUDA);
- ASSERT_EQ(new_tensor->get_device_id(), 0);
- };
- check(0);
- check(1);
- check(2);
- }
- #endif
- #endif
-
- // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
|