|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217 |
- #include "test/fallback/fixture.h"
-
- #include "test/common/checker.h"
- #include "test/common/relayout.h"
- #include "test/common/tensor.h"
-
- #include <ctime>
- #include "megdnn/basic_types.h"
- #include "test/common/task_record_check.h"
-
- using namespace megdnn;
- using namespace test;
-
- namespace {
- template <typename tag>
- class FALLBACK_RELAYOUT : public FALLBACK {};
- TYPED_TEST_CASE(FALLBACK_RELAYOUT, relayout::test_types);
- TYPED_TEST(FALLBACK_RELAYOUT, run) {
- relayout::run_test<TypeParam>(this->handle());
- }
- } // namespace
-
- TEST_F(FALLBACK, RELAYOUT_CONTINUE) {
- Checker<Relayout> checker(handle());
- checker.set_dtype(0, dtype::Int32());
- checker.set_dtype(1, dtype::Int32());
- checker.exec({{2, 2, 2}, {2, 2, 2}});
- }
-
- TEST_F(FALLBACK, RELAYOUT_RECORD) {
- TaskRecordChecker<Relayout> checker(1);
- checker.set_dtype(0, dtype::Int32());
- checker.set_dtype(1, dtype::Int32());
- checker.exec({{2, 2, 2}, {2, 2, 2}});
- }
-
- TEST_F(FALLBACK, RELAYOUT_Q4) {
- Checker<Relayout> checker(handle());
- UniformIntRNG rng_int4{-7, 7};
- checker.set_rng(0, &rng_int4)
- .set_rng(1, &rng_int4)
- .set_dtype(0, dtype::QuantizedS4(1.f))
- .set_dtype(1, dtype::QuantizedS4(1.f))
- .execs({{2, 2, 1, 1}, {1, 1, 2, 2}})
- .execs({{1, 64, 15, 15}, {1, 15, 15, 64}})
- .execs({{1, 5, 9, 32}, {1, 5, 32, 9}})
- .execl(TensorLayoutArray{
- {{6400}, {1}, dtype::QuantizedS4{1.f}},
- {{20, 320}, {1024, 1}, dtype::QuantizedS4{1.f}}})
- .execl(TensorLayoutArray{
- {{156}, {1}, dtype::QuantizedS4{1.f}},
- {{13, 3, 4}, {16, 1, 4}, dtype::QuantizedS4{1.f}}})
- .execl(TensorLayoutArray{
- {{48}, {1}, dtype::QuantizedS4{1.f}},
- {{3, 4, 4}, {16, 1, 4}, dtype::QuantizedS4{1.f}}})
- .execl(TensorLayoutArray{
- {{84}, {1}, dtype::QuantizedS4{1.f}},
- {{3, 4, 7}, {28, 1, 4}, dtype::QuantizedS4{1.f}}})
- .execl(TensorLayoutArray{
- {{336}, {1}, dtype::QuantizedS4{1.f}},
- {{3, 4, 7, 4}, {112, 4, 16, 1}, dtype::QuantizedS4{1.f}}})
- .execl(TensorLayoutArray{
- {{54}, {1}, dtype::QuantizedS4{1.f}},
- {{6, 3, 3}, {16, 4, 1}, dtype::QuantizedS4{1.f}}})
- .execl(TensorLayoutArray{
- {{1200, 3}, {4, 1}, dtype::QuantizedS4{1.f}},
- {{20, 60, 3}, {256, 4, 1}, dtype::QuantizedS4{1.f}}})
- .execl(TensorLayoutArray{
- {{20, 20, 3, 3}, {256, 12, 4, 1}, dtype::QuantizedS4{1.f}},
- {{1200, 3}, {4, 1}, dtype::QuantizedS4{1.f}}})
- .execl(TensorLayoutArray{
- {{5, 16, 7, 7, 4}, {3136, 196, 28, 4, 1}, dtype::QuantizedS4{1.f}},
- {{5, 16, 7, 7, 4}, {3136, 4, 448, 64, 1}, dtype::QuantizedS4{1.f}}})
- .execl(TensorLayoutArray{
- {{5, 7, 7, 16, 4}, {3136, 448, 64, 4, 1}, dtype::QuantizedS4{1.f}},
- {{5, 7, 7, 16, 4}, {3136, 28, 4, 196, 1}, dtype::QuantizedS4{1.f}}})
- .execl(TensorLayoutArray{
- {{5, 2, 7, 7, 32},
- {3136, 1568, 224, 32, 1},
- dtype::QuantizedS4{1.f}},
- {{5, 2, 7, 7, 32},
- {3136, 32, 448, 64, 1},
- dtype::QuantizedS4{1.f}}})
- .execl(TensorLayoutArray{
- {{5, 7, 7, 2, 32}, {3136, 448, 64, 32, 1}, dtype::QuantizedS4{1.f}},
- {{5, 7, 7, 2, 32},
- {3136, 224, 32, 1568, 1},
- dtype::QuantizedS4{1.f}}});
- }
-
- #if MEGDNN_WITH_BENCHMARK
- TEST_F(FALLBACK, BENCHMARK_RELAYOUT_CV) {
- relayout::run_cv_benchmark(handle());
- }
-
- TEST_F(FALLBACK, BENCHMARK_RELAYOUT) {
- auto naive_handle = create_cpu_handle(2);
- bool verbose = false;
-
- auto run = [&](bool out_cont, const TensorLayout& cont_layout,
- const TensorLayout& noncont_layout) {
- megdnn_assert(
- cont_layout.dtype == dtype::Int32() &&
- noncont_layout.dtype == dtype::Int32() &&
- noncont_layout.span().low_byte == 0);
- auto noncont_storage_size = noncont_layout.span().high_elem;
- Tensor<dt_int32> noncont_storage0(
- handle(), {{noncont_storage_size}, dtype::Int32()}),
- noncont_storage1(handle(), {{noncont_storage_size}, dtype::Int32()}),
- cont_storage0(handle(), cont_layout),
- cont_storage1(handle(), cont_layout);
-
- auto noncont0 = noncont_storage0.tensornd(),
- noncont1 = noncont_storage1.tensornd();
- noncont0.layout = noncont_layout;
- noncont1.layout = noncont_layout;
-
- TensorND src, dst0, dst1;
- if (out_cont) {
- src = noncont0;
- dst0 = cont_storage0.tensornd();
- dst1 = cont_storage1.tensornd();
- auto ptr = src.ptr<int>();
- for (size_t i = 0; i < noncont_storage_size; ++i) {
- ptr[i] = i;
- }
- } else {
- memset(noncont_storage0.ptr(), -1,
- noncont_storage0.layout().span().dist_byte());
- memset(noncont_storage1.ptr(), -1,
- noncont_storage1.layout().span().dist_byte());
- src = cont_storage0.tensornd();
- dst0 = noncont0;
- dst1 = noncont1;
- auto ptr = src.ptr<int>();
- for (size_t i = 0, it = src.layout.total_nr_elems(); i < it; ++i) {
- ptr[i] = i;
- }
- }
- auto opr_cur = handle()->create_operator<Relayout>();
- auto opr_naive = naive_handle->create_operator<Relayout>();
-
- auto timeit = [&src](Relayout* opr, TensorND out) {
- opr->exec(src, out);
- auto start = clock();
- opr->exec(src, out);
- auto stop = clock();
- return (stop - start) * 1e3 / CLOCKS_PER_SEC;
- };
- auto t1 = timeit(opr_naive.get(), dst1), t0 = timeit(opr_cur.get(), dst0);
- double tot_size_gb_ms = cont_layout.total_nr_elems() * sizeof(int) / 1024.0 /
- 1024.0 / 1024.0 * 1e3;
- if (verbose) {
- printf("noncont-%zu dir=%d: fallback=%7.3fms,%5.2fGiB/s "
- "naive=%7.3fms,%5.2fGiB/s\n",
- noncont_layout.collapse_contiguous().ndim, out_cont, t0,
- tot_size_gb_ms / t0, t1, tot_size_gb_ms / t1);
- }
-
- ASSERT_EQ(
- 0, memcmp(dst0.ptr<int>(), dst1.ptr<int>(),
- dst0.layout.span().dist_byte()));
- };
-
- auto run_preset = [&](const TensorShape& noncont_shp, int swap, bool sub,
- bool out_cont) {
- TensorLayout noncont_layout(noncont_shp, dtype::Int32());
- if (swap) {
- auto a = swap - 1, b = swap;
- std::swap(noncont_layout.shape[a], noncont_layout.shape[b]);
- std::swap(noncont_layout.stride[a], noncont_layout.stride[b]);
- }
-
- TensorLayout cont_layout = noncont_layout;
- cont_layout.init_contiguous_stride();
-
- TensorShape noncont_storage_shp(cont_layout);
- if (sub) {
- ++noncont_storage_shp[noncont_layout.ndim - 1];
- noncont_layout.init_contiguous_stride(noncont_storage_shp);
- --noncont_layout.shape[noncont_layout.ndim - 1];
- }
-
- run(out_cont, cont_layout, noncont_layout);
- };
- for (bool out_cont : {false, true}) {
- verbose = false;
- run_preset({2, 3}, 1, false, out_cont);
- run_preset({2, 2, 2}, 0, true, out_cont);
- {
- // padding-like
- TensorLayout cont{{2, 3, 3}, dtype::Int32()}, noncont = cont;
- noncont.stride[1] = 5;
- noncont.stride[0] = 25;
- run(out_cont, cont, noncont);
- }
-
- verbose = true;
- run_preset({1234, 5678}, 0, false, out_cont);
- run_preset({256, 256, 256}, 0, true, out_cont);
- run_preset({2, 3, 1024, 1024}, 1, false, out_cont);
- run_preset({1025, 2049}, 1, false, out_cont);
- run_preset({2049, 1025}, 1, false, out_cont);
- run_preset({10, 1024, 1024}, 2, false, out_cont);
-
- {
- // padding-like
- TensorLayout cont{{60, 60, 60}, dtype::Int32()}, noncont = cont;
- noncont.stride[1] = 63;
- noncont.stride[0] = 63 * 63;
- run(out_cont, cont, noncont);
- }
- }
- }
- #endif
-
- // vim: syntax=cpp.doxygen
|