|
- /**
- * \file dnn/test/naive/matrix_mul.cpp
- * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- *
- * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
- #include "test/naive/fixture.h"
-
- #include "megdnn/oprs/linalg.h"
- #include "test/common/checker.h"
- #include "test/common/matrix_mul.h"
- #include "test/common/random_state.h"
- #include "test/common/extra_impl_helper.h"
-
- using namespace megdnn;
- using namespace test;
-
- namespace {
-
- void run_matmul_mk_format(Handle* handle, param::MatrixMul::Format format,
- DType Atype, DType Btype, DType Ctype) {
- using namespace matrix_mul;
- std::vector<TestArg> args = get_matmul_args();
- Checker<MatrixMul> checker(handle);
-
- auto extra_impl = [](const TensorNDArray& tensors, param::MatrixMul param,
- Handle* handle, size_t pack_size) {
- megdnn_assert((param.format == param::MatrixMul::Format::MK4 ||
- param.format == param::MatrixMul::Format::MK4_DOT ||
- param.format == param::MatrixMul::Format::MK8) &&
- tensors.size() == 3);
- param::MatrixMul new_param = param;
- new_param.format = param::MatrixMul::Format::DEFAULT;
- size_t M = tensors[2].layout[0] * pack_size;
- size_t N = tensors[2].layout[1];
- size_t K = tensors[0].layout[1 - param.transposeA] * pack_size;
-
- TensorLayoutArray default_layouts, mk4_layouts;
- if (param.transposeA) {
- default_layouts.emplace_back(tensors[0].layout.reshape({K, M}));
- if (param.format == param::MatrixMul::Format::MK4_DOT) {
- mk4_layouts.emplace_back(
- default_layouts.back()
- .reshape({K / pack_size, M / pack_size,
- pack_size, pack_size})
- .dimshuffle({0, 3, 1, 2}));
- } else {
- mk4_layouts.emplace_back(
- default_layouts.back()
- .reshape({K / pack_size, M / pack_size,
- pack_size, pack_size})
- .dimshuffle({0, 2, 1, 3}));
- }
- } else {
- default_layouts.emplace_back(tensors[0].layout.reshape({M, K}));
- if (param.format == param::MatrixMul::Format::MK4_DOT) {
- mk4_layouts.emplace_back(
- default_layouts.back()
- .reshape({M / pack_size, K / pack_size,
- pack_size, pack_size})
- .dimshuffle({0, 2, 1, 3}));
- } else {
- mk4_layouts.emplace_back(
- default_layouts.back()
- .reshape({M / pack_size, K / pack_size,
- pack_size, pack_size})
- .dimshuffle({0, 3, 1, 2}));
- }
- }
- if (param.transposeB) {
- default_layouts.emplace_back(tensors[1].layout.reshape({N, K}));
- mk4_layouts.emplace_back(
- default_layouts.back()
- .reshape({N, K / pack_size, pack_size})
- .dimshuffle({0, 1, 2}));
- } else {
- default_layouts.emplace_back(tensors[1].layout.reshape({K, N}));
- mk4_layouts.emplace_back(
- default_layouts.back()
- .reshape({K / pack_size, N, pack_size})
- .dimshuffle({0, 2, 1}));
- }
-
- default_layouts.emplace_back(tensors[2].layout.reshape({M, N}));
- mk4_layouts.emplace_back(default_layouts.back()
- .reshape({M / pack_size, N, pack_size})
- .dimshuffle({0, 2, 1}));
-
- auto matmul_opr = handle->create_operator<MatrixMul>();
- matmul_opr->param() = new_param;
- size_t matmul_workspace = matmul_opr->get_workspace_in_bytes(
- default_layouts[0], default_layouts[1], default_layouts[2]);
- auto relayout_opr = handle->create_operator<Relayout>();
-
- WorkspaceBundle wb(nullptr, {default_layouts[0].span().dist_byte(),
- default_layouts[1].span().dist_byte(),
- default_layouts[2].span().dist_byte(),
- matmul_workspace});
- wb.set(malloc(wb.total_size_in_bytes()));
-
- TensorNDArray default_tensors, mk4_tensors;
- for (size_t i = 0; i < 3; i++) {
- default_tensors.emplace_back(wb.get(i), default_layouts[i]);
- mk4_tensors.emplace_back(tensors[i].raw_ptr, mk4_layouts[i]);
- }
- relayout_opr->exec(mk4_tensors[0], default_tensors[0]);
- relayout_opr->exec(mk4_tensors[1], default_tensors[1]);
- matmul_opr->exec(default_tensors[0], default_tensors[1],
- default_tensors[2], wb.get_workspace(3));
- relayout_opr->exec(default_tensors[2], mk4_tensors[2]);
-
- free(wb.ptr());
- };
-
- size_t pack_size = MatrixMulForward::pack_size(format);
- for (auto&& arg : args) {
- if (arg.m % pack_size != 0 || arg.k % pack_size != 0)
- continue;
- param::MatrixMul param;
- param.transposeA = arg.mask & 0x1;
- param.transposeB = arg.mask & 0x2;
- param.format = format;
- size_t m = arg.m, n = arg.n, k = arg.k;
- TensorShape A, B;
- if (param.transposeA) {
- A = TensorShape{k / pack_size, m / pack_size, pack_size, pack_size};
- } else {
- A = TensorShape{m / pack_size, k / pack_size, pack_size, pack_size};
- }
- if (param.transposeB) {
- B = TensorShape{n, k / pack_size, pack_size};
- } else {
- B = TensorShape{k / pack_size, n, pack_size};
- }
-
- checker.set_extra_opr_impl(std::bind(extra_impl, std::placeholders::_1,
- param, handle, pack_size));
- checker.set_dtype(0, Atype)
- .set_dtype(1, Btype)
- .set_dtype(2, Ctype)
- .set_epsilon(1e-3)
- .set_param(param)
- .execs({A, B, {}});
- }
- }
-
- } // namespace
-
- TEST_F(NAIVE, MATRIX_MUL_QUANTIZED4x4x32) {
- Checker<MatrixMul> checker(handle(), /* check_dispatch */ false);
- auto GenTensorValueQuint4 = [](const TensorShape& shape,
- dtype::Quantized4Asymm dtype,
- const std::vector<int>& values) {
- TensorND tensor;
- tensor.layout = {shape, dtype};
- tensor.raw_ptr =
- static_cast<dt_byte*>(malloc(tensor.layout.span().dist_byte()));
- uint8_t* ptr = static_cast<uint8_t*>(tensor.raw_ptr);
- megdnn_assert(values.size() == tensor.layout.span().dist_elem());
- for (size_t i = 0; i < tensor.layout.span().dist_elem(); i += 2) {
- int val0 = values[i], val1 = values[i + 1];
- ptr[i / 2] = val0 | (val1 << 4);
- }
- return tensor;
- };
- using Param = MatrixMul::Param;
- Param param;
- checker.set_param(param);
- checker.set_dtype(2, dtype::QuantizedS32(0.3f * 0.3f));
- checker.exect(
- Testcase{
- GenTensorValueQuint4(
- {8, 8}, dtype::Quantized4Asymm(0.3f, (uint8_t)8),
- {13, 2, 4, 13, 9, 3, 14, 14, 14, 5, 3, 3, 15,
- 11, 8, 8, 5, 7, 14, 15, 8, 2, 11, 1, 15, 9,
- 13, 14, 2, 3, 11, 11, 15, 10, 11, 0, 13, 12, 3,
- 11, 9, 9, 10, 5, 2, 5, 8, 4, 6, 9, 0, 0,
- 3, 9, 9, 8, 8, 15, 7, 5, 0, 3, 9, 10}),
- GenTensorValueQuint4(
- {8, 8}, dtype::Quantized4Asymm(0.3f, (uint8_t)8),
- {5, 14, 13, 11, 4, 7, 12, 12, 11, 7, 13, 10, 5,
- 6, 4, 2, 3, 12, 2, 2, 13, 3, 14, 0, 15, 15,
- 0, 2, 2, 13, 3, 14, 10, 8, 9, 11, 0, 14, 15,
- 4, 14, 7, 1, 6, 13, 2, 12, 5, 2, 15, 7, 11,
- 13, 9, 8, 10, 0, 11, 6, 10, 12, 2, 2, 12}),
- {}},
- Testcase{
- {},
- {},
- TensorValue(
- {8, 8}, dtype::QuantizedS32(0.3f * 0.3f),
- {-90, 120, -3, 40, -31, 58, -54, 165, -5, -19,
- 71, 87, -51, 24, 92, 15, 27, 62, -59, -82,
- -40, 91, 11, -16, -85, 138, -18, -36, 8, -25,
- -56, 75, -46, -34, 67, 53, -4, -83, 111, -86,
- -29, -17, 45, -9, 38, -22, -3, -19, -17, -95,
- 94, 78, 63, -35, -51, 21, -63, -14, 87, 31,
- 44, -53, -107, 5}),
- });
- }
-
- TEST_F(NAIVE, MATRIX_MUL_QUANTIZEDS4_4x4x16) {
- Checker<MatrixMul> checker(handle(), /* check_dispatch */ false);
- auto GenTensorValueQuint4 = [](const TensorShape& shape,
- dtype::QuantizedS4 dtype,
- const std::vector<int>& values) {
- TensorND tensor;
- tensor.layout = {shape, dtype};
- tensor.raw_ptr =
- static_cast<dt_byte*>(malloc(tensor.layout.span().dist_byte()));
- uint8_t* ptr = static_cast<uint8_t*>(tensor.raw_ptr);
- megdnn_assert(values.size() == tensor.layout.span().dist_elem());
- for (size_t i = 0; i < tensor.layout.span().dist_elem(); i += 2) {
- int val0 = values[i], val1 = values[i + 1];
- ptr[i / 2] =(val0 & 0xF) | (val1 << 4);
- }
- return tensor;
- };
- using Param = MatrixMul::Param;
- Param param;
- checker.set_param(param);
- checker.set_dtype(2, dtype::QuantizedS16(0.3f * 0.3f));
- checker.exect(
- Testcase{
- GenTensorValueQuint4(
- {8, 8}, dtype::QuantizedS4(0.3f),
- {-8, 7, 2, 1, 2, 3, 2, 7,
- 2, 5, 3, 3, 7, 4, -7, 1,
- -5, 7, -4, -1, -1, 2, 4, 1,
- 7, 2, -6, -2, -6, 3, 4, 4,
- -2, 2, 3, 0, 6, 5, 3, 4,
- -1, -1, -5, 5, 2, 5, 1, 4,
- 6, 2, 0, 0, 3, 2, 2, 1,
- -4, -3, 7, 5, 0, 3, 2, 3}),
- GenTensorValueQuint4(
- {8, 8}, dtype::QuantizedS4(0.3f),
- {5, -8, -7, -6, 4, 7, -5, -5,
- -4, 7, -3, -2, 5, 6, 4, 2,
- 3, -1, 2, 2, 7, 3, 6, 0,
- 5, 4, 0, 2, 2, 3, 3, 2,
- 1, -8, -7, -6, 0, -5, -4, 4,
- -3, 7, 1, 6, -2, 2, -1, 5,
- 2, 0, 7, 6, 5, 4, 3, 2,
- 0, 0, 1, 0, 5, 2, 2, 6}),
- {}},
- Testcase{
- {},
- {},
- TensorValue(
- {8, 8}, dtype::QuantizedS16(0.3f * 0.3f),
- {-60, 120, 49, 58, 58, 13, 92, 125,
- -5, 0, -116, -70, 22, 9, -14, 46,
- -69, 111, 44, 48, 6, 19, 42, 57,
- -8, 25, 10, 16, 26, 97, -28, -12,
- -12, 14, 2, 26, 48, 7, 24, 93,
- -2, 45, 2, 32, -19, -1, -16, 72,
- 23, -44, -52, -34, 45, 53, -28, 6,
- 33, 45, 71, 84, 47, 10, 74, 61})
-
- });
- }
-
- TEST_F(NAIVE, MATRIX_MUL_QUANTIZED8x8x32) {
- Checker<MatrixMul> checker(handle(), /* check_dispatch */ false);
- MatrixMul::Param param;
- param.transposeA = false;
- param.transposeB = false;
-
- checker.set_param(param).exect(
- Testcase{TensorValue(
- {4, 7}, dtype::Quantized8Asymm(0.1f, (uint8_t)128),
- {6, 97, 210, 47, 213, 246, 92, 121, 132, 133,
- 37, 31, 87, 71, 0, 5, 198, 11, 97, 141,
- 222, 166, 76, 212, 190, 108, 245, 143}),
- TensorValue({7, 5},
- dtype::Quantized8Asymm(0.2f, (uint8_t)233),
- {89, 207, 79, 135, 43, 29, 235, 171, 40,
- 78, 119, 145, 254, 162, 184, 139, 248, 214,
- 201, 183, 127, 75, 48, 200, 96, 109, 63,
- 60, 100, 120, 111, 182, 150, 227, 92}),
- {}},
- Testcase{{},
- {},
- TensorValue({4, 5}, dtype::QuantizedS32(0.1f * 0.2f),
- {2908, -36975, -9180, -3574, 8114,
- 30496, 23588, 32433, 11467, 30974,
- 36748, -6939, 26715, 33787, 35329,
- -24486, -25049, -19828, -16627, -18972})});
-
- param.transposeA = true;
- checker.set_param(param).exect(
- Testcase{TensorValue({2, 1},
- dtype::Quantized8Asymm(0.7f, (uint8_t)128),
- {129, 129}),
- TensorValue({2, 1},
- dtype::Quantized8Asymm(0.4f, (uint8_t)128),
- {129, 129}),
- {}},
- Testcase{{},
- {},
- TensorValue({1, 1}, dtype::QuantizedS32(0.7f * 0.4f),
- {2})});
- }
-
- TEST_F(NAIVE, MATRIX_MUL_MK4) {
- run_matmul_mk_format(handle(), param::MatrixMul::Format::MK4,
- dtype::Float32(), dtype::Float32(), dtype::Float32());
- }
-
- TEST_F(NAIVE, MATRIX_MUL_MK8) {
- run_matmul_mk_format(handle(), param::MatrixMul::Format::MK8,
- dtype::Int16(), dtype::Int16(), dtype::Int32());
- }
-
- TEST_F(NAIVE, MATRIX_MUL_MK4_DOT) {
- run_matmul_mk_format(handle(), param::MatrixMul::Format::MK4_DOT,
- dtype::Int8(), dtype::Int8(), dtype::Int32());
- }
-
- TEST_F(NAIVE, MATRIX_MUL_BFLOAT16) {
- Checker<MatrixMul> checker(handle(), /* check_dispatch */ false);
- MatrixMul::Param param, fp32_param;
- fp32_param = param;
- param.compute_mode = param::MatrixMul::ComputeMode::FLOAT32;
- checker.set_param(param);
- checker.set_dtype(0, dtype::BFloat16());
- checker.set_dtype(1, dtype::BFloat16());
- checker.set_dtype(2, dtype::BFloat16());
- auto extra_impl = extra_impl_helper<MatrixMul>(handle(), fp32_param);
-
- checker.set_extra_opr_impl(extra_impl);
- checker.set_epsilon(1.5e-2);
- UniformFloatRNG frng{1e-2, 5.f};
- checker.set_rng(0, &frng);
- checker.set_rng(1, &frng);
- checker.execs({{8, 8}, {8, 8}, {}});
- param.compute_mode = param::MatrixMul::ComputeMode::DEFAULT;
- checker.set_param(param);
- checker.execs({{8, 8}, {8, 8}, {}});
- }
-
- // vim: syntax=cpp.doxygen
|