|
- /**
- * \file src/opr/test/dnn/convolution.cpp
- * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- *
- * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-
- #include "megbrain/comp_node_env.h"
- #include "./legacy_checker.h"
-
- #include "megbrain/opr/dnn/convolution.h"
- #include "megbrain/test/autocheck.h"
- #include "megbrain/test/helper.h"
- #include "megbrain/test/megdnn_helper.h"
- #include "megbrain/serialization/serializer.h"
- #include "megbrain/opr/basic_arith.h"
- #include "megbrain/gopt/inference.h"
- #include "megbrain/opr/tensor_manip.h"
- #include "megdnn/dtype.h"
- #include "megdnn/oprs/base.h"
-
- #include <gmock/gmock.h>
-
- #include <cmath>
- #include <memory>
- #include <random>
-
- using namespace mgb;
- namespace {
-
- using Param = opr::Convolution::Param;
- using Param3D = opr::Convolution3D::Param;
- using Mode = Param::Mode;
-
- Mode modes_to_check[] = {Mode::CONVOLUTION, Mode::CROSS_CORRELATION};
-
- void conv_bwd_data_brute(const std::vector<std::shared_ptr<HostTensorND>>& inps,
- std::shared_ptr<HostTensorND>& dest,
- const opr::ConvolutionBackwardData::Param& param) {
- mgb_assert(param.format == Param::Format::NCHW);
- auto &&data = *inps[0], &&filter = *inps[1];
- size_t N = data.shape(0), IH = data.shape(2), IW = data.shape(3);
- size_t GROUP, ICPG, OCPG, FH, FW;
-
- if (param.sparse == Param::Sparse::DENSE) {
- GROUP = 1, ICPG = filter.shape(0), OCPG = filter.shape(1),
- FH = filter.shape(2), FW = filter.shape(3);
- } else {
- mgb_assert(param.sparse == Param::Sparse::GROUP);
- GROUP = filter.shape(0), ICPG = filter.shape(1), OCPG = filter.shape(2),
- FH = filter.shape(3), FW = filter.shape(4);
- }
- auto get_shp = [](size_t inp, size_t filter, size_t stride, size_t pad,
- size_t dilate) {
- return (inp - 1) * stride + (filter - 1) * dilate + 1 - pad * 2;
- };
- size_t OH = get_shp(IH, FH, param.stride_h, param.pad_h, param.dilate_h),
- OW = get_shp(IW, FW, param.stride_w, param.pad_w, param.dilate_w);
- dest = std::make_shared<HostTensorND>(CompNode::load("xpu0"),
- TensorShape{N, OCPG * GROUP, OH, OW});
- auto&& out = *dest;
- auto fptr = filter.ptr<float>(), dptr = data.ptr<float>(),
- optr = out.ptr<float>();
- memset(optr, 0, sizeof(float) * out.shape().total_nr_elems());
- auto ol = out.layout(), fl = filter.layout();
-
- #define FOR2(a, A, b, B) \
- for (size_t a = 0; a < A; ++a) \
- for (size_t b = 0; b < B; ++b)
- #define FOR3(a, A, b, B, c, C) \
- FOR2(a, A, b, B) \
- for (size_t c = 0; c < C; ++c)
-
- FOR3(n, N, group, GROUP, icg, ICPG)
- FOR2(ih, IH, iw, IW) {
- float scale = *(dptr++);
-
- FOR3(ocg, OCPG, fh, FH, fw, FW) {
- auto oc_tot = group * OCPG + ocg;
- int oh = int(ih * param.stride_h + fh * param.dilate_h) -
- int(param.pad_h),
- ow = int(iw * param.stride_w + fw * param.dilate_w) -
- int(param.pad_w);
- if (oh >= 0 && ow >= 0 && oh < static_cast<int>(OH) &&
- ow < static_cast<int>(OW)) {
- auto out_off = n * ol.stride[0] + oc_tot * ol.stride[1] +
- oh * ol.stride[2] + ow;
- size_t flt_off = 0;
- if (param.sparse == Param::Convolution::Sparse::DENSE) {
- flt_off = icg * fl.stride[0] +
- ocg * fl.stride[1] + fh * fl.stride[2] + fw;
- } else {
- flt_off = group * fl.stride[0] + icg * fl.stride[1] +
- ocg * fl.stride[2] + fh * fl.stride[3] + fw;
- }
- optr[out_off] += scale * fptr[flt_off];
- }
- }
- }
- #undef FOR3
- #undef FOR2
- }
-
- void conv_bwd_flt_brute(const std::vector<std::shared_ptr<HostTensorND>>& inps,
- std::shared_ptr<HostTensorND>& out,
- const opr::ConvolutionBackwardFilter::Param& param) {
- auto &&src = *inps[0], &&diff = *inps[1], &&filter = *inps[2];
- size_t N = src.shape(0), IH = src.shape(2), IW = src.shape(3),
- OC = filter.shape(0), IC = filter.shape(1), FH = filter.shape(2),
- FW = filter.shape(3), OH = diff.shape(2), OW = diff.shape(3);
- out = std::make_shared<HostTensorND>(CompNode::load("xpu0"),
- TensorShape{OC, IC, FH, FW});
- auto&& grad = *out;
- auto sptr = src.ptr<float>(), dptr = diff.ptr<float>(),
- gptr = grad.ptr<float>();
- memset(gptr, 0, sizeof(float) * grad.shape().total_nr_elems());
- auto valid = [&](size_t ih, size_t iw) { return ih < IH && iw < IW; };
- for (size_t n = 0; n < N; ++n)
- for (size_t oc = 0; oc < OC; ++oc)
- for (size_t ic = 0; ic < IC; ++ic) {
- for (size_t oh = 0; oh < OH; ++oh)
- for (size_t ow = 0; ow < OW; ++ow) {
- for (size_t fh = 0; fh < FH; ++fh)
- for (size_t fw = 0; fw < FW; ++fw) {
- size_t ih = oh * param.stride_h + fh -
- param.pad_h,
- iw = ow * param.stride_w + fw -
- param.pad_w;
- auto src_data =
- valid(ih, iw)
- ? sptr[(n * IC + ic) * IH * IW +
- ih * IW + iw]
- : 0;
- gptr[(oc * IC + ic) * FH * FW + fh * FW + fw] +=
- dptr[(n * OC + oc) * OH * OW + oh * OW +
- ow] *
- src_data;
- }
- }
- }
- }
-
- void local_share_brute(const std::vector<std::shared_ptr<HostTensorND>>& inps,
- std::shared_ptr<HostTensorND>& out,
- const opr::LocalShare::Param& param) {
- auto in = inps[0], filter = inps[1];
- mgb_assert(in->shape().ndim == 4);
- mgb_assert(filter->shape().ndim == 6);
- int batch_size = in->shape()[0], ci = in->shape()[1], hi = in->shape()[2],
- wi = in->shape()[3];
- int fh = filter->shape()[3], fw = filter->shape()[4];
- int ph = param.pad_h, pw = param.pad_w;
- int sh = param.stride_h, sw = param.stride_w;
- int dh = param.dilate_h, dw = param.dilate_w;
- int sgh = filter->shape()[0], sgw = filter->shape()[1];
- mgb_assert(dh == 1 && dw == 1);
- mgb_assert(static_cast<uint32_t>(sgh) == param.spatial_groups_h &&
- static_cast<uint32_t>(sgw) == param.spatial_groups_w);
-
- int ho = (hi + 2 * ph - fh) / sh + 1;
- int wo = (wi + 2 * pw - fw) / sw + 1;
- mgb_assert(ho % sgh == 0 && wo % sgw == 0);
- int grp_ho = ho / sgh, grp_wo = wo / sgw;
- int co = filter->shape()[5];
-
- size_t u_batch = batch_size, u_co = co, u_ho = ho, u_wo = wo;
- out = std::make_shared<HostTensorND>(
- CompNode::load("xpu0"), TensorShape{u_batch, u_co, u_ho, u_wo});
- mgb_assert(param.mode == Param::Mode::CROSS_CORRELATION);
- for (int n = 0; n < batch_size; ++n) {
- for (int oc = 0; oc < co; ++oc) {
- for (int oh = 0; oh < ho; ++oh) {
- for (int ow = 0; ow < wo; ++ow) {
- size_t u_n = n, u_oc = oc, u_oh = oh, u_ow = ow;
- float& dval = out->ptr<float>({u_n, u_oc, u_oh, u_ow})[0];
- dval = 0;
- int grp_oh_idx = oh / grp_ho;
- int grp_ow_idx = ow / grp_wo;
- for (int ic = 0; ic < ci; ++ic) {
- for (int kh = 0; kh < fh; ++kh) {
- for (int kw = 0; kw < fw; ++kw) {
- int ih = oh * sh - ph + kh;
- int iw = ow * sw - pw + kw;
- float sval = 0.f;
- float fval = 0.f;
- if (ih >= 0 && ih < hi && iw >= 0 && iw < wi) {
- sval = in->ptr<float>(
- {static_cast<size_t>(n),
- static_cast<size_t>(ic),
- static_cast<size_t>(ih),
- static_cast<size_t>(iw)})[0];
- }
- fval = filter->ptr<float>(
- {static_cast<size_t>(grp_oh_idx),
- static_cast<size_t>(grp_ow_idx),
- static_cast<size_t>(ic),
- static_cast<size_t>(kh),
- static_cast<size_t>(kw),
- static_cast<size_t>(oc)})[0];
- dval += fval * sval;
- }
- }
- }
- }
- }
- }
- }
- }
-
- void convolution_brute(const std::vector<std::shared_ptr<HostTensorND>> &in_tensor,
- std::shared_ptr<HostTensorND> &out_tensor,
- const opr::Convolution::Param ¶m)
- {
- mgb_assert(in_tensor.size() == 2);
- auto in = in_tensor[0], filter = in_tensor[1];
- mgb_assert(in->shape().ndim == 4);
- mgb_assert(filter->shape().ndim == 4);
-
- int batch_size = in->shape().shape[0];
- int ic = in->shape().shape[1];
- int ih = in->shape().shape[2];
- int iw = in->shape().shape[3];
-
- int fh = filter->shape().shape[2];
- int fw = filter->shape().shape[3];
-
- int ph = param.pad_h;
- int pw = param.pad_w;
-
- int sh = param.stride_h;
- int sw = param.stride_w;
-
- int dh = param.dilate_h;
- int dw = param.dilate_w;
-
- mgb_assert(ih + 2*ph >= (fh - 1) * dh + 1);
- mgb_assert(iw + 2*pw >= (fw - 1) * dw + 1);
- int oh = (ih + 2*ph - ((fh - 1) * dh + 1)) / sh + 1;
- int ow = (iw + 2*pw - ((fw - 1) * dw + 1)) / sw + 1;
- mgb_assert(static_cast<size_t>(ic) == filter->shape().shape[1]);
- int oc = filter->shape().shape[0];
-
- out_tensor = std::make_shared<HostTensorND>(CompNode::load("xpu0"),
- TensorShape{
- static_cast<size_t>(batch_size),
- static_cast<size_t>(oc),
- static_cast<size_t>(oh),
- static_cast<size_t>(ow)});
-
- int pn, poc, poh, pow, pih, piw, pic, pfh, pfw;
- for (pn = 0; pn < batch_size; ++pn)
- for (poc = 0; poc < oc; ++poc)
- for (poh = 0, pih = -ph; poh < oh; ++poh, pih += sh)
- for (pow = 0, piw = -pw; pow < ow; ++pow, piw += sw)
- {
- float &target = out_tensor->ptr<float>({
- static_cast<size_t>(pn),
- static_cast<size_t>(poc),
- static_cast<size_t>(poh),
- static_cast<size_t>(pow)})[0];
- target = 0;
- for (pic = 0; pic < ic; ++pic)
- for (pfh = 0; pfh < fh; ++pfh)
- for (pfw = 0; pfw < fw; ++pfw)
- {
- int prih, priw;
- float img_data, filter_data;
- if (param.mode == Param::Mode::CONVOLUTION) {
- prih = pih + (fh - pfh - 1) * dh;
- priw = piw + (fw - pfw - 1) * dw;
- } else {
- mgb_assert(param.mode == Param::Mode::CROSS_CORRELATION);
- prih = pih + pfh * dh;
- priw = piw + pfw * dw;
- }
- if (prih >= 0 && prih < ih &&
- priw >= 0 && priw < iw) {
- img_data = in_tensor[0]->ptr<float>({
- static_cast<size_t>(pn),
- static_cast<size_t>(pic),
- static_cast<size_t>(prih),
- static_cast<size_t>(priw)})[0];
- } else {
- img_data = 0;
- }
- filter_data = filter->ptr<float>({
- static_cast<size_t>(poc),
- static_cast<size_t>(pic),
- static_cast<size_t>(pfh),
- static_cast<size_t>(pfw)})[0];
- target += img_data * filter_data;
- }
- }
- }
-
-
-
- opr::Convolution::Param convert_to_conv_param(
- const opr::ConvBiasForward::Param& param) {
- return opr::Convolution::Param{
- param.mode, param.pad_h, param.pad_w,
- param.stride_h, param.stride_w, param.dilate_h,
- param.dilate_w, param.sparse, param.format};
- };
- #if MGB_CUDA
- opr::Convolution::Param convert_to_conv_param(
- const opr::BatchConvBiasForward::Param& param) {
- return opr::Convolution::Param{
- param.mode, param.pad_h, param.pad_w,
- param.stride_h, param.stride_w, param.dilate_h,
- param.dilate_w, param.sparse, param.format};
- };
- #endif
-
- TEST(TestOprDNN, ConvolutionForward) {
- uint32_t ih = 10, ic = 16, oc = 32, ph = 0, sh = 1, fh = 2;
- for (auto mode: modes_to_check) {
- uint32_t iw = ih + 1, fw = fh + 1, pw = ph + 1, sw = sh + 1;
- Param param{mode, ph, pw, sh, sw};
- size_t batch_size = 32;
- // !!! DEPRECATED. use AutoOprChecker instead.
- opr::test::ForwardChecker<opr::Convolution, 2> forward_checker({
- {batch_size, ic, ih, iw},
- {oc, ic, fh, fw}},
- convolution_brute, param);
- forward_checker.run();
- }
- }
-
- TEST(TestOprDNN, ConvolutionBackward) {
- uint32_t ih = 10, ic = 16, oc = 32, ph = 0, sh = 1, fh = 2;
- for (auto mode: modes_to_check) {
- uint32_t iw = 11, fw = 4, pw = 1, sw = 3;
- Param param{mode, ph, pw, sh, sw};
- size_t batch_size = 32;
- // !!! DEPRECATED. use AutoOprChecker instead.
- opr::test::BackwardChecker<opr::Convolution, 2> backward_checker({
- {batch_size, ic, ih, iw},
- {oc, ic, fh, fw}}, param, 1e-2, 1);
- backward_checker.run();
- }
- }
-
- TEST(TestOprDNN, ConvBiasExePolicy) {
- using Param = opr::ConvBias::Param;
- Param param;
- using Policy = opr::ConvBias::ExecutionPolicy;
- using S = Policy::Strategy;
-
- auto cn = CompNode::load("cpux");
-
- auto orig_impl = PersistentCache::set_impl(
- std::make_shared<InMemoryPersistentCache>());
-
- auto run = [&](S strategy) {
- auto graph = ComputingGraph::make();
- HostTensorGenerator<> gen;
-
- auto mkvar = [&](const char* name, const TensorShape& shp,
- const DType& dtype) {
- return opr::TypeCvt::make(
- opr::Host2DeviceCopy::make(*graph, gen(shp), cn)
- .rename(name),
- dtype);
- };
-
- auto x = mkvar("x", {20, 50, 50, 16}, dtype::QuantizedS8(2.5f));
- auto w = mkvar("w", {24, 3, 3, 16}, dtype::QuantizedS8(2.5f));
- auto bias = mkvar("bias", {1, 1, 1, 24}, dtype::QuantizedS32(6.25f));
- param.nonlineMode = Param::NonlineMode::RELU;
- param.format = Param::Format::NHWC;
- Policy policy;
- policy.strategy = strategy;
-
- auto conv_bias = opr::ConvBias::make(
- x, w, bias, param, policy,
- OperatorNodeConfig{dtype::QuantizedS8(2.5f)});
- HostTensorND host_y;
- auto func = graph->compile({make_callback_copy(conv_bias, host_y)});
- func->execute();
- //! set a new cache
- PersistentCache::set_impl(std::make_shared<InMemoryPersistentCache>());
- };
-
- #if MGB_ENABLE_FASTRUN
- for (auto strategy :
- SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
- S::PROFILE | S::HEURISTIC}) {
- #else
- for (auto strategy :
- SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
- #endif
- run(strategy);
- }
- ASSERT_THROW(run(S::OPTIMIZED | S::PROFILE), MegBrainError);
- PersistentCache::set_impl(orig_impl);
- }
-
- TEST(TestOprDNN, ConvBiasExePolicy_Quantized8Asym) {
- using Param = opr::ConvBias::Param;
- Param param;
- using Policy = opr::ConvBias::ExecutionPolicy;
- using S = Policy::Strategy;
-
- auto cn = CompNode::load("cpux");
-
- for (auto strategy :
- SmallVector<S>{S::PROFILE, S::PROFILE | S::REPRODUCIBLE}) {
- auto graph = ComputingGraph::make();
- HostTensorGenerator<> gen;
-
- auto mkvar = [&](const char* name, const TensorShape& shp,
- const DType& dtype) {
- return opr::TypeCvt::make(
- opr::Host2DeviceCopy::make(*graph, gen(shp), cn)
- .rename(name),
- dtype);
- };
-
- auto x = mkvar("x", {20, 50, 50, 16},
- dtype::Quantized8Asymm(2.5f, static_cast<uint8_t>(0)));
- auto w = mkvar("w", {24, 3, 3, 16},
- dtype::Quantized8Asymm(2.5f, static_cast<uint8_t>(0)));
- auto bias = mkvar("bias", {1, 1, 1, 24}, dtype::QuantizedS32(6.25f));
-
- param.nonlineMode = Param::NonlineMode::RELU;
- param.format = Param::Format::NHWC;
-
- Policy policy;
- policy.strategy = strategy;
-
- auto conv_bias = opr::ConvBias::make(
- x, w, bias, param, policy,
- OperatorNodeConfig{dtype::Quantized8Asymm(2.5f, static_cast<uint8_t>(0))});
- HostTensorND host_y;
- auto func = graph->compile({make_callback_copy(conv_bias, host_y)});
- func->execute();
- }
- }
-
- TEST(TestOprDNN, ConvolutionExePolicy) {
- Param param{Mode::CONVOLUTION};
- using Policy = opr::Convolution::ExecutionPolicy;
- using S = Policy::Strategy;
-
- int nr_get = 0;
- auto on_get = [&nr_get](const std::string&, const void*, size_t,
- const void*, size_t) { ++nr_get; };
- PersistentCacheHook cache_hook{on_get};
-
- #if MGB_ENABLE_FASTRUN
- for (auto strategy :
- SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
- S::PROFILE | S::HEURISTIC}) {
- #else
- for (auto strategy :
- SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
- #endif
- using Checker = AutoOprChecker<2, 1>;
-
- auto make_graph = [&](const Checker::SymInpArray& inputs)
- -> Checker::SymOutArray {
- Policy policy;
- policy.strategy = strategy;
- auto out =
- opr::Convolution::make(inputs[0], inputs[1], param, policy);
- return {out};
- };
-
- auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
- std::shared_ptr<HostTensorND> sh_out;
- convolution_brute({inp.begin(), inp.end()}, sh_out, param);
- dest[0] = *sh_out;
- };
-
- Checker::RunOptions opt;
- opt.numdiff_eps = 1;
- nr_get = 0;
- Checker(make_graph, fwd)
- .run({TensorShape{3, 2, 10, 6}, {4, 2, 3, 2}}, opt)
- .run({TensorShape{6, 3, 8, 13}, {2, 3, 2, 13}}, opt)
- .run({TensorShape{1, 1, 10, 10}, {2, 1, 3, 3}}, opt);
- if (strategy == S::HEURISTIC) {
- ASSERT_EQ(0, nr_get);
- } else {
- ASSERT_LT(0, nr_get);
- }
- }
- }
-
- TEST(TestOprDNN, ConvolutionBackwardDataBfloat16ExePolicy) {
- REQUIRE_GPU(1);
- Param param{Mode::CROSS_CORRELATION, 1, 1, 1, 1};
- param.compute_mode = Param::ComputeMode::FLOAT32;
- using Policy = opr::Convolution::ExecutionPolicy;
- using S = Policy::Strategy;
-
- auto gen_bfp16 = [](HostTensorND& dest) {
- RNGxorshf rng{next_rand_seed()};
- auto rand_real = [&rng]() {
- std::uniform_real_distribution<float> dist(-1, 1);
- return dist(rng);
- };
- auto ptr = dest.ptr<dt_bfloat16>();
- size_t elems = dest.shape().total_nr_elems();
- for (size_t i = 0; i < elems; i++) {
- ptr[i] = dt_bfloat16(rand_real());
- }
- };
-
- auto f32_to_bf16 = [](const std::shared_ptr<HostTensorND>& src)
- -> std::shared_ptr<HostTensorND> {
- auto ret = std::make_shared<HostTensorND>(
- src->comp_node(), src->shape(), dtype::BFloat16{});
- for (size_t i = 0; i < src->layout().total_nr_elems(); i++) {
- ret->ptr<dt_bfloat16>()[i] = src->ptr<dt_float32>()[i];
- }
- return ret;
- };
-
- auto bf16_to_f32 = [](const std::shared_ptr<HostTensorND>& src)
- -> std::shared_ptr<HostTensorND> {
- auto ret = std::make_shared<HostTensorND>(
- src->comp_node(), src->shape(), dtype::Float32{});
- for (size_t i = 0; i < src->layout().total_nr_elems(); i++) {
- ret->ptr<dt_float32>()[i] = src->ptr<dt_bfloat16>()[i];
- }
- return ret;
- };
-
- int nr_get = 0;
- auto on_get = [&nr_get](const std::string&, const void*, size_t,
- const void*, size_t) { ++nr_get; };
- PersistentCacheHook cache_hook{on_get};
-
- #if MGB_ENABLE_FASTRUN
- for (auto strategy :
- {S::PROFILE, S::HEURISTIC, S(S::PROFILE | S::REPRODUCIBLE),
- S(S::PROFILE | S::HEURISTIC)}) {
- #else
- for (auto strategy: {S:HEURISTIC, S(S::PROFILE | S::HEURISTIC)}) {
- #endif
- using Checker = AutoOprChecker<2, 1>;
-
- auto make_graph = [&](const Checker::SymInpArray& inputs)
- -> Checker::SymOutArray {
- Policy policy;
- policy.strategy = strategy;
- return {opr::ConvolutionBackwardData::make_deconv(
- inputs[0], inputs[1], param, policy)};
- };
-
- auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
- std::shared_ptr<HostTensorND> out;
- conv_bwd_data_brute(
- {bf16_to_f32(inp[0]), bf16_to_f32(inp[1])}, out,
- param);
- dest[0] = *f32_to_bf16(out);
- };
-
- Checker::RunOptions opt;
- opt.outputs_max_err = 1e-3;
- nr_get = 0;
- Checker(make_graph, fwd)
- .disable_grad_check()
- .set_input_dtype(0, dtype::BFloat16{})
- .set_input_dtype(1, dtype::BFloat16{})
- .set_input_generator(0, gen_bfp16)
- .set_input_generator(1, gen_bfp16)
- .run({TensorShape{3, 4, 10, 6}, {4, 2, 3, 3}}, opt)
- .run({TensorShape{2, 2, 4, 3}, {2, 2, 3, 3}}, opt)
- .run({TensorShape{1, 3, 10, 6}, {3, 2, 3, 3}}, opt);
- if (strategy == S::HEURISTIC) {
- ASSERT_EQ(0, nr_get);
- } else {
- ASSERT_LT(0, nr_get);
- }
- }
- }
-
- #if MGB_ENABLE_FASTRUN
- TEST(TestOprDNN, ConvolutionBackwardDataFloat16ExePolicy) {
- REQUIRE_GPU(1);
- Param param{Mode::CROSS_CORRELATION, 1, 1, 1, 1};
- param.compute_mode = Param::ComputeMode::FLOAT32;
- using Policy = opr::Convolution::ExecutionPolicy;
- using S = Policy::Strategy;
-
- auto gen_fp16 = [](HostTensorND& dest) {
- RNGxorshf rng{next_rand_seed()};
- auto rand_real = [&rng]() {
- std::uniform_real_distribution<float> dist(-1, 1);
- return dist(rng);
- };
- auto ptr = dest.ptr<dt_float16>();
- size_t elems = dest.shape().total_nr_elems();
- for (size_t i = 0; i < elems; i++) {
- ptr[i] = dt_float16(rand_real());
- }
- };
-
- auto f32_to_f16 = [](const std::shared_ptr<HostTensorND>& src)
- -> std::shared_ptr<HostTensorND> {
- auto ret = std::make_shared<HostTensorND>(
- src->comp_node(), src->shape(), dtype::Float16{});
- for (size_t i = 0; i < src->layout().total_nr_elems(); i++) {
- ret->ptr<dt_float16>()[i] = src->ptr<dt_float32>()[i];
- }
- return ret;
- };
-
- auto f16_to_f32 = [](const std::shared_ptr<HostTensorND>& src)
- -> std::shared_ptr<HostTensorND> {
- auto ret = std::make_shared<HostTensorND>(
- src->comp_node(), src->shape(), dtype::Float32{});
- for (size_t i = 0; i < src->layout().total_nr_elems(); i++) {
- ret->ptr<dt_float32>()[i] = src->ptr<dt_float16>()[i];
- }
- return ret;
- };
-
- int nr_get = 0;
- auto on_get = [&nr_get](const std::string&, const void*, size_t,
- const void*, size_t) { ++nr_get; };
- PersistentCacheHook cache_hook{on_get};
-
- auto strategy = S(S::PROFILE | S::REPRODUCIBLE);
- using Checker = AutoOprChecker<2, 1>;
-
- auto make_graph =
- [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
- Policy policy;
- policy.strategy = strategy;
- return {opr::ConvolutionBackwardData::make_deconv(inputs[0], inputs[1],
- param, policy)};
- };
-
- auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
- std::shared_ptr<HostTensorND> out;
- conv_bwd_data_brute({f16_to_f32(inp[0]), f16_to_f32(inp[1])}, out,
- param);
- dest[0] = *f32_to_f16(out);
- };
-
- Checker::RunOptions opt;
- opt.outputs_max_err = 1e-2;
- nr_get = 0;
- Checker(make_graph, fwd)
- .disable_grad_check()
- .set_input_dtype(0, dtype::Float16{})
- .set_input_dtype(1, dtype::Float16{})
- .set_input_generator(0, gen_fp16)
- .set_input_generator(1, gen_fp16)
- .run({TensorShape{3, 4, 10, 6}, {4, 2, 3, 3}}, opt)
- .run({TensorShape{2, 2, 4, 3}, {2, 2, 3, 3}}, opt)
- .run({TensorShape{1, 3, 10, 6}, {3, 2, 3, 3}}, opt);
- if (strategy == S::HEURISTIC) {
- ASSERT_EQ(0, nr_get);
- } else {
- ASSERT_LT(0, nr_get);
- }
- }
- #endif
-
- TEST(TestOprDNN, Deconvolution) {
- // dilated grouped deconv
- using Checker = AutoOprChecker<2, 1>;
-
- Param param{Mode::CROSS_CORRELATION, 0, 1, 1, 2};
- param.dilate_h = 2;
- param.sparse = Param::Sparse::GROUP;
- auto make_graph = [&](
- const Checker::SymInpArray &inputs) -> Checker::SymOutArray {
- return {opr::ConvolutionBackwardData::make_deconv(
- inputs[0], inputs[1], param)};
- };
-
- auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
- std::shared_ptr<HostTensorND> out;
- conv_bwd_data_brute({inp[0], inp[1]}, out, param);
- dest[0] = *out;
- };
-
- Checker::RunOptions opt;
- opt.numdiff_eps = 1;
- Checker(make_graph, fwd).
- run({TensorShape{2, 4, 6, 8}, {1, 4, 5, 3, 2}}, opt).
- run({TensorShape{3, 2, 1, 1}, {2, 1, 1, 4, 3}}, opt).
- run({TensorShape{4, 6, 7, 2}, {2, 3, 4, 8, 13}}, opt);
- }
-
- TEST(TestOprDNN, DeconvolutionExePolicy_QuantizedS8) {
- REQUIRE_GPU(1);
- auto cn = CompNode::load("gpu0");
- cn.activate();
- REQUIRE_CUDA_COMPUTE_CAPABILITY(6, 1);
-
- Param param;
- using Policy = opr::ConvolutionBackwardData::ExecutionPolicy;
- using S = Policy::Strategy;
-
- #if MGB_ENABLE_FASTRUN
- for (auto strategy :
- {S::PROFILE, S::HEURISTIC, S(S::PROFILE | S::REPRODUCIBLE),
- S(S::PROFILE | S::HEURISTIC)}) {
- #else
- for (auto strategy: {S:HEURISTIC, S(S::PROFILE | S::HEURISTIC)}) {
- #endif
- auto graph = ComputingGraph::make();
- HostTensorGenerator<> gen;
-
- auto mkvar = [&](const char* name, const TensorShape& shp,
- const DType& dtype) {
- return opr::TypeCvt::make(
- opr::Host2DeviceCopy::make(*graph, gen(shp)).rename(name),
- dtype);
- };
-
- auto x = mkvar("x", {16, 4, 50, 50, 4}, dtype::QuantizedS8(1.2f));
- auto w = mkvar("w", {16, 4, 4, 4, 4}, dtype::QuantizedS8(1.3f));
-
- param.format = Param::Format::NCHW4;
- param.pad_h = param.pad_w = 2;
- param.stride_h = param.stride_w = 2;
-
- Policy policy;
- policy.strategy = strategy;
-
- auto deconv = opr::ConvolutionBackwardData::make_deconv(
- x, w, param, policy,
- OperatorNodeConfig{dtype::QuantizedS8(1.2f)});
- HostTensorND host_y;
- auto func = graph->compile({make_callback_copy(deconv, host_y)});
- func->execute();
- }
- }
-
- TEST(TestOprDNN, ConvolutionBackwardFilter) {
- using Checker = AutoOprChecker<3, 1>;
-
- constexpr size_t PH = 0, PW = 1, SH = 1, SW = 2;
-
- auto make_graph = [&](
- const Checker::SymInpArray &inputs) -> Checker::SymOutArray {
- Param param{Mode::CROSS_CORRELATION, PH, PW, SH, SW};
- return {opr::ConvolutionBackwardFilter::make(
- inputs[0], inputs[1], inputs[2], param)};
- };
-
- auto fwd = [&](Checker::NumOutArray &dest, Checker::NumInpArray inp) {
- std::shared_ptr<HostTensorND> out;
- conv_bwd_flt_brute({inp[0], inp[1], inp[2]}, out,
- Param{Mode::CROSS_CORRELATION, PH, PW, SH, SW});
- dest[0] = *out;
- };
-
- #define get_shp(N, P, S, F) ((N + 2 * P - F) / S + 1)
- #define inp_tensor(N, IC, OC, IH, IW, FH, FW) \
- { TensorShape{N, IC, IH, IW}, \
- {N, OC, get_shp(IH, PH, SH, FH), get_shp(IW, PW, SW, FW)}, \
- {OC, IC, FH, FW} }
- Checker::RunOptions opt;
- opt.numdiff_eps = 1;
- Checker(make_graph, fwd).
- run(inp_tensor(2, 3, 4, 9, 8, 4, 3), opt).
- run(inp_tensor(1, 5, 3, 7, 9, 3, 4), opt).
- run(inp_tensor(3, 4, 4, 9, 9, 3, 3), opt);
- #undef inp_tensor
- #undef get_shp
- }
-
- TEST(TestOprDNN, DilatedConvolution) {
- using Checker = AutoOprChecker<2, 1>;
-
- opr::ConvolutionForward::Param param;
- param.pad_h = 5;
- param.pad_w = 2;
- param.stride_w = 2;
- param.dilate_h = 2;
-
- auto make_graph = [&](const Checker::SymInpArray &inputs) ->
- Checker::SymOutArray {
- return {opr::Convolution::make(inputs[0], inputs[1], param)};
- };
-
- auto fwd = [&](Checker::NumOutArray &dest, Checker::NumInpArray inp) {
- auto opr = megdnn_naive_handle()->create_operator<
- megdnn::Convolution>();
- opr->param() = param;
- TensorLayout dest_layout;
- opr->deduce_layout(inp[0]->layout(), inp[1]->layout(), dest_layout);
- std::vector<dt_byte> workspace(opr->get_workspace_in_bytes(
- inp[0]->layout(), inp[1]->layout(), dest_layout, nullptr));
- dest[0].dtype(dtype::Float32()).
- comp_node(inp[0]->comp_node()).resize(dest_layout);
- opr->exec(inp[0]->as_megdnn(), inp[1]->as_megdnn(), dest[0].as_megdnn(),
- nullptr, {workspace.data(), workspace.size()});
- };
- Checker::RunOptions option;
- option.numdiff_eps = 0.1;
-
- Checker(make_graph, fwd).
- run({TensorShape{2, 3, 8, 7}, TensorShape{4, 3, 2, 2}}, option).
- run({TensorShape{2, 3, 8, 7}, TensorShape{4, 3, 3, 2}}, option).
- run({TensorShape{2, 3, 8, 9}, TensorShape{4, 3, 3, 2}}, option);
- }
-
- TEST(TestOprDNN, GroupConv) {
- using Checker = AutoOprChecker<2, 1>;
- opr::Convolution::Param param;
- param.pad_h = 1;
- param.pad_w = 2;
- param.stride_h = 2;
- auto make_graph = [&](
- const Checker::SymInpArray &inputs) -> Checker::SymOutArray {
- auto p1 = param;
- p1.sparse = opr::Convolution::Param::Sparse::GROUP;
- return {opr::Convolution::make(inputs[0], inputs[1], p1)};
- };
-
- auto cn = CompNode::load("xpux");
- auto inp0 = std::make_shared<HostTensorND>(cn, dtype::Float32()),
- inp1 = std::make_shared<HostTensorND>(cn, dtype::Float32());
- HostTensorND out_raw;
- auto graph_raw = ComputingGraph::make();
- auto func_raw = graph_raw->compile({
- make_callback_copy(
- opr::Convolution::make(
- opr::Host2DeviceCopy::make(*graph_raw, inp0),
- opr::Host2DeviceCopy::make(*graph_raw, inp1),
- param),
- out_raw)});
- auto fwd = [&](Checker::NumOutArray &dest, Checker::NumInpArray inp) {
- auto &&out = dest[0];
- auto sl = inp[0]->layout(),
- fl = inp[1]->layout().remove_axis(0);
- TensorLayout ol;
- auto group = inp[1]->layout()[0];
- sl.shape[1] /= group;
- for (size_t i = 0; i < group; ++ i) {
- inp0->copy_from(inp[0]->sub(SubTensorSpec::make_from_offset_elem(
- sl, i * sl[1] * sl[2] * sl[3])));
- inp1->copy_from(inp[1]->sub(SubTensorSpec::make_from_offset_elem(
- fl, i * fl.total_nr_elems())));
- func_raw->execute();
- if (!i) {
- auto oshp = out_raw.shape();
- oshp[1] *= group;
- out.resize(oshp);
- ol = out.layout();
- ol[1] /= group;
- }
- out.sub(SubTensorSpec::make_from_offset_elem(
- ol, i * ol[1] * ol[2] * ol[3])).copy_from_fixlayout(
- out_raw);
- }
- };
-
- Checker::RunOptions opt;
- opt.numdiff_eps = 1;
- opt.outputs_max_err = 5e-5;
- Checker checker{make_graph, fwd};
- auto run = [&](const TensorShape &ishp,
- size_t fh, size_t fw, size_t oc, size_t group) {
- size_t ic = ishp[1];
- TensorShape flt{group, oc/group, ic/group, fh, fw};
- checker.run({ishp, flt}, opt);
- };
- run({1, 2, 1, 1}, 1, 1, 2, 2);
- run({3, 9, 5, 4}, 1, 2, 6, 3);
- run({3, 6, 8, 9}, 3, 1, 4, 2);
- run({2, 5, 3, 6}, 2, 3, 5, 1);
- run({2, 6, 3, 6}, 2, 3, 6, 6);
- }
-
- TEST(TestOprDNN, MaskConvolution) {
- using Checker = AutoOprChecker<3, 1>;
- opr::Convolution::Param param;
- auto make_graph =
- [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
- return {opr::MaskConvolution::make(inputs[0], inputs[1], inputs[2],
- param)};
- };
-
- auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
- std::shared_ptr<HostTensorND> sh_out;
- convolution_brute({inp[0], inp[1]}, sh_out, param);
- dest[0] = *sh_out;
- size_t N = dest[0].shape()[0];
- size_t OC = dest[0].shape()[1];
- size_t OH = dest[0].shape()[2];
- size_t OW = dest[0].shape()[3];
- auto mask_ptr = inp[2]->ptr<int8_t>();
- auto dest_ptr = dest[0].ptr<float>();
- for (size_t i = 0; i < N * OC; ++i) {
- for (size_t mask_idx = 0; mask_idx < OH * OW; ++mask_idx) {
- if (mask_ptr[mask_idx] == 0) {
- dest_ptr[i * OH * OW + mask_idx] = 0;
- }
- }
- }
- };
-
- auto gen_mask = [](HostTensorND& dest) {
- HostTensorGenerator<dtype::Int8, RandomDistribution::UNIFORM>
- mask_generator{0, 1};
- dest = *mask_generator(dest.shape(), dest.comp_node());
- };
-
- auto run_with_param = [&](size_t SH = 1, size_t SW = 1, size_t PH = 0,
- size_t PW = 0) {
- param.pad_h = PH;
- param.pad_w = PW;
- param.stride_h = SH;
- param.stride_w = SW;
- Checker checker{make_graph, fwd};
- Checker::RunOptions opt;
- checker.set_output_allow_grad(0, false);
- checker.set_input_dtype(2, dtype::Int8());
- checker.set_input_generator(2, gen_mask);
- auto run = [&](size_t N, size_t IC, size_t OC, size_t IH, size_t IW,
- size_t FH, size_t FW) {
- size_t OH = (IH + 2 * PH - FH) / SH + 1;
- size_t OW = (IW + 2 * PW - FW) / SW + 1;
- checker.run(
- {TensorShape{N, IC, IH, IW}, {OC, IC, FH, FW}, {OH, OW}},
- opt);
- };
- run(1, 1, 1, 5, 5, 3, 3);
- run(2, 3, 4, 5, 5, 3, 3);
- run(3, 3, 4, 224, 223, 3, 3);
- run(3, 3, 4, 224, 223, 2, 2);
- };
-
- run_with_param();
- run_with_param(2, 2, 3, 3);
- run_with_param(3, 2, 1, 2);
- run_with_param(2, 3, 2, 2);
- }
-
- TEST(TestOprDNN, MaskPropagate) {
- using Checker = AutoOprChecker<3, 1>;
- opr::MaskPropagate::Param mask_param;
- opr::Convolution::Param conv_param;
-
- auto make_graph =
- [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
- auto inp_mask = inputs[2];
- auto out_mask = opr::MaskPropagate::make(inp_mask, mask_param);
- return {opr::MaskConvolution::make(inputs[0], inputs[1], out_mask,
- conv_param)};
- };
-
- auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
- auto& src = *inp[0];
- auto& mask = *inp[2];
- auto src_ptr = inp[0]->ptr<float>();
- auto mask_ptr = inp[2]->ptr<int>();
- mgb_assert(src.shape()[2] == mask.shape()[0] &&
- src.shape()[3] == mask.shape()[1]);
- for (size_t i = 0; i < src.shape()[0] * src.shape()[1]; ++i) {
- for (size_t mask_idx = 0;
- mask_idx < src.shape()[2] * src.shape()[3]; ++mask_idx) {
- if (mask_ptr[mask_idx] == 0) {
- src_ptr[i * src.layout().stride[1] + mask_idx] = 0;
- }
- }
- }
- std::shared_ptr<HostTensorND> sh_out;
- convolution_brute({inp[0], inp[1]}, sh_out, conv_param);
- dest[0] = *sh_out;
- };
-
- auto gen_mask = [](HostTensorND& dest) {
- HostTensorGenerator<dtype::Int32, RandomDistribution::UNIFORM>
- mask_generator{0, 1};
- dest = *mask_generator(dest.shape(), dest.comp_node());
- };
-
- auto run_with_param = [&](size_t FH, size_t FW, size_t SH = 1,
- size_t SW = 1, size_t PH = 0, size_t PW = 0,
- size_t DH = 1, size_t DW = 1) {
- conv_param.pad_h = PH;
- conv_param.pad_w = PW;
- conv_param.stride_h = SH;
- conv_param.stride_w = SW;
- conv_param.dilate_h = DH;
- conv_param.dilate_w = DW;
-
- mask_param.pad_h = PH;
- mask_param.pad_w = PW;
- mask_param.stride_h = SH;
- mask_param.stride_w = SW;
- mask_param.kernel_h = FH;
- mask_param.kernel_w = FW;
- mask_param.dilate_h = DH;
- mask_param.dilate_w = DW;
-
- Checker checker{make_graph, fwd};
- Checker::RunOptions opt;
- checker.set_output_allow_grad(0, false);
- checker.set_input_dtype(2, dtype::Int32());
- checker.set_input_generator(2, gen_mask);
- auto run = [&](size_t N, size_t IC, size_t OC, size_t IH, size_t IW) {
- checker.run(
- {TensorShape{N, IC, IH, IW}, {OC, IC, FH, FW}, {IH, IW}},
- opt);
- };
- run(1, 1, 1, 5, 5);
- run(2, 3, 4, 5, 5);
- run(3, 3, 4, 224, 223);
- run(3, 3, 4, 224, 223);
- };
-
- run_with_param(3, 3, 1, 1, 0, 0, 2, 2);
- run_with_param(3, 3, 2, 2, 3, 3);
- run_with_param(4, 2, 3, 2, 1, 2);
- run_with_param(2, 4, 2, 3, 2, 2);
- run_with_param(4, 2, 3, 2, 1, 2, 2, 2);
- run_with_param(2, 4, 2, 3, 2, 2, 2, 1);
- }
- void convolution3d_brute(const std::vector<std::shared_ptr<HostTensorND>> &in_tensor,
- std::shared_ptr<HostTensorND> &out_tensor,
- const opr::Convolution3D::Param ¶m)
- {
- mgb_assert(in_tensor.size() == 2);
- auto in = in_tensor[0], filter = in_tensor[1];
- mgb_assert(in->shape().ndim == 5);
- mgb_assert(filter->shape().ndim == 5);
-
- int batch_size = in->shape().shape[0];
- int ic = in->shape().shape[1];
- int id = in->shape().shape[2];
- int ih = in->shape().shape[3];
- int iw = in->shape().shape[4];
-
- int fd = filter->shape().shape[2];
- int fh = filter->shape().shape[3];
- int fw = filter->shape().shape[4];
-
- int pd = param.pad_d;
- int ph = param.pad_h;
- int pw = param.pad_w;
-
- int sd = param.stride_d;
- int sh = param.stride_h;
- int sw = param.stride_w;
-
- int dd = param.dilate_d;
- int dh = param.dilate_h;
- int dw = param.dilate_w;
-
- mgb_assert(id + 2*pd >= (fd - 1) * dd + 1);
- mgb_assert(ih + 2*ph >= (fh - 1) * dh + 1);
- mgb_assert(iw + 2*pw >= (fw - 1) * dw + 1);
- int od = (id + 2*pd - ((fd - 1) * dd + 1)) / sd + 1;
- int oh = (ih + 2*ph - ((fh - 1) * dh + 1)) / sh + 1;
- int ow = (iw + 2*pw - ((fw - 1) * dw + 1)) / sw + 1;
- mgb_assert(static_cast<size_t>(ic) == filter->shape().shape[1]);
- int oc = filter->shape().shape[0];
-
- out_tensor = std::make_shared<HostTensorND>(CompNode::load("xpu0"),
- TensorShape{
- static_cast<size_t>(batch_size),
- static_cast<size_t>(oc),
- static_cast<size_t>(od),
- static_cast<size_t>(oh),
- static_cast<size_t>(ow)});
-
- int pn, poc, pod, poh, pow,
- pic, pid, pih, piw,
- pfd, pfh, pfw;
- for (pn = 0; pn < batch_size; ++pn)
- for (poc = 0; poc < oc; ++poc)
- for (pod = 0, pid = -pd; pod < od; ++pod, pid += sd)
- for (poh = 0, pih = -ph; poh < oh; ++poh, pih += sh)
- for (pow = 0, piw = -pw; pow < ow; ++pow, piw += sw)
- {
- float &target = out_tensor->ptr<float>({
- static_cast<size_t>(pn),
- static_cast<size_t>(poc),
- static_cast<size_t>(pod),
- static_cast<size_t>(poh),
- static_cast<size_t>(pow)})[0];
- target = 0;
- for (pic = 0; pic < ic; ++pic)
- for (pfd = 0; pfd < fd; ++pfd)
- for (pfh = 0; pfh < fh; ++pfh)
- for (pfw = 0; pfw < fw; ++pfw)
- {
- int prid, prih, priw;
- float img_data, filter_data;
- if (param.mode == opr::Convolution3D::Param::Mode::CONVOLUTION) {
- prid = pid + (fd - pfd - 1) * dd;
- prih = pih + (fh - pfh - 1) * dh;
- priw = piw + (fw - pfw - 1) * dw;
- } else {
- mgb_assert(param.mode == opr::Convolution3D::Param::Mode::CROSS_CORRELATION);
- prid = pid + pfd * dd;
- prih = pih + pfh * dh;
- priw = piw + pfw * dw;
- }
- if (prid >= 0 && prid < id &&
- prih >= 0 && prih < ih &&
- priw >= 0 && priw < iw) {
- img_data = in_tensor[0]->ptr<float>({
- static_cast<size_t>(pn),
- static_cast<size_t>(pic),
- static_cast<size_t>(prid),
- static_cast<size_t>(prih),
- static_cast<size_t>(priw)})[0];
- } else {
- img_data = 0;
- }
- filter_data = filter->ptr<float>({
- static_cast<size_t>(poc),
- static_cast<size_t>(pic),
- static_cast<size_t>(pfd),
- static_cast<size_t>(pfh),
- static_cast<size_t>(pfw)})[0];
- target += img_data * filter_data;
- }
- }
- }
- TEST(TestOprDNN, Convolution3DForward) {
- for (uint32_t batch_size : {8})
- for (uint32_t id : {12})
- for (uint32_t fd : {1, 3})
- for (uint32_t ic : {4})
- for (uint32_t oc : {ic})
- for (uint32_t pd : {0, 2})
- for (uint32_t sd : {1, 3})
- for (uint32_t dd : {1, 3})
- for (bool xcorr : {0, 1}) {
- uint32_t ih = id + 1, fh = fd, ph = pd + 1, sh = sd + 1;
- uint32_t iw = ih + 1, fw = fh, pw = ph + 1, sw = sh + 1;
- Param3D param{xcorr ? Param3D::Mode::CROSS_CORRELATION :
- Param3D::Mode::CONVOLUTION , pd, ph, pw,
- sd, sh, sw, dd, dd, dd};
- // !!! DEPRECATED. use AutoOprChecker instead.
- opr::test::ForwardChecker<opr::Convolution3D, 2> forward_checker({
- {batch_size, ic, id, ih, iw},
- {oc, ic, fd, fh, fw}},
- convolution3d_brute, param);
- forward_checker.run();
- }
- }
-
- TEST(TestOprDNN, Convolution3DBackward) {
- for (uint32_t batch_size : {8})
- for (uint32_t id : {12})
- for (uint32_t fd : {1, 3})
- for (uint32_t ic : {4})
- for (uint32_t oc : {ic})
- for (uint32_t pd : {0, 2})
- for (uint32_t sd : {1, 3})
- for (uint32_t dd : {1, 3})
- for (bool xcorr : {0, 1}) {
- uint32_t ih = id + 1, fh = fd, ph = pd + 1, sh = sd + 1;
- uint32_t iw = ih + 1, fw = fh, pw = ph + 1, sw = sh + 1;
- Param3D param{xcorr ? Param3D::Mode::CROSS_CORRELATION :
- Param3D::Mode::CONVOLUTION,
- pd, ph, pw, sd, sh, sw, dd, dd, dd};
- // !!! DEPRECATED. use AutoOprChecker instead.
- opr::test::BackwardChecker<opr::Convolution3D, 2> backward_checker(
- {{batch_size, ic, id, ih, iw},
- {oc, ic, fd, fh, fw}}, param, 1e-2, 1);
- backward_checker.run();
- }
- }
-
- TEST(TestOprDNN, GroupConv3D) {
- using Checker = AutoOprChecker<2, 1>;
- opr::Convolution3D::Param param;
- param.pad_d = 0;
- param.pad_h = 1;
- param.pad_w = 0;
- param.stride_d = 1;
- param.stride_h = 2;
- auto make_graph = [&](
- const Checker::SymInpArray &inputs) -> Checker::SymOutArray {
- auto p1 = param;
- p1.sparse = opr::Convolution3D::Param::Sparse::GROUP;
- return {opr::Convolution3D::make(inputs[0], inputs[1], p1)};
- };
-
- auto cn = CompNode::load("xpux");
- auto inp0 = std::make_shared<HostTensorND>(cn, dtype::Float32()),
- inp1 = std::make_shared<HostTensorND>(cn, dtype::Float32());
- HostTensorND out_raw;
- auto graph_raw = ComputingGraph::make();
- auto func_raw = graph_raw->compile({
- make_callback_copy(
- opr::Convolution3D::make(
- opr::Host2DeviceCopy::make(*graph_raw, inp0),
- opr::Host2DeviceCopy::make(*graph_raw, inp1),
- param),
- out_raw)});
- auto fwd = [&](Checker::NumOutArray &dest, Checker::NumInpArray inp) {
- auto &&out = dest[0];
- auto sl = inp[0]->layout(),
- fl = inp[1]->layout().remove_axis(0);
- TensorLayout ol;
- auto group = inp[1]->layout()[0];
- sl.shape[1] /= group;
- for (size_t i = 0; i < group; ++ i) {
- inp0->copy_from(inp[0]->sub(SubTensorSpec::make_from_offset_elem(
- sl, i * sl[1] * sl[2] * sl[3] * sl[4])));
- inp1->copy_from(inp[1]->sub(SubTensorSpec::make_from_offset_elem(
- fl, i * fl.total_nr_elems())));
- func_raw->execute();
- if (!i) {
- auto oshp = out_raw.shape();
- oshp[1] *= group;
- out.resize(oshp);
- ol = out.layout();
- ol[1] /= group;
- }
- out.sub(SubTensorSpec::make_from_offset_elem(
- ol, i * ol[1] * ol[2] * ol[3] * ol[4])).
- copy_from_fixlayout(out_raw);
- }
- };
-
- Checker::RunOptions opt;
- opt.numdiff_eps = 1;
- opt.outputs_max_err = 5e-5;
- Checker checker{make_graph, fwd};
- auto run = [&](const TensorShape &ishp,
- size_t fd, size_t fh, size_t fw, size_t oc, size_t group) {
- size_t ic = ishp[1];
- TensorShape flt{group, oc/group, ic/group, fd, fh, fw};
- checker.
- run({ishp, flt}, opt);
- };
- run({1, 2, 1, 1, 1}, 1, 1, 1, 2, 2);
- run({3, 9, 5, 4, 3}, 1, 2, 3, 6, 3);
- run({2, 1, 3, 6, 9}, 2, 3, 3, 5, 1);
- run({2, 1, 3, 6, 9}, 2, 3, 3, 5, 1);
- }
-
- TEST(TestOprDNN, Deconvolution3D) {
- using Checker = AutoOprChecker<2, 1>;
- Param3D param{Param3D::Mode::CROSS_CORRELATION, 0, 1, 1, 1, 2, 2};
- param.sparse = Param3D::Sparse::GROUP;
- auto make_graph = [&](
- const Checker::SymInpArray &inputs) -> Checker::SymOutArray {
- return {opr::Convolution3DBackwardData::make_deconv(
- inputs[0], inputs[1], param)};
- };
-
- auto fwd = [&](Checker::NumOutArray &dest, Checker::NumInpArray inp) {
- auto &&data = *inp[0], &&filter = *inp[1];
- size_t N = data.shape(0),
- ID = data.shape(2), IH = data.shape(3), IW = data.shape(4),
- GROUP = filter.shape(0),
- ICPG = filter.shape(1), OCPG = filter.shape(2),
- FD = filter.shape(3), FH = filter.shape(4), FW = filter.shape(5);
- auto &&out = dest[0];
- auto get_shp = [](
- size_t inp, size_t filter, size_t stride, size_t pad,
- size_t dilate) {
- return (inp - 1) * stride + (filter - 1) * dilate + 1 - pad * 2;
- };
- size_t OD = get_shp(ID, FD,
- param.stride_d, param.pad_d, param.dilate_d),
- OH = get_shp(IH, FH,
- param.stride_h, param.pad_h, param.dilate_h),
- OW = get_shp(IW, FW,
- param.stride_w, param.pad_w, param.dilate_w);
- out.resize({N, OCPG * GROUP, OD, OH, OW});
- auto fptr = filter.ptr<float>(),
- dptr = data.ptr<float>(),
- optr = out.ptr<float>();
- memset(optr, 0, sizeof(float) * out.shape().total_nr_elems());
- auto ol = out.layout(), fl = filter.layout();
- #define FOR2(a, A, b, B) \
- for (size_t a = 0; a < A; ++ a) \
- for (size_t b = 0; b < B; ++ b)
- #define FOR3(a, A, b, B, c, C) \
- FOR2(a, A, b, B) \
- for (size_t c = 0; c < C; ++ c)
- #define FOR4(a, A, b, B, c, C, d, D) \
- FOR3(a, A, b, B, c, C) \
- for (size_t d = 0; d < D; ++ d)
- FOR3(n, N, group, GROUP, icg, ICPG)
- FOR3(id, ID, ih, IH, iw, IW) {
- float scale = *(dptr ++);
- FOR4(ocg, OCPG, fd, FD, fh, FH, fw, FW) {
- auto oc_tot = group * OCPG + ocg;
- int od = int(id * param.stride_d +
- fd * param.dilate_d) - int(param.pad_d),
- oh = int(ih * param.stride_h +
- fh * param.dilate_h) - int(param.pad_h),
- ow = int(iw * param.stride_w +
- fw * param.dilate_w) - int(param.pad_w);
- if (od >= 0 && oh >= 0 && ow >= 0 &&
- od < static_cast<int>(OD) &&
- oh < static_cast<int>(OH) &&
- ow < static_cast<int>(OW)) {
- auto out_off = n * ol.stride[0] + oc_tot * ol.stride[1] +
- od * ol.stride[2] + oh * ol.stride[3] + ow,
- flt_off = group * fl.stride[0] + icg * fl.stride[1] +
- ocg * fl.stride[2] + fd * fl.stride[3] +
- fh * fl.stride[4] + fw;
- optr[out_off] += scale * fptr[flt_off];
- }
- }
- }
- #undef FOR4
- #undef FOR3
- #undef FOR2
- };
-
- Checker::RunOptions opt;
- opt.numdiff_eps = 1;
- Checker(make_graph, fwd).
- run({TensorShape{2, 4, 3, 3, 2}, {1, 4, 5, 3, 2, 2}}, opt).
- run({TensorShape{3, 2, 1, 1, 1}, {2, 1, 1, 4, 3, 3}}, opt).
- run({TensorShape{4, 6, 2, 2, 2}, {2, 3, 4, 6, 5, 4}}, opt);
- }
-
- TEST(TestOprDNN, Convolution3DExePolicy) {
- Param3D param{Param3D::Mode::CONVOLUTION};
- using Policy = opr::Convolution3D::ExecutionPolicy;
- using S = Policy::Strategy;
-
- #if MGB_ENABLE_FASTRUN
- for (auto strategy :
- SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
- S::PROFILE | S::HEURISTIC}) {
- #else
- for (auto strategy :
- SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
- #endif
-
- using Checker = AutoOprChecker<2, 1>;
-
- auto make_graph = [&](const Checker::SymInpArray &inputs) ->
- Checker::SymOutArray {
- Policy policy;
- policy.strategy = strategy;
- auto out = opr::Convolution3D::make(
- inputs[0], inputs[1], param, policy);
- return {out};
- };
-
- auto fwd = [&](Checker::NumOutArray &dest, Checker::NumInpArray inp) {
- std::shared_ptr<HostTensorND> sh_out;
- convolution3d_brute({inp.begin(), inp.end()}, sh_out, param);
- dest[0] = *sh_out;
- };
-
- Checker::RunOptions opt;
- opt.numdiff_eps = 1;
- Checker(make_graph, fwd).
- run({TensorShape{3, 2, 3, 4, 1}, {4, 2, 2, 2, 1}}, opt).
- run({TensorShape{3, 3, 2, 6, 2}, {2, 3, 1, 4, 1}}, opt).
- run({TensorShape{1, 1, 4, 4, 4}, {2, 1, 3, 3, 3}}, opt);
- }
- }
-
- TEST(TestOprDNN, ConvBiasForward) {
- using Checker2 = AutoOprChecker<2, 1>;
- using Checker3 = AutoOprChecker<3, 1>;
- opr::ConvBiasForward::Param param;
- auto make_graph2 =
- [&](const Checker2::SymInpArray& inputs) -> Checker2::SymOutArray {
- return {opr::ConvBiasForward::make(inputs[0], inputs[1], param)};
- };
-
- auto make_graph3 =
- [&](const Checker3::SymInpArray& inputs) -> Checker3::SymOutArray {
- return {opr::ConvBiasForward::make(inputs[0], inputs[1], inputs[2],
- param)};
- };
-
- auto fwd2 = [&](Checker2::NumOutArray& dest, Checker2::NumInpArray inp) {
- std::shared_ptr<HostTensorND> sh_out;
- convolution_brute({inp[0], inp[1]}, sh_out,
- convert_to_conv_param(param));
- dest[0] = *sh_out;
- };
-
-
- auto fwd3 = [&](Checker3::NumOutArray& dest, Checker3::NumInpArray inp) {
- std::shared_ptr<HostTensorND> sh_out;
- convolution_brute({inp[0], inp[1]}, sh_out,
- convert_to_conv_param(param));
- dest[0] = *sh_out;
- size_t N = dest[0].shape()[0];
- size_t OC = dest[0].shape()[1];
- size_t OH = dest[0].shape()[2];
- size_t OW = dest[0].shape()[3];
- auto dest_ptr = dest[0].ptr<float>();
- for (size_t i = 0; i < N; i++) {
- auto bias_ptr = inp[2]->ptr<float>();
- for (size_t c = 0; c < OC; c++) {
- for (size_t hw = 0; hw < OH * OW; hw++) {
- *(dest_ptr++) += *(bias_ptr);
- }
- bias_ptr++;
- }
- }
- };
-
- auto run_with_param = [&](size_t SH = 1, size_t SW = 1, size_t PH = 0,
- size_t PW = 0) {
- param.pad_h = PH;
- param.pad_w = PW;
- param.stride_h = SH;
- param.stride_w = SW;
- Checker2 checker2{make_graph2, fwd2};
- Checker2::RunOptions opt2;
- checker2.set_output_allow_grad(0, false);
- Checker3 checker3{make_graph3, fwd3};
- Checker3::RunOptions opt3;
- checker3.set_output_allow_grad(0, false);
-
- auto run = [&](size_t N, size_t IC, size_t OC, size_t IH, size_t IW,
- size_t FH, size_t FW) {
- auto opr = megdnn_naive_handle()
- ->create_operator<megdnn::ConvolutionForward>();
- opr->param() = convert_to_conv_param(param);
- TensorLayout dest_layout;
- opr->deduce_layout({{N, IC, IH, IW}, dtype::Float32()},
- {{OC, IC, FH, FW}, dtype::Float32()},
- dest_layout);
- checker2.run({TensorShape{N, IC, IH, IW}, {OC, IC, FH, FW}}, opt2);
-
- checker3.run({TensorShape{N, IC, IH, IW},
- {OC, IC, FH, FW},
- {1, OC, 1, 1}},
- opt3);
- };
- run(1, 1, 1, 5, 5, 1, 1);
- run(1, 1, 1, 5, 5, 3, 3);
- run(2, 3, 4, 5, 5, 3, 3);
- run(3, 3, 4, 224, 223, 3, 3);
- run(3, 3, 4, 224, 223, 2, 2);
- };
- run_with_param();
- run_with_param(2, 2, 3, 3);
- run_with_param(3, 2, 1, 2);
- run_with_param(2, 3, 2, 2);
- }
-
- TEST(TestOprDNN, ConvBiasForwardWithZ) {
- REQUIRE_GPU(1);
- using Checker4 = AutoOprChecker<4, 1>;
- opr::ConvBiasForward::Param param;
-
- auto make_graph4 =
- [&](const Checker4::SymInpArray& inputs) -> Checker4::SymOutArray {
- return {opr::ConvBiasForward::make(inputs[0], inputs[1], inputs[2],
- inputs[3], param)};
- };
-
- auto fwd4 = [&](Checker4::NumOutArray& dest, Checker4::NumInpArray inp) {
- std::shared_ptr<HostTensorND> sh_out;
- convolution_brute({inp[0], inp[1]}, sh_out,
- convert_to_conv_param(param));
- dest[0] = *sh_out;
- size_t N = dest[0].shape()[0];
- size_t OC = dest[0].shape()[1];
- size_t OH = dest[0].shape()[2];
- size_t OW = dest[0].shape()[3];
- auto dest_ptr = dest[0].ptr<float>();
- float* z_ptr = inp[3]->ptr<float>();
-
- for (size_t i = 0; i < N; i++) {
- auto bias_ptr = inp[2]->ptr<float>();
- for (size_t c = 0; c < OC; c++) {
- for (size_t hw = 0; hw < OH * OW; hw++) {
- *(dest_ptr++) += *(bias_ptr) + *(z_ptr++);
- }
- bias_ptr++;
- }
- }
- };
-
- auto run_with_param = [&](size_t SH = 1, size_t SW = 1, size_t PH = 0,
- size_t PW = 0) {
- param.pad_h = PH;
- param.pad_w = PW;
- param.stride_h = SH;
- param.stride_w = SW;
- Checker4 checker4{make_graph4, fwd4};
- Checker4::RunOptions opt4;
- checker4.set_output_allow_grad(0, false);
-
- auto run = [&](size_t N, size_t IC, size_t OC, size_t IH, size_t IW,
- size_t FH, size_t FW) {
- auto opr = megdnn_naive_handle()
- ->create_operator<megdnn::ConvolutionForward>();
- opr->param() = convert_to_conv_param(param);
- TensorLayout dest_layout;
- opr->deduce_layout({{N, IC, IH, IW}, dtype::Float32()},
- {{OC, IC, FH, FW}, dtype::Float32()},
- dest_layout);
- checker4.run({TensorShape{N, IC, IH, IW},
- {OC, IC, FH, FW},
- {1, OC, 1, 1},
- {N, OC, dest_layout[2], dest_layout[3]}},
- opt4);
- };
- run(1, 1, 1, 5, 5, 3, 3);
- run(2, 3, 4, 5, 5, 3, 3);
- run(3, 3, 4, 224, 223, 3, 3);
- run(3, 3, 4, 224, 223, 2, 2);
- };
- run_with_param();
- run_with_param(2, 2, 3, 3);
- run_with_param(3, 2, 1, 2);
- run_with_param(2, 3, 2, 2);
- }
-
- TEST(TestOprDNN, ConvBiasINT8x8xX_NCHW4) {
- using Checker = AutoOprChecker<3, 1>;
- using Param = opr::ConvBias::Param;
- opr::ConvBiasForward::Param param;
-
- auto make_quantized = [&](SymbolVar x, const DType& dtype) {
- return opr::TypeCvt::make(x, dtype);
- };
- auto make_graph =
- [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
- auto conv_param = convert_to_conv_param(param);
- auto y = opr::Convolution::make(
- make_quantized(inputs[0], dtype::QuantizedS8(0.3f)),
- make_quantized(inputs[1], dtype::QuantizedS8(0.1f)), conv_param);
- y = y + make_quantized(inputs[2], dtype::QuantizedS32(0.03f));
- if (param.nonlineMode == Param::NonlineMode::RELU)
- y = opr::Elemwise::make(
- {y}, {opr::Elemwise::Mode::RELU});
- y = opr::TypeCvt::make(y, dtype::QuantizedS8(0.5f));
- return {opr::TypeCvt::make(y, dtype::Float32())};
- };
- auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
- auto graph = ComputingGraph::make();
- Checker::SymInpArray inputs;
- for (size_t i = 0; i < inp.size(); ++i) {
- inputs[i] = opr::Host2DeviceCopy::make(
- *graph, inp[i]);
- }
-
- auto options = gopt::OptimizeForInferenceOptions{};
- options.enable_fuse_conv_bias_nonlinearity();
- auto y = gopt::optimize_for_inference({make_graph(inputs)[0]},
- options)[0];
- auto func = graph->compile({make_callback_copy(y, dest[0])});
- func->execute();
- func->wait();
- };
-
- auto run_with_param = [&](size_t SH = 1, size_t SW = 1, size_t PH = 0,
- size_t PW = 0, size_t group = 1) {
- param.pad_h = PH;
- param.pad_w = PW;
- param.stride_h = SH;
- param.stride_w = SW;
- param.format = Param::Format::NCHW4;
- if (group != 1)
- param.sparse = Param::Sparse::GROUP;
- Checker checker{make_graph, fwd, CompNode::load("cpu0")};
- Checker::RunOptions opt;
- checker.set_output_allow_grad(0, false);
- auto run = [&](size_t N, size_t IC, size_t OC, size_t IH, size_t IW,
- size_t FH, size_t FW) {
-
- mgb_assert(IC % 4 == 0 && OC % 4 == 0);
- checker.run({TensorShape{N, group * IC / 4, IH, IW, 4},
- {group, OC, IC / 4, FH, FW, 4},
- {1, group * OC / 4, 1, 1, 4}},
- opt);
- };
- run(1, 8, 8, 56, 56, 3, 3);
- run(1, 8, 8, 56, 56, 3, 3);
- run(1, 8, 8, 56, 56, 3, 3);
- };
- run_with_param(1, 1, 1, 1, 8);
- run_with_param();
- run_with_param(2, 2, 3, 3);
- run_with_param(3, 2, 1, 2);
- run_with_param(2, 3, 2, 2);
- }
-
-
- TEST(TestOprDNN, ConvolutionDTypeInference) {
- Param param;
- param.mode = Mode::CONVOLUTION;
-
- auto cn = CompNode::load("cpu0");
- auto graph = ComputingGraph::make();
- HostTensorND inp_host{
- cn, {1, 3, 7, 7}, dtype::Quantized8Asymm(0.233f, (uint8_t)123)};
- HostTensorND filt_host{
- cn, {8, 3, 1, 1}, dtype::Quantized8Asymm(0.874f, (uint8_t)234)};
- auto inp = opr::ImmutableTensor::make(*graph, inp_host);
- auto filt = opr::ImmutableTensor::make(*graph, filt_host);
- auto opr = opr::Convolution::make(inp, filt, param);
- ASSERT_EQ(opr.dtype().enumv(), DTypeEnum::QuantizedS32);
- // This has to be EQ instead of NEAR
- EXPECT_EQ(opr.dtype().param<dtype::QuantizedS32>().scale, 0.233f * 0.874f);
-
- inp_host = {cn, {1, 3, 7, 7}, dtype::QuantizedS8(0.1234f)};
- filt_host = {cn, {8, 3, 1, 1}, dtype::QuantizedS8(0.2345f)};
- inp = opr::ImmutableTensor::make(*graph, inp_host);
- filt = opr::ImmutableTensor::make(*graph, filt_host);
- opr = opr::Convolution::make(inp, filt, param);
- ASSERT_EQ(opr.dtype().enumv(), DTypeEnum::QuantizedS32);
- EXPECT_EQ(opr.dtype().param<dtype::QuantizedS32>().scale,
- 0.1234f * 0.2345f);
-
- inp_host = {cn, {1, 3, 7, 7}, dtype::Int8()};
- filt_host = {cn, {8, 3, 1, 1}, dtype::Int8()};
- inp = opr::ImmutableTensor::make(*graph, inp_host);
- filt = opr::ImmutableTensor::make(*graph, filt_host);
- opr = opr::Convolution::make(inp, filt, param);
- ASSERT_EQ(opr.dtype().enumv(), DTypeEnum::Int32);
- }
-
- TEST(TestOprDNN, ConvBiasINT8x8xXDTypeInference) {
- float inp_scale = 1.926f;
- float filt_scale = 0.817f;
- float bias_scale = inp_scale * filt_scale;
- opr::ConvBias::Param param;
- param.mode = Mode::CONVOLUTION;
-
- auto cn = CompNode::load("cpu0");
- auto graph = ComputingGraph::make();
- HostTensorND inp_host{cn, {1, 3, 7, 7}, dtype::QuantizedS8(inp_scale)};
- HostTensorND filt_host{cn, {8, 3, 1, 1}, dtype::QuantizedS8(filt_scale)};
- DType output_dtype = dtype::QuantizedS8(bias_scale);
- HostTensorND bias_host{cn, {1, 3, 7, 7}, dtype::QuantizedS32(bias_scale)};
- auto inp = opr::ImmutableTensor::make(*graph, inp_host);
- auto filt = opr::ImmutableTensor::make(*graph, filt_host);
- auto bias = opr::ImmutableTensor::make(*graph, filt_host);
- auto opr = opr::ConvBiasForward::make(inp, filt, bias, param,
- {}, OperatorNodeConfig{output_dtype});
- ASSERT_EQ(opr.dtype().enumv(), DTypeEnum::QuantizedS8);
- EXPECT_EQ(opr.dtype().param<dtype::QuantizedS8>().scale, bias_scale);
- }
-
- TEST(TestOprDNN, ConvBiasINT8x8xXSerialization) {
- using namespace serialization;
-
- float inp_scale = 1.926f;
- float filt_scale = 0.817f;
- float bias_scale = inp_scale * filt_scale;
- DType output_dtype = dtype::QuantizedS8(bias_scale);
-
- auto fname = output_file("ConvBiasINT8x8xXTest");
- auto dump = [&]() {
- opr::ConvBias::Param param;
- param.mode = Mode::CONVOLUTION;
-
- auto cn = CompNode::load("cpu0");
- auto graph = ComputingGraph::make();
- HostTensorND inp_host{cn, {1, 3, 7, 7}, dtype::QuantizedS8(inp_scale)};
- HostTensorND filt_host{
- cn, {8, 3, 1, 1}, dtype::QuantizedS8(filt_scale)};
- HostTensorND bias_host{
- cn, {1, 3, 7, 7}, dtype::QuantizedS32(bias_scale)};
- auto inp = opr::ImmutableTensor::make(*graph, inp_host);
- auto filt = opr::ImmutableTensor::make(*graph, filt_host);
- auto bias = opr::ImmutableTensor::make(*graph, filt_host);
- auto opr = opr::ConvBiasForward::make(inp, filt, bias, param,
- {},
- OperatorNodeConfig{output_dtype});
- auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()));
- auto rst = dumper->dump({opr});
- ASSERT_EQ(rst.outputs.size(), 1u);
- };
-
- auto load = [&]() {
- auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()));
- auto rst = loader->load();
- ASSERT_EQ(rst.output_var_list.size(), 1u);
- EXPECT_EQ(rst.output_var_list[0].dtype(), output_dtype);
- };
-
- dump();
- load();
- }
-
- TEST(TestOprDNN, LocalShareForward) {
- REQUIRE_GPU(1);
- using Checker = AutoOprChecker<2, 1>;
- using Param = opr::LocalShare::Param;
- Param param;
- param.mode = Param::Mode::CROSS_CORRELATION;
- param.sparse = Param::Sparse::DENSE;
- auto make_graph =
- [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
- return {opr::LocalShare::make(inputs[0], inputs[1], param)};
- };
-
- auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
- mgb_assert(inp.size() == 2);
- mgb_assert(dest.size() == 1);
- std::shared_ptr<HostTensorND> out;
- local_share_brute({inp[0], inp[1]}, out, param);
- dest[0] = *out;
- };
-
- auto run_with_param = [&](size_t fh = 3, size_t fw = 3, size_t sh = 1,
- size_t sw = 1, size_t sgh = 3, size_t sgw = 3) {
- size_t ph = fh / 2, pw = fw / 2;
- param.pad_h = ph, param.pad_w = pw;
- param.stride_h = sh, param.stride_w = sw, param.spatial_groups_h = sgh,
- param.spatial_groups_w = sgw;
- Checker checker{make_graph, fwd};
- Checker::RunOptions opt;
- checker.set_output_allow_grad(0, false);
- checker.set_input_dtype(0, dtype::Float32());
- checker.set_input_dtype(1, dtype::Float32());
- auto run = [&](size_t n, size_t ci, size_t co, size_t hi, size_t wi) {
- size_t ho = (hi + 2 * ph - fh) / sh + 1;
- size_t wo = (wi + 2 * pw - fw) / sw + 1;
- if (ho % sgh != 0 || wo % sgw != 0)
- return;
- checker.run({TensorShape{n, ci, hi, wi},
- TensorShape{sgh, sgw, ci, fh, fw, co}},
- opt);
- };
- run(32, 2, 7, 24, 24);
- run(16, 2, 7, 24, 24);
- run(32, 2, 8, 12, 12);
- run(16, 2, 9, 6, 6);
- };
- run_with_param(1, 1, 1, 1, 3, 3);
- run_with_param(3, 3, 1, 1, 2, 2);
- run_with_param(5, 5, 1, 1, 2, 2);
- run_with_param(7, 7, 1, 1, 2, 2);
- run_with_param(1, 1, 2, 2, 3, 3);
- run_with_param(3, 3, 2, 2, 2, 2);
- run_with_param(5, 5, 1, 1, 2, 2);
- run_with_param(7, 7, 1, 1, 2, 2);
- }
-
- TEST(TestOprDNN, LocalShareForwardGrad) {
- REQUIRE_GPU(1);
- using Checker = AutoOprChecker<2, 1>;
- using Param = opr::LocalShare::Param;
- Param param;
- param.mode = Param::Mode::CROSS_CORRELATION;
- param.sparse = Param::Sparse::DENSE;
- auto make_graph =
- [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
- return {opr::LocalShare::make(inputs[0], inputs[1], param)};
- };
-
- auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
- mgb_assert(inp.size() == 2);
- mgb_assert(dest.size() == 1);
- std::shared_ptr<HostTensorND> out;
- local_share_brute({inp[0], inp[1]}, out, param);
- dest[0] = *out;
- };
-
- auto run_with_param = [&](size_t fh = 3, size_t fw = 3, size_t sh = 1,
- size_t sw = 1, size_t sgh = 3, size_t sgw = 3) {
- size_t ph = fh / 2, pw = fw / 2;
- param.pad_h = ph, param.pad_w = pw;
- param.stride_h = sh, param.stride_w = sw, param.spatial_groups_h = sgh,
- param.spatial_groups_w = sgw;
- Checker checker{make_graph, fwd};
- Checker::RunOptions opt;
- checker.set_output_allow_grad(0, true);
- opt.numdiff_max_err = 1e-1;
- checker.set_input_dtype(0, dtype::Float32());
- checker.set_input_dtype(1, dtype::Float32());
- auto run = [&](size_t n, size_t ci, size_t co, size_t hi, size_t wi) {
- size_t ho = (hi + 2 * ph - fh) / sh + 1;
- size_t wo = (wi + 2 * pw - fw) / sw + 1;
- if (ho % sgh != 0 || wo % sgw != 0)
- return;
- checker.run({TensorShape{n, ci, hi, wi},
- TensorShape{sgh, sgw, ci, fh, fw, co}},
- opt);
- };
- run(4, 2, 8, 24, 24);
- run(8, 2, 4, 6, 6);
- run(16, 4, 8, 12, 12);
- run(4, 4, 8, 12, 12);
- };
- run_with_param(1, 1, 1, 1, 3, 3);
- run_with_param(1, 1, 2, 2, 3, 3);
- run_with_param(3, 3, 2, 2, 2, 2);
- }
-
- TEST(TestOprDNN, LocalShareForwardExecPolicy) {
- REQUIRE_GPU(1);
- using Checker = AutoOprChecker<2, 1>;
- using Policy = opr::LocalShare::ExecutionPolicy;
- using S = Policy::Strategy;
- using Param = opr::LocalShare::Param;
- Param param;
- param.mode = Param::Mode::CROSS_CORRELATION;
- param.sparse = Param::Sparse::DENSE;
-
- int nr_get = 0;
- auto on_get = [&nr_get](const std::string&, const void*, size_t,
- const void*, size_t) { ++nr_get; };
- PersistentCacheHook cache_hook{on_get};
-
- #if MGB_ENABLE_FASTRUN
- for (auto strategy :
- SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
- S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTIMIZED}) {
- #else
- for (auto strategy :
- SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
- #endif
- auto make_graph = [&](const Checker::SymInpArray& inputs)
- -> Checker::SymOutArray {
- Policy policy;
- policy.strategy = strategy;
- return {opr::LocalShare::make(inputs[0], inputs[1], param, policy)};
- };
-
- auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
- mgb_assert(inp.size() == 2);
- mgb_assert(dest.size() == 1);
- std::shared_ptr<HostTensorND> out;
- local_share_brute({inp[0], inp[1]}, out, param);
- dest[0] = *out;
- };
-
- auto run_with_param = [&](size_t fh = 3, size_t fw = 3, size_t sh = 1,
- size_t sw = 1, size_t sgh = 3,
- size_t sgw = 3) {
- size_t ph = fh / 2, pw = fw / 2;
- param.pad_h = ph, param.pad_w = pw;
- param.stride_h = sh, param.stride_w = sw,
- param.spatial_groups_h = sgh, param.spatial_groups_w = sgw;
- Checker checker{make_graph, fwd};
- Checker::RunOptions opt;
- checker.set_output_allow_grad(0, false);
- checker.set_input_dtype(0, dtype::Float32());
- checker.set_input_dtype(1, dtype::Float32());
- nr_get = 0;
- opt.outputs_max_err = 1e-3;
- auto run = [&](size_t n, size_t ci, size_t co, size_t hi,
- size_t wi) {
- size_t ho = (hi + 2 * ph - fh) / sh + 1;
- size_t wo = (wi + 2 * pw - fw) / sw + 1;
- if (ho % sgh != 0 || wo % sgw != 0)
- return;
- checker.run({TensorShape{n, ci, hi, wi},
- TensorShape{sgh, sgw, ci, fh, fw, co}},
- opt);
- };
- run(32, 4, 8, 24, 24);
- run(32, 4, 8, 12, 12);
- run(16, 4, 8, 12, 12);
- run(32, 4, 8, 6, 6);
- if (strategy == S::HEURISTIC) {
- ASSERT_EQ(0, nr_get);
- } else {
- ASSERT_LT(0, nr_get);
- }
- };
- run_with_param(1, 1, 1, 1, 3, 3);
- run_with_param(3, 3, 1, 1, 2, 2);
- run_with_param(5, 5, 1, 1, 2, 2);
- run_with_param(7, 7, 1, 1, 2, 2);
- run_with_param(1, 1, 2, 2, 3, 3);
- run_with_param(3, 3, 2, 2, 2, 2);
- run_with_param(5, 5, 1, 1, 2, 2);
- run_with_param(7, 7, 1, 1, 2, 2);
- }
- }
-
- TEST(TestOprDNN, LocalShareSerialization) {
- using namespace serialization;
-
- auto fname = output_file("LocalShareForwardTest");
- auto dump = [&]() {
- opr::LocalShare::Param param;
- param.mode = Mode::CROSS_CORRELATION;
- param.stride_h = param.stride_w = 1;
- param.pad_h = param.pad_w = 0;
- param.spatial_groups_h = param.spatial_groups_w = 3;
-
- auto cn = CompNode::load("cpu0");
- auto graph = ComputingGraph::make();
- HostTensorND inp_host{cn, {32, 4, 24, 24}, dtype::Float32()};
- HostTensorND filt_host{
- cn, {3, 3, 4, 1, 1, 8}, dtype::Float32()};
- auto inp = opr::ImmutableTensor::make(*graph, inp_host);
- auto filt = opr::ImmutableTensor::make(*graph, filt_host);
- auto opr = opr::LocalShareForward::make(inp, filt, param,
- {});
- auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()));
- auto rst = dumper->dump({opr});
- ASSERT_EQ(rst.outputs.size(), 1u);
- };
-
- auto load = [&]() {
- auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()));
- auto rst = loader->load();
- ASSERT_EQ(rst.output_var_list.size(), 1u);
- };
-
- dump();
- load();
- }
-
- TEST(TestOprDNN, DeformableConvForward) {
- REQUIRE_GPU(1);
- using Checker = AutoOprChecker<4, 1>;
- using Policy = opr::DeformableConvForward::ExecutionPolicy;
- using S = Policy::Strategy;
- using Param = opr::DeformableConvForward::Param;
- Param param;
-
- #if MGB_ENABLE_FASTRUN
- for (auto strategy :
- SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
- S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTIMIZED}) {
- #else
- for (auto strategy :
- SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
- #endif
- auto make_graph = [&](const Checker::SymInpArray& inputs)
- -> Checker::SymOutArray {
- Policy policy;
- policy.strategy = strategy;
- return {opr::DeformableConvForward::make(
- inputs[0], inputs[1], inputs[2], inputs[3], param, policy)};
- };
- auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
- auto opr =
- megdnn_naive_handle()
- ->create_operator<megdnn::DeformableConvForward>();
- opr->param() = param;
- TensorLayout dest_layout;
- opr->deduce_layout(inp[0]->layout(), inp[1]->layout(),
- inp[2]->layout(), inp[3]->layout(), dest_layout);
- std::vector<dt_byte> workspace(opr->get_workspace_in_bytes(
- inp[0]->layout(), inp[1]->layout(), inp[2]->layout(),
- inp[3]->layout(), dest_layout));
- dest[0].dtype(dtype::Float32())
- .comp_node(inp[0]->comp_node())
- .resize(dest_layout);
- opr->exec(inp[0]->as_megdnn(), inp[1]->as_megdnn(),
- inp[2]->as_megdnn(), inp[3]->as_megdnn(),
- dest[0].as_megdnn(),
- {workspace.data(), workspace.size()});
- };
- auto run_with_param = [&](size_t fh, size_t fw, size_t sh, size_t sw,
- size_t dh, size_t dw, size_t group,
- size_t deformable_group) {
- Checker checker{make_graph, fwd};
- size_t ph = fh / 2, pw = fw / 2;
- param.pad_h = ph, param.pad_w = pw;
- param.stride_h = sh, param.stride_w = sw;
- param.dilate_h = dh, param.dilate_w = dw;
-
- param.format = Param::Format::NCHW;
- param.mode = Param::Mode::CROSS_CORRELATION;
- param.sparse = Param::Sparse::DENSE;
- if (group > 1)
- param.sparse = Param::Sparse::GROUP;
- Checker::RunOptions opt;
- float DELTA = 1e-3;
- opt.numdiff_eps = DELTA;
- opt.numdiff_max_err = 1e-1;
- auto gen_off = [DELTA](HostTensorND& off, float l = -2.f, float h = 2.f) {
- RNGxorshf rng{next_rand_seed()};
- auto elems = off.shape().total_nr_elems();
- auto ptr = off.ptr<float>();
- auto rand_real = [](RNGxorshf& rng, float lo, float hi) {
- std::uniform_real_distribution<float> dist(lo, hi);
- return dist(rng);
- };
- for (size_t i = 0; i < elems; ++i) {
- do {
- float val = rand_real(rng, l, h);
- if (abs(floor(val + 2 * DELTA) - floor(val)) <= 1e-6f &&
- abs(floor(val - 2 * DELTA) - floor(val)) <= 1e-6f) {
- ptr[i] = val;
- break;
- }
- } while (true);
- }
- };
- //! generate offset to avoid value near integer
- /// because bilinear function is not derivable over there
- checker.set_input_generator(2, gen_off);
- checker.set_input_dtype(0, dtype::Float32());
- checker.set_input_dtype(1, dtype::Float32());
- checker.set_input_dtype(2, dtype::Float32());
- checker.set_input_dtype(3, dtype::Float32());
- auto run = [&](size_t n, size_t ih, size_t iw, size_t icpg,
- size_t ocpg) {
- size_t oh = (ih + 2 * ph - fh) / sh + 1;
- size_t ow = (iw + 2 * pw - fw) / sw + 1;
- checker.run({TensorShape{n, group * icpg, ih, iw},
- (param.sparse == Param::Sparse::GROUP)
- ? TensorShape{group, ocpg, icpg, fh, fw}
- : TensorShape{group * ocpg, group * icpg,
- fh, fw},
- {n, 2 * deformable_group * fh * fw, oh, ow},
- {n, deformable_group * fh * fw, oh, ow}},
- opt);
- };
- run(1, 3, 3, 2, 1);
- run(2, 3, 3, 2, 2);
- run(1, 5, 5, 2, 1);
- };
- // run_with_param(1, 1, 1, 1, 1, 1, 1, 1);
- run_with_param(3, 3, 1, 1, 1, 1, 2, 2);
- // run_with_param(5, 5, 1, 1, 1, 1, 2, 2);
- }
- }
-
- TEST(TestOprDNN, DeformableConvSerialization) {
- using namespace serialization;
-
- auto fname = output_file("DeformableConvTest");
- auto dump = [&]() {
- using Param = opr::DeformableConvForward::Param;
- Param param;
- size_t n = 16, ocpg = 2, icpg = 4;
- size_t ih = 24, iw = 24, fh = 3, fw = 3, ph = 2, pw = 2, sh = 1, sw = 1, dh = 1, dw = 1;
- size_t group = 1, deformable_group =1;
-
- size_t oh = (ih + 2 * ph - fh) / sh + 1;
- size_t ow = (iw + 2 * pw - fw) / sw + 1;
-
- param.pad_h = ph, param.pad_w = pw;
- param.stride_h = sh, param.stride_w = sw;
- param.dilate_h = dh, param.dilate_w = dw;
-
- param.format = Param::Format::NCHW;
- param.mode = Param::Mode::CROSS_CORRELATION;
- param.sparse = Param::Sparse::DENSE;
-
- auto cn = CompNode::load("cpu0");
- auto graph = ComputingGraph::make();
- HostTensorND inp_host{cn, {n, group * icpg, ih, iw}, dtype::Float32()};
- HostTensorND filt_host{
- cn, {group * ocpg, group * icpg, fh, fw}, dtype::Float32()};
- HostTensorND offset_host{
- cn, {n, 2 * deformable_group * fh * fw, oh, ow}, dtype::Float32()};
- HostTensorND mask_host{
- cn, {n, deformable_group * fh * fw, oh, ow}, dtype::Float32()};
- auto inp = opr::ImmutableTensor::make(*graph, inp_host);
- auto filt = opr::ImmutableTensor::make(*graph, filt_host);
- auto offset = opr::ImmutableTensor::make(*graph, offset_host);
- auto mask = opr::ImmutableTensor::make(*graph, mask_host);
- auto opr = opr::DeformableConvForward::make(inp, filt, offset, mask,
- param, {}, {});
- auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()));
- auto rst = dumper->dump({opr});
- ASSERT_EQ(rst.outputs.size(), 1u);
- };
-
- auto load = [&]() {
- auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()));
- auto rst = loader->load();
- ASSERT_EQ(rst.output_var_list.size(), 1u);
- };
-
- dump();
- load();
- }
-
- #if MGB_CUDA
- TEST(TestOprDNN, BatchConvBiasForward) {
- REQUIRE_GPU(1);
- auto cn = CompNode::load("gpu0");
- cn.activate();
- REQUIRE_CUDA_COMPUTE_CAPABILITY(6, 1);
-
- using Checker = AutoOprChecker<3, 1>;
- using Policy = opr::BatchConvBiasForward::ExecutionPolicy;
- using S = Policy::Strategy;
- using Param = opr::BatchConvBiasForward::Param;
- Param param;
- param.format = Param::Format::NCHW4;
- param.mode = Param::Mode::CROSS_CORRELATION;
- param.sparse = Param::Sparse::DENSE;
-
- #if MGB_ENABLE_FASTRUN
- for (auto strategy :
- SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
- S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTIMIZED}) {
- #else
- for (auto strategy :
- SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
- #endif
-
- auto make_quantized = [&](SymbolVar x, const DType& dtype) {
- return opr::TypeCvt::make(x, dtype);
- };
- auto make_graph = [&](const Checker::SymInpArray& inputs)
- -> Checker::SymOutArray {
- Policy policy;
- policy.strategy = strategy;
- auto conv_bias = opr::BatchConvBiasForward::make(
- make_quantized(inputs[0], dtype::QuantizedS8{1.1f}),
- make_quantized(inputs[1], dtype::QuantizedS8{1.2f}),
- make_quantized(inputs[2], dtype::QuantizedS32{1.1f * 1.2f}),
- param, policy,
- OperatorNodeConfig{dtype::QuantizedS8{1.3f}});
- return {opr::TypeCvt::make(conv_bias, dtype::Float32())};
- };
- auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
- mgb_assert(inp.size() == 3);
- mgb_assert(dest.size() == 1);
- auto graph = ComputingGraph::make();
- Checker::SymInpArray inputs;
- for (size_t i = 0; i < inp.size(); ++i) {
- inputs[i] = opr::Host2DeviceCopy::make(*graph, inp[i]);
- }
- auto src = make_quantized(inputs[0], dtype::QuantizedS8{1.1f}),
- filter = make_quantized(inputs[1], dtype::QuantizedS8{1.2f}),
- bias = make_quantized(inputs[2],
- dtype::QuantizedS32{1.1f * 1.2f});
- {
- auto xshp = opr::GetVarShape::make(src);
-
- auto cv = [&src](int v) { return src.make_scalar(v); };
- auto sub = [&xshp, &cv](int idx) {
- return opr::IndexAt::make(xshp, {{0, cv(idx)}});
- };
- auto tshp = opr::Concat::make(
- {cv(1), sub(0) * sub(1), sub(2), sub(3), sub(4)}, 0);
- src = opr::Reshape::make(src, tshp);
- }
- auto conv_param = convert_to_conv_param(param);
- conv_param.sparse = opr::BatchConvBias::Param::Sparse::GROUP;
- auto y = opr::Convolution::make(src, filter, conv_param);
- {
- auto fshp = opr::GetVarShape::make(filter);
- auto batch =
- opr::IndexAt::make(fshp, {{0, filter.make_scalar(0)}});
-
- auto xshp = opr::GetVarShape::make(y);
-
- auto cv = [&y](int v) { return y.make_scalar(v); };
- auto sub = [&xshp, &cv](int idx) {
- return opr::IndexAt::make(xshp, {{0, cv(idx)}});
- };
- auto tshp = opr::Concat::make(
- {batch, sub(1) / batch, sub(2), sub(3), sub(4)}, 0);
- y = opr::Reshape::make(y, tshp);
- }
- y = y + bias;
- y = opr::TypeCvt::make(y, dtype::QuantizedS8{1.3f});
- y = opr::TypeCvt::make(y, dtype::Float32());
- auto func = graph->compile({make_callback_copy(y, dest[0])});
- func->execute();
- func->wait();
- };
-
- auto run_with_param = [&](size_t sh = 1, size_t sw = 1) {
- size_t fh = 1;
- size_t fw = 1;
- size_t ph = fh / 2, pw = fw / 2;
- param.pad_h = ph, param.pad_w = pw;
- param.stride_h = sh, param.stride_w = sw;
- Checker checker{make_graph, fwd, cn};
- Checker::RunOptions opt;
- checker.set_output_allow_grad(0, false);
- checker.set_input_dtype(0, dtype::Float32());
- checker.set_input_dtype(1, dtype::Float32());
- checker.set_input_dtype(2, dtype::Float32());
- auto run = [&](size_t n, size_t ci, size_t co, size_t hi,
- size_t wi) {
- checker.run({TensorShape{n, ci / 4, hi, wi, 4},
- TensorShape{n, co, ci / 4, fh, fw, 4},
- TensorShape{1, co / 4, 1, 1, 4}},
-
- opt);
- };
- run(32, 16, 32, 24, 24);
- run(16, 16, 32, 24, 24);
- run(32, 16, 64, 12, 12);
- run(16, 16, 64, 6, 6);
- };
- run_with_param(1, 1);
- run_with_param(2, 2);
- }
- }
- #endif
-
- TEST(TestOprDNN, BatchConvBiasSerialization) {
- using namespace serialization;
-
- auto fname = output_file("BatchConvBiasForwardTest");
- auto dump = [&]() {
- opr::BatchConvBias::Param param;
- param.mode = Mode::CROSS_CORRELATION;
- param.format = opr::BatchConvBias::Param::Format::NCHW4;
- param.stride_h = param.stride_w = 1;
- param.pad_h = param.pad_w = 0;
-
- auto cn = CompNode::load("cpu0");
- auto graph = ComputingGraph::make();
- HostTensorND inp_host{cn, {32, 1, 24, 24, 4}, dtype::QuantizedS8{1.1f}};
- HostTensorND filt_host{cn, {32, 8, 1, 1, 1, 4}, dtype::QuantizedS8{1.2f}};
- auto inp = opr::ImmutableTensor::make(*graph, inp_host);
- auto filt = opr::ImmutableTensor::make(*graph, filt_host);
- auto opr = opr::BatchConvBiasForward::make(
- inp, filt, param, {},
- OperatorNodeConfig{dtype::QuantizedS8{1.3f}});
- auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()));
- auto rst = dumper->dump({opr});
- ASSERT_EQ(rst.outputs.size(), 1u);
- };
-
- auto load = [&]() {
- auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()));
- auto rst = loader->load();
- ASSERT_EQ(rst.output_var_list.size(), 1u);
- };
-
- dump();
- load();
- }
-
- TEST(TestOprDNN, HeuristicReproducible) {
- using Policy = opr::ConvolutionBackwardFilter::ExecutionPolicy;
- using S = Policy::Strategy;
-
- using Checker = AutoOprChecker<3, 1>;
-
- constexpr size_t PH = 1, PW = 1, SH = 1, SW = 1;
-
- for (auto strategy :
- SmallVector<S>{S::HEURISTIC, S::HEURISTIC | S::REPRODUCIBLE}) {
- VarNode* bwd_flt;
- auto make_graph = [&](const Checker::SymInpArray& inputs)
- -> Checker::SymOutArray {
- Param param{Mode::CROSS_CORRELATION, PH, PW, SH, SW};
- Policy policy;
- policy.strategy = strategy;
- auto out = opr::ConvolutionBackwardFilter::make(
- inputs[0], inputs[1], inputs[2], param, policy);
- bwd_flt = out.node();
- return {out};
- };
-
- auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
- std::shared_ptr<HostTensorND> out;
- conv_bwd_flt_brute({inp[0], inp[1], inp[2]}, out,
- Param{Mode::CROSS_CORRELATION, PH, PW, SH, SW});
- dest[0] = *out;
- };
-
- #define get_shp(N, P, S, F) ((N + 2 * P - F) / S + 1)
- #define inp_tensor(N, IC, OC, IH, IW, FH, FW) \
- { \
- TensorShape{N, IC, IH, IW}, \
- {N, OC, get_shp(IH, PH, SH, FH), get_shp(IW, PW, SW, FW)}, { \
- OC, IC, FH, FW \
- } \
- }
- Checker::RunOptions opt;
- opt.numdiff_eps = 1;
- opt.outputs_max_err = 1e-3;
- std::string algo_name0, algo_name1;
- {
- Checker checker(make_graph, fwd);
- checker.run(inp_tensor(2, 3, 4, 9, 8, 3, 3), opt)
- .run(inp_tensor(1, 5, 3, 7, 9, 3, 3), opt)
- .run(inp_tensor(3, 4, 4, 9, 9, 3, 3), opt);
-
- auto&& megdnn_opr = static_cast<megdnn::ConvolutionBackwardFilter*>(
- static_cast<opr::ConvolutionBackwardFilter*>(
- bwd_flt->owner_opr())
- ->megdnn_opr());
- auto&& algo = megdnn_opr->execution_policy().algo;
- megdnn::Algorithm* palgo =
- megdnn_opr->get_algorithm_from_desc(algo);
- mgb_assert(palgo, "Unknown algo description");
- if (strategy == S(S::HEURISTIC | S::REPRODUCIBLE)) {
- EXPECT_TRUE(palgo->contain_attribute_all(
- megdnn::AlgoAttribute::REPRODUCIBLE));
- }
- algo_name0 = palgo->name();
- }
- {
- Checker checker(make_graph, fwd);
- checker.run(inp_tensor(2, 3, 4, 9, 8, 3, 3), opt)
- .run(inp_tensor(1, 5, 3, 7, 9, 3, 3), opt)
- .run(inp_tensor(3, 4, 4, 9, 9, 3, 3), opt);
- auto&& megdnn_opr = static_cast<megdnn::ConvolutionBackwardFilter*>(
- static_cast<opr::ConvolutionBackwardFilter*>(
- bwd_flt->owner_opr())
- ->megdnn_opr());
- auto&& algo = megdnn_opr->execution_policy().algo;
- megdnn::Algorithm* palgo =
- megdnn_opr->get_algorithm_from_desc(algo);
- mgb_assert(palgo, "Unknown algo description");
-
- algo_name1 = palgo->name();
- }
- EXPECT_TRUE(algo_name0 == algo_name1);
- }
- #undef inp_tensor
- #undef get_shp
- }
-
- #if MGB_CUDA
- TEST(TestOprDNN, ConvolutionMultiCompNode) {
- REQUIRE_GPU(1);
- auto cn0 = CompNode::load("gpu0:0"), cn1 = CompNode::load("gpu0:1");
- cn0.activate();
- auto&& prop = CompNodeEnv::from_comp_node(cn0).cuda_env().device_prop;
- auto sm_ver = prop.major * 10 + prop.minor;
- if (sm_ver < 61) {
- printf("This testcast ignored due to insufficient cuda cap(got: %d, "
- "expected: %d)\n",
- sm_ver, 61);
- return;
- }
-
- HostTensorGenerator<dtype::Int8> gen;
- auto mkvar = [&gen](const char* name, const TensorShape& shp,
- const DType& dtype,
- std::shared_ptr<ComputingGraph> graph,
- const CompNode& cn) {
- return opr::TypeCvt::make(
- opr::Host2DeviceCopy::make(*graph, gen(shp, cn)).rename(name),
- dtype);
- };
- auto mkcvar = [&gen](const char* name, const TensorShape& shp,
- const DType& dtype,
- std::shared_ptr<ComputingGraph> graph,
- const CompNode& cn) {
- return opr::TypeCvt::make(
- opr::SharedDeviceTensor::make(*graph, *gen(shp, cn))
- .rename(name),
- dtype);
- };
-
- auto graph0 = ComputingGraph::make();
- graph0->options().graph_opt_level = 0;
- auto graph1 = ComputingGraph::make();
- graph1->options().graph_opt_level = 0;
- auto make_func = [&gen, &mkvar, &mkcvar](
- std::shared_ptr<ComputingGraph> graph,
- const CompNode& cn) {
- using Policy = opr::ConvBias::ExecutionPolicy;
- using S = Policy::Strategy;
- auto x = mkvar("x", {64, 32, 28, 28, 4}, dtype::QuantizedS8(2.5f),
- graph, cn),
- w1 = mkcvar("w1", {256, 32, 5, 5, 4}, dtype::QuantizedS8(2.5f),
- graph, cn),
- b1 = mkcvar("b1", {1, 64, 1, 1, 4}, dtype::QuantizedS32(6.25f),
- graph, cn),
- w2 = mkcvar("w2", {256, 64, 3, 3, 4}, dtype::QuantizedS8(2.5f),
- graph, cn),
- b2 = mkcvar("b2", {1, 64, 1, 1, 4}, dtype::QuantizedS32(6.25f),
- graph, cn);
- opr::ConvBias::Param param;
- param.format = opr::ConvBias::Param::Format::NCHW4;
- param.nonlineMode = opr::ConvBias::Param::NonlineMode::RELU;
- param.stride_h = param.stride_w = 2;
- param.pad_h = param.pad_w = 2;
- Policy policy;
- policy.strategy = S::PROFILE;
-
- auto y = opr::ConvBias::make(
- x, w1, b1, param, policy,
- OperatorNodeConfig{dtype::QuantizedS8(2.5f)});
- param.stride_h = param.stride_w = 1;
- param.pad_h = param.pad_w = 1;
- y = opr::ConvBias::make(y, w2, b2, param, policy,
- OperatorNodeConfig{dtype::QuantizedS8(2.5f)});
- return y;
- };
- auto y0 = make_func(graph0, cn0);
- auto y1 = make_func(graph1, cn1);
- HostTensorND host_y0, host_y1;
- auto func0 = graph0->compile({make_callback_copy(y0, host_y0)});
- auto func1 = graph1->compile({make_callback_copy(y1, host_y1)});
-
- auto worker = [&func0, &func1](int wid) {
- static const int iter_num = 1000;
- if (wid == 0) {
- for (int i = 0; i < iter_num; ++i)
- func0->execute();
- } else {
- for (int i = 0; i < iter_num; ++i)
- func1->execute();
- }
- };
- std::thread worker0(worker, 0);
- std::thread worker1(worker, 1);
- worker0.join();
- worker1.join();
- }
- #endif
-
- } // anonymous namespace
-
- #ifndef _WIN32
-
- namespace mgb {
- namespace opr {
- namespace testing {
-
- class ConvolutionTestingPeer {
- opr::ConvolutionForward& m_conv_opr;
- public:
- explicit ConvolutionTestingPeer(cg::OperatorNodeBase* opr)
- : m_conv_opr(opr->cast_final_safe<opr::ConvolutionForward>()) {}
- void set_megdnn_opr(
- std::unique_ptr<megdnn::ConvolutionForward> megdnn_opr) {
- m_conv_opr.set_megdnn_opr(std::move(megdnn_opr));
- }
- };
-
- } // namespace testing
- } // namespace opr
- } // namespace mgb
-
- namespace {
-
- using megdnn::TensorND;
- using megdnn::Workspace;
- using opr::testing::ConvolutionTestingPeer;
-
- class MockConvolutionForward : public megdnn::ConvolutionForward {
- const char* m_algorithm_set_name;
- public:
- MockConvolutionForward(megdnn::ConvolutionForward* orig,
- const char* algo_set_name)
- : megdnn::ConvolutionForward(orig->handle()),
- m_algorithm_set_name(algo_set_name) {}
-
- MOCK_METHOD5(exec, void(_megdnn_tensor_in src, _megdnn_tensor_in filter,
- _megdnn_tensor_out dst,
- const PreprocessedFilter* preprocessed_filter,
- _megdnn_workspace workspace));
- MOCK_METHOD5(exec_preprocess,
- void(const TensorLayout& src_layout, _megdnn_tensor_in filter,
- const TensorLayout& dst_layout,
- PreprocessedFilter* preprocessed_filter,
- _megdnn_workspace workspace));
- MOCK_METHOD4(get_workspace_in_bytes,
- size_t(const TensorLayout& src, const TensorLayout& filter,
- const TensorLayout& dst,
- const PreprocessedFilter* preprocessed_filter));
- MOCK_METHOD3(deduce_preprocessed_filter_layout,
- SmallVector<TensorLayout>(const TensorLayout& src,
- const TensorLayout& filter,
- const TensorLayout& dst));
- MOCK_METHOD3(get_preprocess_workspace_in_bytes,
- size_t(const TensorLayout& src, const TensorLayout& filter,
- const TensorLayout& dst));
-
- MOCK_METHOD3(get_all_algorithms_info,
- std::vector<AlgorithmInfo>(const TensorLayout& p0,
- const TensorLayout& p1,
- const TensorLayout& p2));
- MOCK_METHOD6(get_algorithm_info_heuristic,
- AlgorithmInfo(const TensorLayout& p0, const TensorLayout& p1,
- const TensorLayout& p2,
- size_t workspace_limit_in_bytes,
- const AlgoAttribute& positive_attr,
- const AlgoAttribute& negative_attr));
-
- MOCK_METHOD3(get_all_algorithms,
- std::vector<Algorithm*>(const TensorLayout& p0,
- const TensorLayout& p1,
- const TensorLayout& p2));
- MOCK_METHOD6(get_algorithm_heuristic,
- Algorithm*(const TensorLayout& p0, const TensorLayout& p1,
- const TensorLayout& p2,
- size_t workspace_limit_in_bytes,
- const AlgoAttribute& positive_attr,
- const AlgoAttribute& negative_attr));
-
- MOCK_METHOD1(get_algorithm_from_desc,
- Algorithm*(const AlgorithmDesc&));
-
- protected:
- const char* get_algorithm_set_name() const override {
- return m_algorithm_set_name;
- }
- };
-
- class MockAlgorithm : public megdnn::detail::Algorithm {
- const char* m_name;
-
- public:
- MockAlgorithm(const char* name = "NotImportant") : m_name(name) {}
- Attribute attribute() const override {
- return Attribute::REPRODUCIBLE;
- }
- const char* name() const override { return m_name; }
- uint32_t type() const override {
- return megdnn::detail::Algorithm::INVALID_ALGO_TYPE;
- }
-
- virtual ~MockAlgorithm() = default;
- };
-
- class TestWeightPreprocess : public ::testing::Test {
- protected:
- CompNode comp_node;
- std::shared_ptr<ComputingGraph> graph;
- std::shared_ptr<HostTensorND> x_host;
- MockConvolutionForward* mock_conv_ptr;
- SymbolVar y;
- HostTensorND y_host;
- std::unique_ptr<cg::AsyncExecutable> func;
-
- MockConvolutionForward& mock_conv() { return *mock_conv_ptr; }
-
- void SetUp() override {
- constexpr uint32_t ih = 10, ic = 16, oc = 32, ph = 0, sh = 1, fh = 2,
- iw = ih;
- comp_node = CompNode::load("cpux");
- graph = ComputingGraph::make();
- graph->options().graph_opt.weight_preprocess = is_weight_preprocess();
- TensorShape x_shape{1, ic, ih, iw}, w_shape{oc, ic, fh, fh};
- x_host = std::make_shared<HostTensorND>(comp_node, x_shape);
- auto x = opr::Host2DeviceCopy::make(*graph, x_host);
- auto w = opr::ImmutableTensor::make(*graph, {comp_node, w_shape});
- Param param;
- param.pad_h = param.pad_w = ph;
- param.stride_h = param.stride_w = sh;
- param.format = Param::Format::NCHW;
- y = opr::ConvolutionForward::make(x, w, param);
- auto& opr =
- y.node()->owner_opr()->cast_final<opr::ConvolutionForward>();
- auto mock = std::make_unique<MockConvolutionForward>(
- opr.megdnn_opr(), ::testing::UnitTest::GetInstance()
- ->current_test_info()
- ->name());
- mock_conv_ptr = mock.get();
- ConvolutionTestingPeer{&opr}.set_megdnn_opr(std::move(mock));
- func = graph->compile({make_callback_copy(y, y_host)});
- }
-
- void run() { func->execute().wait(); }
-
- virtual bool is_weight_preprocess() { return true; }
-
- void TearDown() override {
- func.reset();
- // Triggers mock check
- graph.reset();
- x_host.reset();
- }
- };
-
- TEST_F(TestWeightPreprocess, NoPreprocessNeeded) {
- using ::testing::_;
- using ::testing::Return;
- auto& mock = mock_conv();
-
- MockAlgorithm algo;
- EXPECT_CALL(mock, get_algorithm_heuristic(_, _, _, _, _, _))
- .WillRepeatedly(Return(&algo));
- EXPECT_CALL(mock, get_algorithm_from_desc(_))
- .WillRepeatedly(Return(&algo));
- EXPECT_CALL(mock, get_workspace_in_bytes(_, _, _, _))
- .WillRepeatedly(Return(0));
- EXPECT_CALL(mock, get_preprocess_workspace_in_bytes(_, _, _))
- .WillRepeatedly(Return(0));
-
- {
- ::testing::InSequence seq;
- // Return empty preprocess filters, indicating no need to preprocess
- EXPECT_CALL(mock, deduce_preprocessed_filter_layout(_, _, _))
- .WillRepeatedly(Return(SmallVector<TensorLayout>{}));
- EXPECT_CALL(mock, exec_preprocess(_, _, _, _, _)).Times(0);
- EXPECT_CALL(mock, exec(_, _, _, nullptr, _));
- run();
- }
- }
-
- TEST_F(TestWeightPreprocess, PreprocessCalledOnlyOnce) {
- using ::testing::_;
- using ::testing::Return;
- using ::testing::Field;
- using ::testing::Invoke;
- using ::testing::Expectation;
- using PF = MockConvolutionForward::PreprocessedFilter;
-
- auto& mock = mock_conv();
- MockAlgorithm algo;
- SmallVector<TensorLayout> filter_layout{{{1, 2, 3, 4}, dtype::Float32()},
- {{5, 6, 7, 8}, dtype::Float32()}};
-
- EXPECT_CALL(mock, deduce_preprocessed_filter_layout(_, _, _))
- .WillRepeatedly(Return(filter_layout));
-
- EXPECT_CALL(mock, get_algorithm_from_desc(_))
- .WillRepeatedly(Return(&algo));
-
- Expectation algo_call =
- EXPECT_CALL(mock, get_algorithm_heuristic(_, _, _, _, _, _))
- .WillOnce(Return(&algo));
- Expectation ws_call = EXPECT_CALL(mock, get_workspace_in_bytes(_, _, _, _))
- .After(algo_call)
- .WillOnce(Return(0));
- Expectation pre_ws_call =
- EXPECT_CALL(mock, get_preprocess_workspace_in_bytes(_, _, _))
- .After(algo_call)
- .WillOnce(Return(233));
- {
- ::testing::InSequence seq;
-
- // exec_preprocess should be called only once, with workspace allocated
- int salt = 0;
- EXPECT_CALL(mock, exec_preprocess(_, _, _, _, _))
- .After(ws_call, pre_ws_call)
- .WillOnce(Invoke([&](const TensorLayout&, _megdnn_tensor_in,
- const TensorLayout&, PF* pf,
- _megdnn_workspace workspace) {
- ASSERT_EQ(workspace.size, 233);
- ASSERT_NE(pf, nullptr);
- pf->algorithm_id = &salt;
- ASSERT_EQ(pf->tensors.size(), 2);
- ASSERT_TRUE(pf->tensors[0].layout.eq_shape({1, 2, 3, 4}));
- ASSERT_TRUE(pf->tensors[1].layout.eq_shape({5, 6, 7, 8}));
- ASSERT_NE(pf->tensors[0].raw_ptr, nullptr);
- ASSERT_NE(pf->tensors[1].raw_ptr, nullptr);
- pf->tensors[0].ptr<float>()[0] = 114.514f;
- pf->tensors[1].ptr<float>()[0] = 1926.0817f;
- }));
- // Run the graph multiple times.
- for (int i = 0; i < 3; i++) {
- if (i > 0) {
- EXPECT_CALL(mock, exec_preprocess(_, _, _, _, _)).Times(0);
- }
- EXPECT_CALL(mock, exec(_, _, _, _, _))
- .WillOnce(Invoke([&](_megdnn_tensor_in, _megdnn_tensor_in,
- _megdnn_tensor_out, const PF* pf,
- _megdnn_workspace) {
- ASSERT_NE(pf, nullptr);
- ASSERT_EQ(pf->algorithm_id, &salt);
- ASSERT_EQ(pf->tensors[0].ptr<float>()[0], 114.514f);
- ASSERT_EQ(pf->tensors[1].ptr<float>()[0], 1926.0817f);
- }));
- run();
- }
- }
- }
-
- class TestNoWeightPreprocess : public TestWeightPreprocess {
- bool is_weight_preprocess() override { return false; }
- };
-
- TEST_F(TestNoWeightPreprocess, NoPreprocess) {
- using ::testing::_;
- using ::testing::Return;
- auto& mock = mock_conv();
-
- MockAlgorithm algo;
- EXPECT_CALL(mock, get_algorithm_heuristic(_, _, _, _, _, _))
- .WillRepeatedly(Return(&algo));
- EXPECT_CALL(mock, get_algorithm_from_desc(_))
- .WillRepeatedly(Return(&algo));
- EXPECT_CALL(mock, get_workspace_in_bytes(_, _, _, _))
- .WillRepeatedly(Return(0));
- EXPECT_CALL(mock, get_preprocess_workspace_in_bytes(_, _, _))
- .WillRepeatedly(Return(0));
-
- {
- ::testing::InSequence seq;
- // Return empty preprocess filters, indicating no need to preprocess
- EXPECT_CALL(mock, deduce_preprocessed_filter_layout(_, _, _)).Times(0);
- EXPECT_CALL(mock, exec_preprocess(_, _, _, _, _)).Times(0);
- EXPECT_CALL(mock, exec(_, _, _, nullptr, _));
- run();
- }
- }
-
- } // anonymous namespace
-
- #endif
-
- namespace {
-
- TEST(TestOprDNN, ConvBiasInt4Serialize) {
- using namespace serialization;
-
- float inp_scale = 1.20210327f;
- float filt_scale = 1.20210406f;
- float bias_scale = inp_scale * filt_scale;
- DType output_dtype = dtype::QuantizedS4{inp_scale};
-
- HostTensorGenerator<dtype::Int8> gen;
- std::shared_ptr<HostTensorND> xv;
- auto mkvar = [](const char* name, const DType& dtype,
- std::shared_ptr<ComputingGraph> graph,
- std::shared_ptr<HostTensorND> val) {
- return opr::TypeCvt::make(
- opr::Host2DeviceCopy::make(*graph, val).rename(name), dtype);
- };
- auto mkcvar =
- [&gen](const char* name, const TensorShape& shp, const DType& dtype,
- std::shared_ptr<ComputingGraph> graph, const CompNode& cn) {
- return opr::TypeCvt::make(
- opr::SharedDeviceTensor::make(*graph, *gen(shp, cn))
- .rename(name),
- dtype);
- };
-
- auto fname = output_file("ConvBiasInt4Serialize");
- HostTensorND y1, y2;
- auto dump = [&]() {
- opr::ConvBias::Param param;
- param.mode = Mode::CONVOLUTION;
-
- auto cn = CompNode::load("cpu0");
- auto graph = ComputingGraph::make();
- xv = gen({1, 64, 56, 56}, cn);
- auto x = mkvar("x", dtype::QuantizedS4{inp_scale}, graph, xv);
- auto w = mkcvar("w", {256, 64, 1, 1}, dtype::QuantizedS4{filt_scale}, graph, cn);
- auto b = mkcvar("b", {1, 256, 1, 1}, dtype::QuantizedS32{bias_scale}, graph, cn);
- auto y = opr::ConvBiasForward::make(x, w, b, param, {},
- OperatorNodeConfig{output_dtype});
- auto w1 = mkcvar("w1", {64, 256, 1, 1}, dtype::QuantizedS4{filt_scale},
- graph, cn);
- auto b1 = mkcvar("b1", {1, 64, 1, 1}, dtype::QuantizedS32{bias_scale},
- graph, cn);
- y = opr::ConvBiasForward::make(y, w1, b1, param, {},
- OperatorNodeConfig{output_dtype});
- y = opr::TypeCvt::make(y, dtype::Float32());
- auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()));
- auto func = graph->compile({make_callback_copy(y, y1)});
- func->execute();
- func->wait();
- auto rst = dumper->dump({y});
- ASSERT_EQ(rst.outputs.size(), 1u);
- };
-
- auto load = [&]() {
- auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()));
- auto rst = loader->load();
- for (const auto& t : rst.tensor_map) {
- t.second->copy_from(*xv).sync();
- }
- auto func = rst.graph->compile(
- {make_callback_copy(rst.output_var_list[0], y2)});
- func->execute();
- func->wait();
- ASSERT_EQ(rst.output_var_list.size(), 1u);
- EXPECT_EQ(rst.output_var_list[0].dtype(), dtype::Float32());
- };
-
- dump();
- load();
- MGB_ASSERT_TENSOR_NEAR(y1, y2, 1e-3);
- }
-
- TEST(TestOprDNN, ConvBiasInt4SerializeWithParamFuse) {
- using namespace serialization;
-
- float inp_scale = 1.20210327f;
- float filt_scale = 1.20210406f;
- float bias_scale = inp_scale * filt_scale;
- DType output_dtype = dtype::QuantizedS4{inp_scale};
-
- HostTensorGenerator<dtype::Int8> gen;
- std::shared_ptr<HostTensorND> xv;
- auto mkvar = [](const char* name, const DType& dtype,
- std::shared_ptr<ComputingGraph> graph,
- std::shared_ptr<HostTensorND> val) {
- return opr::TypeCvt::make(
- opr::Host2DeviceCopy::make(*graph, val).rename(name), dtype);
- };
- auto mkcvar =
- [&gen](const char* name, const TensorShape& shp, const DType& dtype,
- std::shared_ptr<ComputingGraph> graph, const CompNode& cn) {
- return opr::TypeCvt::make(
- opr::SharedDeviceTensor::make(*graph, *gen(shp, cn))
- .rename(name),
- dtype);
- };
-
- auto fname = output_file("ConvBiasInt4SerializeWithParamFuse");
- HostTensorND y1, y2;
- auto dump = [&]() {
- opr::ConvBias::Param param;
- param.mode = Mode::CONVOLUTION;
-
- auto cn = CompNode::load("cpu0");
- auto graph = ComputingGraph::make();
- xv = gen({1, 64, 56, 56}, cn);
- auto x = mkvar("x", dtype::QuantizedS4{inp_scale}, graph, xv);
- auto w = mkcvar("w", {256, 64, 1, 1}, dtype::QuantizedS4{filt_scale}, graph, cn);
- auto b = mkcvar("b", {1, 256, 1, 1}, dtype::QuantizedS32{bias_scale}, graph, cn);
- auto y = opr::ConvBiasForward::make(x, w, b, param, {},
- OperatorNodeConfig{output_dtype});
- auto w1 = mkcvar("w1", {64, 256, 1, 1}, dtype::QuantizedS4{filt_scale},
- graph, cn);
- auto b1 = mkcvar("b1", {1, 64, 1, 1}, dtype::QuantizedS32{bias_scale},
- graph, cn);
- y = opr::ConvBiasForward::make(y, w1, b1, param, {},
- OperatorNodeConfig{output_dtype});
- y = opr::TypeCvt::make(y, dtype::Float32());
- SymbolVar y_param_fused;
- unpack_vector(gopt::GraphOptimizer{}
- .add_pass<gopt::ParamFusePass>()
- .apply({{y}})
- .endpoint_vars(),
- y_param_fused);
- auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()));
- auto func = graph->compile({make_callback_copy(y_param_fused, y1)});
- func->execute();
- func->wait();
- auto rst = dumper->dump({y_param_fused});
- ASSERT_EQ(rst.outputs.size(), 1u);
- };
-
- auto load = [&]() {
- auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()));
- auto rst = loader->load();
- for (const auto& t : rst.tensor_map) {
- t.second->copy_from(*xv).sync();
- }
- auto func = rst.graph->compile(
- {make_callback_copy(rst.output_var_list[0], y2)});
- func->execute();
- func->wait();
- ASSERT_EQ(rst.output_var_list.size(), 1u);
- EXPECT_EQ(rst.output_var_list[0].dtype(), dtype::Float32());
- };
-
- dump();
- load();
- MGB_ASSERT_TENSOR_NEAR(y1, y2, 1e-3);
- }
- } // namespace
-
- // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
|