Browse Source

refactor(dnn): refactor winograd strategy helper

GitOrigin-RevId: ecc2b15df9
tags/v0.3.2
Megvii Engine Team 5 years ago
parent
commit
dc6f89f2d3
5 changed files with 339 additions and 643 deletions
  1. +261
    -540
      dnn/src/common/winograd/winograd_helper.cpp
  2. +4
    -44
      dnn/src/common/winograd/winograd_helper.h
  3. +48
    -31
      dnn/src/fallback/conv_bias/winograd/strategy.cpp
  4. +1
    -0
      dnn/src/fallback/conv_bias/winograd/strategy.h
  5. +25
    -28
      dnn/src/naive/winograd_filter_preprocess/opr_impl.cpp

+ 261
- 540
dnn/src/common/winograd/winograd_helper.cpp View File

@@ -58,368 +58,300 @@ struct OutputGetter<
return dtype.param<dtype::Quantized8Asymm>().quantize(item).as_uint8();
}
};

} // namespace

namespace megdnn {
namespace winograd {

template <typename ctype, typename dst_type, typename input_filter_compute_type,
typename output_compute_type>
class StrategyHelper<ctype, dst_type, input_filter_compute_type,
output_compute_type, param::MatrixMul::Format::DEFAULT> {
public:
static void filter(const ctype* filter,
input_filter_compute_type* filter_transform_buf,
input_filter_compute_type* transform_mid_buf, size_t OC,
size_t IC, size_t oc_start, size_t oc_end, size_t m,
size_t r, const std::vector<float>& interp_points,
DType dtype, float rescale) {
size_t alpha = m + r - 1;
WinogradCoeff<input_filter_compute_type> winograd_coeff(m, r,
interp_points);

input_filter_compute_type* mid_buf1 = transform_mid_buf;
input_filter_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha;

Getter<ctype, input_filter_compute_type> getter(dtype);
for (size_t oc = oc_start; oc < oc_end; oc++) {
rep(ic, IC) {
const ctype* filter_ptr = filter + (oc * IC + ic) * r * r;
rep(i, r) rep(j, r) {
mid_buf1[i * r + j] = getter(filter_ptr[i * r + j]);
}
constexpr size_t layout_pack_size(param::ConvBias::Format layout) {
switch (layout) {
case param::ConvBias::Format::NHWCD4:
return 4;
case param::ConvBias::Format::NCHW4:
return 4;
case param::ConvBias::Format::NCHW32:
return 32;
case param::ConvBias::Format::NCHW88:
case param::ConvBias::Format::NCHW8:
return 8;
default:
return 1;
}
}

template <param::ConvBias::Format layout, param::MatrixMul::Format format>
struct FilterVisitor {
size_t IC, OC;
FilterVisitor(size_t OC, size_t IC) : IC(IC), OC(OC) {}
size_t get(size_t r, size_t oc, size_t ic, size_t h, size_t w) {
constexpr size_t input_pack_size = layout_pack_size(layout);
size_t ocb_layout = oc / input_pack_size;
size_t oc_layout = oc % input_pack_size;
size_t icb_layout = ic / input_pack_size;
size_t ic_layout = ic % input_pack_size;

return (ocb_layout * (IC / input_pack_size) + icb_layout) * r * r *
input_pack_size * input_pack_size +
ic_layout * input_pack_size + oc_layout +
(h * r + w) * input_pack_size * input_pack_size;
}

/* tmp = Matmul(G, src) */
megdnn::naive::run_matrix_mul_tpl<input_filter_compute_type,
input_filter_compute_type,
false, false>(
winograd_coeff.G(rescale).data(), mid_buf1, mid_buf2,
alpha, r, r, r, r, r, dtype, dtype);
/* dst = Matmul(tmp, G^T) */
megdnn::naive::run_matrix_mul_tpl<input_filter_compute_type,
input_filter_compute_type,
false, true>(
mid_buf2, winograd_coeff.G(rescale).data(), mid_buf1,
alpha, alpha, r, r, r, alpha, dtype, dtype);

rep(i, alpha) rep(j, alpha) {
filter_transform_buf[(i * alpha + j) * OC * IC + ic * OC +
oc] = mid_buf1[i * alpha + j];
}
}
size_t put(size_t alpha, size_t oc, size_t ic, size_t h, size_t w) {
if (format == param::MatrixMul::Format::DEFAULT) {
return (h * alpha + w) * OC * IC + ic * OC + oc;
}
size_t matmul_pack_size = MatrixMulForward::pack_size(format);
size_t ocb = oc / matmul_pack_size;
size_t oc_pack = oc % matmul_pack_size;
size_t icb = ic / matmul_pack_size;
size_t ic_pack = ic % matmul_pack_size;

size_t OCB = OC / matmul_pack_size;
size_t ICB = IC / matmul_pack_size;

return (h * alpha + w) * OCB * ICB * matmul_pack_size *
matmul_pack_size +
ocb * ICB * matmul_pack_size * matmul_pack_size +
icb * matmul_pack_size * matmul_pack_size +
ic_pack * matmul_pack_size + oc_pack;
}
};

static void input(const ctype* input,
input_filter_compute_type* input_transform_buf,
input_filter_compute_type* transform_mid_buf,
int ih_start, int iw_start, size_t IH, size_t IW,
size_t IC, size_t unit_idx, size_t nr_units_in_tile,
size_t m, size_t r,
const std::vector<float>& interp_points, DType dtype,
float rescale) {
size_t alpha = m + r - 1;
Getter<ctype, input_filter_compute_type> getter(dtype);
WinogradCoeff<input_filter_compute_type> winograd_coeff(m, r,
interp_points);
rep(ic, IC) {
input_filter_compute_type* mid_buf1 = transform_mid_buf;
input_filter_compute_type* mid_buf2 =
transform_mid_buf + alpha * alpha;
template <param::ConvBias::Format layout, param::MatrixMul::Format format>
struct InputVisitor {
size_t IC;
InputVisitor(size_t IC) : IC(IC) {}

memset(mid_buf1, 0,
alpha * alpha * sizeof(input_filter_compute_type));
rep(i, alpha) rep(j, alpha) {
int ih = ih_start + i;
int iw = iw_start + j;
if (ih >= 0 && ih < (int)IH && iw >= 0 && iw < (int)IW) {
mid_buf1[i * alpha + j] =
getter(input[ic * IH * IW + ih * IW + iw]);
}
}
megdnn::naive::run_matrix_mul_tpl<input_filter_compute_type,
input_filter_compute_type, true,
false>(
winograd_coeff.B(rescale).data(), mid_buf1, mid_buf2, alpha,
alpha, alpha, alpha, alpha, alpha, dtype, dtype);
megdnn::naive::run_matrix_mul_tpl<input_filter_compute_type,
input_filter_compute_type, false,
false>(
mid_buf2, winograd_coeff.B(rescale).data(), mid_buf1, alpha,
alpha, alpha, alpha, alpha, alpha, dtype, dtype);
rep(i, alpha) rep(j, alpha) {
input_transform_buf[(i * alpha + j) * nr_units_in_tile * IC +
unit_idx * IC + ic] =
mid_buf1[i * alpha + j];
}
}
size_t get(size_t alpha, size_t ic, size_t IH, size_t IW, size_t ih,
size_t iw) {
constexpr size_t input_pack_size = layout_pack_size(layout);
size_t icb_layout = ic / input_pack_size;
size_t ic_layout = ic % input_pack_size;

return (icb_layout * IH * IW + ih * IW + iw) * input_pack_size +
ic_layout;
}

static void output(const output_compute_type* output_transform_buf,
const output_compute_type* bias, dst_type* output,
output_compute_type* transform_mid_buf, BiasMode bmode,
NonlineMode nonline_mode, size_t oh_start,
size_t ow_start, size_t OH, size_t OW, size_t oc_start,
size_t oc_end, size_t unit_idx, size_t nr_units_in_tile,
size_t m, size_t r,
const std::vector<float>& interp_points, DType dtype,
float input_filter_scale, float input_filter_rescale,
float rescale) {
size_t alpha = m + r - 1;
size_t OC = oc_end - oc_start;

OutputGetter<output_compute_type, dst_type> getter(dtype);
winograd::WinogradCoeff<output_compute_type> winograd_coeff(
m, r, interp_points);
for (size_t oc = oc_start; oc < oc_end; oc++) {
size_t oc_index = oc - oc_start;
output_compute_type* mid_buf1 = transform_mid_buf;
output_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha;

// gather
rep(i, alpha) rep(j, alpha) {
mid_buf1[i * alpha + j] =
output_transform_buf[(i * alpha + j) *
nr_units_in_tile * OC +
unit_idx * OC + oc_index];
}
/* A[alpha*m] M[alpha*alpha] */
megdnn::naive::run_matrix_mul_tpl<output_compute_type,
output_compute_type, true, false>(
winograd_coeff.A(rescale).data(), mid_buf1, mid_buf2, m,
alpha, alpha, m, alpha, alpha, dtype, dtype);
megdnn::naive::run_matrix_mul_tpl<
output_compute_type, output_compute_type, false, false>(
mid_buf2, winograd_coeff.A(rescale).data(), mid_buf1, m, m,
alpha, alpha, m, m, dtype, dtype);
rep(i, m) rep(j, m) {
auto oh = oh_start + i;
auto ow = ow_start + j;
if (oh < OH && ow < OW) {
float val = mid_buf1[i * m + j];
if (bmode == BiasMode::BROADCAST_CHANNEL_BIAS) {
val += bias[oc] * input_filter_rescale *
input_filter_rescale;
} else if (bmode == BiasMode::BIAS) {
val += bias[oc * OH * OW + oh * OW + ow] *
input_filter_rescale * input_filter_rescale;
}
val = val * input_filter_scale /
(input_filter_rescale * input_filter_rescale *
rescale * rescale);
if (nonline_mode == NonlineMode::RELU) {
val = val > 0 ? val : 0;
} else if (nonline_mode == NonlineMode::SIGMOID) {
val = 1.f / (expf(-val) + 1.f);
} else if (nonline_mode == NonlineMode::H_SWISH) {
val = val * std::min(std::max(val + 3, 0.f), 6.f) / 6.f;
} else {
megdnn_assert(nonline_mode == NonlineMode::IDENTITY);
}

output[oc * OH * OW + oh * OW + ow] = getter(val);
}
}
size_t put(size_t alpha, size_t ic, size_t nr_units_in_tile,
size_t unit_idx, size_t h, size_t w) {
if (format == param::MatrixMul::Format::DEFAULT) {
return (h * alpha + w) * nr_units_in_tile * IC + unit_idx * IC + ic;
}
size_t matmul_pack_size = MatrixMulForward::pack_size(format);
size_t icb = ic / matmul_pack_size;
size_t ic_pack = ic % matmul_pack_size;
size_t ICB = IC / matmul_pack_size;

return (h * alpha + w) * ICB * nr_units_in_tile * matmul_pack_size +
icb * nr_units_in_tile * matmul_pack_size +
unit_idx * matmul_pack_size + ic_pack;
}
};

template <typename ctype, typename dst_type, typename input_filter_compute_type,
typename output_compute_type, param::MatrixMul::Format format>
class StrategyHelper<
ctype, dst_type, input_filter_compute_type, output_compute_type, format,
std::enable_if_t<format == param::MatrixMul::Format::MK4 ||
format == param::MatrixMul::Format::MK8>> {
public:
static void filter(const ctype* filter,
input_filter_compute_type* filter_transform_buf,
input_filter_compute_type* transform_mid_buf, size_t OC,
size_t IC, size_t oc_start, size_t oc_end, size_t m,
size_t r, const std::vector<float>& interp_points,
DType dtype, float rescale) {
size_t alpha = m + r - 1;
WinogradCoeff<input_filter_compute_type> winograd_coeff(m, r,
interp_points);

input_filter_compute_type* mid_buf1 = transform_mid_buf;
input_filter_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha;

Getter<ctype, input_filter_compute_type> getter(dtype);
size_t OCB = OC / pack_size;
size_t ICB = IC / pack_size;
for (size_t oc = oc_start; oc < oc_end; oc++) {
rep(ic, IC) {
const ctype* filter_ptr = filter + (oc * IC + ic) * r * r;
rep(i, r) rep(j, r) {
mid_buf1[i * r + j] = getter(filter_ptr[i * r + j]);
}
template <param::ConvBias::Format layout, param::MatrixMul::Format format>
struct OutputVisitor {
size_t OC;
OutputVisitor(size_t OC) : OC(OC) {}

/* tmp = Matmul(G, src) */
megdnn::naive::run_matrix_mul_tpl<input_filter_compute_type,
input_filter_compute_type,
false, false>(
winograd_coeff.G(rescale).data(), mid_buf1, mid_buf2,
alpha, r, r, r, r, r, dtype, dtype);
/* dst = Matmul(tmp, G^T) */
megdnn::naive::run_matrix_mul_tpl<input_filter_compute_type,
input_filter_compute_type,
false, true>(
mid_buf2, winograd_coeff.G(rescale).data(), mid_buf1,
alpha, alpha, r, r, r, alpha, dtype, dtype);

size_t ocb = oc / pack_size;
size_t oc_pack = oc % pack_size;
size_t icb = ic / pack_size;
size_t ic_pack = ic % pack_size;
rep(i, alpha) rep(j, alpha) {
filter_transform_buf[(i * alpha + j) * OCB * ICB *
pack_size * pack_size +
ocb * ICB * pack_size * pack_size +
icb * pack_size * pack_size +
ic_pack * pack_size + oc_pack] =
mid_buf1[i * alpha + j];
}
}
size_t get(size_t alpha, size_t oc_index, size_t oc,
size_t nr_units_in_tile, size_t unit_idx, size_t h, size_t w) {
if (format == param::MatrixMul::Format::DEFAULT) {
return (h * alpha + w) * nr_units_in_tile * OC + unit_idx * OC +
oc_index;
}
size_t matmul_pack_size = MatrixMulForward::pack_size(format);
size_t ocb = oc_index / matmul_pack_size;
size_t oc_pack = oc % matmul_pack_size;
size_t OCB = OC / matmul_pack_size;

return (h * alpha + w) * OCB * nr_units_in_tile * matmul_pack_size +
ocb * nr_units_in_tile * matmul_pack_size +
unit_idx * matmul_pack_size + oc_pack;
}

static void input(const ctype* input,
input_filter_compute_type* input_transform_buf,
input_filter_compute_type* transform_mid_buf,
int ih_start, int iw_start, size_t IH, size_t IW,
size_t IC, size_t unit_idx, size_t nr_units_in_tile,
size_t m, size_t r,
const std::vector<float>& interp_points, DType dtype,
float rescale) {
size_t alpha = m + r - 1;
Getter<ctype, input_filter_compute_type> getter(dtype);
WinogradCoeff<input_filter_compute_type> winograd_coeff(m, r,
interp_points);
size_t ICB = IC / pack_size;
rep(ic, IC) {
input_filter_compute_type* mid_buf1 = transform_mid_buf;
input_filter_compute_type* mid_buf2 =
transform_mid_buf + alpha * alpha;
size_t put(size_t oc, size_t OH, size_t OW, size_t oh, size_t ow) {
constexpr size_t input_pack_size = layout_pack_size(layout);
size_t oc_layout = oc % input_pack_size;

memset(mid_buf1, 0,
alpha * alpha * sizeof(input_filter_compute_type));
rep(i, alpha) rep(j, alpha) {
int ih = ih_start + i;
int iw = iw_start + j;
if (ih >= 0 && ih < (int)IH && iw >= 0 && iw < (int)IW) {
mid_buf1[i * alpha + j] =
getter(input[ic * IH * IW + ih * IW + iw]);
}
return (oc / input_pack_size * OH * OW + oh * OW + ow) *
input_pack_size +
oc_layout;
}
};

template <typename ctype, typename dst_type, typename input_filter_compute_type,
typename output_compute_type, param::ConvBias::Format layout,
param::MatrixMul::Format format>
void StrategyHelper<
ctype, dst_type, input_filter_compute_type, output_compute_type, layout,
format>::filter(const ctype* filter,
input_filter_compute_type* filter_transform_buf,
input_filter_compute_type* transform_mid_buf, size_t OC,
size_t IC, size_t oc_start, size_t oc_end, size_t m,
size_t r, const std::vector<float>& interp_points,
DType dtype, float rescale) {
size_t alpha = m + r - 1;
WinogradCoeff<input_filter_compute_type> winograd_coeff(m, r,
interp_points);
input_filter_compute_type* mid_buf1 = transform_mid_buf;
input_filter_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha;
Getter<ctype, input_filter_compute_type> getter(dtype);
FilterVisitor<layout, format> filter_visitor(OC, IC);

for (size_t oc = oc_start; oc < oc_end; oc++) {
rep(ic, IC) {
rep(i, r) rep(j, r) {
mid_buf1[i * r + j] =
getter(filter[filter_visitor.get(r, oc, ic, i, j)]);
}

/* tmp = Matmul(G, src) */
megdnn::naive::run_matrix_mul_tpl<input_filter_compute_type,
input_filter_compute_type, true,
input_filter_compute_type, false,
false>(
winograd_coeff.B(rescale).data(), mid_buf1, mid_buf2, alpha,
alpha, alpha, alpha, alpha, alpha, dtype, dtype);
winograd_coeff.G(rescale).data(), mid_buf1, mid_buf2, alpha,
r, r, r, r, r, dtype, dtype);
/* dst = Matmul(tmp, G^T) */
megdnn::naive::run_matrix_mul_tpl<input_filter_compute_type,
input_filter_compute_type, false,
false>(
mid_buf2, winograd_coeff.B(rescale).data(), mid_buf1, alpha,
alpha, alpha, alpha, alpha, alpha, dtype, dtype);
size_t icb = ic / pack_size;
size_t ic_pack = ic % pack_size;
true>(
mid_buf2, winograd_coeff.G(rescale).data(), mid_buf1, alpha,
alpha, r, r, r, alpha, dtype, dtype);

rep(i, alpha) rep(j, alpha) {
input_transform_buf[(i * alpha + j) * ICB * nr_units_in_tile *
pack_size +
icb * nr_units_in_tile * pack_size +
unit_idx * pack_size + ic_pack] =
filter_transform_buf[filter_visitor.put(alpha, oc, ic, i, j)] =
mid_buf1[i * alpha + j];
}
}
}
}

static void output(const output_compute_type* output_transform_buf,
const output_compute_type* bias, dst_type* output,
output_compute_type* transform_mid_buf, BiasMode bmode,
NonlineMode nonline_mode, size_t oh_start,
size_t ow_start, size_t OH, size_t OW, size_t oc_start,
size_t oc_end, size_t unit_idx, size_t nr_units_in_tile,
template <typename ctype, typename dst_type, typename input_filter_compute_type,
typename output_compute_type, param::ConvBias::Format layout,
param::MatrixMul::Format format>
void StrategyHelper<
ctype, dst_type, input_filter_compute_type, output_compute_type, layout,
format>::input(const ctype* input,
input_filter_compute_type* input_transform_buf,
input_filter_compute_type* transform_mid_buf,
int ih_start, int iw_start, size_t IH, size_t IW,
size_t IC, size_t unit_idx, size_t nr_units_in_tile,
size_t m, size_t r,
const std::vector<float>& interp_points, DType dtype,
float input_filter_scale, float input_filter_rescale,
float rescale) {
size_t alpha = m + r - 1;
size_t OC = oc_end - oc_start;

OutputGetter<output_compute_type, dst_type> getter(dtype);
winograd::WinogradCoeff<output_compute_type> winograd_coeff(
m, r, interp_points);
size_t OCB = OC / pack_size;
for (size_t oc = oc_start; oc < oc_end; oc++) {
output_compute_type* mid_buf1 = transform_mid_buf;
output_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha;

size_t ocb = (oc - oc_start) / pack_size;
size_t oc_pack = oc % pack_size;
// gather
rep(i, alpha) rep(j, alpha) {
mid_buf1[i * alpha + j] = output_transform_buf
[(i * alpha + j) * OCB * nr_units_in_tile * pack_size +
ocb * nr_units_in_tile * pack_size +
unit_idx * pack_size + oc_pack];
size_t alpha = m + r - 1;
WinogradCoeff<input_filter_compute_type> winograd_coeff(m, r,
interp_points);
input_filter_compute_type* mid_buf1 = transform_mid_buf;
input_filter_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha;
Getter<ctype, input_filter_compute_type> getter(dtype);
InputVisitor<layout, format> intput_visitor(IC);

rep(ic, IC) {
memset(mid_buf1, 0, alpha * alpha * sizeof(input_filter_compute_type));
rep(i, alpha) rep(j, alpha) {
int ih = ih_start + i;
int iw = iw_start + j;
if (ih >= 0 && ih < (int)IH && iw >= 0 && iw < (int)IW) {
mid_buf1[i * alpha + j] = getter(
input[intput_visitor.get(alpha, ic, IH, IW, ih, iw)]);
}
/* A[alpha*m] M[alpha*alpha] */
megdnn::naive::run_matrix_mul_tpl<output_compute_type,
output_compute_type, true, false>(
winograd_coeff.A(rescale).data(), mid_buf1, mid_buf2, m,
alpha, alpha, m, alpha, alpha, dtype, dtype);
megdnn::naive::run_matrix_mul_tpl<
output_compute_type, output_compute_type, false, false>(
mid_buf2, winograd_coeff.A(rescale).data(), mid_buf1, m, m,
alpha, alpha, m, m, dtype, dtype);
rep(i, m) rep(j, m) {
auto oh = oh_start + i;
auto ow = ow_start + j;
if (oh < OH && ow < OW) {
float val = mid_buf1[i * m + j];
if (bmode == BiasMode::BROADCAST_CHANNEL_BIAS) {
val += bias[oc] * input_filter_rescale *
input_filter_rescale;
} else if (bmode == BiasMode::BIAS) {
val += bias[oc * OH * OW + oh * OW + ow] *
input_filter_rescale * input_filter_rescale;
}
val = val * input_filter_scale /
(input_filter_rescale * input_filter_rescale *
rescale * rescale);
if (nonline_mode == NonlineMode::RELU) {
val = val > 0 ? val : 0;
} else if (nonline_mode == NonlineMode::SIGMOID) {
val = 1.f / (expf(-val) + 1.f);
} else if (nonline_mode == NonlineMode::H_SWISH) {
val = val * std::min(std::max(val + 3, 0.f), 6.f) / 6.f;
} else {
megdnn_assert(nonline_mode == NonlineMode::IDENTITY);
}

output[oc * OH * OW + oh * OW + ow] = getter(val);
}

megdnn::naive::run_matrix_mul_tpl<input_filter_compute_type,
input_filter_compute_type, true,
false>(
winograd_coeff.B(rescale).data(), mid_buf1, mid_buf2, alpha,
alpha, alpha, alpha, alpha, alpha, dtype, dtype);
megdnn::naive::run_matrix_mul_tpl<input_filter_compute_type,
input_filter_compute_type, false,
false>(
mid_buf2, winograd_coeff.B(rescale).data(), mid_buf1, alpha,
alpha, alpha, alpha, alpha, alpha, dtype, dtype);

rep(i, alpha) rep(j, alpha) {
input_transform_buf[intput_visitor.put(alpha, ic, nr_units_in_tile,
unit_idx, i, j)] =
mid_buf1[i * alpha + j];
}
}
}

template <typename ctype, typename dst_type, typename input_filter_compute_type,
typename output_compute_type, param::ConvBias::Format layout,
param::MatrixMul::Format format>
void StrategyHelper<
ctype, dst_type, input_filter_compute_type, output_compute_type, layout,
format>::output(const output_compute_type* output_transform_buf,
const output_compute_type* bias, dst_type* output,
output_compute_type* transform_mid_buf, BiasMode bmode,
NonlineMode nonline_mode, size_t oh_start,
size_t ow_start, size_t OH, size_t OW, size_t oc_start,
size_t oc_end, size_t unit_idx, size_t nr_units_in_tile,
size_t m, size_t r,
const std::vector<float>& interp_points, DType dtype,
float input_filter_scale, float input_filter_rescale,
float rescale) {
size_t alpha = m + r - 1;
winograd::WinogradCoeff<output_compute_type> winograd_coeff(m, r,
interp_points);
output_compute_type* mid_buf1 = transform_mid_buf;
output_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha;
OutputGetter<output_compute_type, dst_type> getter(dtype);
OutputVisitor<layout, format> output_visitor(oc_end - oc_start);

for (size_t oc = oc_start; oc < oc_end; oc++) {
/* gather */
rep(i, alpha) rep(j, alpha) {
mid_buf1[i * alpha + j] = output_transform_buf[output_visitor.get(
alpha, oc - oc_start, oc, nr_units_in_tile, unit_idx, i,
j)];
}
/* A[alpha*m] M[alpha*alpha] */
megdnn::naive::run_matrix_mul_tpl<output_compute_type,
output_compute_type, true, false>(
winograd_coeff.A(rescale).data(), mid_buf1, mid_buf2, m, alpha,
alpha, m, alpha, alpha, dtype, dtype);
megdnn::naive::run_matrix_mul_tpl<output_compute_type,
output_compute_type, false, false>(
mid_buf2, winograd_coeff.A(rescale).data(), mid_buf1, m, m,
alpha, alpha, m, m, dtype, dtype);

rep(i, m) rep(j, m) {
auto oh = oh_start + i;
auto ow = ow_start + j;
if (oh < OH && ow < OW) {
float val = mid_buf1[i * m + j];
if (bmode == BiasMode::BROADCAST_CHANNEL_BIAS) {
val += bias[oc] * input_filter_rescale *
input_filter_rescale;
} else if (bmode == BiasMode::BIAS) {
val += bias[output_visitor.put(oc, OH, OW, oh, ow)] *
input_filter_rescale * input_filter_rescale;
}
val = val * input_filter_scale /
(input_filter_rescale * input_filter_rescale * rescale *
rescale);
if (nonline_mode == NonlineMode::RELU) {
val = val > 0 ? val : 0;
} else if (nonline_mode == NonlineMode::SIGMOID) {
val = 1.f / (expf(-val) + 1.f);
} else if (nonline_mode == NonlineMode::H_SWISH) {
val = val * std::min(std::max(val + 3, 0.f), 6.f) / 6.f;
} else {
megdnn_assert(nonline_mode == NonlineMode::IDENTITY);
}
output[output_visitor.put(oc, OH, OW, oh, ow)] = getter(val);
}
}
}

static size_t pack_size;
};

template <typename ctype, typename dst_type, typename input_filter_compute_type,
typename output_compute_type, param::MatrixMul::Format format>
size_t StrategyHelper<
ctype, dst_type, input_filter_compute_type, output_compute_type, format,
std::enable_if_t<format == param::MatrixMul::Format::MK4 ||
format == param::MatrixMul::Format::MK8>>::pack_size =
MatrixMulForward::pack_size(format);

#define INST(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type) \
template class StrategyHelper< \
_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, param::MatrixMul::Format::DEFAULT>;
#define INST(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type) \
template class StrategyHelper<_ctype, _dst_type, \
_input_filter_compute_type, \
_output_compute_type>;

INST(float, float, float, float)
MEGDNN_INC_FLOAT16(INST(dt_float16, dt_float16, dt_float16, dt_float16))
@@ -428,234 +360,23 @@ INST(uint8_t, uint8_t, int16_t, int)
#undef INST

#define INST(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type) \
_output_compute_type, layout) \
template class StrategyHelper< \
_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, param::MatrixMul::Format::MK4>;
INST(float, float, float, float)
_output_compute_type, layout, param::MatrixMul::Format::MK4>;
INST(float, float, float, float, param::ConvBias::Format::NCHW)
#undef INST

#define INST(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type) \
_output_compute_type, layout) \
template class StrategyHelper< \
_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, param::MatrixMul::Format::MK8>;
INST(int8_t, int8_t, int16_t, int)
MEGDNN_INC_FLOAT16(INST(dt_float16, dt_float16, dt_float16, dt_float16))
#undef INST

template <typename ctype, typename dst_type, typename input_filter_compute_type,
typename output_compute_type, param::MatrixMul::Format format>
class StrategyHelperNchwxx<
ctype, dst_type, input_filter_compute_type, output_compute_type, format,
std::enable_if_t<format == param::MatrixMul::Format::MK8>> {
public:
static void filter(const ctype* filter,
input_filter_compute_type* filter_transform_buf,
input_filter_compute_type* transform_mid_buf, size_t OC,
size_t IC, size_t oc_start, size_t oc_end, size_t m,
size_t r, const std::vector<float>& interp_points,
DType dtype, float rescale) {
megdnn_assert(
(oc_end - oc_start) % 8 == 0 && oc_start % 8 == 0 &&
oc_end % 8 == 0 && IC % 8 == 0 && OC % 8 == 0,
"Winograd filter transform input param is not times of 8!");

size_t alpha = m + r - 1;
WinogradCoeff<input_filter_compute_type> winograd_coeff(m, r,
interp_points);

input_filter_compute_type* mid_buf1 = transform_mid_buf;
input_filter_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha;

Getter<ctype, input_filter_compute_type> getter(dtype);
size_t OCB = OC / pack_size;
size_t ICB = IC / pack_size;
for (size_t oc = oc_start; oc < oc_end; oc++) {
rep(ic, IC) {
size_t ocb = oc / pack_size;
size_t oc_pack = oc % pack_size;
size_t icb = ic / pack_size;
size_t ic_pack = ic % pack_size;

const ctype* filter_ptr =
filter + (ocb * (IC / 8) + icb) * r * r * 8 * 8 +
ic_pack * 8 + oc_pack;
rep(i, r) rep(j, r) {
mid_buf1[i * r + j] =
getter(filter_ptr[(i * r + j) * 8 * 8]);
}

/* tmp = Matmul(G, src) */
megdnn::naive::run_matrix_mul_tpl<input_filter_compute_type,
input_filter_compute_type,
false, false>(
winograd_coeff.G(rescale).data(), mid_buf1, mid_buf2,
alpha, r, r, r, r, r, dtype, dtype);
/* dst = Matmul(tmp, G^T) */
megdnn::naive::run_matrix_mul_tpl<input_filter_compute_type,
input_filter_compute_type,
false, true>(
mid_buf2, winograd_coeff.G(rescale).data(), mid_buf1,
alpha, alpha, r, r, r, alpha, dtype, dtype);

rep(i, alpha) rep(j, alpha) {
filter_transform_buf[(i * alpha + j) * OCB * ICB *
pack_size * pack_size +
ocb * ICB * pack_size * pack_size +
icb * pack_size * pack_size +
ic_pack * pack_size + oc_pack] =
mid_buf1[i * alpha + j];
}
}
}
}

static void input(const ctype* input,
input_filter_compute_type* input_transform_buf,
input_filter_compute_type* transform_mid_buf,
int ih_start, int iw_start, size_t IH, size_t IW,
size_t IC, size_t unit_idx, size_t nr_units_in_tile,
size_t m, size_t r,
const std::vector<float>& interp_points, DType dtype,
float rescale) {
size_t alpha = m + r - 1;
Getter<ctype, input_filter_compute_type> getter(dtype);
WinogradCoeff<input_filter_compute_type> winograd_coeff(m, r,
interp_points);
size_t ICB = IC / pack_size;
rep(ic, IC) {
size_t icb = ic / pack_size;
size_t ic_pack = ic % pack_size;
input_filter_compute_type* mid_buf1 = transform_mid_buf;
input_filter_compute_type* mid_buf2 =
transform_mid_buf + alpha * alpha;

memset(mid_buf1, 0,
alpha * alpha * sizeof(input_filter_compute_type));
rep(i, alpha) rep(j, alpha) {
int ih = ih_start + i;
int iw = iw_start + j;
if (ih >= 0 && ih < (int)IH && iw >= 0 && iw < (int)IW) {
mid_buf1[i * alpha + j] = getter(
input[(icb * IH * IW + ih * IW + iw) * pack_size +
ic_pack]);
}
}
megdnn::naive::run_matrix_mul_tpl<input_filter_compute_type,
input_filter_compute_type, true,
false>(
winograd_coeff.B(rescale).data(), mid_buf1, mid_buf2, alpha,
alpha, alpha, alpha, alpha, alpha, dtype, dtype);
megdnn::naive::run_matrix_mul_tpl<input_filter_compute_type,
input_filter_compute_type, false,
false>(
mid_buf2, winograd_coeff.B(rescale).data(), mid_buf1, alpha,
alpha, alpha, alpha, alpha, alpha, dtype, dtype);
rep(i, alpha) rep(j, alpha) {
input_transform_buf[(i * alpha + j) * ICB * nr_units_in_tile *
pack_size +
icb * nr_units_in_tile * pack_size +
unit_idx * pack_size + ic_pack] =
mid_buf1[i * alpha + j];
}
}
}

static void output(const output_compute_type* output_transform_buf,
const output_compute_type* bias, dst_type* output,
output_compute_type* transform_mid_buf, BiasMode bmode,
NonlineMode nonline_mode, size_t oh_start,
size_t ow_start, size_t OH, size_t OW, size_t oc_start,
size_t oc_end, size_t unit_idx, size_t nr_units_in_tile,
size_t m, size_t r,
const std::vector<float>& interp_points, DType dtype,
float input_filter_scale, float input_filter_rescale,
float rescale) {
size_t alpha = m + r - 1;
size_t OC = oc_end - oc_start;

OutputGetter<output_compute_type, dst_type> getter(dtype);
winograd::WinogradCoeff<output_compute_type> winograd_coeff(
m, r, interp_points);
size_t OCB = OC / pack_size;
for (size_t oc = oc_start; oc < oc_end; oc++) {
output_compute_type* mid_buf1 = transform_mid_buf;
output_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha;

size_t ocb = (oc - oc_start) / pack_size;
size_t oc_pack = oc % pack_size;
// gather
rep(i, alpha) rep(j, alpha) {
mid_buf1[i * alpha + j] = output_transform_buf
[(i * alpha + j) * OCB * nr_units_in_tile * pack_size +
ocb * nr_units_in_tile * pack_size +
unit_idx * pack_size + oc_pack];
}
/* A[alpha*m] M[alpha*alpha] */
megdnn::naive::run_matrix_mul_tpl<output_compute_type,
output_compute_type, true, false>(
winograd_coeff.A(rescale).data(), mid_buf1, mid_buf2, m,
alpha, alpha, m, alpha, alpha, dtype, dtype);
megdnn::naive::run_matrix_mul_tpl<
output_compute_type, output_compute_type, false, false>(
mid_buf2, winograd_coeff.A(rescale).data(), mid_buf1, m, m,
alpha, alpha, m, m, dtype, dtype);
rep(i, m) rep(j, m) {
auto oh = oh_start + i;
auto ow = ow_start + j;
if (oh < OH && ow < OW) {
float val = mid_buf1[i * m + j];
if (bmode == BiasMode::BROADCAST_CHANNEL_BIAS) {
val += bias[oc] * input_filter_rescale *
input_filter_rescale;
} else if (bmode == BiasMode::BIAS) {
val += bias[(oc / pack_size * OH * OW + oh * OW + ow) *
pack_size +
oc_pack] *
input_filter_rescale * input_filter_rescale;
}
val = val * input_filter_scale /
(input_filter_rescale * input_filter_rescale *
rescale * rescale);
if (nonline_mode == NonlineMode::RELU) {
val = val > 0 ? val : 0;
} else if (nonline_mode == NonlineMode::SIGMOID) {
val = 1.f / (expf(-val) + 1.f);
} else if (nonline_mode == NonlineMode::H_SWISH) {
val = val * std::min(std::max(val + 3, 0.f), 6.f) / 6.f;
} else {
megdnn_assert(nonline_mode == NonlineMode::IDENTITY);
}

output[(oc / pack_size * OH * OW + oh * OW + ow) *
pack_size +
oc_pack] = getter(val);
}
}
}
}

static size_t pack_size;
};

template <typename ctype, typename dst_type, typename input_filter_compute_type,
typename output_compute_type, param::MatrixMul::Format format>
size_t StrategyHelperNchwxx<
ctype, dst_type, input_filter_compute_type, output_compute_type, format,
std::enable_if_t<format == param::MatrixMul::Format::MK8>>::pack_size =
MatrixMulForward::pack_size(format);

#define INST(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type) \
template class StrategyHelperNchwxx< \
_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, param::MatrixMul::Format::MK8>;
INST(float, float, float, float)
_output_compute_type, layout, param::MatrixMul::Format::MK8>;
INST(int8_t, int8_t, int16_t, int, param::ConvBias::Format::NCHW)
INST(float, float, float, float, param::ConvBias::Format::NCHW88)
MEGDNN_INC_FLOAT16(INST(dt_float16, dt_float16, dt_float16, dt_float16,
param::ConvBias::Format::NCHW))
#undef INST



} // namespace winograd
} // namespace megdnn



+ 4
- 44
dnn/src/common/winograd/winograd_helper.h View File

@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#pragma once
@@ -28,8 +29,8 @@ using BiasMode = ConvBiasForward::BiasMode;
*/
template <typename ctype, typename dst_type, typename input_filter_compute_type,
typename output_compute_type,
param::MatrixMul::Format format = param::MatrixMul::Format::DEFAULT,
typename enable = void>
param::ConvBias::Format layout = param::ConvBias::Format::NCHW,
param::MatrixMul::Format format = param::MatrixMul::Format::DEFAULT>
class StrategyHelper {
public:
static void filter(const ctype* filter,
@@ -61,47 +62,6 @@ public:
float rescale = 1.0f);
};

/**
* \brief Strategy helper, contains some helper function for debug kernel
* implementation
*
* \warning The layout should be NCHW88
*/
template <typename ctype, typename dst_type, typename input_filter_compute_type,
typename output_compute_type,
param::MatrixMul::Format format = param::MatrixMul::Format::MK8,
typename enable = void>
class StrategyHelperNchwxx {
public:
static void filter(const ctype* filter,
input_filter_compute_type* filter_transform_buf,
input_filter_compute_type* transform_mid_buf, size_t OC,
size_t IC, size_t oc_start, size_t oc_end, size_t m,
size_t r, const std::vector<float>& interp_points,
DType dtype, float rescale = 1.0f);

static void input(const ctype* input,
input_filter_compute_type* input_transform_buf,
input_filter_compute_type* transform_mid_buf,
int ih_start, int iw_start, size_t IH, size_t IW,
size_t IC, size_t unit_idx, size_t nr_units_in_tile,
size_t m, size_t r,
const std::vector<float>& interp_points, DType dtype,
float rescale = 1.0f);

static void
output(const output_compute_type* output_transform_buf,
const output_compute_type* bias, dst_type* output,
output_compute_type* transform_mid_buf, BiasMode bmode,
NonlineMode nonline_mode, size_t oh_start, size_t ow_start,
size_t OH, size_t OW, size_t oc_start, size_t oc_end,
size_t unit_idx, size_t nr_units_in_tile, size_t m, size_t r,
const std::vector<float>& interp_points, DType dtype,
float input_filter_scale = 1.0f, // input_scale * filter_scale
float input_filter_rescale = 1.0f, // input_rescale * filter_rescale
float rescale = 1.0f);
};

} // namespace winograd
} // namespace megdnn
// vim: syntax=cpp.doxygen

+ 48
- 31
dnn/src/fallback/conv_bias/winograd/strategy.cpp View File

@@ -6,13 +6,14 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#include "src/fallback/conv_bias/winograd/strategy.h"
#include "src/fallback/conv_bias/winograd/winograd.h"
#include "src/common/winograd/winograd_helper.h"
#include "src/common/utils.h"
#include "src/common/winograd/winograd_helper.h"
#include "src/fallback/conv_bias/winograd/winograd.h"

namespace megdnn {
namespace fallback {
@@ -60,7 +61,7 @@ void winograd_2x3_4x4_f::filter(const float* filter,
float* transform_mid_buf, size_t OC, size_t IC,
size_t oc_start, size_t oc_end) {
::megdnn::winograd::StrategyHelper<
float, float, float, float,
float, float, float, float, param::ConvBias::Format::NCHW,
param::MatrixMul::Format::MK4>::filter(filter, filter_transform_buf,
transform_mid_buf, OC, IC,
oc_start, oc_end,
@@ -73,11 +74,15 @@ void winograd_2x3_4x4_f::input(const float* input, float* input_transform_buf,
float* transform_mid_buf, int ih_start,
int iw_start, size_t IH, size_t IW, size_t IC,
size_t unit_idx, size_t nr_units_in_tile) {
::megdnn::winograd::StrategyHelper<float, float, float, float,
param::MatrixMul::Format::MK4>::
input(input, input_transform_buf, transform_mid_buf, ih_start,
iw_start, IH, IW, IC, unit_idx, nr_units_in_tile,
OUTPUT_BLOCK_SIZE, KERNEL_SIZE, {0, 1, -1}, src_dtype);
::megdnn::winograd::StrategyHelper<
float, float, float, float, param::ConvBias::Format::NCHW,
param::MatrixMul::Format::MK4>::input(input, input_transform_buf,
transform_mid_buf, ih_start,
iw_start, IH, IW, IC,
unit_idx, nr_units_in_tile,
OUTPUT_BLOCK_SIZE,
KERNEL_SIZE, {0, 1, -1},
src_dtype);
}

void winograd_2x3_4x4_f::output(const float* output_transform_buf,
@@ -87,16 +92,19 @@ void winograd_2x3_4x4_f::output(const float* output_transform_buf,
size_t ow_start, size_t OH, size_t OW,
size_t oc_start, size_t oc_end, size_t unit_idx,
size_t nr_units_in_tile) {
::megdnn::winograd::StrategyHelper<float, float, float, float,
param::MatrixMul::Format::MK4>::
output(output_transform_buf, bias, output, transform_mid_buf, bmode,
nonline_mode, oh_start, ow_start, OH, OW, oc_start, oc_end,
unit_idx, nr_units_in_tile, OUTPUT_BLOCK_SIZE, KERNEL_SIZE,
{0, 1, -1}, dst_dtype);
::megdnn::winograd::StrategyHelper<
float, float, float, float, param::ConvBias::Format::NCHW,
param::MatrixMul::Format::MK4>::output(output_transform_buf, bias,
output, transform_mid_buf,
bmode, nonline_mode,
oh_start, ow_start, OH, OW,
oc_start, oc_end, unit_idx,
nr_units_in_tile,
OUTPUT_BLOCK_SIZE,
KERNEL_SIZE, {0, 1, -1},
dst_dtype);
}



MEGDNN_REG_WINOGRAD_STRATEGY_IMPL(winograd_2x3_1x1_qs8)

void winograd_2x3_1x1_qs8::filter(const int8_t* filter,
@@ -136,7 +144,6 @@ void winograd_2x3_1x1_qs8::output(const int* output_transform_buf,
{0, 1, -1}, dst_dtype, scale_input * scale_filter, 2.0f, 1.0f);
}


MEGDNN_REG_WINOGRAD_STRATEGY_IMPL(winograd_2x3_8x8_qs8)

void winograd_2x3_8x8_qs8::filter(const int8_t* filter,
@@ -144,7 +151,7 @@ void winograd_2x3_8x8_qs8::filter(const int8_t* filter,
int16_t* transform_mid_buf, size_t OC,
size_t IC, size_t oc_start, size_t oc_end) {
::megdnn::winograd::StrategyHelper<
int8_t, int8_t, int16_t, int,
int8_t, int8_t, int16_t, int, param::ConvBias::Format::NCHW,
param::MatrixMul::Format::MK8>::filter(filter, filter_transform_buf,
transform_mid_buf, OC, IC,
oc_start, oc_end,
@@ -158,11 +165,15 @@ void winograd_2x3_8x8_qs8::input(const int8_t* input,
int16_t* transform_mid_buf, int ih_start,
int iw_start, size_t IH, size_t IW, size_t IC,
size_t unit_idx, size_t nr_units_in_tile) {
::megdnn::winograd::StrategyHelper<int8_t, int8_t, int16_t, int,
param::MatrixMul::Format::MK8>::
input(input, input_transform_buf, transform_mid_buf, ih_start,
iw_start, IH, IW, IC, unit_idx, nr_units_in_tile,
OUTPUT_BLOCK_SIZE, KERNEL_SIZE, {0, 1, -1}, src_dtype, 1.0f);
::megdnn::winograd::StrategyHelper<
int8_t, int8_t, int16_t, int, param::ConvBias::Format::NCHW,
param::MatrixMul::Format::MK8>::input(input, input_transform_buf,
transform_mid_buf, ih_start,
iw_start, IH, IW, IC,
unit_idx, nr_units_in_tile,
OUTPUT_BLOCK_SIZE,
KERNEL_SIZE, {0, 1, -1},
src_dtype, 1.0f);
}

void winograd_2x3_8x8_qs8::output(const int* output_transform_buf,
@@ -180,13 +191,19 @@ void winograd_2x3_8x8_qs8::output(const int* output_transform_buf,
megdnn_assert(filter_dtype.enumv() == DTypeEnum::QuantizedS16);
scale_filter = filter_dtype.param<dtype::QuantizedS16>().scale;
}
::megdnn::winograd::StrategyHelper<int8_t, int8_t, int16_t, int,
param::MatrixMul::Format::MK8>::
output(output_transform_buf, bias, output, transform_mid_buf, bmode,
nonline_mode, oh_start, ow_start, OH, OW, oc_start, oc_end,
unit_idx, nr_units_in_tile, OUTPUT_BLOCK_SIZE, KERNEL_SIZE,
{0, 1, -1}, dst_dtype, scale_input * scale_filter, 2.0f,
1.0f);
::megdnn::winograd::StrategyHelper<
int8_t, int8_t, int16_t, int, param::ConvBias::Format::NCHW,
param::MatrixMul::Format::MK8>::output(output_transform_buf, bias,
output, transform_mid_buf,
bmode, nonline_mode,
oh_start, ow_start, OH, OW,
oc_start, oc_end, unit_idx,
nr_units_in_tile,
OUTPUT_BLOCK_SIZE,
KERNEL_SIZE, {0, 1, -1},
dst_dtype,
scale_input * scale_filter,
2.0f, 1.0f);
}

} // namespace winograd


+ 1
- 0
dnn/src/fallback/conv_bias/winograd/strategy.h View File

@@ -28,6 +28,7 @@ MEGDNN_REG_WINOGRAD_STRATEGY(int8_t, int8_t, int16_t, int, 2, 3, 1, 1,

MEGDNN_REG_WINOGRAD_STRATEGY(int8_t, int8_t, int16_t, int, 2, 3, 8, 8,
winograd_2x3_8x8_qs8)

}
} // namespace fallback
} // namespace megdnn


+ 25
- 28
dnn/src/naive/winograd_filter_preprocess/opr_impl.cpp View File

@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#include "src/naive/winograd_filter_preprocess/opr_impl.h"
@@ -49,17 +50,16 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src,
size_t m = param().output_block_size;

bool execed = false;
#define cb(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, _format, rescale) \
if (param().format == _format) { \
return winograd::StrategyHelper< \
_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, _format>::filter(src_ptr, dst_ptr, \
workspace_ptr, OC, IC, \
0, OC, m, FW, \
interp_points, \
src.layout.dtype, \
rescale); \

#define cb(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, _format, rescale) \
if (param().format == _format) { \
return winograd::StrategyHelper< \
_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, param::ConvBias::Format::NCHW, \
_format>::filter(src_ptr, dst_ptr, workspace_ptr, OC, IC, 0, \
OC, m, FW, interp_points, src.layout.dtype, \
rescale); \
}

#define DISPATCH_FORMAT_MK4(_ctype, _dst_type, _input_filter_compute_type, \
@@ -110,8 +110,9 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src,
DISPATCH_KERNEL(dt_float16, dt_float16, dt_float16, dt_float16, \
DISPATCH_FORMAT_MK8, 1.0f, _midout_tag, 2); \
})
//! normal nchw mode
if (src.layout.ndim <= 5) {
//! dispatch_dtype with consider layout and format.
if (FW == 3) {
if (m == 2) {
std::vector<float> interp_points = {0, 1, -1};
@@ -131,22 +132,20 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src,
DISPATCH_DTYPE(3);
}
}
}
#undef cb
#undef DISPATCH_FORMAT_MK4
#undef DISPATCH_FORMAT_MK8
#undef DISPATCH_DTYPE
#define cb(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, _format, rescale) \
if (param().format == _format) { \
return winograd::StrategyHelperNchwxx< \
_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, _format>::filter(src_ptr, dst_ptr, \
workspace_ptr, OC, IC, \
0, OC, m, FW, \
interp_points, \
src.layout.dtype, \
rescale); \
} else {
#define cb(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, _format, rescale) \
if (param().format == _format) { \
return winograd::StrategyHelper< \
_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, param::ConvBias::Format::NCHW88, \
_format>::filter(src_ptr, dst_ptr, workspace_ptr, OC, IC, 0, \
OC, m, FW, interp_points, src.layout.dtype, \
rescale); \
}

#define DISPATCH_FORMAT_MK8(_ctype, _dst_type, _input_filter_compute_type, \
@@ -159,8 +158,6 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src,
DISPATCH_KERNEL(dt_float32, dt_float32, dt_float32, dt_float32, \
DISPATCH_FORMAT_MK8, 1.0f, _midout_tag, 0); \
}
//! nchwxx mode
else {
megdnn_assert(src.layout.ndim == 6 || src.layout.ndim == 7);
if (FW == 3) {
if (m == 2) {
@@ -171,11 +168,11 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src,
DISPATCH_DTYPE(5);
}
}
}
#undef cb
#undef DISPATCH_FORMAT_MK8
#undef DISPATCH_KERNEL
#undef DISPATCH_DTYPE
}
megdnn_assert(execed,
"Unsupport winograd filter preprocess. m: %zu src: %s", m,
src.layout.to_string().c_str());


Loading…
Cancel
Save