GitOrigin-RevId: 82242b7437
tags/v1.7.2.m1
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#include "megdnn/oprs.h" | #include "megdnn/oprs.h" | ||||
@@ -28,22 +29,40 @@ void Images2NeibsBase::deduce_layout_fwd(const TensorLayout& src, TensorLayout& | |||||
}; | }; | ||||
MEGDNN_MARK_USED_VAR(errmsg); | MEGDNN_MARK_USED_VAR(errmsg); | ||||
megdnn_assert_contiguous(src); | megdnn_assert_contiguous(src); | ||||
megdnn_assert(src.ndim == 4_z, "%s", errmsg().c_str()); | |||||
size_t n = src[0], ic = src[1], ih = src[2], iw = src[3]; | |||||
size_t ph = this->param().pad_h; | |||||
size_t pw = this->param().pad_w; | |||||
size_t sh = this->param().stride_h; | |||||
size_t sw = this->param().stride_w; | |||||
size_t dh = this->param().dilate_h; | |||||
size_t dw = this->param().dilate_w; | |||||
size_t wh = this->param().window_h; | |||||
size_t ww = this->param().window_w; | |||||
size_t oh, ow; | |||||
megdnn_assert(src.ndim == 4_z || src.ndim == 5_z, "%s", errmsg().c_str()); | |||||
infer_conv_shape2d( | |||||
ih, iw, wh + (wh - 1) * (dh - 1), ww + (ww - 1) * (dw - 1), sh, sw, ph, pw, | |||||
oh, ow); | |||||
dst = TensorLayout(TensorShape({n, ic, oh, ow, wh, ww}), src.dtype); | |||||
if (src.ndim == 4_z) { | |||||
size_t n = src[0], ic = src[1], ih = src[2], iw = src[3]; | |||||
size_t ph = this->param().pad_h; | |||||
size_t pw = this->param().pad_w; | |||||
size_t sh = this->param().stride_h; | |||||
size_t sw = this->param().stride_w; | |||||
size_t dh = this->param().dilate_h; | |||||
size_t dw = this->param().dilate_w; | |||||
size_t wh = this->param().window_h; | |||||
size_t ww = this->param().window_w; | |||||
size_t oh, ow; | |||||
infer_conv_shape2d( | |||||
ih, iw, wh + (wh - 1) * (dh - 1), ww + (ww - 1) * (dw - 1), sh, sw, ph, | |||||
pw, oh, ow); | |||||
dst = TensorLayout(TensorShape({n, ic, oh, ow, wh, ww}), src.dtype, src.format); | |||||
} else if (src.ndim == 5_z) { | |||||
size_t n = src[0], ih = src[1], iw = src[3], ic = src[2]; | |||||
size_t ph = this->param().pad_h; | |||||
size_t pw = this->param().pad_w; | |||||
size_t sh = this->param().stride_h; | |||||
size_t sw = this->param().stride_w; | |||||
size_t dh = this->param().dilate_h; | |||||
size_t dw = this->param().dilate_w; | |||||
size_t wh = this->param().window_h; | |||||
size_t ww = this->param().window_w; | |||||
size_t oh, ow; | |||||
infer_conv_shape2d( | |||||
ih, iw, wh + (wh - 1) * (dh - 1), ww + (ww - 1) * (dw - 1), sh, sw, ph, | |||||
pw, oh, ow); | |||||
dst = TensorLayout( | |||||
TensorShape({n, oh, ic, ow, wh, ww, 4}), src.dtype, src.format); | |||||
} | |||||
} | } | ||||
void Images2NeibsBase::check_layout_fwd( | void Images2NeibsBase::check_layout_fwd( | ||||
@@ -21,40 +21,100 @@ namespace naive { | |||||
template <typename T> | template <typename T> | ||||
void Images2NeibsForwardImpl::exec_internal( | void Images2NeibsForwardImpl::exec_internal( | ||||
_megdnn_tensor_in src, _megdnn_tensor_out dst) { | _megdnn_tensor_in src, _megdnn_tensor_out dst) { | ||||
int N = src.layout.shape[0], C = src.layout.shape[1], IH = src.layout.shape[2], | |||||
IW = src.layout.shape[3]; | |||||
auto sptr = src.ptr<T>(); | |||||
auto dptr = dst.ptr<T>(); | |||||
size_t idx = 0; | |||||
int window_h = static_cast<int>(param().window_h); | |||||
int window_w = static_cast<int>(param().window_w); | |||||
int pad_h = static_cast<int>(param().pad_h); | |||||
int pad_w = static_cast<int>(param().pad_w); | |||||
int stride_h = static_cast<int>(param().stride_h); | |||||
int stride_w = static_cast<int>(param().stride_w); | |||||
int dilate_h = static_cast<int>(param().dilate_h); | |||||
int dilate_w = static_cast<int>(param().dilate_w); | |||||
int equ_window_h = dilate_h * (window_h - 1) + 1; | |||||
int equ_window_w = dilate_w * (window_w - 1) + 1; | |||||
for (int n = 0; n < N; ++n) | |||||
for (int c = 0; c < C; ++c) { | |||||
int ih = -pad_h; | |||||
for (; ih + equ_window_h <= IH + pad_h; ih += stride_h) { | |||||
int iw = -pad_w; | |||||
for (; iw + equ_window_w <= IW + pad_w; iw += stride_w) { | |||||
for (int kh = 0; kh < window_h; ++kh) | |||||
for (int kw = 0; kw < window_w; ++kw) { | |||||
int ih2 = ih + dilate_h * kh, iw2 = iw + dilate_w * kw; | |||||
dptr[idx * window_h * window_w + kh * window_w + kw] = | |||||
ih2 >= 0 && ih2 < IH && iw2 >= 0 && iw2 < IW | |||||
? sptr[n * C * IH * IW + c * IH * IW + | |||||
ih2 * IW + iw2] | |||||
: 0.0f; | |||||
} | |||||
++idx; | |||||
megdnn_assert(src.layout.ndim == 5 || src.layout.ndim == 4); | |||||
if (src.layout.ndim == 5) { | |||||
int N = src.layout.shape[0], C = src.layout.shape[2], IH = src.layout.shape[1], | |||||
IW = src.layout.shape[3]; | |||||
auto sptr = src.ptr<T>(); | |||||
auto dptr = dst.ptr<T>(); | |||||
size_t idx = 0; | |||||
int window_h = static_cast<int>(param().window_h); | |||||
int window_w = static_cast<int>(param().window_w); | |||||
int pad_h = static_cast<int>(param().pad_h); | |||||
int pad_w = static_cast<int>(param().pad_w); | |||||
int stride_h = static_cast<int>(param().stride_h); | |||||
int stride_w = static_cast<int>(param().stride_w); | |||||
int dilate_h = static_cast<int>(param().dilate_h); | |||||
int dilate_w = static_cast<int>(param().dilate_w); | |||||
int equ_window_h = dilate_h * (window_h - 1) + 1; | |||||
int equ_window_w = dilate_w * (window_w - 1) + 1; | |||||
auto src_stride = src.layout.stride; | |||||
auto dst_stride = dst.layout.stride; | |||||
for (int n = 0; n < N; ++n) | |||||
for (int c = 0; c < C; ++c) { | |||||
int ih = -pad_h; | |||||
int hc = 0; | |||||
for (; ih <= IH + pad_h - equ_window_h; ih += stride_h, hc++) { | |||||
int iw = -pad_w; | |||||
int wc = 0; | |||||
for (; iw <= IW + pad_w - equ_window_w; iw += stride_w, wc++) { | |||||
for (int kh = 0; kh < window_h; ++kh) | |||||
for (int kw = 0; kw < window_w; ++kw) { | |||||
for (int cn = 0; cn < 4; cn++) { | |||||
int ih2 = ih + dilate_h * kh, | |||||
iw2 = iw + dilate_w * kw; | |||||
int dst_pos = | |||||
n * dst_stride[0] + hc * dst_stride[1] + | |||||
c * dst_stride[2] + wc * dst_stride[3] + | |||||
kh * dst_stride[4] + kw * dst_stride[5] + | |||||
cn * dst_stride[6]; | |||||
int src_pos = | |||||
n * src_stride[0] + ih2 * src_stride[1] + | |||||
c * src_stride[2] + iw2 * src_stride[3] + | |||||
cn * src_stride[4]; | |||||
if (ih2 >= 0 && ih2 < IH && iw2 >= 0 && iw2 < IW) { | |||||
dptr[dst_pos] = sptr[src_pos]; | |||||
} else { | |||||
dptr[dst_pos] = 0.0f; | |||||
} | |||||
} | |||||
} | |||||
++idx; | |||||
} | |||||
} | } | ||||
} | } | ||||
} | |||||
} else { | |||||
int N = src.layout.shape[0], C = src.layout.shape[1], IH = src.layout.shape[2], | |||||
IW = src.layout.shape[3]; | |||||
auto sptr = src.ptr<T>(); | |||||
auto dptr = dst.ptr<T>(); | |||||
size_t idx = 0; | |||||
int window_h = static_cast<int>(param().window_h); | |||||
int window_w = static_cast<int>(param().window_w); | |||||
int pad_h = static_cast<int>(param().pad_h); | |||||
int pad_w = static_cast<int>(param().pad_w); | |||||
int stride_h = static_cast<int>(param().stride_h); | |||||
int stride_w = static_cast<int>(param().stride_w); | |||||
int dilate_h = static_cast<int>(param().dilate_h); | |||||
int dilate_w = static_cast<int>(param().dilate_w); | |||||
int equ_window_h = dilate_h * (window_h - 1) + 1; | |||||
int equ_window_w = dilate_w * (window_w - 1) + 1; | |||||
for (int n = 0; n < N; ++n) | |||||
for (int c = 0; c < C; ++c) { | |||||
int ih = -pad_h; | |||||
for (; ih + equ_window_h <= IH + pad_h; ih += stride_h) { | |||||
int iw = -pad_w; | |||||
for (; iw + equ_window_w <= IW + pad_w; iw += stride_w) { | |||||
for (int kh = 0; kh < window_h; ++kh) | |||||
for (int kw = 0; kw < window_w; ++kw) { | |||||
int ih2 = ih + dilate_h * kh, iw2 = iw + dilate_w * kw; | |||||
int src_pos = | |||||
n * C * IH * IW + c * IH * IW + ih2 * IW + iw2; | |||||
int dst_pos = | |||||
idx * window_h * window_w + kh * window_w + kw; | |||||
if (ih2 >= 0 && ih2 < IH && iw2 >= 0 && iw2 < IW) { | |||||
dptr[dst_pos] = sptr[src_pos]; | |||||
} else { | |||||
dptr[dst_pos] = 0.0f; | |||||
} | |||||
} | |||||
++idx; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
} | } | ||||
void Images2NeibsForwardImpl::exec( | void Images2NeibsForwardImpl::exec( | ||||
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#pragma once | #pragma once | ||||
#include <cstddef> | #include <cstddef> | ||||
@@ -26,6 +27,32 @@ struct TestArg { | |||||
inline std::vector<TestArg> get_args() { | inline std::vector<TestArg> get_args() { | ||||
std::vector<TestArg> args; | std::vector<TestArg> args; | ||||
// clang-format off | |||||
for (uint32_t ph : {0, 1}) | |||||
for (uint32_t pw : {0, 1}) | |||||
for (uint32_t sh : {1, 2}) | |||||
for (uint32_t sw : {1, 2}) | |||||
for (uint32_t dh : {1, 2, 3}) | |||||
for (uint32_t dw : {1, 2, 3}) | |||||
for (uint32_t wh : {3, 4}) | |||||
for (uint32_t ww : {3, 4}) { | |||||
args.emplace_back(param::Images2Neibs{ph, pw, sh, sw, dh, dw, wh, ww}, | |||||
TensorShape{2, 3, 19, 20}); | |||||
} | |||||
// clang-format on | |||||
// large window case | |||||
args.emplace_back( | |||||
param::Images2Neibs{0, 0, 1, 1, 1, 1, 32, 64}, TensorShape{2, 3, 96, 128}); | |||||
// large size | |||||
args.emplace_back( | |||||
param::Images2Neibs{0, 0, 1, 1, 1, 1, 1, 1}, TensorShape{128, 128, 28, 24}); | |||||
return args; | |||||
} | |||||
inline std::vector<TestArg> get_cd4_args() { | |||||
std::vector<TestArg> args; | |||||
// clang-format off | // clang-format off | ||||
for (uint32_t ph : {0, 1}) | for (uint32_t ph : {0, 1}) | ||||
for (uint32_t pw : {0, 1}) | for (uint32_t pw : {0, 1}) | ||||
@@ -33,18 +60,21 @@ inline std::vector<TestArg> get_args() { | |||||
for (uint32_t sw : {1, 2}) | for (uint32_t sw : {1, 2}) | ||||
for (uint32_t dh : {1, 2, 3}) | for (uint32_t dh : {1, 2, 3}) | ||||
for (uint32_t dw : {1, 2, 3}) | for (uint32_t dw : {1, 2, 3}) | ||||
for (uint32_t wh : {3, 4}) | |||||
for (uint32_t ww : {3, 4}) { | |||||
args.emplace_back(param::Images2Neibs{ph, pw, sh, sw, dh, dw, wh, ww}, | |||||
TensorShape{2, 3, 19, 20}); | |||||
for (uint32_t wh : {2, 3}) | |||||
for (uint32_t ww : {2, 3}) { | |||||
args.emplace_back(param::Images2Neibs{ph, pw, sh, sw, dh, dw, wh, | |||||
ww}, | |||||
TensorShape{2, 13, 1, 14, 4}); | |||||
} | } | ||||
// clang-format on | // clang-format on | ||||
// large window case | // large window case | ||||
args.emplace_back( | args.emplace_back( | ||||
param::Images2Neibs{0, 0, 1, 1, 1, 1, 32, 64}, TensorShape{2, 3, 96, 128}); | |||||
param::Images2Neibs{0, 0, 1, 1, 1, 1, 8, 14}, TensorShape{2, 16, 1, 16, 4}); | |||||
// large size | // large size | ||||
args.emplace_back( | args.emplace_back( | ||||
param::Images2Neibs{0, 0, 1, 1, 1, 1, 1, 1}, TensorShape{128, 128, 28, 24}); | |||||
param::Images2Neibs{0, 0, 1, 1, 1, 1, 1, 1}, | |||||
TensorShape{256, 16, 64, 16, 4}); | |||||
return args; | return args; | ||||
} | } | ||||
@@ -75,6 +105,33 @@ inline std::vector<TestArg> get_benchmark_args() { | |||||
return args; | return args; | ||||
} | } | ||||
inline std::vector<TestArg> get_benchmark_args_cd4() { | |||||
std::vector<TestArg> args; | |||||
// clang-format off | |||||
for (uint32_t ph : {0, 1}) | |||||
for (uint32_t pw : {0, 1}) | |||||
for (uint32_t sh : {1, 2}) | |||||
for (uint32_t sw : {1, 2}) | |||||
for (uint32_t dh : {1, 2}) | |||||
for (uint32_t dw : {1, 2}) | |||||
for (uint32_t wh : {3, 4}) | |||||
for (uint32_t ww : {3, 4}) | |||||
for (uint32_t b : {1, 32}) | |||||
for (uint32_t c : {16, 32}) | |||||
for (uint32_t hw : {16, 32}) { | |||||
args.emplace_back(param::Images2Neibs{ph, pw, sh, sw, dh, dw, wh, ww}, | |||||
TensorShape{b, hw, (c + 3) / 4, hw, 4}); | |||||
} | |||||
// clang-format on | |||||
// large size | |||||
args.emplace_back( | |||||
param::Images2Neibs{0, 0, 1, 1, 1, 1, 1, 1}, | |||||
TensorShape{256, 28, 32, 24, 4}); | |||||
return args; | |||||
} | |||||
} // namespace images2neibs | } // namespace images2neibs | ||||
} // namespace test | } // namespace test | ||||
} // namespace megdnn | } // namespace megdnn | ||||
@@ -56,3 +56,68 @@ TEST_F(NAIVE, IMAGES2NEIBS_FORWARD) { | |||||
8, 10, 0, 22, 24, 0, 36, 38, 8, 10, 12, 22, 24, 26, | 8, 10, 0, 22, 24, 0, 36, 38, 8, 10, 12, 22, 24, 26, | ||||
36, 38, 40, 10, 12, 0, 24, 26, 0, 38, 40, 0})}); | 36, 38, 40, 10, 12, 0, 24, 26, 0, 38, 40, 0})}); | ||||
} | } | ||||
TEST_F(NAIVE, IMAGES2NEIBS_FORWARD_CD4) { | |||||
Checker<Images2Neibs> checker(handle(), /* check_dispatch */ false); | |||||
Images2Neibs::Param param(0, 0, 1, 1, 1, 1, 2, 2); | |||||
checker.set_param(param).exect( | |||||
Testcase{ | |||||
TensorValue( | |||||
{1, 3, 1, 3, 4}, dtype::Uint8(), | |||||
{0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, | |||||
0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 8, 0, 0, 0}), | |||||
{}}, | |||||
Testcase{ | |||||
{}, | |||||
TensorValue( | |||||
{1, 2, 1, 2, 2, 2, 4}, dtype::Uint8(), | |||||
{0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, | |||||
1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, | |||||
3, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, | |||||
4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0, 8, 0, 0, 0})}); | |||||
param.pad_h = 1; | |||||
param.pad_w = 1; | |||||
param.stride_h = 2; | |||||
param.stride_w = 2; | |||||
param.dilate_h = 2; | |||||
param.dilate_w = 2; | |||||
param.window_h = 3; | |||||
param.window_w = 3; | |||||
checker.set_param(param).exect( | |||||
Testcase{ | |||||
TensorValue( | |||||
{1, 6, 1, 7, 4}, dtype::Uint8(), | |||||
{0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, | |||||
4, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, | |||||
8, 0, 0, 0, 9, 0, 0, 0, 10, 0, 0, 0, 11, 0, 0, 0, | |||||
12, 0, 0, 0, 13, 0, 0, 0, 14, 0, 0, 0, 15, 0, 0, 0, | |||||
16, 0, 0, 0, 17, 0, 0, 0, 18, 0, 0, 0, 19, 0, 0, 0, | |||||
20, 0, 0, 0, 21, 0, 0, 0, 22, 0, 0, 0, 23, 0, 0, 0, | |||||
24, 0, 0, 0, 25, 0, 0, 0, 26, 0, 0, 0, 27, 0, 0, 0, | |||||
28, 0, 0, 0, 29, 0, 0, 0, 30, 0, 0, 0, 31, 0, 0, 0, | |||||
32, 0, 0, 0, 33, 0, 0, 0, 34, 0, 0, 0, 35, 0, 0, 0, | |||||
36, 0, 0, 0, 37, 0, 0, 0, 38, 0, 0, 0, 39, 0, 0, 0, | |||||
40, 0, 0, 0, 41, 0, 0, 0}), | |||||
{}}, | |||||
Testcase{ | |||||
{}, | |||||
TensorValue( | |||||
{1, 2, 1, 3, 3, 3, 4}, dtype::Uint8(), | |||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||||
8, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, | |||||
24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||||
8, 0, 0, 0, 10, 0, 0, 0, 12, 0, 0, 0, 22, 0, 0, 0, | |||||
24, 0, 0, 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||||
0, 0, 0, 0, 10, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, | |||||
24, 0, 0, 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||||
8, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, | |||||
24, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 38, 0, 0, 0, | |||||
8, 0, 0, 0, 10, 0, 0, 0, 12, 0, 0, 0, 22, 0, 0, 0, | |||||
24, 0, 0, 0, 26, 0, 0, 0, 36, 0, 0, 0, 38, 0, 0, 0, | |||||
40, 0, 0, 0, 10, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, | |||||
24, 0, 0, 0, 26, 0, 0, 0, 0, 0, 0, 0, 38, 0, 0, 0, | |||||
40, 0, 0, 0, 0, 0, 0, 0})}); | |||||
} |
@@ -17,6 +17,7 @@ | |||||
#include "megbrain/opr/blas.h" | #include "megbrain/opr/blas.h" | ||||
#include "megbrain/opr/dnn/batch_norm.h" | #include "megbrain/opr/dnn/batch_norm.h" | ||||
#include "megbrain/opr/dnn/convolution.h" | #include "megbrain/opr/dnn/convolution.h" | ||||
#include "megbrain/opr/dnn/images2neibs.h" | |||||
#include "megbrain/opr/dnn/local.h" | #include "megbrain/opr/dnn/local.h" | ||||
#include "megbrain/opr/dnn/pooling.h" | #include "megbrain/opr/dnn/pooling.h" | ||||
#include "megbrain/opr/imgproc.h" | #include "megbrain/opr/imgproc.h" | ||||
@@ -1651,6 +1652,7 @@ std::unique_ptr<ConvertFormatPass> ConvertFormatPass::make_nhwcd4_converter() { | |||||
replace_func[opr::Concat::typeinfo()] = replace_concat_opr; | replace_func[opr::Concat::typeinfo()] = replace_concat_opr; | ||||
replace_func[opr::Reshape::typeinfo()] = relayout_inp_to_chw; | replace_func[opr::Reshape::typeinfo()] = relayout_inp_to_chw; | ||||
replace_func[opr::GetVarShape::typeinfo()] = relayout_inp_to_chw; | replace_func[opr::GetVarShape::typeinfo()] = relayout_inp_to_chw; | ||||
replace_func[opr::Images2NeibsBackward::typeinfo()] = relayout_inp_to_chw; | |||||
replace_func[opr::Dimshuffle::typeinfo()] = relayout_inp_to_chw; | replace_func[opr::Dimshuffle::typeinfo()] = relayout_inp_to_chw; | ||||
replace_func[opr::Reduce::typeinfo()] = relayout_inp_to_chw; | replace_func[opr::Reduce::typeinfo()] = relayout_inp_to_chw; | ||||
replace_func[opr::AssertEqual::typeinfo()] = relayout_inp_to_chw; | replace_func[opr::AssertEqual::typeinfo()] = relayout_inp_to_chw; | ||||