GitOrigin-RevId: b4b494bb69
tags/v1.0.0-rc1
@@ -105,15 +105,32 @@ class WarpPerspectiveBackwardData: public WarpPerspectiveBase { | |||||
* \param[out] grad the backpropagated gradient wrt. src | * \param[out] grad the backpropagated gradient wrt. src | ||||
* \param[out] workspace temporary workspace to perform backward | * \param[out] workspace temporary workspace to perform backward | ||||
*/ | */ | ||||
void exec(_megdnn_tensor_in mat, | |||||
_megdnn_tensor_in diff, | |||||
_megdnn_tensor_out grad, | |||||
_megdnn_workspace workspace) { | |||||
exec(mat, {}, diff, grad, workspace); | |||||
} | |||||
virtual void exec(_megdnn_tensor_in mat, | virtual void exec(_megdnn_tensor_in mat, | ||||
_megdnn_tensor_in mat_idx, | |||||
_megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad, | _megdnn_tensor_out grad, | ||||
_megdnn_workspace workspace) = 0; | _megdnn_workspace workspace) = 0; | ||||
size_t get_workspace_in_bytes(const TensorLayout &mat, | |||||
const TensorLayout &diff, | |||||
const TensorLayout &grad) { | |||||
return get_workspace_in_bytes(mat, {}, diff, grad); | |||||
} | |||||
virtual size_t get_workspace_in_bytes(const TensorLayout &mat, | virtual size_t get_workspace_in_bytes(const TensorLayout &mat, | ||||
const TensorLayout &mat_idx, | |||||
const TensorLayout &diff, | const TensorLayout &diff, | ||||
const TensorLayout &grad) = 0; | const TensorLayout &grad) = 0; | ||||
protected: | protected: | ||||
void check_exec(const TensorLayout &mat, | void check_exec(const TensorLayout &mat, | ||||
const TensorLayout &mat_idx, | |||||
const TensorLayout &diff, | const TensorLayout &diff, | ||||
const TensorLayout &grad, | const TensorLayout &grad, | ||||
size_t workspace_in_bytes); | size_t workspace_in_bytes); | ||||
@@ -129,18 +146,37 @@ class WarpPerspectiveBackwardMat: public WarpPerspectiveBase { | |||||
* \param[out] grad the backpropagated gradient wrt. mat | * \param[out] grad the backpropagated gradient wrt. mat | ||||
* \param[out] workspace temporary workspace to perform backward | * \param[out] workspace temporary workspace to perform backward | ||||
*/ | */ | ||||
void exec(_megdnn_tensor_in src, | |||||
_megdnn_tensor_in mat, | |||||
_megdnn_tensor_in diff, | |||||
_megdnn_tensor_out grad, | |||||
_megdnn_workspace workspace) { | |||||
exec(src, mat, {}, diff, grad, workspace); | |||||
} | |||||
virtual void exec(_megdnn_tensor_in src, | virtual void exec(_megdnn_tensor_in src, | ||||
_megdnn_tensor_in mat, | _megdnn_tensor_in mat, | ||||
_megdnn_tensor_in mat_idx, | |||||
_megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad, | _megdnn_tensor_out grad, | ||||
_megdnn_workspace workspace) = 0; | _megdnn_workspace workspace) = 0; | ||||
size_t get_workspace_in_bytes(const TensorLayout &src, | |||||
const TensorLayout &mat, | |||||
const TensorLayout &diff, | |||||
const TensorLayout &grad) { | |||||
return get_workspace_in_bytes(src, mat, {}, diff, grad); | |||||
} | |||||
virtual size_t get_workspace_in_bytes(const TensorLayout &src, | virtual size_t get_workspace_in_bytes(const TensorLayout &src, | ||||
const TensorLayout &mat, | const TensorLayout &mat, | ||||
const TensorLayout &mat_idx, | |||||
const TensorLayout &diff, | const TensorLayout &diff, | ||||
const TensorLayout &grad) = 0; | const TensorLayout &grad) = 0; | ||||
protected: | protected: | ||||
void check_exec(const TensorLayout &src, | void check_exec(const TensorLayout &src, | ||||
const TensorLayout &mat, | const TensorLayout &mat, | ||||
const TensorLayout &mat_idx, | |||||
const TensorLayout &diff, | const TensorLayout &diff, | ||||
const TensorLayout &grad, | const TensorLayout &grad, | ||||
size_t workspace_in_bytes); | size_t workspace_in_bytes); | ||||
@@ -255,29 +255,31 @@ void WarpPerspectiveForward::check_exec_allow_nhwc_mat_idx( | |||||
} | } | ||||
void WarpPerspectiveBackwardData::check_exec(const TensorLayout& mat, | void WarpPerspectiveBackwardData::check_exec(const TensorLayout& mat, | ||||
const TensorLayout& mat_idx, | |||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad, | const TensorLayout& grad, | ||||
size_t workspace_in_bytes) { | size_t workspace_in_bytes) { | ||||
check_layout_fwd(grad, mat, diff); | |||||
check_layout_fwd(grad, mat, mat_idx, diff); | |||||
megdnn_assert(grad.dtype == dtype::Float32() MEGDNN_INC_FLOAT16( | megdnn_assert(grad.dtype == dtype::Float32() MEGDNN_INC_FLOAT16( | ||||
|| grad.dtype == dtype::BFloat16()), | || grad.dtype == dtype::BFloat16()), | ||||
"Backward WarpPerspective only supports Float32/BFloat16."); | "Backward WarpPerspective only supports Float32/BFloat16."); | ||||
auto required_workspace_in_bytes = get_workspace_in_bytes(mat, diff, grad); | |||||
auto required_workspace_in_bytes = get_workspace_in_bytes(mat, mat_idx, diff, grad); | |||||
megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | ||||
} | } | ||||
void WarpPerspectiveBackwardMat::check_exec(const TensorLayout& src, | void WarpPerspectiveBackwardMat::check_exec(const TensorLayout& src, | ||||
const TensorLayout& mat, | const TensorLayout& mat, | ||||
const TensorLayout& mat_idx, | |||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad, | const TensorLayout& grad, | ||||
size_t workspace_in_bytes) { | size_t workspace_in_bytes) { | ||||
check_layout_fwd(src, mat, diff); | |||||
check_layout_fwd(src, mat, mat_idx, diff); | |||||
megdnn_assert_eq_layout(mat, grad); | megdnn_assert_eq_layout(mat, grad); | ||||
megdnn_assert(grad.dtype == dtype::Float32() MEGDNN_INC_FLOAT16( | megdnn_assert(grad.dtype == dtype::Float32() MEGDNN_INC_FLOAT16( | ||||
|| grad.dtype == dtype::BFloat16()), | || grad.dtype == dtype::BFloat16()), | ||||
"Backward WarpPerspective only supports Float32/BFloat16."); | "Backward WarpPerspective only supports Float32/BFloat16."); | ||||
auto required_workspace_in_bytes = | auto required_workspace_in_bytes = | ||||
get_workspace_in_bytes(src, mat, diff, grad); | |||||
get_workspace_in_bytes(src, mat, mat_idx, diff, grad); | |||||
megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | ||||
} | } | ||||
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#include "src/cuda/warp_perspective/opr_impl.h" | #include "src/cuda/warp_perspective/opr_impl.h" | ||||
@@ -18,8 +19,8 @@ namespace megdnn { | |||||
namespace cuda { | namespace cuda { | ||||
WorkspaceBundle WarpPerspectiveBackwardDataImpl::get_workspace_bundle( | WorkspaceBundle WarpPerspectiveBackwardDataImpl::get_workspace_bundle( | ||||
void* ptr, const TensorLayout& mat, const TensorLayout& diff, | |||||
const TensorLayout& grad) const { | |||||
void* ptr, const TensorLayout& mat, const TensorLayout& mat_idx, | |||||
const TensorLayout& diff, const TensorLayout& grad) const { | |||||
SmallVector<size_t> sizes; | SmallVector<size_t> sizes; | ||||
TensorLayout fmat = mat; | TensorLayout fmat = mat; | ||||
TensorLayout fdiff = diff; | TensorLayout fdiff = diff; | ||||
@@ -33,20 +34,24 @@ WorkspaceBundle WarpPerspectiveBackwardDataImpl::get_workspace_bundle( | |||||
get_workspace(fmat); | get_workspace(fmat); | ||||
get_workspace(fdiff); | get_workspace(fdiff); | ||||
get_workspace(fgrad); | get_workspace(fgrad); | ||||
sizes.push_back(get_float32_workspace_in_bytes(fmat, fdiff, fgrad)); | |||||
sizes.push_back( | |||||
get_float32_workspace_in_bytes(fmat, mat_idx, fdiff, fgrad)); | |||||
return {ptr, std::move(sizes)}; | return {ptr, std::move(sizes)}; | ||||
} | } | ||||
void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, | void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, | ||||
_megdnn_tensor_in mat_idx, | |||||
_megdnn_tensor_in sdiff, | _megdnn_tensor_in sdiff, | ||||
_megdnn_tensor_out sgrad, | _megdnn_tensor_out sgrad, | ||||
_megdnn_workspace sworkspace) { | _megdnn_workspace sworkspace) { | ||||
check_exec(smat.layout, sdiff.layout, sgrad.layout, sworkspace.size); | |||||
check_exec(smat.layout, mat_idx.layout, sdiff.layout, sgrad.layout, | |||||
sworkspace.size); | |||||
TensorND mat = smat; | TensorND mat = smat; | ||||
TensorND diff = sdiff; | TensorND diff = sdiff; | ||||
TensorND grad = sgrad; | TensorND grad = sgrad; | ||||
auto bundle = get_workspace_bundle(sworkspace.raw_ptr, smat.layout, | |||||
sdiff.layout, sgrad.layout); | |||||
auto bundle = | |||||
get_workspace_bundle(sworkspace.raw_ptr, smat.layout, | |||||
mat_idx.layout, sdiff.layout, sgrad.layout); | |||||
auto ctypecvt = CompTypeCvter<dtype::BFloat16, dtype::Float32>( | auto ctypecvt = CompTypeCvter<dtype::BFloat16, dtype::Float32>( | ||||
concrete_handle(this->handle()), &bundle); | concrete_handle(this->handle()), &bundle); | ||||
if (sgrad.layout.dtype.enumv() == DTypeTrait<dtype::BFloat16>::enumv) { | if (sgrad.layout.dtype.enumv() == DTypeTrait<dtype::BFloat16>::enumv) { | ||||
@@ -60,6 +65,15 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, | |||||
auto N = grad.layout.shape[0], C = grad.layout.shape[1], | auto N = grad.layout.shape[0], C = grad.layout.shape[1], | ||||
IH = grad.layout.shape[2], IW = grad.layout.shape[3], | IH = grad.layout.shape[2], IW = grad.layout.shape[3], | ||||
OH = diff.layout.shape[2], OW = diff.layout.shape[3]; | OH = diff.layout.shape[2], OW = diff.layout.shape[3]; | ||||
int* midx_ptr = nullptr; | |||||
if (mat_idx.raw_ptr) { | |||||
megdnn_assert(mat_idx.layout.ndim == 1); | |||||
N = mat_idx.layout.shape[0]; | |||||
midx_ptr = mat_idx.ptr<int>(); | |||||
} else { | |||||
megdnn_assert(mat_idx.layout.ndim == 0); | |||||
} | |||||
auto bval = param().border_val; | auto bval = param().border_val; | ||||
auto bmode = warp_perspective::get_bmode(param().bmode); | auto bmode = warp_perspective::get_bmode(param().bmode); | ||||
@@ -67,10 +81,11 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, | |||||
size_t max_batch_x_channel = max_batch_x_channel_size(); | size_t max_batch_x_channel = max_batch_x_channel_size(); | ||||
if (batch_x_channel_size <= max_batch_x_channel) { | if (batch_x_channel_size <= max_batch_x_channel) { | ||||
warp_perspective::backward_data_proxy( | warp_perspective::backward_data_proxy( | ||||
mat.ptr<dt_float32>(), diff.ptr<dt_float32>(), | |||||
mat.ptr<dt_float32>(), midx_ptr, diff.ptr<dt_float32>(), | |||||
grad.ptr<dt_float32>(), | grad.ptr<dt_float32>(), | ||||
reinterpret_cast<float*>(workspace.raw_ptr), N, C, IH, IW, | |||||
OH, OW, bval, bmode, stream); | |||||
reinterpret_cast<float*>(workspace.raw_ptr), N, | |||||
grad.layout.shape[0], C, IH, IW, OH, OW, bval, bmode, | |||||
stream); | |||||
} else { | } else { | ||||
dt_float32* mat_ptr = mat.ptr<dt_float32>(); | dt_float32* mat_ptr = mat.ptr<dt_float32>(); | ||||
dt_float32* diff_ptr = diff.ptr<dt_float32>(); | dt_float32* diff_ptr = diff.ptr<dt_float32>(); | ||||
@@ -80,10 +95,10 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, | |||||
size_t curr_batch_size = | size_t curr_batch_size = | ||||
N > max_batch_size ? max_batch_size : N; | N > max_batch_size ? max_batch_size : N; | ||||
warp_perspective::backward_data_proxy( | warp_perspective::backward_data_proxy( | ||||
mat_ptr, diff_ptr, grad_ptr, | |||||
mat_ptr, midx_ptr, diff_ptr, grad_ptr, | |||||
reinterpret_cast<float*>(workspace.raw_ptr), | reinterpret_cast<float*>(workspace.raw_ptr), | ||||
curr_batch_size, C, IH, IW, OH, OW, bval, bmode, | |||||
stream); | |||||
curr_batch_size, grad.layout.shape[0], C, IH, IW, OH, | |||||
OW, bval, bmode, stream); | |||||
if (N <= max_batch_size) { | if (N <= max_batch_size) { | ||||
break; | break; | ||||
@@ -91,7 +106,11 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, | |||||
N -= max_batch_size; | N -= max_batch_size; | ||||
mat_ptr += curr_batch_size * mat.layout.stride[0]; | mat_ptr += curr_batch_size * mat.layout.stride[0]; | ||||
diff_ptr += curr_batch_size * diff.layout.stride[0]; | diff_ptr += curr_batch_size * diff.layout.stride[0]; | ||||
grad_ptr += curr_batch_size * grad.layout.stride[0]; | |||||
if (midx_ptr == nullptr) { | |||||
grad_ptr += curr_batch_size * grad.layout.stride[0]; | |||||
} else { | |||||
midx_ptr += curr_batch_size; | |||||
} | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -102,8 +121,8 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, | |||||
} | } | ||||
size_t WarpPerspectiveBackwardDataImpl::get_float32_workspace_in_bytes( | size_t WarpPerspectiveBackwardDataImpl::get_float32_workspace_in_bytes( | ||||
const TensorLayout& /* mat */, const TensorLayout& diff, | |||||
const TensorLayout& grad) const { | |||||
const TensorLayout& /* mat */, const TensorLayout& mat_idx, | |||||
const TensorLayout& diff, const TensorLayout& grad) const { | |||||
auto N = grad.shape[0], C = grad.shape[1], IH = grad.shape[2], | auto N = grad.shape[0], C = grad.shape[1], IH = grad.shape[2], | ||||
IW = grad.shape[3]; | IW = grad.shape[3]; | ||||
auto OH = diff.shape[2], OW = diff.shape[3]; | auto OH = diff.shape[2], OW = diff.shape[3]; | ||||
@@ -112,6 +131,9 @@ size_t WarpPerspectiveBackwardDataImpl::get_float32_workspace_in_bytes( | |||||
size_t max_batch_size = N; | size_t max_batch_size = N; | ||||
size_t max_batch_x_channel = max_batch_x_channel_size(); | size_t max_batch_x_channel = max_batch_x_channel_size(); | ||||
if (N * C > max_batch_x_channel) { | if (N * C > max_batch_x_channel) { | ||||
/* when batch size is too large, the workspace only contains part of grad, | |||||
this will cause out of range with mat idx */ | |||||
megdnn_assert(mat_idx.ndim == 0, "batch size is too large, it's unsupported with mat idx backward."); | |||||
max_batch_size = max_batch_x_channel / C; | max_batch_size = max_batch_x_channel / C; | ||||
} | } | ||||
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#include "src/cuda/warp_perspective/common.h" | #include "src/cuda/warp_perspective/common.h" | ||||
@@ -20,16 +21,21 @@ namespace warp_perspective { | |||||
const int factor = 4; | const int factor = 4; | ||||
template <typename Getter, int factor> | template <typename Getter, int factor> | ||||
__global__ void warp_perspective_bwd_data_kernel(const float *hidden, | |||||
const float *mat, float *dst, | |||||
int N, int C, int IH, int IW, int OH, int OW) | |||||
{ | |||||
__global__ void warp_perspective_bwd_data_kernel(const float* hidden, | |||||
const float* mat, | |||||
const int* midx, float* dst, | |||||
int N, int C, int IH, int IW, | |||||
int OH, int OW) { | |||||
Getter getter; | Getter getter; | ||||
int n = blockIdx.z; | int n = blockIdx.z; | ||||
int ow = blockIdx.x * blockDim.x + threadIdx.x; | int ow = blockIdx.x * blockDim.x + threadIdx.x; | ||||
int oh = blockIdx.y * blockDim.y + threadIdx.y; | int oh = blockIdx.y * blockDim.y + threadIdx.y; | ||||
hidden += n * C*OH*OW; | hidden += n * C*OH*OW; | ||||
dst += n * C*factor*IH*IW; | |||||
if (midx) { | |||||
dst += midx[n] * C * factor * IH * IW; | |||||
} else { | |||||
dst += n * C * factor * IH * IW; | |||||
} | |||||
mat += n * 3*3; | mat += n * 3*3; | ||||
if (ow < OW && oh < OH) { | if (ow < OW && oh < OH) { | ||||
float denominator = mat[6]*ow + mat[7]*oh + mat[8]; | float denominator = mat[6]*ow + mat[7]*oh + mat[8]; | ||||
@@ -72,15 +78,19 @@ __global__ void add_up_kernel(const float *src, float *dst, | |||||
} | } | ||||
template <int factor> | template <int factor> | ||||
__global__ void warp_perspective_bwd_data_constant_kernel(const float *hidden, | |||||
const float *mat, float *dst, | |||||
int N, int C, int IH, int IW, int OH, int OW) | |||||
{ | |||||
__global__ void warp_perspective_bwd_data_constant_kernel( | |||||
const float* hidden, const float* mat, const int* midx, float* dst, | |||||
int N, int C, int IH, int IW, int OH, int OW) { | |||||
int n = blockIdx.z; | |||||
int ow = blockIdx.x * blockDim.x + threadIdx.x; | int ow = blockIdx.x * blockDim.x + threadIdx.x; | ||||
int oh = blockIdx.y * blockDim.y + threadIdx.y; | int oh = blockIdx.y * blockDim.y + threadIdx.y; | ||||
hidden += blockIdx.z * C*OH*OW; | |||||
dst += blockIdx.z * C*factor*IH*IW; | |||||
mat += blockIdx.z * 3*3; | |||||
hidden += n * C * OH * OW; | |||||
if (midx) { | |||||
dst += midx[n] * C * factor * IH * IW; | |||||
} else { | |||||
dst += n * C * factor * IH * IW; | |||||
} | |||||
mat += n * 3 * 3; | |||||
if (ow < OW && oh < OH) { | if (ow < OW && oh < OH) { | ||||
float denominator = mat[6]*ow + mat[7]*oh + mat[8]; | float denominator = mat[6]*ow + mat[7]*oh + mat[8]; | ||||
float iw = (mat[0]*ow + mat[1]*oh + mat[2]) / denominator; | float iw = (mat[0]*ow + mat[1]*oh + mat[2]) / denominator; | ||||
@@ -119,30 +129,35 @@ __global__ void warp_perspective_bwd_data_constant_kernel(const float *hidden, | |||||
} | } | ||||
} | } | ||||
size_t get_backward_data_workspace_in_bytes( | |||||
int N, int C, int IH, int IW, int /* OH */, int /* OW */, | |||||
BorderMode /* bmode */) | |||||
{ | |||||
size_t get_backward_data_workspace_in_bytes(int N, int C, int IH, int IW, | |||||
int /* OH */, int /* OW */, | |||||
BorderMode /* bmode */) { | |||||
return N*C*IH*IW*factor * sizeof(float); | return N*C*IH*IW*factor * sizeof(float); | ||||
} | } | ||||
void backward_data_proxy(const float *mat, const float *diff, | |||||
float *grad, float *workspace, | |||||
int N, int C, int IH, int IW, int OH, int OW, float bval, | |||||
BorderMode mode, cudaStream_t stream) | |||||
{ | |||||
void backward_data_proxy(const float* mat, const int* midx, const float* diff, | |||||
float* grad, float* workspace, int N, int N_SRC, int C, | |||||
int IH, int IW, int OH, int OW, float bval, | |||||
BorderMode mode, cudaStream_t stream) { | |||||
(void)bval; | (void)bval; | ||||
(void)grad; | (void)grad; | ||||
const int BY = 16, BX = 32; | const int BY = 16, BX = 32; | ||||
{ | { | ||||
dim3 threads(BX, BY); | dim3 threads(BX, BY); | ||||
dim3 blocks((OW+BX-1)/BX, (OH+BY-1)/BY, N); | dim3 blocks((OW+BX-1)/BX, (OH+BY-1)/BY, N); | ||||
cuda_check(cudaMemsetAsync(workspace, 0, sizeof(float) * factor*N*C*IH*IW, | |||||
if (midx) { | |||||
cuda_check(cudaMemsetAsync( | |||||
workspace, 0, sizeof(float) * factor * N_SRC * C * IH * IW, | |||||
stream)); | stream)); | ||||
#define DISPATCH(Getter) \ | |||||
warp_perspective_bwd_data_kernel<Getter, factor><<<blocks, threads, \ | |||||
0, stream>>>(diff, mat, workspace, N, C, IH, IW, OH, OW); | |||||
} else { | |||||
cuda_check(cudaMemsetAsync(workspace, 0, | |||||
sizeof(float) * factor * N * C * IH * IW, | |||||
stream)); | |||||
} | |||||
#define DISPATCH(Getter) \ | |||||
warp_perspective_bwd_data_kernel<Getter, factor> \ | |||||
<<<blocks, threads, 0, stream>>>(diff, mat, midx, workspace, N, C, \ | |||||
IH, IW, OH, OW); | |||||
switch (mode) { | switch (mode) { | ||||
case BORDER_REPLICATE: | case BORDER_REPLICATE: | ||||
DISPATCH(ReplicateGetter); | DISPATCH(ReplicateGetter); | ||||
@@ -158,8 +173,9 @@ void backward_data_proxy(const float *mat, const float *diff, | |||||
break; | break; | ||||
case BORDER_CONSTANT: | case BORDER_CONSTANT: | ||||
warp_perspective_bwd_data_constant_kernel<factor> | warp_perspective_bwd_data_constant_kernel<factor> | ||||
<<<blocks, threads, 0, stream>>> | |||||
(diff, mat, workspace, N, C, IH, IW, OH, OW); | |||||
<<<blocks, threads, 0, stream>>>(diff, mat, midx, | |||||
workspace, N, C, IH, | |||||
IW, OH, OW); | |||||
break; | break; | ||||
default: | default: | ||||
break; | break; | ||||
@@ -169,9 +185,15 @@ void backward_data_proxy(const float *mat, const float *diff, | |||||
{ | { | ||||
int THREADS = 512; | int THREADS = 512; | ||||
dim3 threads(THREADS); | dim3 threads(THREADS); | ||||
dim3 blocks((IH*IW+THREADS-1)/THREADS, N*C); | |||||
add_up_kernel<factor><<<blocks, threads, 0, stream>>>(workspace, grad, | |||||
IH*IW); | |||||
if (midx) { | |||||
dim3 blocks((IH * IW + THREADS - 1) / THREADS, N_SRC * C); | |||||
add_up_kernel<factor> | |||||
<<<blocks, threads, 0, stream>>>(workspace, grad, IH * IW); | |||||
} else { | |||||
dim3 blocks((IH * IW + THREADS - 1) / THREADS, N * C); | |||||
add_up_kernel<factor> | |||||
<<<blocks, threads, 0, stream>>>(workspace, grad, IH * IW); | |||||
} | |||||
} | } | ||||
after_kernel_launch(); | after_kernel_launch(); | ||||
} | } | ||||
@@ -181,4 +203,3 @@ void backward_data_proxy(const float *mat, const float *diff, | |||||
} // namespace megdnn | } // namespace megdnn | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen | ||||
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#include "src/cuda/warp_perspective/opr_impl.h" | #include "src/cuda/warp_perspective/opr_impl.h" | ||||
@@ -40,15 +41,17 @@ WorkspaceBundle WarpPerspectiveBackwardMatImpl::get_workspace_bundle( | |||||
void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, | void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, | ||||
_megdnn_tensor_in smat, | _megdnn_tensor_in smat, | ||||
_megdnn_tensor_in smat_idx, | |||||
_megdnn_tensor_in sdiff, | _megdnn_tensor_in sdiff, | ||||
_megdnn_tensor_out sgrad, | _megdnn_tensor_out sgrad, | ||||
_megdnn_workspace sworkspace) { | _megdnn_workspace sworkspace) { | ||||
check_exec(ssrc.layout, smat.layout, sdiff.layout, sgrad.layout, | |||||
sworkspace.size); | |||||
check_exec(ssrc.layout, smat.layout, smat_idx.layout, sdiff.layout, | |||||
sgrad.layout, sworkspace.size); | |||||
TensorND src = ssrc; | TensorND src = ssrc; | ||||
TensorND mat = smat; | TensorND mat = smat; | ||||
TensorND diff = sdiff; | TensorND diff = sdiff; | ||||
TensorND grad = sgrad; | TensorND grad = sgrad; | ||||
TensorND mat_idx = smat_idx; | |||||
auto bundle = get_workspace_bundle(sworkspace.raw_ptr, ssrc.layout, | auto bundle = get_workspace_bundle(sworkspace.raw_ptr, ssrc.layout, | ||||
smat.layout, sdiff.layout, sgrad.layout); | smat.layout, sdiff.layout, sgrad.layout); | ||||
auto ctypecvt = CompTypeCvter<dtype::BFloat16, dtype::Float32>( | auto ctypecvt = CompTypeCvter<dtype::BFloat16, dtype::Float32>( | ||||
@@ -64,6 +67,15 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, | |||||
auto N = src.layout.shape[0], C = src.layout.shape[1], | auto N = src.layout.shape[0], C = src.layout.shape[1], | ||||
IH = src.layout.shape[2], IW = src.layout.shape[3], | IH = src.layout.shape[2], IW = src.layout.shape[3], | ||||
OH = diff.layout.shape[2], OW = diff.layout.shape[3]; | OH = diff.layout.shape[2], OW = diff.layout.shape[3]; | ||||
int* midx_ptr = nullptr; | |||||
if (mat_idx.raw_ptr) { | |||||
megdnn_assert(mat_idx.layout.ndim == 1); | |||||
N = mat_idx.layout.shape[0]; | |||||
midx_ptr = mat_idx.ptr<int>(); | |||||
} else { | |||||
megdnn_assert(mat_idx.layout.ndim == 0); | |||||
} | |||||
auto bval = param().border_val; | auto bval = param().border_val; | ||||
auto bmode = warp_perspective::get_bmode(param().bmode); | auto bmode = warp_perspective::get_bmode(param().bmode); | ||||
@@ -71,7 +83,7 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, | |||||
size_t max_batch_x_channel = max_batch_x_channel_size(); | size_t max_batch_x_channel = max_batch_x_channel_size(); | ||||
if (batch_x_channel_size <= max_batch_x_channel) { | if (batch_x_channel_size <= max_batch_x_channel) { | ||||
warp_perspective::backward_mat_proxy( | warp_perspective::backward_mat_proxy( | ||||
src.ptr<dt_float32>(), mat.ptr<dt_float32>(), | |||||
src.ptr<dt_float32>(), mat.ptr<dt_float32>(), midx_ptr, | |||||
diff.ptr<dt_float32>(), grad.ptr<dt_float32>(), N, C, IH, | diff.ptr<dt_float32>(), grad.ptr<dt_float32>(), N, C, IH, | ||||
IW, OH, OW, bval, bmode, stream); | IW, OH, OW, bval, bmode, stream); | ||||
} else { | } else { | ||||
@@ -84,14 +96,19 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, | |||||
size_t curr_batch_size = | size_t curr_batch_size = | ||||
N > max_batch_size ? max_batch_size : N; | N > max_batch_size ? max_batch_size : N; | ||||
warp_perspective::backward_mat_proxy( | warp_perspective::backward_mat_proxy( | ||||
src_ptr, mat_ptr, diff_ptr, grad_ptr, curr_batch_size, | |||||
C, IH, IW, OH, OW, bval, bmode, stream); | |||||
src_ptr, mat_ptr, midx_ptr, diff_ptr, grad_ptr, | |||||
curr_batch_size, C, IH, IW, OH, OW, bval, bmode, | |||||
stream); | |||||
if (N <= max_batch_size) { | if (N <= max_batch_size) { | ||||
break; | break; | ||||
} else { | } else { | ||||
N -= max_batch_size; | N -= max_batch_size; | ||||
src_ptr += curr_batch_size * src.layout.stride[0]; | |||||
if (midx_ptr == nullptr) { | |||||
src_ptr += curr_batch_size * src.layout.stride[0]; | |||||
} else { | |||||
midx_ptr += curr_batch_size; | |||||
} | |||||
mat_ptr += curr_batch_size * mat.layout.stride[0]; | mat_ptr += curr_batch_size * mat.layout.stride[0]; | ||||
diff_ptr += curr_batch_size * diff.layout.stride[0]; | diff_ptr += curr_batch_size * diff.layout.stride[0]; | ||||
grad_ptr += curr_batch_size * grad.layout.stride[0]; | grad_ptr += curr_batch_size * grad.layout.stride[0]; | ||||
@@ -109,4 +126,3 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, | |||||
} // namespace megdnn | } // namespace megdnn | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen | ||||
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#include "src/cuda/warp_perspective/common.h" | #include "src/cuda/warp_perspective/common.h" | ||||
@@ -20,17 +21,21 @@ namespace cuda { | |||||
namespace warp_perspective { | namespace warp_perspective { | ||||
template <typename Getter> | template <typename Getter> | ||||
__global__ void warp_perspective_bwd_mat_kernel(const float *hidden, | |||||
const float *in, const float *mat, float *grad, | |||||
int N, int C, int IH, int IW, int OH, int OW) | |||||
{ | |||||
__global__ void warp_perspective_bwd_mat_kernel( | |||||
const float* hidden, const float* in, const float* mat, const int* midx, | |||||
float* grad, int N, int C, int IH, int IW, int OH, int OW) { | |||||
Getter getter; | Getter getter; | ||||
int n = blockIdx.z; | |||||
int ow = blockIdx.x * blockDim.x + threadIdx.x; | int ow = blockIdx.x * blockDim.x + threadIdx.x; | ||||
int oh = blockIdx.y * blockDim.y + threadIdx.y; | int oh = blockIdx.y * blockDim.y + threadIdx.y; | ||||
hidden += blockIdx.z * C*OH*OW; | hidden += blockIdx.z * C*OH*OW; | ||||
in += blockIdx.z * C*IH*IW; | |||||
mat += blockIdx.z * 3*3; | |||||
grad += blockIdx.z * 3*3; | |||||
if (midx) { | |||||
in += midx[n] * C * IH * IW; | |||||
} else { | |||||
in += n * C * IH * IW; | |||||
} | |||||
mat += n * 3*3; | |||||
grad += n * 3*3; | |||||
float grad_local[3*3]; | float grad_local[3*3]; | ||||
memset(grad_local, 0, sizeof(grad_local)); | memset(grad_local, 0, sizeof(grad_local)); | ||||
if (ow < OW && oh < OH) { | if (ow < OW && oh < OH) { | ||||
@@ -83,9 +88,8 @@ __global__ void warp_perspective_bwd_mat_kernel(const float *hidden, | |||||
dh[8] = 1.0f * ddenominatorh; | dh[8] = 1.0f * ddenominatorh; | ||||
#pragma unroll | #pragma unroll | ||||
for (int i = 0; i < 9; ++i) { | for (int i = 0; i < 9; ++i) { | ||||
grad_local[i] += | |||||
hidden[oh*OW+ow] * dalpha * dh[i] + | |||||
hidden[oh*OW+ow] * dbeta * dw[i]; | |||||
grad_local[i] += hidden[oh * OW + ow] * dalpha * dh[i] + | |||||
hidden[oh * OW + ow] * dbeta * dw[i]; | |||||
} | } | ||||
hidden += OH*OW; | hidden += OH*OW; | ||||
in += IH*IW; | in += IH*IW; | ||||
@@ -125,17 +129,21 @@ __global__ void warp_perspective_bwd_mat_kernel(const float *hidden, | |||||
} | } | ||||
} | } | ||||
__global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden, | |||||
const float *in, const float *mat, float *grad, | |||||
int N, int C, int IH, int IW, int OH, int OW, float bval) | |||||
{ | |||||
__global__ void warp_perspective_bwd_mat_constant_kernel( | |||||
const float* hidden, const float* in, const float* mat, const int* midx, | |||||
float* grad, int N, int C, int IH, int IW, int OH, int OW, float bval) { | |||||
int n = blockIdx.z; | |||||
int ow = blockIdx.x * blockDim.x + threadIdx.x; | int ow = blockIdx.x * blockDim.x + threadIdx.x; | ||||
int oh = blockIdx.y * blockDim.y + threadIdx.y; | int oh = blockIdx.y * blockDim.y + threadIdx.y; | ||||
hidden += blockIdx.z * C*OH*OW; | |||||
in += blockIdx.z * C*IH*IW; | |||||
mat += blockIdx.z * 3*3; | |||||
grad += blockIdx.z * 3*3; | |||||
float grad_local[3*3]; | |||||
hidden += blockIdx.z * C * OH * OW; | |||||
if (midx) { | |||||
in += midx[n] * C * IH * IW; | |||||
} else { | |||||
in += n * C * IH * IW; | |||||
} | |||||
mat += n * 3 * 3; | |||||
grad += n * 3 * 3; | |||||
float grad_local[3 * 3]; | |||||
memset(grad_local, 0, sizeof(grad_local)); | memset(grad_local, 0, sizeof(grad_local)); | ||||
if (ow < OW && oh < OH) { | if (ow < OW && oh < OH) { | ||||
float numeratorw = mat[0]*ow + mat[1]*oh + mat[2]; | float numeratorw = mat[0]*ow + mat[1]*oh + mat[2]; | ||||
@@ -199,10 +207,10 @@ __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden, | |||||
dh[8] = 1.0f * ddenominatorh; | dh[8] = 1.0f * ddenominatorh; | ||||
#pragma unroll | #pragma unroll | ||||
for (int i = 0; i < 9; ++i) { | for (int i = 0; i < 9; ++i) { | ||||
float delta = | |||||
hidden[oh*OW+ow] * dalpha * dh[i] + | |||||
hidden[oh*OW+ow] * dbeta * dw[i]; | |||||
if (isfinite(delta)) grad_local[i] += delta; | |||||
float delta = hidden[oh * OW + ow] * dalpha * dh[i] + | |||||
hidden[oh * OW + ow] * dbeta * dw[i]; | |||||
if (isfinite(delta)) | |||||
grad_local[i] += delta; | |||||
} | } | ||||
hidden += OH*OW; | hidden += OH*OW; | ||||
in += IH*IW; | in += IH*IW; | ||||
@@ -227,8 +235,9 @@ __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden, | |||||
for (int k = 16; k >= 1; k >>= 1) { | for (int k = 16; k >= 1; k >>= 1) { | ||||
if (tidx < k) { | if (tidx < k) { | ||||
#pragma unroll | #pragma unroll | ||||
for (int i = 0; i < 9; ++i) | |||||
grad_shared[tidy][tidx][i] += grad_shared[tidy][tidx+k][i]; | |||||
for (int i = 0; i < 9; ++i) | |||||
grad_shared[tidy][tidx][i] += | |||||
grad_shared[tidy][tidx + k][i]; | |||||
} | } | ||||
cub::WARP_SYNC(0xffffffff); | cub::WARP_SYNC(0xffffffff); | ||||
} | } | ||||
@@ -240,18 +249,17 @@ __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden, | |||||
} | } | ||||
} | } | ||||
void backward_mat_proxy(const float *src, const float *mat, | |||||
const float *diff, float *grad, | |||||
int N, int C, int IH, int IW, int OH, int OW, float bval, | |||||
BorderMode mode, cudaStream_t stream) | |||||
{ | |||||
void backward_mat_proxy(const float* src, const float* mat, const int* midx, | |||||
const float* diff, float* grad, int N, int C, int IH, | |||||
int IW, int OH, int OW, float bval, BorderMode mode, | |||||
cudaStream_t stream) { | |||||
const int BY = 16, BX = 32; | const int BY = 16, BX = 32; | ||||
dim3 threads(BX, BY); | dim3 threads(BX, BY); | ||||
dim3 blocks((OW+BX-1)/BX, (OH+BY-1)/BY, N); | dim3 blocks((OW+BX-1)/BX, (OH+BY-1)/BY, N); | ||||
cuda_check(cudaMemsetAsync(grad, 0, sizeof(float) * N*3*3, stream)); | cuda_check(cudaMemsetAsync(grad, 0, sizeof(float) * N*3*3, stream)); | ||||
#define DISPATCH(Getter) \ | |||||
#define DISPATCH(Getter) \ | |||||
warp_perspective_bwd_mat_kernel<Getter><<<blocks, threads, 0, stream>>>( \ | warp_perspective_bwd_mat_kernel<Getter><<<blocks, threads, 0, stream>>>( \ | ||||
diff, src, mat, grad, N, C, IH, IW, OH, OW); | |||||
diff, src, mat, midx, grad, N, C, IH, IW, OH, OW); | |||||
switch (mode) { | switch (mode) { | ||||
case BORDER_REPLICATE: | case BORDER_REPLICATE: | ||||
DISPATCH(ReplicateGetter); | DISPATCH(ReplicateGetter); | ||||
@@ -266,8 +274,9 @@ void backward_mat_proxy(const float *src, const float *mat, | |||||
DISPATCH(WrapGetter); | DISPATCH(WrapGetter); | ||||
break; | break; | ||||
case BORDER_CONSTANT: | case BORDER_CONSTANT: | ||||
warp_perspective_bwd_mat_constant_kernel<<<blocks, threads, 0, stream>>>( | |||||
diff, src, mat, grad, N, C, IH, IW, OH, OW, bval); | |||||
warp_perspective_bwd_mat_constant_kernel<<<blocks, threads, 0, | |||||
stream>>>( | |||||
diff, src, mat, midx, grad, N, C, IH, IW, OH, OW, bval); | |||||
break; | break; | ||||
default: | default: | ||||
break; | break; | ||||
@@ -281,4 +290,3 @@ void backward_mat_proxy(const float *src, const float *mat, | |||||
} // namespace megdnn | } // namespace megdnn | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen | ||||
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#pragma once | #pragma once | ||||
#include <cuda_runtime_api.h> | #include <cuda_runtime_api.h> | ||||
@@ -19,40 +20,34 @@ namespace warp_perspective { | |||||
// all these kernels use bilinear interpolation | // all these kernels use bilinear interpolation | ||||
template<typename ctype> | |||||
void forward_proxy( | |||||
bool is_nhwc, | |||||
const ctype *src, const float *mat, const int *mat_idx, | |||||
ctype *dst, int N_SRC, int N_MAT, | |||||
int C, int IH, int IW, int OH, int OW, ctype bval, | |||||
BorderMode bmode, | |||||
megcore::AsyncErrorInfo* error_info, void* error_tracker, | |||||
cudaStream_t stream); | |||||
template <typename ctype> | |||||
void forward_proxy(bool is_nhwc, const ctype* src, const float* mat, | |||||
const int* mat_idx, ctype* dst, int N_SRC, int N_MAT, int C, | |||||
int IH, int IW, int OH, int OW, ctype bval, BorderMode bmode, | |||||
megcore::AsyncErrorInfo* error_info, void* error_tracker, | |||||
cudaStream_t stream); | |||||
template <typename ctype> | template <typename ctype> | ||||
void forward_proxy_nchw4( | |||||
const ctype *src, const float *mat, const int *mat_idx, | |||||
ctype *dst, int N_SRC, int N_MAT, | |||||
int C, int IH, int IW, int OH, int OW, ctype bval, | |||||
BorderMode bmode, | |||||
megcore::AsyncErrorInfo* error_info, void* error_tracker, | |||||
cudaStream_t stream); | |||||
void backward_data_proxy(const float *mat, const float *diff, float *grad, | |||||
float *workspace, | |||||
int N, int C, int IH, int IW, int OH, int OW, float bval, | |||||
BorderMode bmode, cudaStream_t stream); | |||||
size_t get_backward_data_workspace_in_bytes( | |||||
int N, int C, int IH, int IW, int OH, int OW, | |||||
BorderMode bmode); | |||||
void backward_mat_proxy( | |||||
const float *src, const float *mat, const float *diff, float *grad, | |||||
int N, int C, int IH, int IW, int OH, int OW, float bval, | |||||
BorderMode bmode, cudaStream_t stream); | |||||
} // namespace warp_perspective | |||||
} // namespace cuda | |||||
} // namespace megdnn | |||||
void forward_proxy_nchw4(const ctype* src, const float* mat, const int* mat_idx, | |||||
ctype* dst, int N_SRC, int N_MAT, int C, int IH, | |||||
int IW, int OH, int OW, ctype bval, BorderMode bmode, | |||||
megcore::AsyncErrorInfo* error_info, | |||||
void* error_tracker, cudaStream_t stream); | |||||
void backward_data_proxy(const float* mat, const int* midx, const float* diff, | |||||
float* grad, float* workspace, int N, int N_SRC, int C, | |||||
int IH, int IW, int OH, int OW, float bval, | |||||
BorderMode bmode, cudaStream_t stream); | |||||
size_t get_backward_data_workspace_in_bytes(int N, int C, int IH, int IW, | |||||
int OH, int OW, BorderMode bmode); | |||||
void backward_mat_proxy(const float* src, const float* mat, const int* midx, | |||||
const float* diff, float* grad, int N, int C, int IH, | |||||
int IW, int OH, int OW, float bval, BorderMode bmode, | |||||
cudaStream_t stream); | |||||
} // namespace warp_perspective | |||||
} // namespace cuda | |||||
} // namespace megdnn | |||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#pragma once | #pragma once | ||||
#include "megdnn/oprs.h" | #include "megdnn/oprs.h" | ||||
@@ -48,20 +49,24 @@ class WarpPerspectiveBackwardDataImpl final | |||||
: public WarpPerspectiveBackwardData { | : public WarpPerspectiveBackwardData { | ||||
public: | public: | ||||
using WarpPerspectiveBackwardData::WarpPerspectiveBackwardData; | using WarpPerspectiveBackwardData::WarpPerspectiveBackwardData; | ||||
void exec(_megdnn_tensor_in mat, _megdnn_tensor_in diff, | |||||
_megdnn_tensor_out grad, _megdnn_workspace workspace) override; | |||||
void exec(_megdnn_tensor_in mat, _megdnn_tensor_in mat_idx, | |||||
_megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||||
_megdnn_workspace workspace) override; | |||||
size_t get_workspace_in_bytes(const TensorLayout& mat, | size_t get_workspace_in_bytes(const TensorLayout& mat, | ||||
const TensorLayout& mat_idx, | |||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad) override { | const TensorLayout& grad) override { | ||||
return get_workspace_bundle(nullptr, mat, diff, grad) | |||||
return get_workspace_bundle(nullptr, mat, mat_idx, diff, grad) | |||||
.total_size_in_bytes(); | .total_size_in_bytes(); | ||||
} | } | ||||
private: | private: | ||||
WorkspaceBundle get_workspace_bundle(void* ptr, const TensorLayout& mat, | WorkspaceBundle get_workspace_bundle(void* ptr, const TensorLayout& mat, | ||||
const TensorLayout& mat_idx, | |||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad) const; | const TensorLayout& grad) const; | ||||
size_t get_float32_workspace_in_bytes(const TensorLayout& mat, | size_t get_float32_workspace_in_bytes(const TensorLayout& mat, | ||||
const TensorLayout& mat_idx, | |||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad) const; | const TensorLayout& grad) const; | ||||
}; | }; | ||||
@@ -70,10 +75,11 @@ class WarpPerspectiveBackwardMatImpl final : public WarpPerspectiveBackwardMat { | |||||
public: | public: | ||||
using WarpPerspectiveBackwardMat::WarpPerspectiveBackwardMat; | using WarpPerspectiveBackwardMat::WarpPerspectiveBackwardMat; | ||||
void exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, | void exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, | ||||
_megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||||
_megdnn_workspace workspace) override; | |||||
_megdnn_tensor_in mat_idx, _megdnn_tensor_in diff, | |||||
_megdnn_tensor_out grad, _megdnn_workspace workspace) override; | |||||
size_t get_workspace_in_bytes(const TensorLayout& src, | size_t get_workspace_in_bytes(const TensorLayout& src, | ||||
const TensorLayout& mat, | const TensorLayout& mat, | ||||
const TensorLayout& /* mat_idx */, | |||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad) override { | const TensorLayout& grad) override { | ||||
return get_workspace_bundle(nullptr, src, mat, diff, grad) | return get_workspace_bundle(nullptr, src, mat, diff, grad) | ||||
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#include "src/naive/warp_perspective/opr_impl.h" | #include "src/naive/warp_perspective/opr_impl.h" | ||||
#include "src/naive/warp_perspective/warp_perspective_cv.h" | #include "src/naive/warp_perspective/warp_perspective_cv.h" | ||||
@@ -358,18 +359,29 @@ void WarpPerspectiveForwardImpl::exec(_megdnn_tensor_in src, | |||||
} | } | ||||
template <typename ctype, typename mtype> | template <typename ctype, typename mtype> | ||||
void WarpPerspectiveBackwardDataImpl::kern_naive(const KernParam<ctype, mtype>& kern_param) { | |||||
const int N = kern_param.n, C = kern_param.c, | |||||
IH = kern_param.ih, IW = kern_param.iw; | |||||
void WarpPerspectiveBackwardDataImpl::kern_naive( | |||||
const KernParam<ctype, mtype>& kern_param) { | |||||
const int N = kern_param.n_mat, C = kern_param.c, IH = kern_param.ih, | |||||
IW = kern_param.iw; | |||||
const int OH = kern_param.oh, OW = kern_param.ow; | const int OH = kern_param.oh, OW = kern_param.ow; | ||||
const ctype* hptr_ = kern_param.hptr; | const ctype* hptr_ = kern_param.hptr; | ||||
const mtype* mptr_ = kern_param.mptr; | const mtype* mptr_ = kern_param.mptr; | ||||
ctype* sptr_ = kern_param.sptr; | ctype* sptr_ = kern_param.sptr; | ||||
int* midx_ptr = kern_param.midx_ptr; | |||||
auto hptr = hptr_; | auto hptr = hptr_; | ||||
auto mptr = mptr_; | auto mptr = mptr_; | ||||
auto sptr = sptr_; | auto sptr = sptr_; | ||||
std::memset(sptr, 0, sizeof(ctype) * N * C * IH * IW); | |||||
if (midx_ptr) { | |||||
std::memset(sptr, 0, sizeof(ctype) * kern_param.n_src * C * IH * IW); | |||||
} else { | |||||
std::memset(sptr, 0, sizeof(ctype) * N * C * IH * IW); | |||||
} | |||||
rep(n, N) { | rep(n, N) { | ||||
if (midx_ptr) { | |||||
sptr = sptr_ + midx_ptr[n] * C * IH * IW; | |||||
} else { | |||||
sptr = sptr_ + n * C * IH * IW; | |||||
} | |||||
rep(oh, OH) rep(ow, OW) { | rep(oh, OH) rep(ow, OW) { | ||||
float numeratorw = mptr[0] * ow + mptr[1] * oh + mptr[2]; | float numeratorw = mptr[0] * ow + mptr[1] * oh + mptr[2]; | ||||
float numeratorh = mptr[3] * ow + mptr[4] * oh + mptr[5]; | float numeratorh = mptr[3] * ow + mptr[4] * oh + mptr[5]; | ||||
@@ -404,27 +416,30 @@ void WarpPerspectiveBackwardDataImpl::kern_naive(const KernParam<ctype, mtype>& | |||||
} | } | ||||
} | } | ||||
} | } | ||||
sptr += C * IH * IW; | |||||
hptr += C * OH * OW; | hptr += C * OH * OW; | ||||
mptr += 3 * 3; | mptr += 3 * 3; | ||||
} | } | ||||
} | } | ||||
void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in mat, | void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in mat, | ||||
_megdnn_tensor_in mat_idx, | |||||
_megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad, | _megdnn_tensor_out grad, | ||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
check_exec(mat.layout, diff.layout, grad.layout, workspace.size); | |||||
check_exec(mat.layout, mat_idx.layout, diff.layout, grad.layout, | |||||
workspace.size); | |||||
megdnn_assert(param().format == param::WarpPerspective::Format::NCHW, | megdnn_assert(param().format == param::WarpPerspective::Format::NCHW, | ||||
"invalid warp_perspective format"); | "invalid warp_perspective format"); | ||||
#define DISPATCH_ST_MT(dt, ct) \ | #define DISPATCH_ST_MT(dt, ct) \ | ||||
if (diff.layout.dtype.enumv() == DTypeTrait<dt>::enumv) { \ | if (diff.layout.dtype.enumv() == DTypeTrait<dt>::enumv) { \ | ||||
if (mat.layout.dtype.enumv() == DTypeTrait<dtype::Float32>::enumv) { \ | if (mat.layout.dtype.enumv() == DTypeTrait<dtype::Float32>::enumv) { \ | ||||
auto kparam = KernParam<ct, float>::from_tensors(mat, diff, grad); \ | |||||
auto kparam = KernParam<ct, float>::from_tensors(mat, mat_idx, \ | |||||
diff, grad); \ | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ | MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ | ||||
return; \ | return; \ | ||||
} else { \ | } else { \ | ||||
auto kparam = KernParam<ct, ct>::from_tensors(mat, diff, grad); \ | |||||
auto kparam = \ | |||||
KernParam<ct, ct>::from_tensors(mat, mat_idx, diff, grad); \ | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ | MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ | ||||
return; \ | return; \ | ||||
} \ | } \ | ||||
@@ -441,7 +456,7 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in mat, | |||||
template <typename ctype, typename mtype> | template <typename ctype, typename mtype> | ||||
void WarpPerspectiveBackwardMatImpl::kern_naive( | void WarpPerspectiveBackwardMatImpl::kern_naive( | ||||
const KernParam<ctype, mtype>& kern_param) { | const KernParam<ctype, mtype>& kern_param) { | ||||
const int N = kern_param.n, C = kern_param.c, IH = kern_param.ih, | |||||
const int N = kern_param.n_mat, C = kern_param.c, IH = kern_param.ih, | |||||
IW = kern_param.iw; | IW = kern_param.iw; | ||||
const int OH = kern_param.oh, OW = kern_param.ow; | const int OH = kern_param.oh, OW = kern_param.ow; | ||||
@@ -449,9 +464,15 @@ void WarpPerspectiveBackwardMatImpl::kern_naive( | |||||
auto sptr = kern_param.sptr; | auto sptr = kern_param.sptr; | ||||
auto mptr = kern_param.mptr; | auto mptr = kern_param.mptr; | ||||
auto res = kern_param.res; | auto res = kern_param.res; | ||||
auto midx_ptr = kern_param.midx_ptr; | |||||
auto border_val = kern_param.border_val; | auto border_val = kern_param.border_val; | ||||
std::memset(res, 0, sizeof(float) * N * 3 * 3); | std::memset(res, 0, sizeof(float) * N * 3 * 3); | ||||
rep(n, N) { | rep(n, N) { | ||||
if (midx_ptr) { | |||||
sptr = kern_param.sptr + midx_ptr[n] * C * IH * IW; | |||||
} else { | |||||
sptr = kern_param.sptr + n * C * IH * IW; | |||||
} | |||||
rep(oh, OH) rep(ow, OW) { | rep(oh, OH) rep(ow, OW) { | ||||
float numeratorw = mptr[0] * ow + mptr[1] * oh + mptr[2]; | float numeratorw = mptr[0] * ow + mptr[1] * oh + mptr[2]; | ||||
float numeratorh = mptr[3] * ow + mptr[4] * oh + mptr[5]; | float numeratorh = mptr[3] * ow + mptr[4] * oh + mptr[5]; | ||||
@@ -537,7 +558,6 @@ void WarpPerspectiveBackwardMatImpl::kern_naive( | |||||
} | } | ||||
} | } | ||||
hptr += C * OH * OW; | hptr += C * OH * OW; | ||||
sptr += C * IH * IW; | |||||
mptr += 3 * 3; | mptr += 3 * 3; | ||||
res += 3 * 3; | res += 3 * 3; | ||||
} | } | ||||
@@ -545,21 +565,22 @@ void WarpPerspectiveBackwardMatImpl::kern_naive( | |||||
void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in src, | void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in src, | ||||
_megdnn_tensor_in mat, | _megdnn_tensor_in mat, | ||||
_megdnn_tensor_in mat_idx, | |||||
_megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad, | _megdnn_tensor_out grad, | ||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
check_exec(src.layout, mat.layout, diff.layout, grad.layout, | |||||
check_exec(src.layout, mat.layout, mat_idx.layout, diff.layout, grad.layout, | |||||
workspace.size); | workspace.size); | ||||
#define DISPATCH_ST_MT(dt, ct) \ | #define DISPATCH_ST_MT(dt, ct) \ | ||||
if (src.layout.dtype.enumv() == DTypeTrait<dt>::enumv) { \ | if (src.layout.dtype.enumv() == DTypeTrait<dt>::enumv) { \ | ||||
if (mat.layout.dtype.enumv() == DTypeTrait<dtype::Float32>::enumv) { \ | if (mat.layout.dtype.enumv() == DTypeTrait<dtype::Float32>::enumv) { \ | ||||
auto kparam = KernParam<ct, float>::from_tensors( \ | auto kparam = KernParam<ct, float>::from_tensors( \ | ||||
param().border_val, src, mat, diff, grad); \ | |||||
param().border_val, src, mat, mat_idx, diff, grad); \ | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ | MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ | ||||
return; \ | return; \ | ||||
} else { \ | } else { \ | ||||
auto kparam = KernParam<ct, ct>::from_tensors( \ | auto kparam = KernParam<ct, ct>::from_tensors( \ | ||||
param().border_val, src, mat, diff, grad); \ | |||||
param().border_val, src, mat, mat_idx, diff, grad); \ | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ | MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ | ||||
return; \ | return; \ | ||||
} \ | } \ | ||||
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#pragma once | #pragma once | ||||
#include "megdnn/oprs.h" | #include "megdnn/oprs.h" | ||||
@@ -15,144 +16,158 @@ | |||||
namespace megdnn { | namespace megdnn { | ||||
namespace naive { | namespace naive { | ||||
class WarpPerspectiveForwardImpl: public WarpPerspectiveForward { | |||||
protected: | |||||
using Format = Param::Format; | |||||
template <typename ctype, typename mtype> | |||||
struct KernParam { | |||||
Format format; | |||||
BorderMode bmode; | |||||
float border_val; | |||||
size_t n_src, n_mat, c, ih, iw, oh, ow; | |||||
ctype *sptr, *dptr; | |||||
mtype *mptr; | |||||
int *midx_ptr; //!< can be null | |||||
Workspace workspace; | |||||
static KernParam from_tensors( | |||||
Format format, BorderMode bmode, float border_val, | |||||
_megdnn_tensor_in src, _megdnn_tensor_in mat, | |||||
_megdnn_tensor_in mat_idx, _megdnn_tensor_out dst, | |||||
_megdnn_workspace workspace) { | |||||
KernParam ret; | |||||
ret.format = format; | |||||
ret.bmode = bmode; | |||||
ret.border_val = border_val; | |||||
ret.n_src = src.layout.shape[0]; | |||||
if (mat_idx.raw_ptr) { | |||||
megdnn_assert(mat_idx.layout.ndim == 1); | |||||
ret.n_mat = mat_idx.layout.shape[0]; | |||||
ret.midx_ptr = mat_idx.ptr<int>(); | |||||
} else { | |||||
megdnn_assert(mat_idx.layout.ndim == 0); | |||||
ret.n_mat = ret.n_src; | |||||
ret.midx_ptr = nullptr; | |||||
} | |||||
if (format == Format::NCHW) { | |||||
ret.c = src.layout.shape[1]; | |||||
ret.ih = src.layout.shape[2]; | |||||
ret.iw = src.layout.shape[3]; | |||||
ret.oh = dst.layout.shape[2]; | |||||
ret.ow = dst.layout.shape[3]; | |||||
} else if (format == Format::NHWC) { | |||||
ret.c = src.layout.shape[3]; | |||||
ret.ih = src.layout.shape[1]; | |||||
ret.iw = src.layout.shape[2]; | |||||
ret.oh = dst.layout.shape[1]; | |||||
ret.ow = dst.layout.shape[2]; | |||||
} else if (format == Format::NCHW4) { | |||||
ret.c = src.layout.shape[1] * 4; | |||||
ret.ih = src.layout.shape[2]; | |||||
ret.iw = src.layout.shape[3]; | |||||
ret.oh = dst.layout.shape[2]; | |||||
ret.ow = dst.layout.shape[3]; | |||||
} else { | |||||
megdnn_assert(format == Format::NHWCD4); | |||||
ret.c = src.layout.shape[2] * 4; | |||||
ret.ih = src.layout.shape[1]; | |||||
ret.iw = src.layout.shape[3]; | |||||
ret.oh = dst.layout.shape[1]; | |||||
ret.ow = dst.layout.shape[3]; | |||||
} | |||||
if (src.layout.dtype.enumv() == DTypeEnum::Float32 || | |||||
MEGDNN_FLOAT16_SELECT( | |||||
(src.layout.dtype.enumv() == DTypeEnum::Float16 || | |||||
src.layout.dtype.enumv() == DTypeEnum::BFloat16), | |||||
false) || | |||||
src.layout.dtype.enumv() == DTypeEnum::Int8 || | |||||
src.layout.dtype.enumv() == DTypeEnum::Uint8 || | |||||
src.layout.dtype.enumv() == DTypeEnum::QuantizedS8 || | |||||
src.layout.dtype.enumv() == DTypeEnum::Quantized8Asymm) { | |||||
ret.sptr = src.compatible_ptr<ctype>(); | |||||
ret.mptr = mat.ptr<mtype>(); | |||||
ret.dptr = dst.compatible_ptr<ctype>(); | |||||
} else if (src.layout.dtype.enumv() == DTypeEnum::QuantizedS8) { | |||||
ret.sptr = src.compatible_ptr<ctype>(); | |||||
ret.mptr = mat.ptr<mtype>(); | |||||
ret.dptr = dst.compatible_ptr<ctype>(); | |||||
} else { | |||||
ret.sptr = nullptr; | |||||
ret.mptr = nullptr; | |||||
ret.dptr = nullptr; | |||||
} | |||||
ret.workspace = workspace; | |||||
return ret; | |||||
class WarpPerspectiveForwardImpl : public WarpPerspectiveForward { | |||||
protected: | |||||
using Format = Param::Format; | |||||
template <typename ctype, typename mtype> | |||||
struct KernParam { | |||||
Format format; | |||||
BorderMode bmode; | |||||
float border_val; | |||||
size_t n_src, n_mat, c, ih, iw, oh, ow; | |||||
ctype *sptr, *dptr; | |||||
mtype* mptr; | |||||
int* midx_ptr; //!< can be null | |||||
Workspace workspace; | |||||
static KernParam from_tensors(Format format, BorderMode bmode, | |||||
float border_val, _megdnn_tensor_in src, | |||||
_megdnn_tensor_in mat, | |||||
_megdnn_tensor_in mat_idx, | |||||
_megdnn_tensor_out dst, | |||||
_megdnn_workspace workspace) { | |||||
KernParam ret; | |||||
ret.format = format; | |||||
ret.bmode = bmode; | |||||
ret.border_val = border_val; | |||||
ret.n_src = src.layout.shape[0]; | |||||
if (mat_idx.raw_ptr) { | |||||
megdnn_assert(mat_idx.layout.ndim == 1); | |||||
ret.n_mat = mat_idx.layout.shape[0]; | |||||
ret.midx_ptr = mat_idx.ptr<int>(); | |||||
} else { | |||||
megdnn_assert(mat_idx.layout.ndim == 0); | |||||
ret.n_mat = ret.n_src; | |||||
ret.midx_ptr = nullptr; | |||||
} | |||||
if (format == Format::NCHW) { | |||||
ret.c = src.layout.shape[1]; | |||||
ret.ih = src.layout.shape[2]; | |||||
ret.iw = src.layout.shape[3]; | |||||
ret.oh = dst.layout.shape[2]; | |||||
ret.ow = dst.layout.shape[3]; | |||||
} else if (format == Format::NHWC) { | |||||
ret.c = src.layout.shape[3]; | |||||
ret.ih = src.layout.shape[1]; | |||||
ret.iw = src.layout.shape[2]; | |||||
ret.oh = dst.layout.shape[1]; | |||||
ret.ow = dst.layout.shape[2]; | |||||
} else if (format == Format::NCHW4) { | |||||
ret.c = src.layout.shape[1] * 4; | |||||
ret.ih = src.layout.shape[2]; | |||||
ret.iw = src.layout.shape[3]; | |||||
ret.oh = dst.layout.shape[2]; | |||||
ret.ow = dst.layout.shape[3]; | |||||
} else { | |||||
megdnn_assert(format == Format::NHWCD4); | |||||
ret.c = src.layout.shape[2] * 4; | |||||
ret.ih = src.layout.shape[1]; | |||||
ret.iw = src.layout.shape[3]; | |||||
ret.oh = dst.layout.shape[1]; | |||||
ret.ow = dst.layout.shape[3]; | |||||
} | |||||
if (src.layout.dtype.enumv() == DTypeEnum::Float32 || | |||||
MEGDNN_FLOAT16_SELECT( | |||||
(src.layout.dtype.enumv() == DTypeEnum::Float16 || | |||||
src.layout.dtype.enumv() == DTypeEnum::BFloat16), | |||||
false) || | |||||
src.layout.dtype.enumv() == DTypeEnum::Int8 || | |||||
src.layout.dtype.enumv() == DTypeEnum::Uint8 || | |||||
src.layout.dtype.enumv() == DTypeEnum::QuantizedS8 || | |||||
src.layout.dtype.enumv() == DTypeEnum::Quantized8Asymm) { | |||||
ret.sptr = src.compatible_ptr<ctype>(); | |||||
ret.mptr = mat.ptr<mtype>(); | |||||
ret.dptr = dst.compatible_ptr<ctype>(); | |||||
} else if (src.layout.dtype.enumv() == DTypeEnum::QuantizedS8) { | |||||
ret.sptr = src.compatible_ptr<ctype>(); | |||||
ret.mptr = mat.ptr<mtype>(); | |||||
ret.dptr = dst.compatible_ptr<ctype>(); | |||||
} else { | |||||
ret.sptr = nullptr; | |||||
ret.mptr = nullptr; | |||||
ret.dptr = nullptr; | |||||
} | } | ||||
}; | |||||
// ctype: C type of input data type. | |||||
// mtype: C type of transformation matrix data type. | |||||
template <typename ctype, typename mtype> | |||||
void kern_naive(const KernParam<ctype, mtype>& kern_param, | |||||
size_t task_id); | |||||
public: | |||||
using WarpPerspectiveForward::WarpPerspectiveForward; | |||||
void exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, | |||||
_megdnn_tensor_in mat_idx, _megdnn_tensor_out dst, | |||||
_megdnn_workspace workspace) override; | |||||
size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, | |||||
const TensorLayout&, | |||||
const TensorLayout&) override { | |||||
return 0; | |||||
ret.workspace = workspace; | |||||
return ret; | |||||
} | } | ||||
}; | |||||
private: | |||||
template <typename ctype, typename mtype> | |||||
void kern_naive_nhwcd4(const KernParam<ctype, mtype>& kern_param, | |||||
size_t task_id); | |||||
// ctype: C type of input data type. | |||||
// mtype: C type of transformation matrix data type. | |||||
template <typename ctype, typename mtype> | |||||
void kern_naive(const KernParam<ctype, mtype>& kern_param, size_t task_id); | |||||
public: | |||||
using WarpPerspectiveForward::WarpPerspectiveForward; | |||||
void exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, | |||||
_megdnn_tensor_in mat_idx, _megdnn_tensor_out dst, | |||||
_megdnn_workspace workspace) override; | |||||
size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, | |||||
const TensorLayout&, | |||||
const TensorLayout&) override { | |||||
return 0; | |||||
} | |||||
private: | |||||
template <typename ctype, typename mtype> | |||||
void kern_naive_nhwcd4(const KernParam<ctype, mtype>& kern_param, | |||||
size_t task_id); | |||||
}; | }; | ||||
class WarpPerspectiveBackwardDataImpl : public WarpPerspectiveBackwardData { | class WarpPerspectiveBackwardDataImpl : public WarpPerspectiveBackwardData { | ||||
protected: | protected: | ||||
template <typename ctype, typename mtype> | template <typename ctype, typename mtype> | ||||
struct KernParam { | struct KernParam { | ||||
size_t n, c, ih, iw, oh, ow; | |||||
size_t n_src, n_mat, c, ih, iw, oh, ow; | |||||
ctype *sptr, *hptr; | ctype *sptr, *hptr; | ||||
mtype* mptr; | mtype* mptr; | ||||
int* midx_ptr; //!< can be null | |||||
static KernParam from_tensors(_megdnn_tensor_in mat, | static KernParam from_tensors(_megdnn_tensor_in mat, | ||||
_megdnn_tensor_in mat_idx, | |||||
_megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad) { | _megdnn_tensor_out grad) { | ||||
KernParam ret; | KernParam ret; | ||||
ret.n = grad.layout.shape[0], ret.c = grad.layout.shape[1], | |||||
ret.n_src = grad.layout.shape[0], ret.c = grad.layout.shape[1]; | |||||
ret.ih = grad.layout.shape[2], ret.iw = grad.layout.shape[3]; | ret.ih = grad.layout.shape[2], ret.iw = grad.layout.shape[3]; | ||||
ret.oh = diff.layout.shape[2], ret.ow = diff.layout.shape[3]; | ret.oh = diff.layout.shape[2], ret.ow = diff.layout.shape[3]; | ||||
ret.hptr = diff.ptr<ctype>(); | ret.hptr = diff.ptr<ctype>(); | ||||
ret.mptr = mat.ptr<mtype>(); | ret.mptr = mat.ptr<mtype>(); | ||||
ret.sptr = grad.ptr<ctype>(); | ret.sptr = grad.ptr<ctype>(); | ||||
if (mat_idx.raw_ptr) { | |||||
megdnn_assert(mat_idx.layout.ndim == 1); | |||||
ret.n_mat = mat_idx.layout.shape[0]; | |||||
ret.midx_ptr = mat_idx.ptr<int>(); | |||||
} else { | |||||
megdnn_assert(mat_idx.layout.ndim == 0); | |||||
ret.n_mat = ret.n_src; | |||||
ret.midx_ptr = nullptr; | |||||
} | |||||
return ret; | return ret; | ||||
} | } | ||||
}; | }; | ||||
public: | public: | ||||
using WarpPerspectiveBackwardData::WarpPerspectiveBackwardData; | using WarpPerspectiveBackwardData::WarpPerspectiveBackwardData; | ||||
void exec(_megdnn_tensor_in mat, _megdnn_tensor_in diff, | |||||
_megdnn_tensor_out grad, _megdnn_workspace workspace) override; | |||||
void exec(_megdnn_tensor_in mat, _megdnn_tensor_in mat_idx, | |||||
_megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||||
_megdnn_workspace workspace) override; | |||||
size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, | size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, | ||||
const TensorLayout&, | |||||
const TensorLayout&) override { | const TensorLayout&) override { | ||||
return 0; | return 0; | ||||
} | } | ||||
private: | private: | ||||
template <typename ctype, typename mtype> | template <typename ctype, typename mtype> | ||||
void kern_naive(const KernParam<ctype, mtype>& kern_param); | void kern_naive(const KernParam<ctype, mtype>& kern_param); | ||||
@@ -162,23 +177,35 @@ class WarpPerspectiveBackwardMatImpl : public WarpPerspectiveBackwardMat { | |||||
protected: | protected: | ||||
template <typename ctype, typename mtype> | template <typename ctype, typename mtype> | ||||
struct KernParam { | struct KernParam { | ||||
size_t n, c, ih, iw, oh, ow; | |||||
size_t n_src, n_mat, c, ih, iw, oh, ow; | |||||
ctype *sptr, *hptr; | ctype *sptr, *hptr; | ||||
mtype* mptr, *res; | |||||
mtype *mptr, *res; | |||||
int* midx_ptr; //!< can be null | |||||
float border_val; | float border_val; | ||||
static KernParam from_tensors(float border_val_, _megdnn_tensor_in src, | static KernParam from_tensors(float border_val_, _megdnn_tensor_in src, | ||||
_megdnn_tensor_in mat, | _megdnn_tensor_in mat, | ||||
_megdnn_tensor_in mat_idx, | |||||
_megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad) { | _megdnn_tensor_out grad) { | ||||
KernParam ret; | KernParam ret; | ||||
ret.border_val = border_val_; | ret.border_val = border_val_; | ||||
ret.n = src.layout.shape[0], ret.c = src.layout.shape[1], | |||||
ret.n_src = src.layout.shape[0], ret.c = src.layout.shape[1]; | |||||
ret.ih = src.layout.shape[2], ret.iw = src.layout.shape[3]; | ret.ih = src.layout.shape[2], ret.iw = src.layout.shape[3]; | ||||
ret.oh = diff.layout.shape[2], ret.ow = diff.layout.shape[3]; | ret.oh = diff.layout.shape[2], ret.ow = diff.layout.shape[3]; | ||||
ret.hptr = diff.ptr<ctype>(); | ret.hptr = diff.ptr<ctype>(); | ||||
ret.mptr = mat.ptr<mtype>(); | ret.mptr = mat.ptr<mtype>(); | ||||
ret.sptr = src.ptr<ctype>(); | ret.sptr = src.ptr<ctype>(); | ||||
ret.res = grad.ptr<mtype>(); | ret.res = grad.ptr<mtype>(); | ||||
if (mat_idx.raw_ptr) { | |||||
megdnn_assert(mat_idx.layout.ndim == 1); | |||||
ret.n_mat = mat_idx.layout.shape[0]; | |||||
ret.midx_ptr = mat_idx.ptr<int>(); | |||||
} else { | |||||
megdnn_assert(mat_idx.layout.ndim == 0); | |||||
ret.n_mat = ret.n_src; | |||||
ret.midx_ptr = nullptr; | |||||
} | |||||
return ret; | return ret; | ||||
} | } | ||||
}; | }; | ||||
@@ -186,10 +213,10 @@ protected: | |||||
public: | public: | ||||
using WarpPerspectiveBackwardMat::WarpPerspectiveBackwardMat; | using WarpPerspectiveBackwardMat::WarpPerspectiveBackwardMat; | ||||
void exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, | void exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, | ||||
_megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||||
_megdnn_workspace workspace) override; | |||||
_megdnn_tensor_in mat_idx, _megdnn_tensor_in diff, | |||||
_megdnn_tensor_out grad, _megdnn_workspace workspace) override; | |||||
size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, | size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, | ||||
const TensorLayout&, | |||||
const TensorLayout&, const TensorLayout&, | |||||
const TensorLayout&) override { | const TensorLayout&) override { | ||||
return 0; | return 0; | ||||
} | } | ||||
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#include "test/common/warp_perspective.h" | #include "test/common/warp_perspective.h" | ||||
@@ -19,6 +20,10 @@ using namespace warp_perspective; | |||||
void WarpPerspectiveMatIdxProxy::deduce_layout(WarpPerspective*, | void WarpPerspectiveMatIdxProxy::deduce_layout(WarpPerspective*, | ||||
TensorLayoutArray&) {} | TensorLayoutArray&) {} | ||||
void WarpPerspectiveMatIdxProxy::deduce_layout(WarpPerspectiveBackwardData*, | |||||
TensorLayoutArray&) {} | |||||
void WarpPerspectiveMatIdxProxy::deduce_layout(WarpPerspectiveBackwardMat*, | |||||
TensorLayoutArray&) {} | |||||
void WarpPerspectiveMatIdxProxy::exec(WarpPerspective* opr, | void WarpPerspectiveMatIdxProxy::exec(WarpPerspective* opr, | ||||
const TensorNDArray& tensors) { | const TensorNDArray& tensors) { | ||||
@@ -31,6 +36,30 @@ void WarpPerspectiveMatIdxProxy::exec(WarpPerspective* opr, | |||||
opr->exec(tensors[0], tensors[1], tensors[2], tensors[3], W.workspace()); | opr->exec(tensors[0], tensors[1], tensors[2], tensors[3], W.workspace()); | ||||
} | } | ||||
void WarpPerspectiveMatIdxProxy::exec(WarpPerspectiveBackwardData* opr, | |||||
const TensorNDArray& tensors) { | |||||
if (!W.valid()) { | |||||
W = WorkspaceWrapper(opr->handle(), 0); | |||||
} | |||||
megdnn_assert(tensors.size() == 4); | |||||
W.update(opr->get_workspace_in_bytes(tensors[0].layout, tensors[1].layout, | |||||
tensors[2].layout, tensors[3].layout)); | |||||
opr->exec(tensors[0], tensors[1], tensors[2], tensors[3], W.workspace()); | |||||
} | |||||
void WarpPerspectiveMatIdxProxy::exec(WarpPerspectiveBackwardMat* opr, | |||||
const TensorNDArray& tensors) { | |||||
if (!W.valid()) { | |||||
W = WorkspaceWrapper(opr->handle(), 0); | |||||
} | |||||
megdnn_assert(tensors.size() == 5); | |||||
W.update(opr->get_workspace_in_bytes(tensors[0].layout, tensors[1].layout, | |||||
tensors[2].layout, tensors[3].layout, | |||||
tensors[4].layout)); | |||||
opr->exec(tensors[0], tensors[1], tensors[2], tensors[3], tensors[4], | |||||
W.workspace()); | |||||
} | |||||
std::vector<TestArg> warp_perspective::get_cv_args() { | std::vector<TestArg> warp_perspective::get_cv_args() { | ||||
std::vector<TestArg> args; | std::vector<TestArg> args; | ||||
@@ -101,10 +130,10 @@ void warp_perspective::run_mat_idx_test(Handle* handle) { | |||||
// test NHWC | // test NHWC | ||||
param.format = WarpPerspective::Param::Format::NHWC; | param.format = WarpPerspective::Param::Format::NHWC; | ||||
checker.set_param(param) | |||||
.set_rng(2, &mat_idx_rng) | |||||
.set_epsilon(1e-1) | |||||
.set_dtype(2, dtype::Int32()); | |||||
checker.set_param(param) | |||||
.set_rng(2, &mat_idx_rng) | |||||
.set_epsilon(1e-1) | |||||
.set_dtype(2, dtype::Int32()); | |||||
checker.execs({{N_SRC, 10, 11, 3}, {2, 3, 3}, {2}, {2, 11, 12, 3}}); | checker.execs({{N_SRC, 10, 11, 3}, {2, 3, 3}, {2}, {2, 11, 12, 3}}); | ||||
} | } | ||||
@@ -22,7 +22,11 @@ namespace test { | |||||
struct WarpPerspectiveMatIdxProxy { | struct WarpPerspectiveMatIdxProxy { | ||||
WorkspaceWrapper W; | WorkspaceWrapper W; | ||||
static void deduce_layout(WarpPerspective*, TensorLayoutArray&); | static void deduce_layout(WarpPerspective*, TensorLayoutArray&); | ||||
static void deduce_layout(WarpPerspectiveBackwardData*, TensorLayoutArray&); | |||||
static void deduce_layout(WarpPerspectiveBackwardMat*, TensorLayoutArray&); | |||||
void exec(WarpPerspective* opr, const TensorNDArray& tensors); | void exec(WarpPerspective* opr, const TensorNDArray& tensors); | ||||
void exec(WarpPerspectiveBackwardData* opr, const TensorNDArray& tensors); | |||||
void exec(WarpPerspectiveBackwardMat* opr, const TensorNDArray& tensors); | |||||
}; | }; | ||||
class WarpPerspectiveMatRNG final : public IIDRNG { | class WarpPerspectiveMatRNG final : public IIDRNG { | ||||
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#include "test/cuda/fixture.h" | #include "test/cuda/fixture.h" | ||||
@@ -21,10 +22,10 @@ namespace { | |||||
using namespace megdnn; | using namespace megdnn; | ||||
using namespace test; | using namespace test; | ||||
class NanMatRNG: public RNG { | |||||
void gen(const TensorND &tensor_) override | |||||
class NanMatRNG : public RNG { | |||||
void gen(const TensorND& tensor_) override | |||||
{ | { | ||||
auto &gen = RandomState::generator(); | |||||
auto& gen = RandomState::generator(); | |||||
std::uniform_real_distribution<dt_float32> pdist3(1.9f, 2.1f); | std::uniform_real_distribution<dt_float32> pdist3(1.9f, 2.1f); | ||||
std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f); | std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f); | ||||
std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f); | std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f); | ||||
@@ -32,7 +33,7 @@ class NanMatRNG: public RNG { | |||||
std::uniform_real_distribution<dt_float32> ndist3(-2.1f, -1.9f); | std::uniform_real_distribution<dt_float32> ndist3(-2.1f, -1.9f); | ||||
std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f); | std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f); | ||||
std::uniform_int_distribution<int> dice(0, 5); | std::uniform_int_distribution<int> dice(0, 5); | ||||
float *ptr = tensor_.ptr<dt_float32>(); | |||||
float* ptr = tensor_.ptr<dt_float32>(); | |||||
auto N = tensor_.layout.shape[0]; | auto N = tensor_.layout.shape[0]; | ||||
for (size_t n = 0; n < N; ++n) { | for (size_t n = 0; n < N; ++n) { | ||||
for (size_t i = 0; i < 9; ++i) { | for (size_t i = 0; i < 9; ++i) { | ||||
@@ -65,7 +66,7 @@ class NanMatRNG: public RNG { | |||||
} | } | ||||
}; | }; | ||||
} // anonymous namespace | |||||
} // anonymous namespace | |||||
namespace megdnn { | namespace megdnn { | ||||
namespace test { | namespace test { | ||||
@@ -171,17 +172,15 @@ TEST_F(CUDA, WARP_PERSPECTIVE_CV) { | |||||
} | } | ||||
#endif | #endif | ||||
TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) | |||||
{ | |||||
TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) { | |||||
using Param = WarpPerspective::Param; | using Param = WarpPerspective::Param; | ||||
Checker<WarpPerspectiveForward> checker(handle_cuda()); | Checker<WarpPerspectiveForward> checker(handle_cuda()); | ||||
WarpPerspectiveMatRNG rng; | WarpPerspectiveMatRNG rng; | ||||
checker.set_rng(1, &rng); | checker.set_rng(1, &rng); | ||||
for (auto bmode: {WarpPerspective::BorderMode::WRAP, | |||||
WarpPerspective::BorderMode::REFLECT, | |||||
WarpPerspective::BorderMode::REPLICATE, | |||||
WarpPerspective::BorderMode::CONSTANT}) | |||||
{ | |||||
for (auto bmode : {WarpPerspective::BorderMode::WRAP, | |||||
WarpPerspective::BorderMode::REFLECT, | |||||
WarpPerspective::BorderMode::REPLICATE, | |||||
WarpPerspective::BorderMode::CONSTANT}) { | |||||
WarpPerspective::Param param; | WarpPerspective::Param param; | ||||
param.border_val = 0.3f; | param.border_val = 0.3f; | ||||
param.bmode = bmode; | param.bmode = bmode; | ||||
@@ -204,8 +203,7 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) | |||||
// nan case | // nan case | ||||
NanMatRNG rng_nan; | NanMatRNG rng_nan; | ||||
UniformFloatRNG rng_zero(0, 0); | UniformFloatRNG rng_zero(0, 0); | ||||
for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero}) | |||||
{ | |||||
for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) { | |||||
param::WarpPerspective param; | param::WarpPerspective param; | ||||
param.bmode = param::WarpPerspective::BorderMode::CONSTANT; | param.bmode = param::WarpPerspective::BorderMode::CONSTANT; | ||||
param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | ||||
@@ -213,20 +211,18 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) | |||||
param.border_val = 1.737; | param.border_val = 1.737; | ||||
checker.set_param(param); | checker.set_param(param); | ||||
// no invalid mem access is enough; no need to check value | // no invalid mem access is enough; no need to check value | ||||
checker.set_expect_exec_fail([](){}); | |||||
checker.set_expect_exec_fail([]() {}); | |||||
checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}}); | checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}}); | ||||
} | } | ||||
} | } | ||||
TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX) | |||||
{ | |||||
TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX) { | |||||
require_compute_capability(6, 0); | require_compute_capability(6, 0); | ||||
using Param = WarpPerspective::Param; | using Param = WarpPerspective::Param; | ||||
Checker<WarpPerspectiveForward> checker(handle_cuda()); | Checker<WarpPerspectiveForward> checker(handle_cuda()); | ||||
WarpPerspectiveMatRNG rng; | WarpPerspectiveMatRNG rng; | ||||
checker.set_rng(1, &rng); | checker.set_rng(1, &rng); | ||||
for (auto bmode: {WarpPerspective::BorderMode::REPLICATE}) | |||||
{ | |||||
for (auto bmode : {WarpPerspective::BorderMode::REPLICATE}) { | |||||
WarpPerspective::Param param; | WarpPerspective::Param param; | ||||
param.border_val = 0.3f; | param.border_val = 0.3f; | ||||
param.bmode = bmode; | param.bmode = bmode; | ||||
@@ -235,27 +231,24 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX) | |||||
param.format = Param::Format::NHWC; | param.format = Param::Format::NHWC; | ||||
checker.set_param(param); | checker.set_param(param); | ||||
checker.set_epsilon(0.15).set_max_avg_error(4e-2); | checker.set_epsilon(0.15).set_max_avg_error(4e-2); | ||||
size_t n = (INT_MAX) / (512 * 512 * 3); | |||||
checker.execs( | |||||
{{n + 1, 512, 512, 3}, {n + 1, 3, 3}, {n + 1, 25, 25, 3}}); | |||||
size_t n = (INT_MAX) / (512 * 512 * 3); | |||||
checker.execs( | |||||
{{n + 1, 512, 512, 3}, {n + 1, 3, 3}, {n + 1, 25, 25, 3}}); | |||||
} | } | ||||
} | } | ||||
TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) | |||||
{ | |||||
TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) { | |||||
using Param = WarpPerspective::Param; | using Param = WarpPerspective::Param; | ||||
Checker<WarpPerspectiveForward> checker(handle_cuda()); | Checker<WarpPerspectiveForward> checker(handle_cuda()); | ||||
WarpPerspectiveMatRNG rng; | WarpPerspectiveMatRNG rng; | ||||
checker.set_rng(1, &rng); | checker.set_rng(1, &rng); | ||||
checker.set_dtype(0, dtype::Float16()) | checker.set_dtype(0, dtype::Float16()) | ||||
.set_dtype(1, dtype::Float32()) | |||||
.set_dtype(2, dtype::Float16()); | |||||
for (auto bmode: {WarpPerspective::BorderMode::WRAP, | |||||
WarpPerspective::BorderMode::REFLECT, | |||||
WarpPerspective::BorderMode::REPLICATE, | |||||
WarpPerspective::BorderMode::CONSTANT}) | |||||
{ | |||||
.set_dtype(1, dtype::Float32()) | |||||
.set_dtype(2, dtype::Float16()); | |||||
for (auto bmode : {WarpPerspective::BorderMode::WRAP, | |||||
WarpPerspective::BorderMode::REFLECT, | |||||
WarpPerspective::BorderMode::REPLICATE, | |||||
WarpPerspective::BorderMode::CONSTANT}) { | |||||
WarpPerspective::Param param; | WarpPerspective::Param param; | ||||
param.border_val = 0.3f; | param.border_val = 0.3f; | ||||
param.bmode = bmode; | param.bmode = bmode; | ||||
@@ -278,8 +271,7 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) | |||||
// nan case | // nan case | ||||
NanMatRNG rng_nan; | NanMatRNG rng_nan; | ||||
UniformFloatRNG rng_zero(0, 0); | UniformFloatRNG rng_zero(0, 0); | ||||
for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero}) | |||||
{ | |||||
for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) { | |||||
param::WarpPerspective param; | param::WarpPerspective param; | ||||
param.bmode = param::WarpPerspective::BorderMode::CONSTANT; | param.bmode = param::WarpPerspective::BorderMode::CONSTANT; | ||||
param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | ||||
@@ -287,13 +279,12 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) | |||||
param.border_val = 1.737; | param.border_val = 1.737; | ||||
checker.set_param(param); | checker.set_param(param); | ||||
// no invalid mem access is enough; no need to check value | // no invalid mem access is enough; no need to check value | ||||
checker.set_expect_exec_fail([](){}); | |||||
checker.set_expect_exec_fail([]() {}); | |||||
checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}}); | checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}}); | ||||
} | } | ||||
} | } | ||||
TEST_F(CUDA, WARP_PERSPECTIVE_NCHW4) | |||||
{ | |||||
TEST_F(CUDA, WARP_PERSPECTIVE_NCHW4) { | |||||
using Param = WarpPerspective::Param; | using Param = WarpPerspective::Param; | ||||
WarpPerspective::Param param; | WarpPerspective::Param param; | ||||
Checker<WarpPerspectiveForward> checker(handle_cuda()); | Checker<WarpPerspectiveForward> checker(handle_cuda()); | ||||
@@ -348,31 +339,29 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_NCHW_INT8) { | |||||
warp_perspective::run_int8_test(handle_cuda()); | warp_perspective::run_int8_test(handle_cuda()); | ||||
} | } | ||||
TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA) | |||||
{ | |||||
TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA) { | |||||
Checker<WarpPerspectiveBackwardData> checker(handle_cuda()); | Checker<WarpPerspectiveBackwardData> checker(handle_cuda()); | ||||
WarpPerspectiveMatRNG rng; | WarpPerspectiveMatRNG rng; | ||||
checker.set_rng(0, &rng); | checker.set_rng(0, &rng); | ||||
for (int i = 0; i < 1; ++i) { | for (int i = 0; i < 1; ++i) { | ||||
for (auto bmode: {WarpPerspective::BorderMode::WRAP, | |||||
WarpPerspective::BorderMode::REFLECT, | |||||
WarpPerspective::BorderMode::REPLICATE, | |||||
WarpPerspective::BorderMode::CONSTANT}) | |||||
{ | |||||
for (auto bmode : {WarpPerspective::BorderMode::WRAP, | |||||
WarpPerspective::BorderMode::REFLECT, | |||||
WarpPerspective::BorderMode::REPLICATE, | |||||
WarpPerspective::BorderMode::CONSTANT}) { | |||||
WarpPerspective::Param param; | WarpPerspective::Param param; | ||||
param.border_val = 0.3f; | param.border_val = 0.3f; | ||||
param.bmode = bmode; | param.bmode = bmode; | ||||
param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | ||||
checker.set_param(param); | checker.set_param(param); | ||||
checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}}); | checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}}); | ||||
checker.execs({{22000, 3, 3}, {22000, 3, 11, 12}, {22000, 3, 10, 11}}); | |||||
checker.execs( | |||||
{{22000, 3, 3}, {22000, 3, 11, 12}, {22000, 3, 10, 11}}); | |||||
} | } | ||||
} | } | ||||
// nan case | // nan case | ||||
NanMatRNG rng_nan; | NanMatRNG rng_nan; | ||||
UniformFloatRNG rng_zero(0, 0); | UniformFloatRNG rng_zero(0, 0); | ||||
for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero}) | |||||
{ | |||||
for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) { | |||||
param::WarpPerspective param; | param::WarpPerspective param; | ||||
param.bmode = param::WarpPerspective::BorderMode::CONSTANT; | param.bmode = param::WarpPerspective::BorderMode::CONSTANT; | ||||
param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | ||||
@@ -380,39 +369,54 @@ TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA) | |||||
param.border_val = 1.737; | param.border_val = 1.737; | ||||
checker.set_param(param); | checker.set_param(param); | ||||
// no invalid mem access is enough; no need to check value | // no invalid mem access is enough; no need to check value | ||||
checker.set_expect_exec_fail([](){}); | |||||
checker.set_expect_exec_fail([]() {}); | |||||
checker.exec({{1000, 3, 3}, {1000, 2, 10, 11}, {1000, 2, 12, 13}}); | checker.exec({{1000, 3, 3}, {1000, 2, 10, 11}, {1000, 2, 12, 13}}); | ||||
} | } | ||||
{ | |||||
Checker<WarpPerspectiveBackwardData, WarpPerspectiveMatIdxProxy> | |||||
checker(handle_cuda()); | |||||
constexpr int N_SRC = 5; | |||||
UniformIntRNG mat_idx_rng{0, N_SRC - 1}; | |||||
checker.set_rng(0, &rng); | |||||
checker.set_dtype(1, dtype::Int32()); | |||||
checker.set_rng(1, &mat_idx_rng); | |||||
param::WarpPerspective param; | |||||
param.bmode = param::WarpPerspective::BorderMode::REFLECT; | |||||
param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | |||||
checker.set_param(param); | |||||
checker.set_epsilon(1 + 1e-3); | |||||
checker.execs({{2, 3, 3}, {2}, {2, 12, 11, 12}, {N_SRC, 12, 10, 11}}); | |||||
checker.execs( | |||||
{{123, 3, 3}, {123}, {123, 56, 16, 15}, {N_SRC, 56, 17, 13}}); | |||||
} | |||||
} | } | ||||
TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT) | |||||
{ | |||||
TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT) { | |||||
Checker<WarpPerspectiveBackwardMat> checker(handle_cuda()); | Checker<WarpPerspectiveBackwardMat> checker(handle_cuda()); | ||||
WarpPerspectiveMatRNG rng; | WarpPerspectiveMatRNG rng; | ||||
checker.set_rng(1, &rng); | checker.set_rng(1, &rng); | ||||
for (int i = 0; i < 1; ++i) { | for (int i = 0; i < 1; ++i) { | ||||
for (auto bmode: {WarpPerspective::BorderMode::WRAP, | |||||
WarpPerspective::BorderMode::REFLECT, | |||||
WarpPerspective::BorderMode::REPLICATE, | |||||
WarpPerspective::BorderMode::CONSTANT}) | |||||
{ | |||||
for (auto bmode : {WarpPerspective::BorderMode::WRAP, | |||||
WarpPerspective::BorderMode::REFLECT, | |||||
WarpPerspective::BorderMode::REPLICATE, | |||||
WarpPerspective::BorderMode::CONSTANT}) { | |||||
WarpPerspective::Param param; | WarpPerspective::Param param; | ||||
param.border_val = 0.3f; | param.border_val = 0.3f; | ||||
param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | ||||
param.bmode = bmode; | param.bmode = bmode; | ||||
checker.set_param(param); | checker.set_param(param); | ||||
checker.set_epsilon(1e-2); | checker.set_epsilon(1e-2); | ||||
checker.execs({ | |||||
{1000, 3, 11, 12}, {1000, 3, 3}, | |||||
{1000, 3, 10, 11}, {1000, 3, 3} | |||||
}); | |||||
checker.execs({{1000, 3, 11, 12}, | |||||
{1000, 3, 3}, | |||||
{1000, 3, 10, 11}, | |||||
{1000, 3, 3}}); | |||||
} | } | ||||
} | } | ||||
// nan case | // nan case | ||||
NanMatRNG rng_nan; | NanMatRNG rng_nan; | ||||
UniformFloatRNG rng_zero(0, 0); | UniformFloatRNG rng_zero(0, 0); | ||||
for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero}) | |||||
{ | |||||
for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) { | |||||
param::WarpPerspective param; | param::WarpPerspective param; | ||||
param.bmode = param::WarpPerspective::BorderMode::CONSTANT; | param.bmode = param::WarpPerspective::BorderMode::CONSTANT; | ||||
param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | ||||
@@ -420,26 +424,50 @@ TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT) | |||||
param.border_val = 1.737; | param.border_val = 1.737; | ||||
checker.set_param(param); | checker.set_param(param); | ||||
// no invalid mem access is enough; no need to check value | // no invalid mem access is enough; no need to check value | ||||
checker.set_expect_exec_fail([](){}); | |||||
checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, | |||||
{1000, 2, 12, 13}, {1000, 3, 3}}); | |||||
checker.set_expect_exec_fail([]() {}); | |||||
checker.exec({{1000, 2, 10, 11}, | |||||
{1000, 3, 3}, | |||||
{1000, 2, 12, 13}, | |||||
{1000, 3, 3}}); | |||||
} | |||||
{ | |||||
Checker<WarpPerspectiveBackwardMat, WarpPerspectiveMatIdxProxy> checker( | |||||
handle_cuda()); | |||||
constexpr int N_SRC = 5; | |||||
UniformIntRNG mat_idx_rng{0, N_SRC - 1}; | |||||
checker.set_rng(1, &rng); | |||||
checker.set_dtype(2, dtype::Int32()); | |||||
checker.set_rng(2, &mat_idx_rng); | |||||
param::WarpPerspective param; | |||||
param.bmode = param::WarpPerspective::BorderMode::REFLECT; | |||||
param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | |||||
checker.set_param(param); | |||||
checker.set_epsilon(1 + 1e-3); | |||||
checker.execs({{N_SRC, 12, 10, 11}, | |||||
{2, 3, 3}, | |||||
{2}, | |||||
{2, 12, 11, 12}, | |||||
{2, 3, 3}}); | |||||
checker.execs({{N_SRC, 56, 17, 13}, | |||||
{123, 3, 3}, | |||||
{123}, | |||||
{123, 56, 16, 15}, | |||||
{123, 3, 3}}); | |||||
} | } | ||||
} | } | ||||
TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16) | |||||
{ | |||||
TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16) { | |||||
using Param = WarpPerspective::Param; | using Param = WarpPerspective::Param; | ||||
Checker<WarpPerspectiveForward> checker(handle_cuda()); | Checker<WarpPerspectiveForward> checker(handle_cuda()); | ||||
WarpPerspectiveMatRNG rng; | WarpPerspectiveMatRNG rng; | ||||
checker.set_rng(1, &rng); | checker.set_rng(1, &rng); | ||||
checker.set_dtype(0, dtype::BFloat16()) | checker.set_dtype(0, dtype::BFloat16()) | ||||
.set_dtype(1, dtype::Float32()) | |||||
.set_dtype(2, dtype::BFloat16()); | |||||
for (auto bmode: {WarpPerspective::BorderMode::WRAP, | |||||
WarpPerspective::BorderMode::REFLECT, | |||||
WarpPerspective::BorderMode::REPLICATE, | |||||
WarpPerspective::BorderMode::CONSTANT}) | |||||
{ | |||||
.set_dtype(1, dtype::Float32()) | |||||
.set_dtype(2, dtype::BFloat16()); | |||||
for (auto bmode : {WarpPerspective::BorderMode::WRAP, | |||||
WarpPerspective::BorderMode::REFLECT, | |||||
WarpPerspective::BorderMode::REPLICATE, | |||||
WarpPerspective::BorderMode::CONSTANT}) { | |||||
WarpPerspective::Param param; | WarpPerspective::Param param; | ||||
param.border_val = 0.3f; | param.border_val = 0.3f; | ||||
param.bmode = bmode; | param.bmode = bmode; | ||||
@@ -457,21 +485,19 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16) | |||||
} | } | ||||
} | } | ||||
TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16) | |||||
{ | |||||
TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16) { | |||||
Checker<WarpPerspectiveBackwardData> checker(handle_cuda()); | Checker<WarpPerspectiveBackwardData> checker(handle_cuda()); | ||||
WarpPerspectiveMatRNG rng; | WarpPerspectiveMatRNG rng; | ||||
checker.set_rng(0, &rng) | checker.set_rng(0, &rng) | ||||
.set_epsilon(1e-1) | |||||
.set_dtype(0, dtype::Float32()) | |||||
.set_dtype(1, dtype::BFloat16()) | |||||
.set_dtype(2, dtype::BFloat16()); | |||||
.set_epsilon(1e-1) | |||||
.set_dtype(0, dtype::Float32()) | |||||
.set_dtype(1, dtype::BFloat16()) | |||||
.set_dtype(2, dtype::BFloat16()); | |||||
for (int i = 0; i < 1; ++i) { | for (int i = 0; i < 1; ++i) { | ||||
for (auto bmode: {WarpPerspective::BorderMode::WRAP, | |||||
WarpPerspective::BorderMode::REFLECT, | |||||
WarpPerspective::BorderMode::REPLICATE, | |||||
WarpPerspective::BorderMode::CONSTANT}) | |||||
{ | |||||
for (auto bmode : {WarpPerspective::BorderMode::WRAP, | |||||
WarpPerspective::BorderMode::REFLECT, | |||||
WarpPerspective::BorderMode::REPLICATE, | |||||
WarpPerspective::BorderMode::CONSTANT}) { | |||||
WarpPerspective::Param param; | WarpPerspective::Param param; | ||||
param.border_val = 0.3f; | param.border_val = 0.3f; | ||||
param.bmode = bmode; | param.bmode = bmode; | ||||
@@ -482,31 +508,29 @@ TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16) | |||||
} | } | ||||
} | } | ||||
TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16) | |||||
{ | |||||
TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16) { | |||||
Checker<WarpPerspectiveBackwardMat> checker(handle_cuda()); | Checker<WarpPerspectiveBackwardMat> checker(handle_cuda()); | ||||
WarpPerspectiveMatRNG rng; | WarpPerspectiveMatRNG rng; | ||||
checker.set_rng(1, &rng) | checker.set_rng(1, &rng) | ||||
.set_epsilon(1e-2) | |||||
.set_dtype(0, dtype::BFloat16()) | |||||
.set_dtype(1, dtype::Float32()) | |||||
.set_dtype(2, dtype::BFloat16()) | |||||
.set_dtype(3, dtype::Float32()); | |||||
.set_epsilon(1e-2) | |||||
.set_dtype(0, dtype::BFloat16()) | |||||
.set_dtype(1, dtype::Float32()) | |||||
.set_dtype(2, dtype::BFloat16()) | |||||
.set_dtype(3, dtype::Float32()); | |||||
for (int i = 0; i < 1; ++i) { | for (int i = 0; i < 1; ++i) { | ||||
for (auto bmode: {WarpPerspective::BorderMode::WRAP, | |||||
WarpPerspective::BorderMode::REFLECT, | |||||
WarpPerspective::BorderMode::REPLICATE, | |||||
WarpPerspective::BorderMode::CONSTANT}) | |||||
{ | |||||
for (auto bmode : {WarpPerspective::BorderMode::WRAP, | |||||
WarpPerspective::BorderMode::REFLECT, | |||||
WarpPerspective::BorderMode::REPLICATE, | |||||
WarpPerspective::BorderMode::CONSTANT}) { | |||||
WarpPerspective::Param param; | WarpPerspective::Param param; | ||||
param.border_val = 0.3f; | param.border_val = 0.3f; | ||||
param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | ||||
param.bmode = bmode; | param.bmode = bmode; | ||||
checker.set_param(param); | checker.set_param(param); | ||||
checker.execs({ | |||||
{1000, 3, 11, 12}, {1000, 3, 3}, | |||||
{1000, 3, 10, 11}, {1000, 3, 3} | |||||
}); | |||||
checker.execs({{1000, 3, 11, 12}, | |||||
{1000, 3, 3}, | |||||
{1000, 3, 10, 11}, | |||||
{1000, 3, 3}}); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -549,14 +573,14 @@ TEST_F(CUDA, BENCHMARK_WARP_PERSPECTIVE_NCHW4) { | |||||
benchmarker.set_dtype(0, dtype::QuantizedS8(1.0f)); | benchmarker.set_dtype(0, dtype::QuantizedS8(1.0f)); | ||||
benchmarker.set_dtype(2, dtype::QuantizedS8(1.0f)); | benchmarker.set_dtype(2, dtype::QuantizedS8(1.0f)); | ||||
run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 256, 5120, 4}}); | run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 256, 5120, 4}}); | ||||
run({TensorShape{1, 25, 256, 5120, 4}, {1, 3, 3}, {1,25, 256, 256, 4}}); | |||||
run({TensorShape{1, 25, 256, 5120, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}}); | |||||
run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 512, 512, 4}}); | run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 512, 512, 4}}); | ||||
run({TensorShape{1, 25, 512, 512, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}}); | run({TensorShape{1, 25, 512, 512, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}}); | ||||
} | } | ||||
#endif | #endif | ||||
} // namespace test | |||||
} // namespace megdnn | |||||
} // namespace test | |||||
} // namespace megdnn | |||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -6,18 +6,18 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#include "./internal/megdnn_opr_wrapper.inl" | |||||
#include "megbrain/opr/imgproc.h" | #include "megbrain/opr/imgproc.h" | ||||
#include "megbrain/opr/utility.h" | |||||
#include "./internal/megdnn_opr_wrapper.inl" | |||||
#include "megbrain/graph/grad_impl.h" | #include "megbrain/graph/grad_impl.h" | ||||
#include "megbrain/opr/utility.h" | |||||
using namespace mgb; | using namespace mgb; | ||||
using namespace opr; | using namespace opr; | ||||
/* ======================= WarpPerspectiveForward ======================= */ | /* ======================= WarpPerspectiveForward ======================= */ | ||||
MGB_DYN_TYPE_OBJ_FINAL_IMPL(WarpPerspectiveForward); | MGB_DYN_TYPE_OBJ_FINAL_IMPL(WarpPerspectiveForward); | ||||
@@ -54,8 +54,7 @@ void WarpPerspectiveForward::add_input_layout_constraint() { | |||||
} | } | ||||
void WarpPerspectiveForward::outshape_by_symvar_do_get_output_shape( | void WarpPerspectiveForward::outshape_by_symvar_do_get_output_shape( | ||||
TensorShape &dest, const ShapeInferInfo &shpinfo) { | |||||
TensorShape& dest, const ShapeInferInfo& shpinfo) { | |||||
TensorShape oshp2d; | TensorShape oshp2d; | ||||
cg::copy_tensor_value_to_shape(oshp2d, *shpinfo.shpval_inp_val.at(0)); | cg::copy_tensor_value_to_shape(oshp2d, *shpinfo.shpval_inp_val.at(0)); | ||||
auto imgshp = shpinfo.shape_inp_shp.at(0), | auto imgshp = shpinfo.shape_inp_shp.at(0), | ||||
@@ -112,8 +111,8 @@ void WarpPerspectiveForward::scn_do_execute() { | |||||
} | } | ||||
size_t WarpPerspectiveForward::get_workspace_size_bytes( | size_t WarpPerspectiveForward::get_workspace_size_bytes( | ||||
const TensorShapeArray &input_shapes, | |||||
const TensorShapeArray &output_shapes) const { | |||||
const TensorShapeArray& input_shapes, | |||||
const TensorShapeArray& output_shapes) const { | |||||
if (input().size() == 3) { | if (input().size() == 3) { | ||||
return intl::_MegDNNOprMethInvoker<2, 1>::get_workspace_in_bytes( | return intl::_MegDNNOprMethInvoker<2, 1>::get_workspace_in_bytes( | ||||
megdnn_opr(), this, input_shapes, output_shapes); | megdnn_opr(), this, input_shapes, output_shapes); | ||||
@@ -129,19 +128,34 @@ void WarpPerspectiveForward::record_execute_deps(ExecDependencyArray& deps) { | |||||
#ifdef MGB_ENABLE_GRAD | #ifdef MGB_ENABLE_GRAD | ||||
MGB_IMPL_OPR_GRAD(WarpPerspectiveForward) { | MGB_IMPL_OPR_GRAD(WarpPerspectiveForward) { | ||||
mgb_assert(opr.input().size() == 3, | |||||
"backward with mat_idx is currently unsupported"); | |||||
if (opr.input().size() == 4) { | |||||
if (wrt_idx == 0) { | |||||
// wrt data | |||||
SymbolVar grad = WarpPerspectiveBackwardData::make( | |||||
opr.input(1), opr.input(2), out_grad[0], opr.input(0), | |||||
opr.param()); | |||||
return grad.node(); | |||||
} else if (wrt_idx == 1) { | |||||
// wrt mat | |||||
SymbolVar grad = WarpPerspectiveBackwardMat::make( | |||||
opr.input(0), opr.input(1), opr.input(2), out_grad[0], | |||||
opr.param()); | |||||
return grad.node(); | |||||
} else { | |||||
return InvalidGrad::make(opr, wrt_idx); | |||||
} | |||||
} | |||||
mgb_assert(opr.input().size() == 3); | |||||
if (wrt_idx == 0) { | if (wrt_idx == 0) { | ||||
// wrt data | // wrt data | ||||
SymbolVar grad = WarpPerspectiveBackwardData::make( | SymbolVar grad = WarpPerspectiveBackwardData::make( | ||||
opr.input(1), out_grad[0], opr.input(0), | |||||
opr.param()); | |||||
opr.input(1), out_grad[0], opr.input(0), opr.param()); | |||||
return grad.node(); | return grad.node(); | ||||
} else if (wrt_idx == 1){ | |||||
} else if (wrt_idx == 1) { | |||||
// wrt mat | // wrt mat | ||||
SymbolVar grad = WarpPerspectiveBackwardMat::make( | SymbolVar grad = WarpPerspectiveBackwardMat::make( | ||||
opr.input(0), opr.input(1), out_grad[0], | |||||
opr.param()); | |||||
opr.input(0), opr.input(1), out_grad[0], opr.param()); | |||||
return grad.node(); | return grad.node(); | ||||
} else | } else | ||||
return InvalidGrad::make(opr, wrt_idx); | return InvalidGrad::make(opr, wrt_idx); | ||||
@@ -151,14 +165,116 @@ MGB_IMPL_OPR_GRAD(WarpPerspectiveForward) { | |||||
/* ====================== WarpPerspectiveBackwardData ====================== */ | /* ====================== WarpPerspectiveBackwardData ====================== */ | ||||
MGB_DYN_TYPE_OBJ_FINAL_IMPL(WarpPerspectiveBackwardData); | MGB_DYN_TYPE_OBJ_FINAL_IMPL(WarpPerspectiveBackwardData); | ||||
MEGDNN_OPR_INIT3(WarpPerspectiveBackwardData, "warp_perspective_bwd_data", | |||||
2, false); | |||||
WarpPerspectiveBackwardData::WarpPerspectiveBackwardData( | |||||
VarNode* mat, VarNode* out_diff, VarNode* in_for_shape, | |||||
const Param& param, const OperatorNodeConfig& config) | |||||
: Super(OperatorNodeBaseCtorParam{mat->owner_graph(), | |||||
config, | |||||
"warp_perspective_bwd_data", | |||||
{mat}}, | |||||
2, false) { | |||||
init_megdnn_opr(*this, param); | |||||
add_input({mat, out_diff, in_for_shape}); | |||||
intl::MegDNNOprInitPostCtor<WarpPerspectiveBackwardData>::apply(*this); | |||||
} | |||||
WarpPerspectiveBackwardData::WarpPerspectiveBackwardData( | |||||
VarNode* mat, VarNode* mat_idx, VarNode* out_diff, | |||||
VarNode* in_for_shape, const Param& param, | |||||
const OperatorNodeConfig& config) | |||||
: Super(OperatorNodeBaseCtorParam{mat->owner_graph(), | |||||
config, | |||||
"warp_perspective_bwd_data", | |||||
{mat, mat_idx}}, | |||||
3, false) { | |||||
init_megdnn_opr(*this, param); | |||||
add_input({mat, mat_idx, out_diff, in_for_shape}); | |||||
intl::MegDNNOprInitPostCtor<WarpPerspectiveBackwardData>::apply(*this); | |||||
} | |||||
SymbolVar WarpPerspectiveBackwardData::make(SymbolVar i0, SymbolVar i1, | |||||
SymbolVar i2, const Param& param, | |||||
const OperatorNodeConfig& config) { | |||||
intl::MegDNNOprInitInputsModifier<WarpPerspectiveBackwardData>::apply( | |||||
param, {&i0, &i1, &i2}); | |||||
return i0.insert_single_output_opr<WarpPerspectiveBackwardData>( | |||||
i0.node(), i1.node(), i2.node(), param, config); | |||||
} | |||||
SymbolVar WarpPerspectiveBackwardData::make(SymbolVar i0, SymbolVar i1, | |||||
SymbolVar i2, SymbolVar i3, | |||||
const Param& param, | |||||
const OperatorNodeConfig& config) { | |||||
intl::MegDNNOprInitInputsModifier<WarpPerspectiveBackwardData>::apply( | |||||
param, {&i0, &i1, &i2, &i3}); | |||||
return i0.insert_single_output_opr<WarpPerspectiveBackwardData>( | |||||
i0.node(), i1.node(), i2.node(), i3.node(), param, config); | |||||
} | |||||
void WarpPerspectiveBackwardData::scn_do_execute() { | |||||
if (input().size() == 3) { | |||||
megdnn_opr()->exec(input(0)->dev_tensor().as_megdnn(), | |||||
input(1)->dev_tensor().as_megdnn(), | |||||
output(0)->dev_tensor().as_megdnn(), | |||||
intl::get_megdnn_workspace_from_var(output(1))); | |||||
} else { | |||||
mgb_assert(input().size() == 4); | |||||
megdnn_opr()->exec(input(0)->dev_tensor().as_megdnn(), | |||||
input(1)->dev_tensor().as_megdnn(), | |||||
input(2)->dev_tensor().as_megdnn(), | |||||
output(0)->dev_tensor().as_megdnn(), | |||||
intl::get_megdnn_workspace_from_var(output(1))); | |||||
} | |||||
} | |||||
/* ====================== WarpPerspectiveBackwardMat ====================== */ | /* ====================== WarpPerspectiveBackwardMat ====================== */ | ||||
MGB_DYN_TYPE_OBJ_FINAL_IMPL(WarpPerspectiveBackwardMat); | MGB_DYN_TYPE_OBJ_FINAL_IMPL(WarpPerspectiveBackwardMat); | ||||
MEGDNN_OPR_INIT3(WarpPerspectiveBackwardMat, "warp_perspective_bwd_mat", | |||||
1, true); | |||||
WarpPerspectiveBackwardMat::WarpPerspectiveBackwardMat( | |||||
VarNode* src, VarNode* mat, VarNode* mat_idx, VarNode* out_diff, | |||||
const Param& param, const OperatorNodeConfig& config) | |||||
: Super(OperatorNodeBaseCtorParam{src->owner_graph(), | |||||
config, | |||||
"warp_perspective_bwd_mat", | |||||
{src, mat, mat_idx}}, | |||||
1, true) { | |||||
init_megdnn_opr(*this, param); | |||||
if (mat_idx) { | |||||
add_input({src, mat, mat_idx, out_diff}); | |||||
} else { | |||||
add_input({src, mat, out_diff}); | |||||
} | |||||
intl::MegDNNOprInitPostCtor<WarpPerspectiveBackwardMat>::apply(*this); | |||||
} | |||||
void WarpPerspectiveBackwardMat::scn_do_execute() { | |||||
if (input().size() == 3) { | |||||
megdnn_opr()->exec(input(0)->dev_tensor().as_megdnn(), | |||||
input(1)->dev_tensor().as_megdnn(), | |||||
input(2)->dev_tensor().as_megdnn(), | |||||
output(0)->dev_tensor().as_megdnn(), | |||||
intl::get_megdnn_workspace_from_var(output(1))); | |||||
} else { | |||||
mgb_assert(input().size() == 4); | |||||
megdnn_opr()->exec(input(0)->dev_tensor().as_megdnn(), | |||||
input(1)->dev_tensor().as_megdnn(), | |||||
input(2)->dev_tensor().as_megdnn(), | |||||
input(3)->dev_tensor().as_megdnn(), | |||||
output(0)->dev_tensor().as_megdnn(), | |||||
intl::get_megdnn_workspace_from_var(output(1))); | |||||
} | |||||
} | |||||
SymbolVar WarpPerspectiveBackwardMat::make( | |||||
SymbolVar i0, SymbolVar i1, SymbolVar i2, SymbolVar i3, | |||||
const Param& param, const OperatorNodeConfig& config) { | |||||
intl::MegDNNOprInitInputsModifier<WarpPerspectiveBackwardMat>::apply( | |||||
param, {&i0, &i1, &i2, &i3}); | |||||
return i0.insert_single_output_opr<WarpPerspectiveBackwardMat>( | |||||
i0.node(), i1.node(), i2.node(), i3.node(), param, config); | |||||
} | |||||
/* ====================== Cv operator ====================== */ | /* ====================== Cv operator ====================== */ | ||||
@@ -188,8 +304,7 @@ void ResizeForward::add_input_layout_constraint() { | |||||
} | } | ||||
void ResizeForward::outshape_by_symvar_do_get_output_shape( | void ResizeForward::outshape_by_symvar_do_get_output_shape( | ||||
TensorShape &dest, const ShapeInferInfo &shpinfo) { | |||||
TensorShape& dest, const ShapeInferInfo& shpinfo) { | |||||
TensorShape oshp2d; | TensorShape oshp2d; | ||||
cg::copy_tensor_value_to_shape(oshp2d, *shpinfo.shpval_inp_val.at(0)); | cg::copy_tensor_value_to_shape(oshp2d, *shpinfo.shpval_inp_val.at(0)); | ||||
auto imgshp = shpinfo.shape_inp_shp.at(0); | auto imgshp = shpinfo.shape_inp_shp.at(0); | ||||
@@ -232,7 +347,7 @@ size_t ResizeForward::get_workspace_size_bytes( | |||||
megdnn_opr(), this, input_shapes, output_shapes); | megdnn_opr(), this, input_shapes, output_shapes); | ||||
} | } | ||||
void ResizeForward::record_execute_deps(ExecDependencyArray &deps) { | |||||
void ResizeForward::record_execute_deps(ExecDependencyArray& deps) { | |||||
record_megdnn_opr(deps); | record_megdnn_opr(deps); | ||||
} | } | ||||
@@ -268,19 +383,17 @@ void WarpAffineForward::add_input_layout_constraint() { | |||||
} | } | ||||
void WarpAffineForward::outshape_by_symvar_do_get_output_shape( | void WarpAffineForward::outshape_by_symvar_do_get_output_shape( | ||||
TensorShape &dest, const ShapeInferInfo &shpinfo) { | |||||
TensorShape& dest, const ShapeInferInfo& shpinfo) { | |||||
TensorShape oshp2d; | TensorShape oshp2d; | ||||
cg::copy_tensor_value_to_shape(oshp2d, *shpinfo.shpval_inp_val.at(0)); | cg::copy_tensor_value_to_shape(oshp2d, *shpinfo.shpval_inp_val.at(0)); | ||||
auto imgshp = shpinfo.shape_inp_shp.at(0), | auto imgshp = shpinfo.shape_inp_shp.at(0), | ||||
matshp = shpinfo.shape_inp_shp.at(1); | matshp = shpinfo.shape_inp_shp.at(1); | ||||
mgb_assert( | |||||
(imgshp.ndim == 4 || imgshp.ndim == 5) && matshp.ndim == 3 && oshp2d.ndim == 2 && | |||||
matshp.shape[0] == imgshp.shape[0] && | |||||
matshp.shape[1] == 2 && matshp.shape[2] == 3, | |||||
"shape mismatch for WarpAffineForward: img=%s mat=%s out2d=%s", | |||||
imgshp.to_string().c_str(), matshp.to_string().c_str(), | |||||
oshp2d.to_string().c_str()); | |||||
mgb_assert((imgshp.ndim == 4 || imgshp.ndim == 5) && matshp.ndim == 3 && | |||||
oshp2d.ndim == 2 && matshp.shape[0] == imgshp.shape[0] && | |||||
matshp.shape[1] == 2 && matshp.shape[2] == 3, | |||||
"shape mismatch for WarpAffineForward: img=%s mat=%s out2d=%s", | |||||
imgshp.to_string().c_str(), matshp.to_string().c_str(), | |||||
oshp2d.to_string().c_str()); | |||||
size_t height_idx = 0; | size_t height_idx = 0; | ||||
if (param().format == Param::Format::NCHW) { | if (param().format == Param::Format::NCHW) { | ||||
@@ -305,18 +418,19 @@ void WarpAffineForward::init_output_static_infer_desc() { | |||||
} | } | ||||
void WarpAffineForward::scn_do_execute() { | void WarpAffineForward::scn_do_execute() { | ||||
intl::MegDNNOprMethInvoker<megdnn::WarpAffine>:: | |||||
exec(megdnn_opr(), this); | |||||
intl::MegDNNOprMethInvoker<megdnn::WarpAffine>::exec(megdnn_opr(), this); | |||||
} | } | ||||
size_t WarpAffineForward::get_workspace_size_bytes( | size_t WarpAffineForward::get_workspace_size_bytes( | ||||
const TensorShapeArray &input_shapes, | |||||
const TensorShapeArray &output_shapes) const { | |||||
return intl::MegDNNOprMethInvoker<megdnn::WarpAffine>:: | |||||
get_workspace_in_bytes(megdnn_opr(), this, input_shapes, output_shapes); | |||||
const TensorShapeArray& input_shapes, | |||||
const TensorShapeArray& output_shapes) const { | |||||
return intl::MegDNNOprMethInvoker< | |||||
megdnn::WarpAffine>::get_workspace_in_bytes(megdnn_opr(), this, | |||||
input_shapes, | |||||
output_shapes); | |||||
} | } | ||||
void WarpAffineForward::record_execute_deps(ExecDependencyArray &deps) { | |||||
void WarpAffineForward::record_execute_deps(ExecDependencyArray& deps) { | |||||
record_megdnn_opr(deps); | record_megdnn_opr(deps); | ||||
} | } | ||||
@@ -325,7 +439,7 @@ void WarpAffineForward::record_execute_deps(ExecDependencyArray &deps) { | |||||
MGB_DYN_TYPE_OBJ_FINAL_IMPL(RemapForward); | MGB_DYN_TYPE_OBJ_FINAL_IMPL(RemapForward); | ||||
MEGDNN_OPR_INIT2(RemapForward, "remap") | MEGDNN_OPR_INIT2(RemapForward, "remap") | ||||
void RemapForward::init_output_dtype(){ | |||||
void RemapForward::init_output_dtype() { | |||||
output(0)->dtype(input(0)->dtype()); | output(0)->dtype(input(0)->dtype()); | ||||
} | } | ||||
@@ -37,13 +37,59 @@ namespace serialization { | |||||
} | } | ||||
} | } | ||||
}; | }; | ||||
template<> | |||||
struct OprMaker<opr::WarpPerspectiveBackwardData, 0> { | |||||
using Opr = opr::WarpPerspectiveBackwardData; | |||||
using Param = Opr::Param; | |||||
static cg::OperatorNodeBase* make(const Param& param, | |||||
const cg::VarNodeArray& inputs, | |||||
ComputingGraph& graph, | |||||
const OperatorNodeConfig& config) { | |||||
MGB_MARK_USED_VAR(graph); | |||||
if (inputs.size() == 3) { | |||||
return Opr::make(inputs[0], inputs[1], inputs[2], param, config) | |||||
.node() | |||||
->owner_opr(); | |||||
} else { | |||||
mgb_assert(inputs.size() == 4); | |||||
return Opr::make(inputs[0], inputs[1], inputs[2], inputs[3], | |||||
param, config) | |||||
.node() | |||||
->owner_opr(); | |||||
} | |||||
} | |||||
}; | |||||
template<> | |||||
struct OprMaker<opr::WarpPerspectiveBackwardMat, 0> { | |||||
using Opr = opr::WarpPerspectiveBackwardMat; | |||||
using Param = Opr::Param; | |||||
static cg::OperatorNodeBase* make(const Param& param, | |||||
const cg::VarNodeArray& inputs, | |||||
ComputingGraph& graph, | |||||
const OperatorNodeConfig& config) { | |||||
MGB_MARK_USED_VAR(graph); | |||||
if (inputs.size() == 3) { | |||||
return Opr::make(inputs[0], inputs[1], inputs[2], param, config) | |||||
.node() | |||||
->owner_opr(); | |||||
} else { | |||||
mgb_assert(inputs.size() == 4); | |||||
return Opr::make(inputs[0], inputs[1], inputs[2], inputs[3], | |||||
param, config) | |||||
.node() | |||||
->owner_opr(); | |||||
} | |||||
} | |||||
}; | |||||
} // namespace serialization | } // namespace serialization | ||||
namespace opr { | namespace opr { | ||||
MGB_SEREG_OPR(WarpPerspective, 0); | MGB_SEREG_OPR(WarpPerspective, 0); | ||||
MGB_SEREG_OPR(WarpPerspectiveBackwardData, 3); | |||||
MGB_SEREG_OPR(WarpPerspectiveBackwardMat, 3); | |||||
MGB_SEREG_OPR(WarpPerspectiveBackwardData, 0); | |||||
MGB_SEREG_OPR(WarpPerspectiveBackwardMat, 0); | |||||
MGB_SEREG_OPR(Rotate, 1); | MGB_SEREG_OPR(Rotate, 1); | ||||
MGB_SEREG_OPR(CvtColor, 1); | MGB_SEREG_OPR(CvtColor, 1); | ||||
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#pragma once | #pragma once | ||||
@@ -33,77 +34,93 @@ namespace opr { | |||||
* Impl note: this operator might have 3 or 4 inputs depending on whether | * Impl note: this operator might have 3 or 4 inputs depending on whether | ||||
* \p mat_idx is given | * \p mat_idx is given | ||||
*/ | */ | ||||
MGB_DEFINE_OPR_CLASS(WarpPerspectiveForward, | |||||
MGB_DEFINE_OPR_CLASS( | |||||
WarpPerspectiveForward, | |||||
intl::WorkspaceSizeInfer< | intl::WorkspaceSizeInfer< | ||||
intl::OutshapeBySymvarSCNOpr<mixin::MegDNNOprHolderImpl< | |||||
megdnn::WarpPerspectiveForward>>>) // { | |||||
public: | |||||
WarpPerspectiveForward( | |||||
VarNode *in_tensor, VarNode *mat, VarNode *mat_idx, | |||||
VarNode *out_shape, | |||||
const Param ¶m, | |||||
const OperatorNodeConfig &config); | |||||
static SymbolVar make(SymbolVar in_tensor, | |||||
SymbolVar mat, SymbolVar mat_idx, SymbolVar out_shape, | |||||
const Param ¶m = {}, | |||||
const OperatorNodeConfig &config = {}); | |||||
static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, | |||||
SymbolVar out_shape, | |||||
const Param ¶m = {}, | |||||
const OperatorNodeConfig &config = {}) { | |||||
return make(in_tensor, mat, SymbolVar{}, out_shape, param, config); | |||||
} | |||||
static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, | |||||
const TensorShape &out_shape, | |||||
const Param ¶m = {}, | |||||
const OperatorNodeConfig &config = {}) | |||||
{ | |||||
return make(in_tensor, mat, | |||||
cg::var_from_tensor_shape( | |||||
in_tensor, out_shape), param, config); | |||||
} | |||||
private: | |||||
void init_output_dtype() override; | |||||
void add_input_layout_constraint() override; | |||||
void init_output_static_infer_desc() override; | |||||
void outshape_by_symvar_do_get_output_shape( | |||||
TensorShape &dest, const ShapeInferInfo &shpinfo) override; | |||||
void scn_do_execute() override; | |||||
size_t get_workspace_size_bytes( | |||||
const TensorShapeArray &input_shapes, | |||||
const TensorShapeArray &output_shapes) const override; | |||||
void record_execute_deps(ExecDependencyArray& deps) override; | |||||
}; | |||||
intl::OutshapeBySymvarSCNOpr<mixin::MegDNNOprHolderImpl< | |||||
megdnn::WarpPerspectiveForward>>>) // { | |||||
public: | |||||
WarpPerspectiveForward(VarNode* in_tensor, VarNode* mat, VarNode* mat_idx, | |||||
VarNode* out_shape, const Param& param, | |||||
const OperatorNodeConfig& config); | |||||
static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, SymbolVar mat_idx, | |||||
SymbolVar out_shape, const Param& param = {}, | |||||
const OperatorNodeConfig& config = {}); | |||||
static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, SymbolVar out_shape, | |||||
const Param& param = {}, | |||||
const OperatorNodeConfig& config = {}) { | |||||
return make(in_tensor, mat, SymbolVar{}, out_shape, param, config); | |||||
} | |||||
static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, | |||||
const TensorShape& out_shape, const Param& param = {}, | |||||
const OperatorNodeConfig& config = {}) { | |||||
return make(in_tensor, mat, cg::var_from_tensor_shape(in_tensor, out_shape), | |||||
param, config); | |||||
} | |||||
private: | |||||
void init_output_dtype() override; | |||||
void add_input_layout_constraint() override; | |||||
void init_output_static_infer_desc() override; | |||||
void outshape_by_symvar_do_get_output_shape( | |||||
TensorShape& dest, const ShapeInferInfo& shpinfo) override; | |||||
void scn_do_execute() override; | |||||
size_t get_workspace_size_bytes( | |||||
const TensorShapeArray& input_shapes, | |||||
const TensorShapeArray& output_shapes) const override; | |||||
void record_execute_deps(ExecDependencyArray& deps) override; | |||||
}; // namespace opr | |||||
using WarpPerspective = WarpPerspectiveForward; | using WarpPerspective = WarpPerspectiveForward; | ||||
MGB_DEFINE_OPR_CLASS(WarpPerspectiveBackwardData, | |||||
intl::MegDNNOprWrapperBwd<megdnn::WarpPerspectiveBackwardData>) // { | |||||
public: | |||||
WarpPerspectiveBackwardData(VarNode *mat, VarNode *out_diff, | |||||
VarNode *in_for_shape, const Param ¶m, | |||||
const OperatorNodeConfig &config); | |||||
static SymbolVar make(SymbolVar mat, SymbolVar out_diff, | |||||
SymbolVar in_for_shape, const Param ¶m = {}, | |||||
const OperatorNodeConfig &config = {}); | |||||
}; | |||||
MGB_DEFINE_OPR_CLASS(WarpPerspectiveBackwardMat, | |||||
intl::MegDNNOprWrapperBwd<megdnn::WarpPerspectiveBackwardMat>) // { | |||||
public: | |||||
WarpPerspectiveBackwardMat( | |||||
VarNode *src, VarNode *mat, VarNode *out_diff, | |||||
const Param ¶m, const OperatorNodeConfig &config); | |||||
static SymbolVar make( | |||||
SymbolVar src, SymbolVar mat, SymbolVar out_diff, | |||||
const Param ¶m = {}, const OperatorNodeConfig &config = {}); | |||||
}; | |||||
MGB_DEFINE_OPR_CLASS( | |||||
WarpPerspectiveBackwardData, | |||||
intl::MegDNNOprWrapperBwd<megdnn::WarpPerspectiveBackwardData>) // { | |||||
public: | |||||
WarpPerspectiveBackwardData(VarNode* mat, VarNode* out_diff, | |||||
VarNode* in_for_shape, const Param& param, | |||||
const OperatorNodeConfig& config); | |||||
WarpPerspectiveBackwardData(VarNode* mat, VarNode* mat_idx, VarNode* out_diff, | |||||
VarNode* in_for_shape, const Param& param, | |||||
const OperatorNodeConfig& config); | |||||
static SymbolVar make(SymbolVar mat, SymbolVar out_diff, SymbolVar in_for_shape, | |||||
const Param& param = {}, | |||||
const OperatorNodeConfig& config = {}); | |||||
static SymbolVar make(SymbolVar mat, SymbolVar mat_idx, SymbolVar out_diff, | |||||
SymbolVar in_for_shape, const Param& param = {}, | |||||
const OperatorNodeConfig& config = {}); | |||||
void scn_do_execute() override; | |||||
}; // namespace mgb | |||||
MGB_DEFINE_OPR_CLASS( | |||||
WarpPerspectiveBackwardMat, | |||||
intl::MegDNNOprWrapperBwd<megdnn::WarpPerspectiveBackwardMat>) // { | |||||
public: | |||||
WarpPerspectiveBackwardMat(VarNode* src, VarNode* mat, VarNode* mat_idx, | |||||
VarNode* out_diff, const Param& param, | |||||
const OperatorNodeConfig& config); | |||||
static SymbolVar make(SymbolVar src, SymbolVar mat, SymbolVar out_diff, | |||||
const Param& param = {}, | |||||
const OperatorNodeConfig& config = {}) { | |||||
return make(src, mat, {}, out_diff, param, config); | |||||
} | |||||
static SymbolVar make(SymbolVar src, SymbolVar mat, SymbolVar mat_idx, | |||||
SymbolVar out_diff, const Param& param = {}, | |||||
const OperatorNodeConfig& config = {}); | |||||
void scn_do_execute() override; | |||||
} | |||||
; | |||||
/* ============================= shape infer ============================== */ | /* ============================= shape infer ============================== */ | ||||
//! param: src, dst | //! param: src, dst | ||||
@@ -116,68 +133,67 @@ using CvtColor = CvtColorForward; | |||||
using GaussianBlur = GaussianBlurForward; | using GaussianBlur = GaussianBlurForward; | ||||
/* ============================= user set shape =========================== */ | /* ============================= user set shape =========================== */ | ||||
MGB_DEFINE_OPR_CLASS(ResizeForward, | |||||
intl::WorkspaceSizeInfer< | |||||
intl::OutshapeBySymvarSCNOpr<mixin::MegDNNOprHolderImpl< | |||||
megdnn::ResizeForward>>>) // { | |||||
public: | |||||
ResizeForward( | |||||
VarNode *in_tensor, VarNode *out_shape, const Param ¶m, | |||||
const OperatorNodeConfig &config); | |||||
static SymbolVar make(SymbolVar in_tensor, SymbolVar out_shape, | |||||
const Param ¶m = {}, | |||||
const OperatorNodeConfig &config = {}); | |||||
static SymbolVar make(SymbolVar in_tensor, const TensorShape &out_shape, | |||||
const Param ¶m = {}, | |||||
const OperatorNodeConfig &config = {}) | |||||
{ | |||||
return make(in_tensor, | |||||
cg::var_from_tensor_shape( | |||||
in_tensor, out_shape), param, config); | |||||
} | |||||
private: | |||||
void init_output_dtype() override; | |||||
void add_input_layout_constraint() override; | |||||
void init_output_static_infer_desc() override; | |||||
void outshape_by_symvar_do_get_output_shape( | |||||
TensorShape &dest, const ShapeInferInfo &shpinfo) override; | |||||
void scn_do_execute() override; | |||||
size_t get_workspace_size_bytes( | |||||
const TensorShapeArray &input_shapes, | |||||
const TensorShapeArray &output_shapes) const override; | |||||
void record_execute_deps(ExecDependencyArray &deps) override; | |||||
}; | |||||
MGB_DEFINE_OPR_CLASS( | |||||
ResizeForward, | |||||
intl::WorkspaceSizeInfer<intl::OutshapeBySymvarSCNOpr< | |||||
mixin::MegDNNOprHolderImpl<megdnn::ResizeForward>>>) // { | |||||
public: | |||||
ResizeForward(VarNode* in_tensor, VarNode* out_shape, const Param& param, | |||||
const OperatorNodeConfig& config); | |||||
static SymbolVar make(SymbolVar in_tensor, SymbolVar out_shape, | |||||
const Param& param = {}, | |||||
const OperatorNodeConfig& config = {}); | |||||
static SymbolVar make(SymbolVar in_tensor, const TensorShape& out_shape, | |||||
const Param& param = {}, | |||||
const OperatorNodeConfig& config = {}) { | |||||
return make(in_tensor, cg::var_from_tensor_shape(in_tensor, out_shape), | |||||
param, config); | |||||
} | |||||
private: | |||||
void init_output_dtype() override; | |||||
void add_input_layout_constraint() override; | |||||
void init_output_static_infer_desc() override; | |||||
void outshape_by_symvar_do_get_output_shape( | |||||
TensorShape& dest, const ShapeInferInfo& shpinfo) override; | |||||
void scn_do_execute() override; | |||||
size_t get_workspace_size_bytes( | |||||
const TensorShapeArray& input_shapes, | |||||
const TensorShapeArray& output_shapes) const override; | |||||
void record_execute_deps(ExecDependencyArray& deps) override; | |||||
} | |||||
; | |||||
using Resize = ResizeForward; | using Resize = ResizeForward; | ||||
MGB_DEFINE_OPR_CLASS(ResizeBackward, | MGB_DEFINE_OPR_CLASS(ResizeBackward, | ||||
intl::MegDNNOprWrapperBwd<megdnn::ResizeBackward>) // { | |||||
public: | |||||
ResizeBackward(VarNode *out_diff, | |||||
VarNode *in_for_shape, const Param ¶m, | |||||
const OperatorNodeConfig &config); | |||||
intl::MegDNNOprWrapperBwd<megdnn::ResizeBackward>) // { | |||||
public: | |||||
ResizeBackward(VarNode* out_diff, VarNode* in_for_shape, const Param& param, | |||||
const OperatorNodeConfig& config); | |||||
static SymbolVar make(SymbolVar out_diff, | |||||
SymbolVar in_for_shape, const Param ¶m = {}, | |||||
const OperatorNodeConfig &config = {}); | |||||
}; | |||||
static SymbolVar make(SymbolVar out_diff, SymbolVar in_for_shape, | |||||
const Param& param = {}, | |||||
const OperatorNodeConfig& config = {}); | |||||
} | |||||
; | |||||
MGB_DEFINE_OPR_CLASS(RemapForward, | MGB_DEFINE_OPR_CLASS(RemapForward, | ||||
intl::MegDNNOprWrapperFwd<megdnn::RemapForward>) // { | |||||
public: | |||||
RemapForward( | |||||
VarNode *in_tensor, VarNode* map, | |||||
const Param ¶m, const OperatorNodeConfig &config); | |||||
static SymbolVar make(SymbolVar in_tensor, SymbolVar map, const Param ¶m = {}, | |||||
const OperatorNodeConfig &config = {}); | |||||
private: | |||||
void init_output_dtype() override; | |||||
}; | |||||
intl::MegDNNOprWrapperFwd<megdnn::RemapForward>) // { | |||||
public: | |||||
RemapForward(VarNode* in_tensor, VarNode* map, const Param& param, | |||||
const OperatorNodeConfig& config); | |||||
static SymbolVar make(SymbolVar in_tensor, SymbolVar map, | |||||
const Param& param = {}, | |||||
const OperatorNodeConfig& config = {}); | |||||
private: | |||||
void init_output_dtype() override; | |||||
} | |||||
; | |||||
using Remap = RemapForward; | using Remap = RemapForward; | ||||
/*! | /*! | ||||
@@ -191,47 +207,42 @@ using Remap = RemapForward; | |||||
* Input mat shape: batch, 2, 2; note that the mat is used to translate output | * Input mat shape: batch, 2, 2; note that the mat is used to translate output | ||||
* coordinate onto input coordinate, so it is not inversed. | * coordinate onto input coordinate, so it is not inversed. | ||||
*/ | */ | ||||
MGB_DEFINE_OPR_CLASS(WarpAffineForward, | |||||
intl::WorkspaceSizeInfer< | |||||
intl::OutshapeBySymvarSCNOpr<mixin::MegDNNOprHolderImpl< | |||||
megdnn::WarpAffineForward>>>) // { | |||||
public: | |||||
WarpAffineForward( | |||||
VarNode *in_tensor, VarNode *mat, VarNode *out_shape, | |||||
const Param ¶m, | |||||
const OperatorNodeConfig &config); | |||||
static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, | |||||
SymbolVar out_shape, | |||||
const Param ¶m = {}, | |||||
const OperatorNodeConfig &config = {}); | |||||
static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, | |||||
const TensorShape &out_shape, | |||||
const Param ¶m = {}, | |||||
const OperatorNodeConfig &config = {}) | |||||
{ | |||||
return make(in_tensor, mat, | |||||
cg::var_from_tensor_shape( | |||||
in_tensor, out_shape), param, config); | |||||
} | |||||
private: | |||||
void init_output_dtype() override; | |||||
void add_input_layout_constraint() override; | |||||
void init_output_static_infer_desc() override; | |||||
void outshape_by_symvar_do_get_output_shape( | |||||
TensorShape &dest, const ShapeInferInfo &shpinfo) override; | |||||
void scn_do_execute() override; | |||||
size_t get_workspace_size_bytes( | |||||
const TensorShapeArray &input_shapes, | |||||
const TensorShapeArray &output_shapes) const override; | |||||
void record_execute_deps(ExecDependencyArray &deps) override; | |||||
}; | |||||
MGB_DEFINE_OPR_CLASS( | |||||
WarpAffineForward, | |||||
intl::WorkspaceSizeInfer<intl::OutshapeBySymvarSCNOpr< | |||||
mixin::MegDNNOprHolderImpl<megdnn::WarpAffineForward>>>) // { | |||||
public: | |||||
WarpAffineForward(VarNode* in_tensor, VarNode* mat, VarNode* out_shape, | |||||
const Param& param, const OperatorNodeConfig& config); | |||||
static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, SymbolVar out_shape, | |||||
const Param& param = {}, | |||||
const OperatorNodeConfig& config = {}); | |||||
static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, | |||||
const TensorShape& out_shape, const Param& param = {}, | |||||
const OperatorNodeConfig& config = {}) { | |||||
return make(in_tensor, mat, cg::var_from_tensor_shape(in_tensor, out_shape), | |||||
param, config); | |||||
} | |||||
private: | |||||
void init_output_dtype() override; | |||||
void add_input_layout_constraint() override; | |||||
void init_output_static_infer_desc() override; | |||||
void outshape_by_symvar_do_get_output_shape( | |||||
TensorShape& dest, const ShapeInferInfo& shpinfo) override; | |||||
void scn_do_execute() override; | |||||
size_t get_workspace_size_bytes( | |||||
const TensorShapeArray& input_shapes, | |||||
const TensorShapeArray& output_shapes) const override; | |||||
void record_execute_deps(ExecDependencyArray& deps) override; | |||||
} | |||||
; | |||||
using WarpAffine = WarpAffineForward; | using WarpAffine = WarpAffineForward; | ||||
} // opr | |||||
} // mgb | |||||
} // opr | |||||
} // mgb | |||||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -216,7 +216,10 @@ TEST(TestOprImgproc, WarpPerspectiveWithMatIdx) { | |||||
.set_input_generator(1, gen_mat) | .set_input_generator(1, gen_mat) | ||||
.set_input_generator(2, gen_mat_idx) | .set_input_generator(2, gen_mat_idx) | ||||
.set_input_dtype(2, dtype::Int32{}) | .set_input_dtype(2, dtype::Int32{}) | ||||
/*! it's hard to make the grad check success, | |||||
the cuda implementation is grad sum */ | |||||
.disable_grad_check() | .disable_grad_check() | ||||
.set_input_allow_grad(2,false) | |||||
.run({TensorShape{N_SRC, C, 4, 5}, {N_MAT, 3, 3}, {N_MAT}}) | .run({TensorShape{N_SRC, C, 4, 5}, {N_MAT, 3, 3}, {N_MAT}}) | ||||
.run({TensorShape{N_SRC, C, 6, 5}, {N_MAT, 3, 3}, {N_MAT}}) | .run({TensorShape{N_SRC, C, 6, 5}, {N_MAT, 3, 3}, {N_MAT}}) | ||||
.run({TensorShape{N_SRC, C, 22, 19}, {N_MAT, 3, 3}, {N_MAT}}); | .run({TensorShape{N_SRC, C, 22, 19}, {N_MAT, 3, 3}, {N_MAT}}); | ||||