@@ -60,7 +60,7 @@ struct PreprocessedFilter { | |||||
TensorNDArray tensors; | TensorNDArray tensors; | ||||
}; | }; | ||||
} // namespace intl | |||||
} // namespace detail | |||||
/** | /** | ||||
* \brief base class for convolution operation | * \brief base class for convolution operation | ||||
@@ -1562,6 +1562,58 @@ protected: | |||||
}; | }; | ||||
using BatchConvBias = BatchConvBiasForward; | using BatchConvBias = BatchConvBiasForward; | ||||
class FakeQuantBase : public OperatorBase { | |||||
DEF_OPR_IMPL_CTOR(FakeQuantBase, OperatorBase); | |||||
DEF_OPR_PARAM(FakeQuant); | |||||
protected: | |||||
void deduce_layout_fwd(const TensorLayout& input, TensorLayout& output); | |||||
void check_layout_fwd(const TensorLayout& input, const TensorLayout& scale, | |||||
const TensorLayout& zero_point, | |||||
const TensorLayout& output); | |||||
}; | |||||
class FakeQuantForward : public FakeQuantBase { | |||||
DEF_OPR_IMPL(FakeQuantForward, FakeQuantBase, 3, 1); | |||||
public: | |||||
virtual void exec(_megdnn_tensor_in input, _megdnn_tensor_in scale, | |||||
_megdnn_tensor_in zero_point, _megdnn_tensor_out output, | |||||
_megdnn_workspace workspace) = 0; | |||||
void deduce_layout(const TensorLayout& input, const TensorLayout& scale, | |||||
const TensorLayout& zero_point, TensorLayout& output); | |||||
virtual size_t get_workspace_in_bytes(const TensorLayout& input, | |||||
const TensorLayout& scale, | |||||
const TensorLayout& zero_point, | |||||
const TensorLayout& output) = 0; | |||||
protected: | |||||
void check_exec(const TensorLayout& input, const TensorLayout& scale, | |||||
const TensorLayout& zero_point, const TensorLayout& output, | |||||
size_t workspace_in_bytes); | |||||
}; | |||||
using FakeQuant = FakeQuantForward; | |||||
class FakeQuantBackward : public FakeQuantBase { | |||||
DEF_OPR_IMPL(FakeQuantBackward, FakeQuantBase, 4, 1); | |||||
public: | |||||
virtual void exec(_megdnn_tensor_in diff, _megdnn_tensor_in input, | |||||
_megdnn_tensor_in scale, _megdnn_tensor_in zero_point, | |||||
_megdnn_tensor_out grad, _megdnn_workspace workspace) = 0; | |||||
virtual size_t get_workspace_in_bytes(const TensorLayout& diff, | |||||
const TensorLayout& input, | |||||
const TensorLayout& scale, | |||||
const TensorLayout& zero_point, | |||||
const TensorLayout& grad) = 0; | |||||
protected: | |||||
void check_exec(const TensorLayout& diff, const TensorLayout& input, | |||||
const TensorLayout& scale, const TensorLayout& zero_point, | |||||
const TensorLayout& grad, size_t workspace_in_bytes); | |||||
}; | |||||
} // namespace megdnn | } // namespace megdnn | ||||
#include "megdnn/internal/opr_header_epilogue.h" | #include "megdnn/internal/opr_header_epilogue.h" | ||||
@@ -943,5 +943,9 @@ when the ``I`` suffix is present. | |||||
add_enum_alias('Format', 'ConvolutionV0'). | add_enum_alias('Format', 'ConvolutionV0'). | ||||
add_enum_alias('ComputeMode', 'Convolution', name_field="compute_mode") | add_enum_alias('ComputeMode', 'Convolution', name_field="compute_mode") | ||||
) | ) | ||||
(pdef('FakeQuant'). | |||||
add_fields('int32','qmin','-2147483648'). | |||||
add_fields('int32','qmax','2147483647') | |||||
) | |||||
@@ -0,0 +1,66 @@ | |||||
/** | |||||
* \file dnn/src/common/fakequant.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "megdnn/oprs.h" | |||||
#include "src/common/utils.h" | |||||
namespace megdnn { | |||||
void FakeQuantBase::deduce_layout_fwd(const TensorLayout& input, | |||||
TensorLayout& output) { | |||||
output = TensorLayout(input, input.dtype); | |||||
} | |||||
void FakeQuantBase::check_layout_fwd(const TensorLayout& input, | |||||
const TensorLayout& scale, | |||||
const TensorLayout& zero_point, | |||||
const TensorLayout& output) { | |||||
megdnn_assert(input.dtype == dtype::Float32()); | |||||
megdnn_assert(scale.dtype == dtype::Float32()); | |||||
megdnn_assert(zero_point.dtype == dtype::Float32()); | |||||
TensorLayout expected; | |||||
deduce_layout_fwd(input, expected); | |||||
megdnn_assert_eq_layout(expected, output); | |||||
} | |||||
void FakeQuantForward::deduce_layout(const TensorLayout& input, | |||||
const TensorLayout& /*scale*/, | |||||
const TensorLayout& /*zero_point*/, | |||||
TensorLayout& output) { | |||||
deduce_layout_fwd(input, output); | |||||
} | |||||
void FakeQuantForward::check_exec(const TensorLayout& input, | |||||
const TensorLayout& scale, | |||||
const TensorLayout& zero_point, | |||||
const TensorLayout& output, | |||||
size_t workspace_in_bytes) { | |||||
check_layout_fwd(input, scale, zero_point, output); | |||||
auto required_workspace_space = | |||||
get_workspace_in_bytes(input, scale, zero_point, output); | |||||
megdnn_assert(workspace_in_bytes >= required_workspace_space); | |||||
} | |||||
void FakeQuantBackward::check_exec(const TensorLayout& diff, | |||||
const TensorLayout& input, | |||||
const TensorLayout& scale, | |||||
const TensorLayout& zero_point, | |||||
const TensorLayout& grad, | |||||
size_t workspace_in_bytes) { | |||||
megdnn_assert_eq_shape(input, diff); | |||||
megdnn_assert_eq_shape(input, grad); | |||||
auto required_worspace_space = | |||||
get_workspace_in_bytes(diff, input, scale, zero_point, grad); | |||||
megdnn_assert(workspace_in_bytes >= required_worspace_space); | |||||
} | |||||
} // namespace megdnn |
@@ -201,7 +201,9 @@ private: | |||||
cb(RemapBackwardMat) \ | cb(RemapBackwardMat) \ | ||||
cb(AdaptivePoolingForward) \ | cb(AdaptivePoolingForward) \ | ||||
cb(AdaptivePoolingBackward) \ | cb(AdaptivePoolingBackward) \ | ||||
cb(DctChannelSelectForward) | |||||
cb(DctChannelSelectForward) \ | |||||
cb(FakeQuantForward) \ | |||||
cb(FakeQuantBackward) | |||||
/*! | /*! | ||||
* \brief specialize HandleImpl::create_operator for a single opr type; | * \brief specialize HandleImpl::create_operator for a single opr type; | ||||
@@ -13,9 +13,9 @@ | |||||
#pragma once | #pragma once | ||||
#include "src/common/elemwise_helper.cuh" | #include "src/common/elemwise_helper.cuh" | ||||
#include "src/cuda/utils.cuh" | |||||
#include "src/cuda/int_fastdiv.cuh" | #include "src/cuda/int_fastdiv.cuh" | ||||
#include "src/cuda/query_blocksize.cuh" | #include "src/cuda/query_blocksize.cuh" | ||||
#include "src/cuda/utils.cuh" | |||||
/* | /* | ||||
* please note that all arithmetics on GPU are 32-bit for best performance; this | * please note that all arithmetics on GPU are 32-bit for best performance; this | ||||
@@ -649,6 +649,102 @@ struct OpCallerUniform<Op, 3, PVis> { | |||||
} | } | ||||
}; | }; | ||||
//! specialization for arity == 4 | |||||
template <class Op, class PVis> | |||||
struct OpCallerUniform<Op, 4, PVis> { | |||||
Op op; | |||||
PVis par[4]; | |||||
static const uint32_t packed_size = PVis::packed_size; | |||||
devfunc void thread_init(uint32_t idx) { | |||||
idx = idx * packed_size; | |||||
par[0].thread_init(idx); | |||||
par[1].thread_init(idx); | |||||
par[2].thread_init(idx); | |||||
par[3].thread_init(idx); | |||||
} | |||||
devfunc void on(uint32_t idx) { | |||||
idx = idx * packed_size; | |||||
op(idx, par[0].at(idx), par[1].at(idx), par[2].at(idx), par[3].at(idx)); | |||||
} | |||||
devfunc void on(uint32_t idx, uint32_t remain) { | |||||
idx = idx * packed_size; | |||||
if (remain >= packed_size) { | |||||
op(idx, par[0].at(idx), par[1].at(idx), par[2].at(idx), | |||||
par[3].at(idx)); | |||||
} else { | |||||
auto ptr0 = par[0].ptr(); | |||||
auto ptr1 = par[1].ptr(); | |||||
auto ptr2 = par[2].ptr(); | |||||
auto ptr3 = par[3].ptr(); | |||||
for (int i = 0; i < remain; i++) { | |||||
op(idx + i, ptr0[par[0].offset(idx + i)], | |||||
ptr1[par[1].offset(idx + i)], ptr2[par[2].offset(idx + i)], | |||||
ptr3[par[3].offset(idx + i)]); | |||||
} | |||||
} | |||||
} | |||||
devfunc void next() { | |||||
par[0].next(); | |||||
par[1].next(); | |||||
par[2].next(); | |||||
par[3].next(); | |||||
} | |||||
}; | |||||
//! specialization for arity == 5 | |||||
template <class Op, class PVis> | |||||
struct OpCallerUniform<Op, 5, PVis> { | |||||
Op op; | |||||
PVis par[5]; | |||||
static const uint32_t packed_size = PVis::packed_size; | |||||
devfunc void thread_init(uint32_t idx) { | |||||
idx = idx * packed_size; | |||||
par[0].thread_init(idx); | |||||
par[1].thread_init(idx); | |||||
par[2].thread_init(idx); | |||||
par[3].thread_init(idx); | |||||
par[4].thread_init(idx); | |||||
} | |||||
devfunc void on(uint32_t idx) { | |||||
idx = idx * packed_size; | |||||
op(idx, par[0].at(idx), par[1].at(idx), par[2].at(idx), par[3].at(idx), | |||||
par[4].at(idx)); | |||||
} | |||||
devfunc void on(uint32_t idx, uint32_t remain) { | |||||
idx = idx * packed_size; | |||||
if (remain >= packed_size) { | |||||
op(idx, par[0].at(idx), par[1].at(idx), par[2].at(idx), | |||||
par[3].at(idx), par[4].at(idx)); | |||||
} else { | |||||
auto ptr0 = par[0].ptr(); | |||||
auto ptr1 = par[1].ptr(); | |||||
auto ptr2 = par[2].ptr(); | |||||
auto ptr3 = par[3].ptr(); | |||||
auto ptr4 = par[4].ptr(); | |||||
for (int i = 0; i < remain; i++) { | |||||
op(idx + i, ptr0[par[0].offset(idx + i)], | |||||
ptr1[par[1].offset(idx + i)], ptr2[par[2].offset(idx + i)], | |||||
ptr3[par[3].offset(idx + i)], ptr4[par[4].offset(idx + i)]); | |||||
} | |||||
} | |||||
} | |||||
devfunc void next() { | |||||
par[0].next(); | |||||
par[1].next(); | |||||
par[2].next(); | |||||
par[3].next(); | |||||
par[4].next(); | |||||
} | |||||
}; | |||||
/*! | /*! | ||||
* \brief call binary (i.e. arity == 2) operator with different param | * \brief call binary (i.e. arity == 2) operator with different param | ||||
* visitors | * visitors | ||||
@@ -0,0 +1,30 @@ | |||||
/** | |||||
* \file dnn/src/cuda/fake_quant/kern.cu | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "./kern.cuh" | |||||
namespace megdnn { | |||||
namespace cuda { | |||||
#define cb(_dtype) \ | |||||
INST_RUN_ELEMWISE(FakeQuantKernOp<DTypeTrait<_dtype>::ctype>, \ | |||||
DTypeTrait<_dtype>::ctype, 2); \ | |||||
INST_RUN_ELEMWISE(FakeQuantBwdKernOp<DTypeTrait<_dtype>::ctype>, \ | |||||
DTypeTrait<_dtype>::ctype, 2); \ | |||||
INST_RUN_ELEMWISE(FakeQuantKernOpNonContig<DTypeTrait<_dtype>::ctype>, \ | |||||
DTypeTrait<_dtype>::ctype, 4); \ | |||||
INST_RUN_ELEMWISE(FakeQuantBwdKernOpNonContig<DTypeTrait<_dtype>::ctype>, \ | |||||
DTypeTrait<_dtype>::ctype, 5); | |||||
cb(megdnn::dtype::Float32) | |||||
} // namespace cuda | |||||
} // namespace megdnn |
@@ -0,0 +1,106 @@ | |||||
/** | |||||
* \file dnn/src/cuda/elemwise_helper.cuh | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#pragma once | |||||
#include "src/cuda/elemwise_helper.cuh" | |||||
#include "src/cuda/utils.cuh" | |||||
#if MEGDNN_CC_HOST | |||||
#include "megdnn/oprs.h" | |||||
#endif | |||||
namespace megdnn { | |||||
namespace cuda { | |||||
template <typename ctype> | |||||
struct FakeQuantKernOp { | |||||
ctype* input; | |||||
ctype* output; | |||||
ctype qmin, qmax; | |||||
__device__ void operator()(uint32_t idx, ctype scale, ctype zero_point) { | |||||
ctype x = round(input[idx] / scale) + zero_point; | |||||
x = fmaxf(fminf(x, qmax), qmin); | |||||
output[idx] = (x - zero_point) * scale; | |||||
} | |||||
#if MEGDNN_CC_HOST | |||||
FakeQuantKernOp(const TensorND& input, const TensorND& output, | |||||
const FakeQuant::Param& param) | |||||
: input{input.ptr<ctype>()}, | |||||
output{output.ptr<ctype>()}, | |||||
qmin(param.qmin), | |||||
qmax(param.qmax) {} | |||||
#endif | |||||
}; | |||||
template <typename ctype> | |||||
struct FakeQuantBwdKernOp { | |||||
ctype* diff; | |||||
ctype* input; | |||||
ctype* grad; | |||||
ctype qmin, qmax; | |||||
__device__ void operator()(uint32_t idx, ctype scale, ctype zero_point) { | |||||
ctype x = round(input[idx] / scale) + zero_point; | |||||
grad[idx] = x <= qmax && x >= qmin ? diff[idx] : 0.0; | |||||
} | |||||
#if MEGDNN_CC_HOST | |||||
FakeQuantBwdKernOp(const TensorND& diff, const TensorND& input, | |||||
const TensorND& grad, const FakeQuant::Param& param) | |||||
: diff{diff.ptr<ctype>()}, | |||||
input{input.ptr<ctype>()}, | |||||
grad{grad.ptr<ctype>()}, | |||||
qmin(param.qmin), | |||||
qmax(param.qmax) {} | |||||
#endif | |||||
}; | |||||
template <typename ctype> | |||||
struct FakeQuantKernOpNonContig { | |||||
ctype qmin; | |||||
ctype qmax; | |||||
__device__ void operator()(uint32_t, ctype& output, ctype input, | |||||
ctype scale, ctype zero_point) { | |||||
ctype x = round(input / scale) + zero_point; | |||||
x = fmaxf(fminf(x, qmax), qmin); | |||||
output = (x - zero_point) * scale; | |||||
} | |||||
#if MEGDNN_CC_HOST | |||||
FakeQuantKernOpNonContig(const FakeQuant::Param& param) | |||||
: qmin(param.qmin), qmax(param.qmax) {} | |||||
#endif | |||||
}; | |||||
template <typename ctype> | |||||
struct FakeQuantBwdKernOpNonContig { | |||||
ctype qmin; | |||||
ctype qmax; | |||||
__device__ void operator()(uint32_t, ctype& grad, ctype diff, ctype input, | |||||
ctype scale, ctype zero_point) { | |||||
ctype x = round(input / scale) + zero_point; | |||||
grad = x <= qmax && x >= qmin ? diff : 0.0; | |||||
} | |||||
#if MEGDNN_CC_HOST | |||||
FakeQuantBwdKernOpNonContig(const FakeQuant::Param& param) | |||||
: qmin(param.qmin), qmax(param.qmax) {} | |||||
#endif | |||||
}; | |||||
} // namespace cuda | |||||
} // namespace megdnn |
@@ -0,0 +1,134 @@ | |||||
/** | |||||
* \file dnn/src/cuda/fake_quant/opr_impl.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "./opr_impl.h" | |||||
#include "./kern.cuh" | |||||
#include "src/common/utils.h" | |||||
namespace megdnn { | |||||
namespace cuda { | |||||
void FakeQuantForwardImpl::exec(_megdnn_tensor_in input, | |||||
_megdnn_tensor_in scale, | |||||
_megdnn_tensor_in zero_point, | |||||
_megdnn_tensor_out output, | |||||
_megdnn_workspace workspace) { | |||||
check_exec(input.layout, scale.layout, zero_point.layout, output.layout, | |||||
workspace.size); | |||||
if (!input.layout.is_contiguous() || !output.layout.is_contiguous()) { | |||||
return exec_noncontig(input, scale, zero_point, output); | |||||
} | |||||
ElemwiseOpParamN<2> ele_param; | |||||
ele_param[0] = scale; | |||||
ele_param[0].layout = ele_param[0].layout.broadcast(input.layout); | |||||
ele_param[1] = zero_point; | |||||
ele_param[1].layout = ele_param[1].layout.broadcast(input.layout); | |||||
ele_param.init_from_given_tensor(); | |||||
auto stream = cuda_stream(handle()); | |||||
#define cb(DType) \ | |||||
if (input.layout.dtype == DType()) { \ | |||||
using T = typename DTypeTrait<DType>::ctype; \ | |||||
run_elemwise<FakeQuantKernOp<T>, T, 2>(ele_param, stream, \ | |||||
{input, output, m_param}); \ | |||||
return; \ | |||||
} | |||||
cb(megdnn::dtype::Float32) | |||||
#undef cb | |||||
} | |||||
void FakeQuantForwardImpl::exec_noncontig(_megdnn_tensor_in input, | |||||
_megdnn_tensor_in scale, | |||||
_megdnn_tensor_in zero_point, | |||||
_megdnn_tensor_out output) { | |||||
ElemwiseOpParamN<4> ele_param; | |||||
ele_param[0] = output; | |||||
ele_param[1] = input; | |||||
ele_param[2] = scale; | |||||
ele_param[2].layout = ele_param[2].layout.broadcast(input.layout); | |||||
ele_param[3] = zero_point; | |||||
ele_param[3].layout = ele_param[3].layout.broadcast(input.layout); | |||||
ele_param.init_from_given_tensor(); | |||||
auto stream = cuda_stream(handle()); | |||||
#define cb(DType) \ | |||||
if (input.layout.dtype == DType()) { \ | |||||
using T = typename DTypeTrait<DType>::ctype; \ | |||||
run_elemwise<FakeQuantKernOpNonContig<T>, T, 4>(ele_param, stream, \ | |||||
{m_param}); \ | |||||
return; \ | |||||
} | |||||
cb(megdnn::dtype::Float32) | |||||
#undef cb | |||||
} | |||||
void FakeQuantBackwardImpl::exec(_megdnn_tensor_in diff, | |||||
_megdnn_tensor_in input, | |||||
_megdnn_tensor_in scale, | |||||
_megdnn_tensor_in zero_point, | |||||
_megdnn_tensor_out grad, | |||||
_megdnn_workspace workspace) { | |||||
check_exec(diff.layout, input.layout, scale.layout, zero_point.layout, | |||||
grad.layout, workspace.size); | |||||
if (!input.layout.is_contiguous() || !diff.layout.is_contiguous() || | |||||
!grad.layout.is_contiguous()) { | |||||
return exec_noncontig(diff, input, scale, zero_point, grad); | |||||
} | |||||
ElemwiseOpParamN<2> ele_param; | |||||
ele_param[0] = scale; | |||||
ele_param[0].layout = ele_param[0].layout.broadcast(input.layout); | |||||
ele_param[1] = zero_point; | |||||
ele_param[1].layout = ele_param[1].layout.broadcast(input.layout); | |||||
ele_param.init_from_given_tensor(); | |||||
auto m_param = param(); | |||||
auto stream = cuda_stream(handle()); | |||||
#define cb(DType) \ | |||||
if (grad.layout.dtype == DType()) { \ | |||||
using T = typename DTypeTrait<DType>::ctype; \ | |||||
run_elemwise<FakeQuantBwdKernOp<T>, T, 2>( \ | |||||
ele_param, stream, {diff, input, grad, m_param}); \ | |||||
return; \ | |||||
} | |||||
cb(megdnn::dtype::Float32) | |||||
#undef cb | |||||
} | |||||
void FakeQuantBackwardImpl::exec_noncontig(_megdnn_tensor_in diff, | |||||
_megdnn_tensor_in input, | |||||
_megdnn_tensor_in scale, | |||||
_megdnn_tensor_in zero_point, | |||||
_megdnn_tensor_out grad) { | |||||
ElemwiseOpParamN<5> ele_param; | |||||
ele_param[0] = grad; | |||||
ele_param[1] = diff; | |||||
ele_param[2] = input; | |||||
ele_param[3] = scale; | |||||
ele_param[3].layout = ele_param[3].layout.broadcast(input.layout); | |||||
ele_param[4] = zero_point; | |||||
ele_param[4].layout = ele_param[4].layout.broadcast(input.layout); | |||||
ele_param.init_from_given_tensor(); | |||||
auto m_param = param(); | |||||
auto stream = cuda_stream(handle()); | |||||
#define cb(DType) \ | |||||
if (grad.layout.dtype == DType()) { \ | |||||
using T = typename DTypeTrait<DType>::ctype; \ | |||||
run_elemwise<FakeQuantBwdKernOpNonContig<T>, T, 5>(ele_param, stream, \ | |||||
{m_param}); \ | |||||
return; \ | |||||
} | |||||
cb(megdnn::dtype::Float32) | |||||
#undef cb | |||||
} | |||||
} // namespace cuda | |||||
} // namespace megdnn |
@@ -0,0 +1,55 @@ | |||||
/** | |||||
* \file dnn/src/cuda/fake_quant/opr_impl.h | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#pragma once | |||||
#include "megdnn/oprs.h" | |||||
#include "src/cuda/utils.h" | |||||
namespace megdnn { | |||||
namespace cuda { | |||||
class FakeQuantForwardImpl : public FakeQuantForward { | |||||
public: | |||||
using FakeQuantForward::FakeQuantForward; | |||||
void exec(_megdnn_tensor_in input, _megdnn_tensor_in scale, | |||||
_megdnn_tensor_in zero_point, _megdnn_tensor_out output, | |||||
_megdnn_workspace workspace) override; | |||||
size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, | |||||
const TensorLayout&, | |||||
const TensorLayout&) override { | |||||
return 0; | |||||
} | |||||
private: | |||||
void exec_noncontig(_megdnn_tensor_in input, _megdnn_tensor_in scale, | |||||
_megdnn_tensor_in zero_point, | |||||
_megdnn_tensor_out output); | |||||
}; | |||||
class FakeQuantBackwardImpl : public FakeQuantBackward { | |||||
public: | |||||
using FakeQuantBackward::FakeQuantBackward; | |||||
void exec(_megdnn_tensor_in diff, _megdnn_tensor_in input, | |||||
_megdnn_tensor_in scale, _megdnn_tensor_in zero_point, | |||||
_megdnn_tensor_out grad, _megdnn_workspace workspace) override; | |||||
size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, | |||||
const TensorLayout&, const TensorLayout&, | |||||
const TensorLayout&) override { | |||||
return 0; | |||||
} | |||||
private: | |||||
void exec_noncontig(_megdnn_tensor_in diff, _megdnn_tensor_in input, | |||||
_megdnn_tensor_in scale, _megdnn_tensor_in zero_point, | |||||
_megdnn_tensor_out grad); | |||||
}; | |||||
} // namespace cuda | |||||
} // namespace megdnn |
@@ -77,6 +77,7 @@ | |||||
#include "src/cuda/roi_align/opr_impl.h" | #include "src/cuda/roi_align/opr_impl.h" | ||||
#include "src/cuda/batch_conv_bias/opr_impl.h" | #include "src/cuda/batch_conv_bias/opr_impl.h" | ||||
#include "src/cuda/remap/opr_impl.h" | #include "src/cuda/remap/opr_impl.h" | ||||
#include "src/cuda/fake_quant/opr_impl.h" | |||||
namespace megdnn { | namespace megdnn { | ||||
namespace cuda { | namespace cuda { | ||||
@@ -0,0 +1,118 @@ | |||||
/** | |||||
* \file dnn/src/naive/fakequant/opr_impl.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/naive/fake_quant/opr_impl.h" | |||||
#include <cmath> | |||||
#include <iostream> | |||||
#include "megdnn/tensor_iter.h" | |||||
#include "src/common/elemwise_helper.cuh" | |||||
#include "src/common/utils.h" | |||||
#include "src/naive/handle.h" | |||||
namespace { | |||||
using namespace megdnn; | |||||
template <typename T> | |||||
void forward_impl(const ElemwiseOpParamN<4> src, float qmin, float qmax) { | |||||
auto inp = tensor_iter_valonly<T>(src[0]).begin(); | |||||
auto out = tensor_iter_valonly<T>(src[1]).begin(); | |||||
auto scale = tensor_iter_valonly<T>(src[2]).begin(); | |||||
auto zero_point = tensor_iter_valonly<T>(src[3]).begin(); | |||||
size_t total = src[0].layout.total_nr_elems(); | |||||
for (size_t i = 0; i < total; ++i) { | |||||
T x = round(*inp / (*scale)) + *zero_point; | |||||
x = x <= qmin ? qmin : x; | |||||
x = x >= qmax ? qmax : x; | |||||
*out = (x - *zero_point) * *scale; | |||||
++inp; | |||||
++out; | |||||
++scale; | |||||
++zero_point; | |||||
} | |||||
} | |||||
template <typename T> | |||||
void backward_impl(const ElemwiseOpParamN<5> src, float qmin, float qmax) { | |||||
auto diff = tensor_iter_valonly<T>(src[0]).begin(); | |||||
auto input = tensor_iter_valonly<T>(src[1]).begin(); | |||||
auto scale = tensor_iter_valonly<T>(src[2]).begin(); | |||||
auto zero_point = tensor_iter_valonly<T>(src[3]).begin(); | |||||
auto grad = tensor_iter_valonly<T>(src[4]).begin(); | |||||
size_t total = src[0].layout.total_nr_elems(); | |||||
for (size_t i = 0; i < total; ++i) { | |||||
T x = round(*input / (*scale)) + *zero_point; | |||||
*grad = (x >= qmin && x <= qmax) ? *diff : 0.0; | |||||
++diff; | |||||
++input; | |||||
++scale; | |||||
++zero_point; | |||||
++grad; | |||||
} | |||||
} | |||||
} // namespace | |||||
namespace megdnn { | |||||
namespace naive { | |||||
void FakeQuantForwardImpl::exec(_megdnn_tensor_in input, | |||||
_megdnn_tensor_in scale, | |||||
_megdnn_tensor_in zero_point, | |||||
_megdnn_tensor_out output, | |||||
_megdnn_workspace workspace) { | |||||
check_exec(input.layout, scale.layout, zero_point.layout, output.layout, | |||||
workspace.size); | |||||
ElemwiseOpParamN<4> src; | |||||
src[0] = input; | |||||
src[1] = output; | |||||
src[2] = scale; | |||||
src[2].layout = src[2].layout.broadcast(input.layout); | |||||
src[3] = zero_point; | |||||
src[3].layout = src[3].layout.broadcast(input.layout); | |||||
#define cb(DType) \ | |||||
if (input.layout.dtype == DType()) { \ | |||||
using T = typename DTypeTrait<DType>::ctype; \ | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR( \ | |||||
forward_impl<T>(src, param().qmin, param().qmax)); \ | |||||
return; \ | |||||
} | |||||
cb(dtype::Float32) | |||||
#undef cb | |||||
} | |||||
void FakeQuantBackwardImpl::exec(_megdnn_tensor_in diff, | |||||
_megdnn_tensor_in input, | |||||
_megdnn_tensor_in scale, | |||||
_megdnn_tensor_in zero_point, | |||||
_megdnn_tensor_out grad, | |||||
_megdnn_workspace workspace) { | |||||
check_exec(diff.layout, input.layout, scale.layout, zero_point.layout, | |||||
grad.layout, workspace.size); | |||||
ElemwiseOpParamN<5> src; | |||||
src[0] = diff; | |||||
src[1] = input; | |||||
src[2] = scale; | |||||
src[2].layout = src[2].layout.broadcast(input.layout); | |||||
src[3] = zero_point; | |||||
src[3].layout = src[3].layout.broadcast(input.layout); | |||||
src[4] = grad; | |||||
#define cb(DType) \ | |||||
if (diff.layout.dtype == DType() && grad.layout.dtype == DType() && \ | |||||
input.layout.dtype == DType()) { \ | |||||
using T = typename DTypeTrait<DType>::ctype; \ | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR( \ | |||||
backward_impl<T>(src, param().qmin, param().qmax)); \ | |||||
return; \ | |||||
} | |||||
cb(dtype::Float32) | |||||
#undef cb | |||||
} | |||||
} // namespace naive | |||||
} // namespace megdnn |
@@ -0,0 +1,45 @@ | |||||
/** | |||||
* \file dnn/src/naive/fakequant/opr_impl.h | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#pragma once | |||||
#include "megdnn/oprs.h" | |||||
namespace megdnn { | |||||
namespace naive { | |||||
class FakeQuantForwardImpl : public FakeQuantForward { | |||||
public: | |||||
using FakeQuantForward::FakeQuantForward; | |||||
void exec(_megdnn_tensor_in input, _megdnn_tensor_in scale, | |||||
_megdnn_tensor_in zero_point, _megdnn_tensor_out output, | |||||
_megdnn_workspace workspace) override; | |||||
size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, | |||||
const TensorLayout&, | |||||
const TensorLayout&) override { | |||||
return 0; | |||||
} | |||||
}; | |||||
class FakeQuantBackwardImpl : public FakeQuantBackward { | |||||
public: | |||||
using FakeQuantBackward::FakeQuantBackward; | |||||
void exec(_megdnn_tensor_in diff, _megdnn_tensor_in input, | |||||
_megdnn_tensor_in scale, _megdnn_tensor_in zero_point, | |||||
_megdnn_tensor_out grad, _megdnn_workspace workspace) override; | |||||
size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, | |||||
const TensorLayout&, const TensorLayout&, | |||||
const TensorLayout&) override { | |||||
return 0; | |||||
} | |||||
}; | |||||
} // namespace naive | |||||
} // namespace megdnn |
@@ -79,6 +79,8 @@ | |||||
#include "src/naive/warp_affine/opr_impl.h" | #include "src/naive/warp_affine/opr_impl.h" | ||||
#include "src/naive/warp_perspective/opr_impl.h" | #include "src/naive/warp_perspective/opr_impl.h" | ||||
#include "src/naive/winograd_filter_preprocess/opr_impl.h" | #include "src/naive/winograd_filter_preprocess/opr_impl.h" | ||||
#include "src/naive/remap/opr_impl.h" | |||||
#include "src/naive/fake_quant/opr_impl.h" | |||||
static size_t g_image2d_pitch_alignment = 1; | static size_t g_image2d_pitch_alignment = 1; | ||||
@@ -0,0 +1,60 @@ | |||||
/** | |||||
* \file dnn/test/common/fake_quant.h | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#pragma once | |||||
#include "megdnn/basic_types.h" | |||||
#include "megdnn/opr_param_defs.h" | |||||
namespace megdnn { | |||||
namespace test { | |||||
namespace fake_quant { | |||||
struct TestArg { | |||||
param::FakeQuant param; | |||||
TensorShape ishape; | |||||
TensorShape scale_shape; | |||||
TensorShape zeropoint_shape; | |||||
TestArg(param::FakeQuant param, TensorShape ishape, TensorShape scale_shape, | |||||
TensorShape zeropoint_shape) | |||||
: param(param), | |||||
ishape(ishape), | |||||
scale_shape(scale_shape), | |||||
zeropoint_shape(zeropoint_shape) {} | |||||
}; | |||||
inline std::vector<TestArg> get_args() { | |||||
std::vector<TestArg> args; | |||||
param::FakeQuant cur_param; | |||||
cur_param.qmin = -128; | |||||
cur_param.qmax = 128; | |||||
for (size_t i = 10; i < 40; i += 2) { | |||||
args.emplace_back(cur_param, TensorShape{10, 64, i, i}, TensorShape{1}, | |||||
TensorShape{1}); | |||||
} | |||||
for (size_t m : {1, 10}) | |||||
for (size_t n : {1, 10}) | |||||
for (size_t j : {1, 10}) | |||||
for (size_t k : {1, 10}) { | |||||
args.emplace_back(cur_param, TensorShape{10, 64, 10, 10}, | |||||
TensorShape{10, 64, m, n}, | |||||
TensorShape{10, 64, j, k}); | |||||
} | |||||
return args; | |||||
} | |||||
} // namespace fake_quant | |||||
} // namespace test | |||||
} // namespace megdnn |
@@ -111,6 +111,8 @@ DEF(Remap, 3, true, true); | |||||
DEF(RemapBackwardData, 3, true, false); | DEF(RemapBackwardData, 3, true, false); | ||||
DEF(RemapBackwardMat, 4, true, false); | DEF(RemapBackwardMat, 4, true, false); | ||||
DEF(DctChannelSelectForward, 4, true, true); | DEF(DctChannelSelectForward, 4, true, true); | ||||
DEF(FakeQuantForward, 4, true, true); | |||||
DEF(FakeQuantBackward, 5, true, false); | |||||
} // namespace test | } // namespace test | ||||
} // namespace megdnn | } // namespace megdnn | ||||
@@ -0,0 +1,99 @@ | |||||
/** | |||||
* \file dnn/test/cuda/fake_quant.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "test/common/fake_quant.h" | |||||
#include "megdnn/oprs.h" | |||||
#include "test/common/checker.h" | |||||
#include "test/cuda/fixture.h" | |||||
namespace megdnn { | |||||
namespace test { | |||||
using namespace fake_quant; | |||||
TEST_F(CUDA, FAKE_QUANT) { | |||||
std::vector<TestArg> args = get_args(); | |||||
auto dtype = dtype::Float32(); | |||||
std::unique_ptr<RNG> rng; | |||||
for (auto&& arg : args) { | |||||
auto param = arg.param; | |||||
auto ishape = arg.ishape; | |||||
auto scale_shape = arg.scale_shape; | |||||
auto zeropoint_shape = arg.zeropoint_shape; | |||||
Checker<FakeQuantForward> checker(handle_cuda()); | |||||
checker.set_param(param) | |||||
.set_dtype(0, dtype) | |||||
.set_dtype(1, dtype) | |||||
.set_dtype(2, dtype) | |||||
.set_dtype(3, dtype) | |||||
.execs(TensorShapeArray{ishape, scale_shape, zeropoint_shape, | |||||
ishape}); | |||||
} | |||||
// test noncontiguous layout | |||||
for (auto&& arg : args) { | |||||
auto param = arg.param; | |||||
auto ishape = arg.ishape; | |||||
auto scale_shape = arg.scale_shape; | |||||
auto zeropoint_shape = arg.zeropoint_shape; | |||||
Checker<FakeQuantForward> checker(handle_cuda()); | |||||
TensorLayout ilayout( | |||||
ishape, | |||||
{(long int)(ishape[1] * ishape[2] * ishape[3] * 2), | |||||
(long int)(ishape[2] * ishape[3]), (long int)ishape[3], 1}, | |||||
dtype::Float32()); | |||||
checker.set_param(param).execl({ilayout, | |||||
{scale_shape, dtype::Float32()}, | |||||
{zeropoint_shape, dtype::Float32()}, | |||||
ilayout}); | |||||
} | |||||
} | |||||
TEST_F(CUDA, FAKE_QUANT_BACKWARD) { | |||||
std::vector<TestArg> args = get_args(); | |||||
auto dtype = dtype::Float32(); | |||||
for (auto&& arg : args) { | |||||
auto param = arg.param; | |||||
auto ishape = arg.ishape; | |||||
auto scale_shape = arg.scale_shape; | |||||
auto zeropoint_shape = arg.zeropoint_shape; | |||||
Checker<FakeQuantBackward> checker(handle_cuda()); | |||||
checker.set_param(param) | |||||
.set_dtype(0, dtype) | |||||
.set_dtype(1, dtype) | |||||
.set_dtype(2, dtype) | |||||
.set_dtype(3, dtype) | |||||
.set_dtype(4, dtype) | |||||
.execs(TensorShapeArray{ishape, ishape, scale_shape, | |||||
zeropoint_shape, ishape}); | |||||
} | |||||
// test noncontiguous layout | |||||
for (auto&& arg : args) { | |||||
auto param = arg.param; | |||||
auto ishape = arg.ishape; | |||||
auto scale_shape = arg.scale_shape; | |||||
auto zeropoint_shape = arg.zeropoint_shape; | |||||
Checker<FakeQuantBackward> checker(handle_cuda()); | |||||
TensorLayout ilayout( | |||||
ishape, | |||||
{(long int)(ishape[1] * ishape[2] * ishape[3] * 2), | |||||
(long int)(ishape[2] * ishape[3]), (long int)ishape[3], 1}, | |||||
dtype::Float32()); | |||||
checker.set_param(param).execl({ilayout, | |||||
ilayout, | |||||
{scale_shape, dtype::Float32()}, | |||||
{zeropoint_shape, dtype::Float32()}, | |||||
ilayout}); | |||||
} | |||||
} | |||||
} // namespace test | |||||
} // namespace megdnn |