@@ -2005,6 +2005,50 @@ protected: | |||||
size_t workspace_in_bytes); | size_t workspace_in_bytes); | ||||
}; | }; | ||||
class DropoutBase : public OperatorBase { | |||||
DEF_OPR_IMPL_CTOR(DropoutBase, OperatorBase); | |||||
DEF_OPR_PARAM(Dropout); | |||||
}; | |||||
class DropoutForward : public DropoutBase { | |||||
DEF_OPR_IMPL(DropoutForward, DropoutBase, 1, 2); | |||||
public: | |||||
void deduce_layout(const TensorLayout& inp, TensorLayout& oup, TensorLayout& mask); | |||||
virtual void exec( | |||||
_megdnn_tensor_in inp, _megdnn_tensor_out oup, _megdnn_tensor_out mask, | |||||
_megdnn_workspace workspace) = 0; | |||||
virtual size_t get_workspace_in_bytes( | |||||
const TensorLayout& inp, const TensorLayout& oup, | |||||
const TensorLayout& mask) = 0; | |||||
virtual size_t get_mask_size_in_bytes(const TensorLayout& inp) = 0; | |||||
protected: | |||||
void check_exec( | |||||
const TensorLayout& inp, const TensorLayout& oup, const TensorLayout& mask, | |||||
size_t workspace_in_bytes); | |||||
}; | |||||
using Dropout = DropoutForward; | |||||
class DropoutBackward : public DropoutBase { | |||||
DEF_OPR_IMPL(DropoutBackward, DropoutBase, 2, 1); | |||||
public: | |||||
void deduce_layout( | |||||
const TensorLayout& doup, const TensorLayout& mask, TensorLayout& dinp); | |||||
virtual void exec( | |||||
_megdnn_tensor_in doup, _megdnn_tensor_in mask, _megdnn_tensor_out dinp, | |||||
_megdnn_workspace workspace) = 0; | |||||
virtual size_t get_workspace_in_bytes( | |||||
const TensorLayout& doup, const TensorLayout& mask, | |||||
const TensorLayout& dinp) = 0; | |||||
protected: | |||||
void check_exec( | |||||
const TensorLayout& doup, const TensorLayout& mask, | |||||
const TensorLayout& dinp, size_t workspace_in_bytes); | |||||
}; | |||||
} // namespace megdnn | } // namespace megdnn | ||||
#include "megdnn/internal/opr_header_epilogue.h" | #include "megdnn/internal/opr_header_epilogue.h" | ||||
@@ -1218,4 +1218,9 @@ PADDING_MODES = [Doc('REPLICATE = 0', 'aaaaaa|abcdefgh|hhhhhhh'), | |||||
.add_fields('float32', 'eps', '1e-5f') | .add_fields('float32', 'eps', '1e-5f') | ||||
.add_fields('uint64', 'normalized_dim', '1') | .add_fields('uint64', 'normalized_dim', '1') | ||||
.add_fields('uint64', 'normalized_size', '1') | .add_fields('uint64', 'normalized_size', '1') | ||||
) | |||||
(pdef('Dropout') | |||||
.add_fields('float32', 'drop_prob', '0') | |||||
.add_fields('uint64', 'seed', '0') | |||||
) | ) |
@@ -0,0 +1,74 @@ | |||||
/** | |||||
* \file dnn/src/common/dropout.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include <time.h> | |||||
#include "megdnn/oprs.h" | |||||
#include "src/common/utils.h" | |||||
namespace megdnn { | |||||
void DropoutForward::deduce_layout( | |||||
const TensorLayout& inp, TensorLayout& oup, TensorLayout& mask) { | |||||
oup = inp; | |||||
size_t mask_size = get_mask_size_in_bytes(inp); | |||||
mask = TensorLayout(TensorShape({mask_size}), dtype::Byte()); | |||||
} | |||||
void DropoutForward::check_exec( | |||||
const TensorLayout& inp, const TensorLayout& oup, const TensorLayout& mask, | |||||
size_t workspace_in_bytes) { | |||||
auto errmsg = [&]() { | |||||
return megdnn_layout_msg(inp) + ", " + megdnn_layout_msg(oup) + ", " + | |||||
megdnn_layout_msg(mask); | |||||
}; | |||||
MEGDNN_MARK_USED_VAR(errmsg); | |||||
megdnn_assert_contiguous(inp); | |||||
megdnn_assert_contiguous(oup); | |||||
megdnn_assert_contiguous(mask); | |||||
megdnn_assert(inp.eq_layout(oup), "%s", errmsg().c_str()); | |||||
megdnn_assert(inp.dtype.category() == DTypeCategory::FLOAT); | |||||
auto required_workspace_in_bytes = get_workspace_in_bytes(inp, oup, mask); | |||||
megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | |||||
auto required_mask_size_in_bytes = get_mask_size_in_bytes(inp); | |||||
megdnn_assert(mask.total_nr_elems() >= required_mask_size_in_bytes); | |||||
megdnn_assert(mask.dtype == dtype::Byte()); | |||||
} | |||||
void DropoutBackward::deduce_layout( | |||||
const TensorLayout& doup, const TensorLayout&, TensorLayout& dinp) { | |||||
dinp = doup; | |||||
} | |||||
void DropoutBackward::check_exec( | |||||
const TensorLayout& doup, const TensorLayout& mask, const TensorLayout& dinp, | |||||
size_t workspace_in_bytes) { | |||||
auto errmsg = [&]() { | |||||
return megdnn_layout_msg(doup) + ", " + megdnn_layout_msg(mask) + ", " + | |||||
megdnn_layout_msg(dinp); | |||||
}; | |||||
MEGDNN_MARK_USED_VAR(errmsg); | |||||
megdnn_assert_contiguous(doup); | |||||
megdnn_assert_contiguous(mask); | |||||
megdnn_assert_contiguous(dinp); | |||||
megdnn_assert(doup.eq_layout(dinp), "%s", errmsg().c_str()); | |||||
auto required_workspace_in_bytes = get_workspace_in_bytes(doup, mask, dinp); | |||||
megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | |||||
megdnn_assert(doup.dtype.category() == DTypeCategory::FLOAT); | |||||
megdnn_assert(mask.dtype == dtype::Byte()); | |||||
megdnn_assert(mask.ndim == 1); | |||||
} | |||||
} // namespace megdnn |
@@ -211,8 +211,9 @@ private: | |||||
cb(PaddingForward) \ | cb(PaddingForward) \ | ||||
cb(PaddingBackward) \ | cb(PaddingBackward) \ | ||||
cb(LayerNormForward) \ | cb(LayerNormForward) \ | ||||
cb(LayerNormBackward) | |||||
cb(LayerNormBackward) \ | |||||
cb(DropoutForward) \ | |||||
cb(DropoutBackward) | |||||
// clang-format on | // clang-format on | ||||
/*! | /*! | ||||
@@ -137,6 +137,8 @@ DEF(LSQBackward, 7, true, false); | |||||
DEF(Fill, 1, true, false); | DEF(Fill, 1, true, false); | ||||
DEF(LayerNormForward, 6, true, true); | DEF(LayerNormForward, 6, true, true); | ||||
DEF(LayerNormBackward, 8, true, true); | DEF(LayerNormBackward, 8, true, true); | ||||
DEF(DropoutForward, 3, true, true); | |||||
DEF(DropoutBackward, 3, true, true); | |||||
} // namespace megdnn | } // namespace megdnn | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -0,0 +1,118 @@ | |||||
/** | |||||
* \file dnn/src/cuda/dropout/opr_impl.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/cuda/dropout/opr_impl.h" | |||||
namespace megdnn { | |||||
namespace cuda { | |||||
using Param = megdnn::Dropout::Param; | |||||
struct DropoutTensorDesc : public TensorDesc { | |||||
public: | |||||
DropoutTensorDesc(const TensorLayout& layout) : TensorDesc() { | |||||
set_dropout_desc(layout); | |||||
} | |||||
void set_dropout_desc(const TensorLayout& layout) { | |||||
cudnnDataType_t cudnn_dtype; | |||||
switch (layout.dtype.enumv()) { | |||||
case DTypeEnum::Float32: | |||||
cudnn_dtype = CUDNN_DATA_FLOAT; | |||||
break; | |||||
case DTypeEnum::Float16: | |||||
cudnn_dtype = CUDNN_DATA_HALF; | |||||
break; | |||||
default: | |||||
megdnn_throw("dtype must be float16/float32"); | |||||
} | |||||
cudnn_check(cudnnSetTensor4dDescriptor( | |||||
desc, CUDNN_TENSOR_NCHW, cudnn_dtype, 1, 1, 1, | |||||
layout.total_nr_elems())); | |||||
} | |||||
}; | |||||
size_t DropoutForwardImpl::get_mask_size_in_bytes(const TensorLayout& inp) { | |||||
size_t reserve_space_size_in_bytes = 0; | |||||
DropoutTensorDesc ddesc(inp); | |||||
cudnn_check( | |||||
cudnnDropoutGetReserveSpaceSize(ddesc.desc, &reserve_space_size_in_bytes)); | |||||
return reserve_space_size_in_bytes; | |||||
} | |||||
void DropoutForwardImpl::exec( | |||||
_megdnn_tensor_in inp, _megdnn_tensor_out oup, _megdnn_tensor_out mask, | |||||
_megdnn_workspace workspace) { | |||||
check_exec(inp.layout, oup.layout, mask.layout, workspace.size); | |||||
uint64_t seed = param().seed; | |||||
float drop_prob = param().drop_prob; | |||||
if (!dropout_status.initialized()) { | |||||
dropout_status.set(cudnn_handle(this->handle()), seed, drop_prob); | |||||
} | |||||
if (dropout_status.drop_prob != drop_prob) { | |||||
dropout_status.drop_prob = drop_prob; | |||||
dropout_status.restore_desc(cudnn_handle(this->handle())); | |||||
} | |||||
megdnn_assert(dropout_status.seed == seed); | |||||
DropoutTensorDesc inp_desc(inp.layout), oup_desc(oup.layout); | |||||
auto&& op_desc = dropout_status.desc; | |||||
cudnn_check(cudnnDropoutForward( | |||||
cudnn_handle(this->handle()), op_desc.desc, inp_desc.desc, inp.raw_ptr(), | |||||
oup_desc.desc, oup.raw_ptr(), mask.raw_ptr(), | |||||
mask.layout.total_nr_elems())); | |||||
} | |||||
void DropoutBackwardImpl::exec( | |||||
_megdnn_tensor_in doup, _megdnn_tensor_in mask, _megdnn_tensor_out dinp, | |||||
_megdnn_workspace workspace) { | |||||
check_exec(doup.layout, mask.layout, dinp.layout, workspace.size); | |||||
#if CUDNN_VERSION >= 7000 | |||||
size_t status_size_in_bytes = 0; | |||||
cudnn_check(cudnnDropoutGetStatesSize( | |||||
cudnn_handle(this->handle()), &status_size_in_bytes)); | |||||
DropoutTensorDesc doup_desc(doup.layout), dinp_desc(dinp.layout); | |||||
op_desc.restore( | |||||
cudnn_handle(this->handle()), param().drop_prob, nullptr, | |||||
status_size_in_bytes, 0); | |||||
cudnn_check(cudnnDropoutBackward( | |||||
cudnn_handle(this->handle()), op_desc.desc, doup_desc.desc, doup.raw_ptr(), | |||||
dinp_desc.desc, dinp.raw_ptr(), mask.raw_ptr(), | |||||
mask.layout.total_nr_elems())); | |||||
#else | |||||
uint64_t seed = param().seed; | |||||
float drop_prob = param().drop_prob; | |||||
if (!dropout_status.initialized()) { | |||||
dropout_status.set(cudnn_handle(this->handle()), seed, drop_prob); | |||||
} | |||||
if (dropout_status.drop_prob != drop_prob) { | |||||
dropout_status.drop_prob = drop_prob; | |||||
dropout_status.restore_desc(cudnn_handle(this->handle())); | |||||
} | |||||
auto&& op_desc = dropout_status.desc; | |||||
DropoutTensorDesc doup_desc(doup.layout), dinp_desc(dinp.layout); | |||||
cudnn_check(cudnnDropoutBackward( | |||||
cudnn_handle(this->handle()), op_desc.desc, doup_desc.desc, doup.raw_ptr(), | |||||
dinp_desc.desc, dinp.raw_ptr(), mask.raw_ptr(), | |||||
mask.layout.total_nr_elems())); | |||||
#endif | |||||
} | |||||
} // namespace cuda | |||||
} // namespace megdnn | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,116 @@ | |||||
/** | |||||
* \file dnn/src/cuda/dropout/opr_impl.h | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#pragma once | |||||
#include "megdnn/oprs.h" | |||||
#include "src/cuda/cudnn_wrapper.h" | |||||
#include "src/cuda/utils.h" | |||||
namespace megdnn { | |||||
namespace cuda { | |||||
class DropoutDesc { | |||||
public: | |||||
DropoutDesc() { cudnn_check(cudnnCreateDropoutDescriptor(&desc)); } | |||||
~DropoutDesc() { cudnn_check(cudnnDestroyDropoutDescriptor(desc)); } | |||||
void set( | |||||
cudnnHandle_t handle, void* status, size_t states_size_in_bytes, | |||||
uint64_t seed, float drop_prob) { | |||||
cudnn_check(cudnnSetDropoutDescriptor( | |||||
desc, handle, drop_prob, status, states_size_in_bytes, seed)); | |||||
} | |||||
void restore( | |||||
cudnnHandle_t handle, float drop_prob, void* status, | |||||
size_t states_size_in_bytes, uint64_t seed) { | |||||
#if CUDNN_VERSION >= 7000 | |||||
cudnn_check(cudnnRestoreDropoutDescriptor( | |||||
desc, handle, drop_prob, status, states_size_in_bytes, 0)); | |||||
#else | |||||
// cudnnDropoutRestore is not support when cudnn version < 7000 | |||||
// so we set the dropoutDesc rather than restore | |||||
cudnn_check(cudnnSetDropoutDescriptor( | |||||
desc, handle, drop_prob, status, states_size_in_bytes, seed)); | |||||
#endif | |||||
} | |||||
cudnnDropoutDescriptor_t desc; | |||||
}; | |||||
class DropoutStatus { | |||||
void* status; | |||||
uint64_t status_size; | |||||
uint64_t seed; | |||||
float drop_prob; | |||||
DropoutDesc desc; | |||||
public: | |||||
DropoutStatus() { | |||||
status = nullptr; | |||||
status_size = 0; | |||||
} | |||||
~DropoutStatus() { | |||||
if (status != nullptr) | |||||
cuda_check(cudaFree(status)); | |||||
} | |||||
void set(cudnnHandle_t handle, uint64_t seed, float drop_prob) { | |||||
this->seed = seed; | |||||
this->drop_prob = drop_prob; | |||||
cudnn_check(cudnnDropoutGetStatesSize(handle, &status_size)); | |||||
cuda_check(cudaMalloc(&status, status_size)); | |||||
desc.set(handle, status, status_size, seed, drop_prob); | |||||
} | |||||
void restore_desc(cudnnHandle_t handle) { | |||||
desc.restore(handle, drop_prob, status, status_size, seed); | |||||
} | |||||
bool initialized() { return status != nullptr; } | |||||
friend class DropoutForwardImpl; | |||||
friend class DropoutBackwardImpl; | |||||
}; | |||||
// similar to RNG operator, dropout operator also have status | |||||
class DropoutForwardImpl final : public DropoutForward { | |||||
DropoutStatus dropout_status; | |||||
public: | |||||
using DropoutForward::DropoutForward; | |||||
void exec( | |||||
_megdnn_tensor_in inp, _megdnn_tensor_out oup, _megdnn_tensor_out mask, | |||||
_megdnn_workspace workspace) override; | |||||
size_t get_mask_size_in_bytes(const TensorLayout& inp) override; | |||||
size_t get_workspace_in_bytes( | |||||
const TensorLayout&, const TensorLayout&, const TensorLayout&) override { | |||||
return 0; | |||||
} | |||||
}; | |||||
class DropoutBackwardImpl final : public DropoutBackward { | |||||
#if CUDNN_VERSION >= 7000 | |||||
DropoutDesc op_desc; | |||||
#else | |||||
// cudnnDropoutRestore is not support when cudnn version < 7000 | |||||
// so we need save the dropout status and set the dropoutDesc | |||||
// rather than restore | |||||
DropoutStatus dropout_status; | |||||
#endif | |||||
public: | |||||
using DropoutBackward::DropoutBackward; | |||||
void exec( | |||||
_megdnn_tensor_in doup, _megdnn_tensor_in mask, _megdnn_tensor_out dinp, | |||||
_megdnn_workspace workspace) override; | |||||
size_t get_workspace_in_bytes( | |||||
const TensorLayout&, const TensorLayout&, const TensorLayout&) override { | |||||
return 0; | |||||
} | |||||
}; | |||||
} // namespace cuda | |||||
} // namespace megdnn | |||||
// vim: syntax=cpp.doxygen |
@@ -34,6 +34,7 @@ | |||||
#include "src/cuda/deformable_conv/opr_impl.h" | #include "src/cuda/deformable_conv/opr_impl.h" | ||||
#include "src/cuda/deformable_ps_roi_pooling/opr_impl.h" | #include "src/cuda/deformable_ps_roi_pooling/opr_impl.h" | ||||
#include "src/cuda/dot/opr_impl.h" | #include "src/cuda/dot/opr_impl.h" | ||||
#include "src/cuda/dropout/opr_impl.h" | |||||
#include "src/cuda/elemwise/opr_impl.h" | #include "src/cuda/elemwise/opr_impl.h" | ||||
#include "src/cuda/elemwise_multi_type/opr_impl.h" | #include "src/cuda/elemwise_multi_type/opr_impl.h" | ||||
#include "src/cuda/eye/opr_impl.h" | #include "src/cuda/eye/opr_impl.h" | ||||
@@ -0,0 +1,110 @@ | |||||
/** | |||||
* \file dnn/src/naive/dropout/opr_impl.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/naive/dropout/opr_impl.h" | |||||
#include <algorithm> | |||||
#include <cstdlib> | |||||
#include <ctime> | |||||
#include "src/common/utils.h" | |||||
#include "src/naive/handle.h" | |||||
using namespace megdnn; | |||||
using namespace naive; | |||||
using namespace std; | |||||
namespace { | |||||
using Param = megdnn::Dropout::Param; | |||||
dt_float32 get_random_number(uint64_t x) { | |||||
union { | |||||
uint32_t i; | |||||
dt_float32 f; | |||||
} u; | |||||
u.i = (0x7F << 23) | (x >> 41); | |||||
return 2 - u.f; | |||||
} | |||||
template <typename T> | |||||
void forward( | |||||
T* inp, T* oup, void* raw_reserved, size_t len, Xoroshiro128plus& rng, | |||||
float drop_prob) { | |||||
uint8_t* reserved = reinterpret_cast<uint8_t*>(raw_reserved); | |||||
float scale = 1.0f / (1.0f - drop_prob); | |||||
for (size_t i = 0; i < len; ++i) { | |||||
float rn = get_random_number(rng()); | |||||
reserved[i] = rn < drop_prob ? 0 : 1; | |||||
oup[i] = static_cast<T>(reserved[i] ? static_cast<float>(inp[i]) * scale : 0.f); | |||||
} | |||||
} | |||||
template <typename T> | |||||
void backward(T* doup, T* dinp, void* raw_reserved, size_t len, float drop_prob) { | |||||
uint8_t* reserved = reinterpret_cast<uint8_t*>(raw_reserved); | |||||
float scale = 1.0f / (1.0f - drop_prob); | |||||
for (size_t i = 0; i < len; ++i) { | |||||
dinp[i] = | |||||
static_cast<T>(reserved[i] ? static_cast<float>(doup[i]) * scale : 0.f); | |||||
} | |||||
} | |||||
} // namespace | |||||
namespace megdnn { | |||||
namespace naive { | |||||
size_t DropoutForwardImpl::get_mask_size_in_bytes(const TensorLayout& inp) { | |||||
return inp.total_nr_elems(); | |||||
} | |||||
void DropoutForwardImpl::exec( | |||||
_megdnn_tensor_in inp, _megdnn_tensor_out oup, _megdnn_tensor_out mask, | |||||
_megdnn_workspace workspace) { | |||||
check_exec(inp.layout, oup.layout, mask.layout, workspace.size); | |||||
size_t length = inp.layout.total_nr_elems(); | |||||
uint64_t seed = param().seed; | |||||
m_rng.ensure_seed(seed); | |||||
#define cb(DType) \ | |||||
if (inp.layout.dtype == DType()) { \ | |||||
using T = typename DTypeTrait<DType>::ctype; \ | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(forward<T>( \ | |||||
inp.ptr<T>(), oup.ptr<T>(), mask.raw_ptr(), length, m_rng, \ | |||||
param().drop_prob)); \ | |||||
return; \ | |||||
} | |||||
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) | |||||
#undef cb | |||||
megdnn_throw("bad dtype"); | |||||
} | |||||
void DropoutBackwardImpl::exec( | |||||
_megdnn_tensor_in doup, _megdnn_tensor_in mask, _megdnn_tensor_out dinp, | |||||
_megdnn_workspace workspace) { | |||||
check_exec(doup.layout, mask.layout, dinp.layout, workspace.size); | |||||
size_t length = doup.layout.total_nr_elems(); | |||||
#define cb(DType) \ | |||||
if (doup.layout.dtype == DType()) { \ | |||||
using T = typename DTypeTrait<DType>::ctype; \ | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(backward<T>( \ | |||||
doup.ptr<T>(), dinp.ptr<T>(), mask.raw_ptr(), length, \ | |||||
param().drop_prob)); \ | |||||
return; \ | |||||
} | |||||
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) | |||||
#undef cb | |||||
megdnn_throw("bad dtype"); | |||||
} | |||||
} // namespace naive | |||||
} // namespace megdnn | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,49 @@ | |||||
/** | |||||
* \file dnn/src/naive/dropout/opr_impl.h | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#pragma once | |||||
#include "megdnn/oprs.h" | |||||
#include "src/naive/rng/opr_impl.h" | |||||
namespace megdnn { | |||||
namespace naive { | |||||
class DropoutForwardImpl final : public DropoutForward { | |||||
Xoroshiro128plus m_rng; | |||||
public: | |||||
using DropoutForward::DropoutForward; | |||||
void exec( | |||||
_megdnn_tensor_in inp, _megdnn_tensor_out oup, _megdnn_tensor_out mask, | |||||
_megdnn_workspace workspace) override; | |||||
size_t get_mask_size_in_bytes(const TensorLayout& inp) override; | |||||
size_t get_workspace_in_bytes( | |||||
const TensorLayout&, const TensorLayout&, const TensorLayout&) override { | |||||
return 0; | |||||
} | |||||
}; | |||||
class DropoutBackwardImpl final : public DropoutBackward { | |||||
public: | |||||
using DropoutBackward::DropoutBackward; | |||||
void exec( | |||||
_megdnn_tensor_in doup, _megdnn_tensor_in mask, _megdnn_tensor_out dinp, | |||||
_megdnn_workspace workspace) override; | |||||
size_t get_workspace_in_bytes( | |||||
const TensorLayout&, const TensorLayout&, const TensorLayout&) override { | |||||
return 0; | |||||
} | |||||
}; | |||||
} // namespace naive | |||||
} // namespace megdnn | |||||
// vim: syntax=cpp.doxygen |
@@ -36,6 +36,7 @@ | |||||
#include "src/naive/deformable_conv/opr_impl.h" | #include "src/naive/deformable_conv/opr_impl.h" | ||||
#include "src/naive/deformable_ps_roi_pooling/opr_impl.h" | #include "src/naive/deformable_ps_roi_pooling/opr_impl.h" | ||||
#include "src/naive/dot/opr_impl.h" | #include "src/naive/dot/opr_impl.h" | ||||
#include "src/naive/dropout/opr_impl.h" | |||||
#include "src/naive/elemwise/opr_impl.h" | #include "src/naive/elemwise/opr_impl.h" | ||||
#include "src/naive/elemwise_multi_type/opr_impl.h" | #include "src/naive/elemwise_multi_type/opr_impl.h" | ||||
#include "src/naive/eye/opr_impl.h" | #include "src/naive/eye/opr_impl.h" | ||||
@@ -193,6 +193,70 @@ void run_shuffle(Handle* handle, bool bwd_flag) { | |||||
run({6, 3}); | run({6, 3}); | ||||
} | } | ||||
template <typename T> | |||||
void run_dropout(Handle* handle) { | |||||
using ctype = typename DTypeTrait<T>::ctype; | |||||
auto run = [&](TensorShape shape, float drop_prob) { | |||||
auto fwd = handle->create_operator<DropoutForward>(); | |||||
auto bwd = handle->create_operator<DropoutBackward>(); | |||||
fwd->param().drop_prob = drop_prob; | |||||
bwd->param().drop_prob = drop_prob; | |||||
double scale = 1.0 / (1.0 - drop_prob); | |||||
TensorLayout inp_lay{shape, T()}; | |||||
TensorLayout oup_lay{shape, T()}; | |||||
TensorLayout mask_lay{{fwd->get_mask_size_in_bytes(inp_lay)}, dtype::Byte()}; | |||||
TensorLayout doup_lay{shape, T()}; | |||||
TensorLayout dinp_lay{shape, T()}; | |||||
TensorLayout fwd_ws_lay{ | |||||
{fwd->get_workspace_in_bytes(inp_lay, oup_lay, mask_lay)}, | |||||
dtype::Byte()}; | |||||
TensorLayout bwd_ws_lay{ | |||||
{bwd->get_workspace_in_bytes(doup_lay, mask_lay, dinp_lay)}, | |||||
dtype::Byte()}; | |||||
SyncedTensor<ctype> inp(handle, inp_lay); | |||||
SyncedTensor<ctype> oup(handle, oup_lay); | |||||
SyncedTensor<DTypeTrait<dt_byte>::ctype> mask(handle, mask_lay); | |||||
SyncedTensor<ctype> doup(handle, doup_lay); | |||||
SyncedTensor<ctype> dinp(handle, dinp_lay); | |||||
SyncedTensor<DTypeTrait<dt_byte>::ctype> fwd_ws(handle, fwd_ws_lay); | |||||
SyncedTensor<DTypeTrait<dt_byte>::ctype> bwd_ws(handle, bwd_ws_lay); | |||||
for (size_t i = 0; i < inp.layout().total_nr_elems(); ++i) { | |||||
inp.ptr_mutable_host()[i] = 1; | |||||
doup.ptr_mutable_host()[i] = 1; | |||||
} | |||||
fwd->exec( | |||||
inp.tensornd_dev(), oup.tensornd_dev(), mask.tensornd_dev(), | |||||
{fwd_ws.ptr_mutable_dev(), fwd_ws.layout().total_nr_elems()}); | |||||
size_t droped_cnt = 0; | |||||
for (size_t i = 0; i < inp.layout().total_nr_elems(); ++i) { | |||||
ASSERT_TRUE( | |||||
oup.ptr_host()[i] == 0 || | |||||
oup.ptr_host()[i] == static_cast<ctype>(scale)); | |||||
if (oup.ptr_host()[i] == 0) { | |||||
droped_cnt++; | |||||
} | |||||
} | |||||
float real_drop = droped_cnt * 1.0 / inp.layout().total_nr_elems(); | |||||
ASSERT_LT(abs(drop_prob - real_drop), 1e-2); | |||||
#if CUDNN_VERSION >= 7000 | |||||
bwd->exec( | |||||
doup.tensornd_dev(), mask.tensornd_dev(), dinp.tensornd_dev(), | |||||
{bwd_ws.ptr_mutable_dev(), bwd_ws.layout().total_nr_elems()}); | |||||
for (size_t i = 0; i < inp.layout().total_nr_elems(); ++i) { | |||||
ASSERT_TRUE(oup.ptr_host()[i] == dinp.ptr_host()[i]); | |||||
} | |||||
#endif | |||||
}; | |||||
run({32, 32, 32, 32}, 0.2); | |||||
run({100000}, 0.3); | |||||
} | |||||
} // anonymous namespace | } // anonymous namespace | ||||
TEST_F(CUDA, UNIFORM_RNG_F32) { | TEST_F(CUDA, UNIFORM_RNG_F32) { | ||||
@@ -290,6 +354,14 @@ TEST_F(CUDA, SHUFFLE_RNG_BWD_F16) { | |||||
run_shuffle<dtype::Float16>(handle_cuda(), true); | run_shuffle<dtype::Float16>(handle_cuda(), true); | ||||
} | } | ||||
TEST_F(CUDA, DROPOUT_F32) { | |||||
run_dropout<dtype::Float32>(handle_cuda()); | |||||
} | |||||
TEST_F(CUDA, DROPOUT_F16) { | |||||
run_dropout<dtype::Float16>(handle_cuda()); | |||||
} | |||||
} // namespace test | } // namespace test | ||||
} // namespace megdnn | } // namespace megdnn | ||||
@@ -231,6 +231,67 @@ void run_shuffle(Handle* handle, bool bwd_flag) { | |||||
run({10}); | run({10}); | ||||
run({6, 3}); | run({6, 3}); | ||||
} | } | ||||
template <typename T> | |||||
void run_dropout(Handle* handle) { | |||||
using ctype = typename DTypeTrait<T>::ctype; | |||||
auto run = [&](TensorShape shape, float drop_prob) { | |||||
auto fwd = handle->create_operator<DropoutForward>(); | |||||
auto bwd = handle->create_operator<DropoutBackward>(); | |||||
fwd->param().drop_prob = drop_prob; | |||||
bwd->param().drop_prob = drop_prob; | |||||
double scale = 1.0 / (1.0 - drop_prob); | |||||
TensorLayout inp_lay{shape, T()}; | |||||
TensorLayout oup_lay{shape, T()}; | |||||
TensorLayout mask_lay{{fwd->get_mask_size_in_bytes(inp_lay)}, dtype::Byte()}; | |||||
TensorLayout doup_lay{shape, T()}; | |||||
TensorLayout dinp_lay{shape, T()}; | |||||
TensorLayout fwd_ws_lay{ | |||||
{fwd->get_workspace_in_bytes(inp_lay, oup_lay, mask_lay)}, | |||||
dtype::Byte()}; | |||||
TensorLayout bwd_ws_lay{ | |||||
{bwd->get_workspace_in_bytes(doup_lay, mask_lay, dinp_lay)}, | |||||
dtype::Byte()}; | |||||
Tensor<ctype> inp(handle, inp_lay); | |||||
Tensor<ctype> oup(handle, oup_lay); | |||||
Tensor<DTypeTrait<dt_byte>::ctype> mask(handle, mask_lay); | |||||
Tensor<ctype> doup(handle, doup_lay); | |||||
Tensor<ctype> dinp(handle, dinp_lay); | |||||
Tensor<DTypeTrait<dt_byte>::ctype> fwd_ws(handle, fwd_ws_lay); | |||||
Tensor<DTypeTrait<dt_byte>::ctype> bwd_ws(handle, bwd_ws_lay); | |||||
for (size_t i = 0; i < inp.layout().total_nr_elems(); ++i) { | |||||
inp.ptr()[i] = 1; | |||||
doup.ptr()[i] = 1; | |||||
} | |||||
fwd->exec( | |||||
inp.tensornd(), oup.tensornd(), mask.tensornd(), | |||||
{fwd_ws.ptr(), fwd_ws.layout().total_nr_elems()}); | |||||
size_t droped_cnt = 0; | |||||
for (size_t i = 0; i < inp.layout().total_nr_elems(); ++i) { | |||||
ASSERT_TRUE(oup.ptr()[i] == 0 || oup.ptr()[i] == static_cast<ctype>(scale)); | |||||
if (oup.ptr()[i] == 0) { | |||||
droped_cnt++; | |||||
} | |||||
} | |||||
float real_drop = droped_cnt * 1.0 / inp.layout().total_nr_elems(); | |||||
ASSERT_LT(abs(drop_prob - real_drop), 1e-2); | |||||
bwd->exec( | |||||
doup.tensornd(), mask.tensornd(), dinp.tensornd(), | |||||
{bwd_ws.ptr(), bwd_ws.layout().total_nr_elems()}); | |||||
for (size_t i = 0; i < inp.layout().total_nr_elems(); ++i) { | |||||
ASSERT_TRUE(oup.ptr()[i] == dinp.ptr()[i]); | |||||
} | |||||
}; | |||||
run({32, 32, 32, 32}, 0.2); | |||||
run({100000}, 0.3); | |||||
} | |||||
} // namespace | } // namespace | ||||
TEST_F(NAIVE, UNIFORM_RNG_F32) { | TEST_F(NAIVE, UNIFORM_RNG_F32) { | ||||
@@ -309,6 +370,14 @@ TEST_F(NAIVE, SHUFFLE_RNG_BWD_F16) { | |||||
run_shuffle<dtype::Float16>(handle(), true); | run_shuffle<dtype::Float16>(handle(), true); | ||||
} | } | ||||
TEST_F(NAIVE, DROPOUT_F32) { | |||||
run_dropout<dtype::Float32>(handle()); | |||||
} | |||||
TEST_F(NAIVE, DROPOUT_F16) { | |||||
run_dropout<dtype::Float16>(handle()); | |||||
} | |||||
} // namespace test | } // namespace test | ||||
} // namespace megdnn | } // namespace megdnn | ||||
@@ -13,10 +13,12 @@ from typing import NamedTuple, Optional, Sequence, Tuple, Union | |||||
from ..core import _config | from ..core import _config | ||||
from ..core._imperative_rt.core2 import apply, dtype_promotion | from ..core._imperative_rt.core2 import apply, dtype_promotion | ||||
from ..core._imperative_rt.ops import SubgraphBuilder as _SubgraphBuilder | from ..core._imperative_rt.ops import SubgraphBuilder as _SubgraphBuilder | ||||
from ..core._imperative_rt.ops import get_global_rng_seed as _get_global_rng_seed | |||||
from ..core.ops import builtin | from ..core.ops import builtin | ||||
from ..core.ops.builtin import ( | from ..core.ops.builtin import ( | ||||
BatchNorm, | BatchNorm, | ||||
Dimshuffle, | Dimshuffle, | ||||
Dropout, | |||||
Elemwise, | Elemwise, | ||||
GetVarShape, | GetVarShape, | ||||
Identity, | Identity, | ||||
@@ -39,7 +41,6 @@ from ..core.tensor.utils import ( | |||||
from ..device import get_default_device | from ..device import get_default_device | ||||
from ..distributed import WORLD, is_distributed | from ..distributed import WORLD, is_distributed | ||||
from ..jit import exclude_from_trace | from ..jit import exclude_from_trace | ||||
from ..random import uniform | |||||
from ..tensor import Tensor | from ..tensor import Tensor | ||||
from ..utils.deprecation import deprecated_func | from ..utils.deprecation import deprecated_func | ||||
from ..utils.tuple_function import _pair, _pair_nonzero, _triple, _triple_nonzero | from ..utils.tuple_function import _pair, _pair_nonzero, _triple, _triple_nonzero | ||||
@@ -1503,12 +1504,9 @@ def dropout(inp: Tensor, drop_prob: float, training: bool = True) -> Tensor: | |||||
return inp | return inp | ||||
# model in training mode, e.g. model.train() | # model in training mode, e.g. model.train() | ||||
rv = uniform(size=inp.shape) | |||||
mask = rv > drop_prob | |||||
ret = inp * mask.astype(inp.dtype) | |||||
ret *= 1 / (1 - drop_prob) | |||||
return ret | |||||
op = Dropout(drop_prob=drop_prob, seed=_get_global_rng_seed(), handle=0) | |||||
outputs = apply(op, inp) | |||||
return outputs[0] | |||||
def one_hot(inp: Tensor, num_classes: int) -> Tensor: | def one_hot(inp: Tensor, num_classes: int) -> Tensor: | ||||
@@ -567,7 +567,15 @@ void init_ops(py::module m) { | |||||
rng::delete_handle(handle); | rng::delete_handle(handle); | ||||
}, | }, | ||||
py::call_guard<py::gil_scoped_release>()); | py::call_guard<py::gil_scoped_release>()); | ||||
m.def("set_global_rng_seed", &rng::set_global_rng_seed); | |||||
m.def("set_global_rng_seed", [](uint64_t seed) -> void { | |||||
mgb_assert( | |||||
python::interpreter_for_py->check_available(), | |||||
"set global random seed failed since imperative interpreter has been " | |||||
"destroyed"); | |||||
python::interpreter_for_py->sync(); | |||||
mgb::CompNode::sync_all(); | |||||
rng::set_global_rng_seed(seed); | |||||
}); | |||||
m.def("get_global_rng_seed", &rng::get_global_rng_seed); | m.def("get_global_rng_seed", &rng::get_global_rng_seed); | ||||
m.def("get_rng_handle_compnode", &rng::get_rng_handle_compnode); | m.def("get_rng_handle_compnode", &rng::get_rng_handle_compnode); | ||||
@@ -59,14 +59,47 @@ def test_where(): | |||||
def test_dropout(): | def test_dropout(): | ||||
# test training mode | |||||
data = tensor(np.ones(10000000, dtype=np.float32)) | |||||
out = F.nn.dropout(data, 1.0 / 3.0, training=True) | |||||
assert not out.numpy().all() | |||||
# test eval mode | |||||
out = F.nn.dropout(data, 1.0 / 3.0, training=False) | |||||
assert out.numpy().all() | |||||
from megengine.autodiff import GradManager | |||||
from megengine.core._imperative_rt.ops import set_global_rng_seed | |||||
def test_dropout_with_shape(shape, rate): | |||||
data = tensor(np.ones(shape, dtype=np.float32)) | |||||
gm = GradManager().attach([data]) | |||||
with gm: | |||||
out = F.nn.dropout(data, rate, training=True) | |||||
gm.backward(out, tensor(np.ones(shape, dtype=np.float32))) | |||||
assert not out.numpy().all() | |||||
np.testing.assert_allclose(out.numpy(), data.grad.numpy(), 1e-7, 1e-7) | |||||
def test_multiple_dropout(shape, rate): | |||||
data = tensor(np.ones(shape, dtype=np.float32)) | |||||
gm = GradManager().attach([data]) | |||||
with gm: | |||||
out1 = F.nn.dropout(data, rate, training=True) | |||||
out2 = F.nn.dropout(out1, rate, training=True) | |||||
out3 = F.nn.dropout(out2, rate, training=True) | |||||
gm.backward(out3, tensor(np.ones(shape, dtype=np.float32))) | |||||
np.testing.assert_allclose(out3.numpy(), data.grad.numpy(), 1e-7, 1e-7) | |||||
def test_dropout_seed(shape, rate): | |||||
data = tensor(np.random.randn(*shape), dtype="float32") | |||||
set_global_rng_seed(111) | |||||
out1 = F.nn.dropout(data, rate, training=True) | |||||
out2 = F.nn.dropout(data, rate, training=True) | |||||
assert not (out1.numpy() == out2.numpy()).all() | |||||
set_global_rng_seed(111) | |||||
out3 = F.nn.dropout(data, rate, training=True) | |||||
assert (out1.numpy() == out3.numpy()).all() | |||||
set_global_rng_seed(222) | |||||
out4 = F.nn.dropout(data, rate, training=True) | |||||
assert not (out1.numpy() == out4.numpy()).all() | |||||
test_dropout_with_shape([13, 17, 63, 21], 0.4) | |||||
test_dropout_with_shape([16, 32, 64], 0.3) | |||||
test_multiple_dropout([1024], 0.2) | |||||
test_dropout_seed([16, 32], 0.2) | |||||
def test_matinv(): | def test_matinv(): | ||||
@@ -282,6 +282,21 @@ struct OpMeth<ShuffleRNG> { | |||||
} | } | ||||
}; | }; | ||||
template <> | |||||
struct OpMeth<Dropout> { | |||||
using DnnOp = megdnn::Dropout; | |||||
using Param = DnnOp::Param; | |||||
using OpNode = mgb::opr::Dropout; | |||||
static Param make_param(const Dropout& opdef) { | |||||
auto handle_seed = RNGDnnOpManager::get_seed(opdef.handle); | |||||
mgb_assert( | |||||
handle_seed == opdef.seed, | |||||
"inconsistent dropout seed: dropout op: %lu handle: %lu", handle_seed, | |||||
opdef.seed); | |||||
return {opdef.drop_prob, handle_seed}; | |||||
} | |||||
}; | |||||
template <bool> | template <bool> | ||||
struct _InferLayout; | struct _InferLayout; | ||||
@@ -482,6 +497,26 @@ SmallVector<LogicalTensorDesc> infer_output_attrs<ShuffleRNG>( | |||||
return dests; | return dests; | ||||
} | } | ||||
template <> | |||||
SmallVector<LogicalTensorDesc> infer_output_attrs<Dropout>( | |||||
const OpDef& op, const SmallVector<TensorPtr>& inputs) { | |||||
SmallVector<LogicalTensorDesc> dests(2); | |||||
auto&& cn = inputs[0]->comp_node(); | |||||
dests[0].comp_node = cn; | |||||
dests[0].layout = TensorLayout(inputs[0]->layout()); | |||||
dests[0].layout.dtype = inputs[0]->layout().dtype; | |||||
auto get_mask_size = [&]() -> size_t { | |||||
auto dnn_handle = MegDNNHandle::get(CompNodeEnv::from_comp_node(cn)).handle(); | |||||
return dnn_handle->create_operator<megdnn::Dropout>()->get_mask_size_in_bytes( | |||||
inputs[0]->layout()); | |||||
}; | |||||
dests[1].comp_node = cn; | |||||
dests[1].layout = TensorLayout(TensorShape({get_mask_size()}), dtype::Byte()); | |||||
return dests; | |||||
} | |||||
template <typename Op> | template <typename Op> | ||||
std::tuple<SmallVector<MemoryDesc>, SmallVector<MemoryDesc>> infer_output_mem_desc( | std::tuple<SmallVector<MemoryDesc>, SmallVector<MemoryDesc>> infer_output_mem_desc( | ||||
const OpDef& def, const SmallVector<TensorPtr>& inputs_tensors, | const OpDef& def, const SmallVector<TensorPtr>& inputs_tensors, | ||||
@@ -559,6 +594,25 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible< | |||||
return {dests, true}; | return {dests, true}; | ||||
} | } | ||||
template <> | |||||
std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible<Dropout>( | |||||
const OpDef& op, const SmallVector<LogicalTensorDesc>& inputs) { | |||||
SmallVector<LogicalTensorDesc> dests(2); | |||||
auto cn = inputs[0].comp_node; | |||||
dests[0].comp_node = cn; | |||||
dests[0].layout = TensorLayout(inputs[0].layout); | |||||
dests[0].layout.dtype = inputs[0].layout.dtype; | |||||
auto get_mask_size = [&]() -> size_t { | |||||
auto dnn_handle = MegDNNHandle::get(CompNodeEnv::from_comp_node(cn)).handle(); | |||||
return dnn_handle->create_operator<megdnn::Dropout>()->get_mask_size_in_bytes( | |||||
inputs[0].layout); | |||||
}; | |||||
dests[1].comp_node = cn; | |||||
dests[1].layout = TensorLayout(TensorShape({get_mask_size()}), dtype::Byte()); | |||||
return {dests, true}; | |||||
} | |||||
} // anonymous namespace | } // anonymous namespace | ||||
Handle new_handle(CompNode comp_node, uint64_t seed) { | Handle new_handle(CompNode comp_node, uint64_t seed) { | ||||
@@ -599,6 +653,7 @@ REG_RNG_OP(PermutationRNG, SymbolVar) | |||||
REG_RNG_OP(PoissonRNG, SymbolVar) | REG_RNG_OP(PoissonRNG, SymbolVar) | ||||
REG_RNG_OP(BetaRNG, SymbolVar) | REG_RNG_OP(BetaRNG, SymbolVar) | ||||
REG_RNG_OP(ShuffleRNG, SymbolVarArray) | REG_RNG_OP(ShuffleRNG, SymbolVarArray) | ||||
REG_RNG_OP(Dropout, SymbolVarArray) | |||||
#undef REG_RNG_OP | #undef REG_RNG_OP | ||||
} // namespace mgb::imperative::rng | } // namespace mgb::imperative::rng | ||||
@@ -433,4 +433,19 @@ def LRN: MgbHashableOp<"LRN", [LRNParam]>; | |||||
def LayerNorm: MgbHashableOp<"LayerNorm", [LayerNormParam]>; | def LayerNorm: MgbHashableOp<"LayerNorm", [LayerNormParam]>; | ||||
def Dropout: MgbHashableOp<"Dropout", [DropoutParam]> { | |||||
let extraArguments = (ins | |||||
MgbSizeTAddr:$handle | |||||
); | |||||
let hashFunction = [{ | |||||
return mgb::hash_pair_combine( | |||||
mgb::hash($_self.dyn_typeinfo()), | |||||
mgb::hash_pair_combine( | |||||
mgb::hash($_self.drop_prob), | |||||
mgb::hash($_self.handle)) | |||||
); | |||||
}]; | |||||
let cmpFunction = [{return $0.handle == $1.handle && $0.drop_prob == $1.drop_prob;}]; | |||||
} | |||||
#endif // MGB_OPS | #endif // MGB_OPS |
@@ -201,6 +201,8 @@ template class RNGOprBase<::megdnn::BetaRNG>; | |||||
template class RNGOprBase<::megdnn::PoissonRNG>; | template class RNGOprBase<::megdnn::PoissonRNG>; | ||||
template class RNGOprBase<::megdnn::ShuffleRNGForward>; | template class RNGOprBase<::megdnn::ShuffleRNGForward>; | ||||
template class RNGOprBase<::megdnn::ShuffleRNGBackward>; | template class RNGOprBase<::megdnn::ShuffleRNGBackward>; | ||||
template class RNGOprBase<::megdnn::DropoutForward>; | |||||
template class RNGOprBase<::megdnn::DropoutBackward>; | |||||
#if MGB_ENABLE_GRAD | #if MGB_ENABLE_GRAD | ||||
IMPL(GaussianRNG); | IMPL(GaussianRNG); | ||||
IMPL(UniformRNG); | IMPL(UniformRNG); | ||||
@@ -300,4 +302,134 @@ MGB_IMPL_OPR_GRAD(ShuffleRNGForward) { | |||||
MGB_DYN_TYPE_OBJ_FINAL_IMPL(ShuffleRNGBackward); | MGB_DYN_TYPE_OBJ_FINAL_IMPL(ShuffleRNGBackward); | ||||
MEGDNN_OPR_INIT3(ShuffleRNGBackward, "shuffle_rng_bwd", 2, true) | MEGDNN_OPR_INIT3(ShuffleRNGBackward, "shuffle_rng_bwd", 2, true) | ||||
/* ================= DropoutForward ================= */ | |||||
MGB_DYN_TYPE_OBJ_FINAL_IMPL(DropoutForward); | |||||
DropoutForward::DropoutForward( | |||||
VarNode* inp, const Param& param, const OperatorNodeConfig& config) | |||||
: Super({inp->owner_graph(), config, "dropout", {inp}}, param) { | |||||
add_input({inp}); | |||||
add_output(None)->dtype(inp->dtype()).add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE); | |||||
add_output(None)->dtype(dtype::Byte()).add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE); | |||||
cg::add_workspace_output(this); | |||||
add_equivalence_component<ScalarHash<void*>>(this); | |||||
} | |||||
SymbolVarArray DropoutForward::make( | |||||
SymbolVar inp, const Param& param, const OperatorNodeConfig& config) { | |||||
auto node = inp.node()->owner_graph()->insert_opr( | |||||
std::make_unique<DropoutForward>(inp.node(), param, config)); | |||||
mgb_assert(node->output().size() == 3); | |||||
return {node->output(0), node->output(1)}; | |||||
} | |||||
void DropoutForward::init_output_static_infer_desc() { | |||||
using namespace cg::static_infer; | |||||
auto&& mgr = owner_graph()->static_infer_manager(); | |||||
mgr.register_shape_infer(output(0), ShapeInferDesc::make_identity(input(0))); | |||||
auto infer_mask = [this](TensorShape& dest, const InpVal& iv) { | |||||
ensure_megdnn_opr(); | |||||
dest.ndim = 1; | |||||
dest.shape[0] = m_dnn_opr->get_mask_size_in_bytes( | |||||
{iv.val[0].shape(), input(0)->dtype()}); | |||||
return true; | |||||
}; | |||||
mgr.register_shape_infer( | |||||
output(1), {SourceType::DEP, {{input(0), DepType::SHAPE}}, infer_mask}); | |||||
auto infer_wk = [this](TensorShape& dest, const InpVal& inp) { | |||||
ensure_megdnn_opr(); | |||||
dest.ndim = 1; | |||||
dest.shape[0] = m_dnn_opr->get_workspace_in_bytes( | |||||
{inp.val[0].shape(), input(0)->dtype()}, | |||||
{output(0)->shape(), output(0)->dtype()}, | |||||
{output(1)->shape(), output(1)->dtype()}); | |||||
return true; | |||||
}; | |||||
mgr.register_shape_infer( | |||||
output(2), {SourceType::DEP, {{input(0), DepType::SHAPE}}, infer_wk}); | |||||
} | |||||
void DropoutForward::add_input_layout_constraint() { | |||||
input(0)->add_layout_constraint_contiguous(); | |||||
}; | |||||
void DropoutForward::scn_do_execute() { | |||||
auto&& ret = output(0); | |||||
if (ret->layout().is_empty()) { | |||||
mgb_assert(ret->dev_tensor().empty()); | |||||
return; | |||||
} | |||||
m_dnn_opr->exec( | |||||
input(0)->dev_tensor().as_megdnn(), output(0)->dev_tensor().as_megdnn(), | |||||
output(1)->dev_tensor().as_megdnn(), | |||||
get_megdnn_workspace_from_var(output(2))); | |||||
} | |||||
cg::OperatorNodeBase::NodeProp* DropoutForward::do_make_node_prop() const { | |||||
auto prop = Super::do_make_node_prop(); | |||||
prop->add_flag(NodeProp::Flag::IMPURE_FUNC); | |||||
for (auto i : input()) { | |||||
prop->add_dep_type_existing_var(i, NodeProp::DepType::VALUE_ALLOW_EMPTY); | |||||
} | |||||
return prop; | |||||
} | |||||
#if MGB_ENABLE_GRAD | |||||
MGB_IMPL_OPR_GRAD(DropoutForward) { | |||||
SymbolVar grad = DropoutBackward::make(out_grad[0], opr.output(1), opr.param()); | |||||
VarNodeArray ret; | |||||
ret.push_back(grad.node()); | |||||
return ret; | |||||
} | |||||
#endif | |||||
/* ==================== LayerNormBackward ==================== */ | |||||
MGB_DYN_TYPE_OBJ_FINAL_IMPL(DropoutBackward); | |||||
DropoutBackward::DropoutBackward( | |||||
VarNode* doup, VarNode* mask, const Param& param, | |||||
const OperatorNodeConfig& config) | |||||
: Super({doup->owner_graph(), config, "dropout_backward", {doup, mask}}, 0, | |||||
true) { | |||||
init_megdnn_opr(*this, param); | |||||
add_input({doup, mask}); | |||||
} | |||||
SymbolVar DropoutBackward::make( | |||||
SymbolVar doup, SymbolVar mask, const Param& param, | |||||
const OperatorNodeConfig& config) { | |||||
return doup.insert_single_output_opr<DropoutBackward>( | |||||
doup.node(), mask.node(), param, config); | |||||
} | |||||
void DropoutBackward::init_output_static_infer_desc() { | |||||
using namespace cg::static_infer; | |||||
auto&& mgr = owner_graph()->static_infer_manager(); | |||||
mgr.register_shape_infer(output(0), ShapeInferDesc::make_identity(input(0))); | |||||
this->init_output_static_infer_desc_workspace(false); | |||||
} | |||||
void DropoutBackward::init_output_dtype() { | |||||
output(0)->dtype(input(0)->dtype()); | |||||
} | |||||
size_t DropoutBackward::get_workspace_size_bytes( | |||||
const TensorShapeArray& input_shapes, | |||||
const TensorShapeArray& output_shapes) const { | |||||
return megdnn_opr()->get_workspace_in_bytes( | |||||
{input_shapes[0], input(0)->dtype(), input(0)->format()}, | |||||
{input_shapes[1], input(1)->dtype(), input(1)->format()}, | |||||
{output_shapes[0], output(0)->dtype(), output(0)->format()}); | |||||
} | |||||
void DropoutBackward::scn_do_execute() { | |||||
megdnn_opr()->exec( | |||||
input(0)->dev_tensor().as_megdnn(), input(1)->dev_tensor().as_megdnn(), | |||||
output(0)->dev_tensor().as_megdnn(), {}); | |||||
} | |||||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -29,6 +29,19 @@ struct OprMaker<opr::ShuffleRNG, 1> { | |||||
return out[0].node()->owner_opr(); | return out[0].node()->owner_opr(); | ||||
} | } | ||||
}; | }; | ||||
// OprMaker in MGB_SEREG_OPR only support unique output opr | |||||
template <> | |||||
struct OprMaker<opr::DropoutForward, 1> { | |||||
using Param = opr::DropoutForward::Param; | |||||
static cg::OperatorNodeBase* make( | |||||
const Param& param, const cg::VarNodeArray& i, ComputingGraph& graph, | |||||
const OperatorNodeConfig& config) { | |||||
MGB_MARK_USED_VAR(graph); | |||||
return opr::DropoutForward::make(i[0], param, config)[0].node()->owner_opr(); | |||||
} | |||||
}; | |||||
} // namespace serialization | } // namespace serialization | ||||
namespace opr { | namespace opr { | ||||
@@ -43,6 +56,8 @@ MGB_SEREG_OPR(PermutationRNG, 1); | |||||
MGB_SEREG_OPR(BetaRNG, 2); | MGB_SEREG_OPR(BetaRNG, 2); | ||||
MGB_SEREG_OPR(ShuffleRNG, 1); | MGB_SEREG_OPR(ShuffleRNG, 1); | ||||
MGB_SEREG_OPR(ShuffleRNGBackward, 3); | MGB_SEREG_OPR(ShuffleRNGBackward, 3); | ||||
MGB_SEREG_OPR(Dropout, 1); | |||||
MGB_SEREG_OPR(DropoutBackward, 2); | |||||
} // namespace opr | } // namespace opr | ||||
} // namespace mgb | } // namespace mgb | ||||
@@ -87,6 +87,7 @@ _DEFINE_RNG_OPR_WITH_INPUT_CLASS(PoissonRNG) | |||||
#undef _OUTPUTS | #undef _OUTPUTS | ||||
#define _OUTPUTS SymbolVarArray | #define _OUTPUTS SymbolVarArray | ||||
_DEFINE_RNG_OPR_WITH_INPUT_CLASS(ShuffleRNGForward) | _DEFINE_RNG_OPR_WITH_INPUT_CLASS(ShuffleRNGForward) | ||||
_DEFINE_RNG_OPR_WITH_INPUT_CLASS(DropoutForward) | |||||
#undef _OUTPUTS | #undef _OUTPUTS | ||||
#undef _INPUTS | #undef _INPUTS | ||||
@@ -108,6 +109,8 @@ using PermutationRNG = intl::PermutationRNG; | |||||
using PoissonRNG = intl::PoissonRNG; | using PoissonRNG = intl::PoissonRNG; | ||||
using BetaRNG = intl::BetaRNG; | using BetaRNG = intl::BetaRNG; | ||||
using ShuffleRNG = intl::ShuffleRNGForward; | using ShuffleRNG = intl::ShuffleRNGForward; | ||||
using Dropout = intl::DropoutForward; | |||||
using DropoutForward = intl::DropoutForward; | |||||
MGB_DEFINE_OPR_CLASS_WITH_EXPORT( | MGB_DEFINE_OPR_CLASS_WITH_EXPORT( | ||||
ShuffleRNGBackward, intl::MegDNNOprWrapperBwd<megdnn::ShuffleRNGBackward>) // { | ShuffleRNGBackward, intl::MegDNNOprWrapperBwd<megdnn::ShuffleRNGBackward>) // { | ||||
@@ -121,6 +124,26 @@ public: | |||||
const Param& param = {}, const OperatorNodeConfig& config = {}); | const Param& param = {}, const OperatorNodeConfig& config = {}); | ||||
}; | }; | ||||
MGB_DEFINE_OPR_CLASS_WITH_EXPORT( | |||||
DropoutBackward, intl::MegDNNOprWrapperBwd<megdnn::DropoutBackward>) // { | |||||
public: | |||||
MGE_WIN_DECLSPEC_FUC DropoutBackward( | |||||
VarNode* doup, VarNode* mask, const Param& param, | |||||
const OperatorNodeConfig& config); | |||||
MGE_WIN_DECLSPEC_FUC static SymbolVar make( | |||||
SymbolVar doup, SymbolVar mask, const Param& param = {}, | |||||
const OperatorNodeConfig& config = {}); | |||||
private: | |||||
void init_output_static_infer_desc() override; | |||||
void init_output_dtype() override; | |||||
size_t get_workspace_size_bytes( | |||||
const TensorShapeArray& input_shapes, | |||||
const TensorShapeArray& output_shapes) const override; | |||||
void scn_do_execute() override; | |||||
}; | |||||
} // namespace opr | } // namespace opr | ||||
} // namespace mgb | } // namespace mgb | ||||
@@ -446,4 +446,42 @@ TEST(TestOprRand, PermutationReprod) { | |||||
}); | }); | ||||
} | } | ||||
TEST(TestOprRand, Dropout) { | |||||
auto run = [&](TensorShape shape, uint64_t seed, float drop_prob) { | |||||
using Param = megdnn::DropoutBase::Param; | |||||
Param param(drop_prob, seed); | |||||
float scale = 1.0 / (1.0 - drop_prob); | |||||
std::shared_ptr<HostTensorND> inp_host( | |||||
new HostTensorND{CompNode::load("xpux"), shape, dtype::Float32()}); | |||||
for (size_t i = 0; i < shape.total_nr_elems(); ++i) { | |||||
inp_host->ptr<dt_float32>()[i] = 1.0f; | |||||
} | |||||
auto graph = ComputingGraph::make(); | |||||
auto inp_sym = opr::Host2DeviceCopy::make(*graph, inp_host); | |||||
auto outs = opr::DropoutForward::make(inp_sym, param); | |||||
HostTensorND oup_host, mask_host, ws_host; | |||||
auto func = graph->compile( | |||||
{make_callback_copy(outs[0], oup_host), | |||||
make_callback_copy(outs[1], mask_host)}); | |||||
func->execute(); | |||||
size_t droped_cnt = 0; | |||||
for (size_t i = 0; i < shape.total_nr_elems(); ++i) { | |||||
ASSERT_TRUE( | |||||
oup_host.ptr<dt_float32>()[i] == 0 || | |||||
oup_host.ptr<dt_float32>()[i] == scale); | |||||
if (oup_host.ptr<dt_float32>()[i] == 0) { | |||||
droped_cnt++; | |||||
} | |||||
} | |||||
float real_drop = droped_cnt * 1.0 / shape.total_nr_elems(); | |||||
ASSERT_LT(abs(drop_prob - real_drop), 1e-2); | |||||
}; | |||||
run({100000}, 0, 0.2); | |||||
run({64, 32, 16, 16}, 1, 0.4); | |||||
} | |||||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -117,6 +117,7 @@ union OperatorParam { | |||||
param.ShuffleRNG = 83, | param.ShuffleRNG = 83, | ||||
param.CheckNonFinite = 84, | param.CheckNonFinite = 84, | ||||
param.LayerNorm = 85, | param.LayerNorm = 85, | ||||
param.Dropout = 86, | |||||
} | } | ||||
table Operator { | table Operator { | ||||