Browse Source

feat(dnn/check_non_finite): add batch check_non_finite

GitOrigin-RevId: e108133282
release-1.7
Megvii Engine Team 3 years ago
parent
commit
11d75fecb5
20 changed files with 366 additions and 122 deletions
  1. +7
    -6
      dnn/include/megdnn/oprs/general.h
  2. +6
    -7
      dnn/src/common/check_non_finite.cpp
  3. +20
    -6
      dnn/src/common/reduce_helper.h
  4. +19
    -6
      dnn/src/common/reduce_helper_device.h
  5. +2
    -1
      dnn/src/cuda/check_non_finite/kern.cu
  6. +70
    -9
      dnn/src/cuda/check_non_finite/opr_impl.cpp
  7. +4
    -2
      dnn/src/cuda/check_non_finite/opr_impl.h
  8. +23
    -26
      dnn/src/naive/check_non_finite/opr_impl.cpp
  9. +7
    -3
      dnn/src/naive/check_non_finite/opr_impl.h
  10. +21
    -0
      dnn/test/common/opr_proxy.h
  11. +6
    -3
      dnn/test/cuda/check_non_finite.cpp
  12. +9
    -4
      dnn/test/naive/check_non_finite.cpp
  13. +10
    -9
      imperative/python/megengine/amp/grad_scaler.py
  14. +3
    -3
      imperative/python/megengine/functional/math.py
  15. +18
    -13
      imperative/python/test/unit/amp/test_grad_scaler.py
  16. +7
    -6
      imperative/python/test/unit/functional/test_math.py
  17. +45
    -3
      imperative/src/impl/ops/misc.cpp
  18. +69
    -13
      src/opr/impl/misc.cpp
  19. +5
    -1
      src/opr/impl/misc.sereg.h
  20. +15
    -1
      src/opr/include/megbrain/opr/misc.h

+ 7
- 6
dnn/include/megdnn/oprs/general.h View File

@@ -1345,22 +1345,23 @@ protected:
*/ */
class CheckNonFinite : public OperatorBase { class CheckNonFinite : public OperatorBase {
DEF_OPR_PARAM(Empty); DEF_OPR_PARAM(Empty);
DEF_OPR_IMPL(CheckNonFinite, OperatorBase, 1, 1);
DEF_OPR_IMPL(CheckNonFinite, OperatorBase, -1, 1);
size_t m_size = 0;


public: public:
virtual size_t get_workspace_in_bytes( virtual size_t get_workspace_in_bytes(
const TensorLayout& src, const TensorLayout& dst) = 0;
const TensorNDArray& srcs, const TensorLayout& dst) = 0;


void deduce_layout(const TensorLayout& src, TensorLayout& dst);
void deduce_layout(const TensorLayoutArray& srcs, TensorLayout& dst);


virtual void exec( virtual void exec(
_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_in const TensorNDArray& srcs, _megdnn_tensor_out dst,
_megdnn_workspace workspace) = 0; _megdnn_workspace workspace) = 0;


protected: protected:
void check_exec( void check_exec(
const TensorLayout& src, const TensorLayout& dst,
size_t workspace_in_bytes);
const TensorNDArray& srcs, const TensorND& dst, size_t workspace_in_bytes);
virtual size_t _get_workspace_in_bytes() = 0;
}; };


/*! /*!


+ 6
- 7
dnn/src/common/check_non_finite.cpp View File

@@ -15,16 +15,15 @@
namespace megdnn { namespace megdnn {


void CheckNonFinite::check_exec( void CheckNonFinite::check_exec(
const TensorLayout& src, const TensorLayout& dst, size_t workspace_in_bytes) {
megdnn_assert_contiguous(src);
megdnn_assert_contiguous(dst);
megdnn_assert(src.ndim == 1);
megdnn_assert(src.dtype == dtype::Float32());
auto required_workspace_in_bytes = get_workspace_in_bytes(src, dst);
const TensorNDArray& srcs, const TensorND& dst, size_t workspace_in_bytes) {
megdnn_assert_contiguous(dst.layout);
megdnn_assert(srcs.size() > 0);
megdnn_assert(srcs.begin()->layout.dtype == dtype::Float32());
auto required_workspace_in_bytes = _get_workspace_in_bytes();
megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes);
} }


void CheckNonFinite::deduce_layout(const TensorLayout&, TensorLayout& dst) {
void CheckNonFinite::deduce_layout(const TensorLayoutArray&, TensorLayout& dst) {
dst.shape[0] = 1; dst.shape[0] = 1;
dst.ndim = 1; dst.ndim = 1;
dst.dtype = dtype::Int32(); dst.dtype = dtype::Int32();


+ 20
- 6
dnn/src/common/reduce_helper.h View File

@@ -156,21 +156,35 @@ struct MaxOp<src_ctype, dst_ctype, dt_float32> {
: INIT(wtype(DTypeTrait<wtype>::min())), src(src), dst(dst), B(B) {} : INIT(wtype(DTypeTrait<wtype>::min())), src(src), dst(dst), B(B) {}
}; };


template <typename src_ctype, typename dst_ctype, typename wtype_>
template <typename src_ctype, typename index_ctype, typename dst_ctype, typename wtype_>
struct CheckNonFiniteOp { struct CheckNonFiniteOp {
typedef wtype_ wtype; typedef wtype_ wtype;
const wtype INIT; const wtype INIT;


RefPtr src;
RefPtr* srcs;
RefPtr srcs_total_nr_elems;
RefPtr dst; RefPtr dst;
const size_t B; const size_t B;


wtype read(uint32_t idx) { return !std::isfinite(src.ptr<src_ctype>()[idx]); }
wtype read(uint32_t idx) {
size_t x = idx / B;
size_t y = idx % B;
if (y < srcs_total_nr_elems.ptr<index_ctype>()[x]) {
RefPtr src = srcs[x];
return !std::isfinite(src.ptr<src_ctype>()[y]);
}
return 0;
}
void write(uint32_t idx, wtype val) { dst.ptr<dst_ctype>()[idx] = val; } void write(uint32_t idx, wtype val) { dst.ptr<dst_ctype>()[idx] = val; }
static wtype apply(wtype lhs, wtype rhs) { return lhs | rhs; } static wtype apply(wtype lhs, wtype rhs) { return lhs | rhs; }
MEGDNN_HOST MEGDNN_DEVICE
CheckNonFiniteOp(const RefPtr& src, const RefPtr& dst, size_t B)
: INIT(wtype(0)), src(src), dst(dst), B(B) {}
CheckNonFiniteOp(
RefPtr* srcs, const RefPtr& srcs_total_nr_elems, const RefPtr& dst,
size_t B)
: INIT(wtype(0)),
srcs(srcs),
srcs_total_nr_elems(srcs_total_nr_elems),
dst(dst),
B(B) {}
}; };


void get_ABC(const TensorShape& shape, size_t& A, size_t& B, size_t& C, size_t axis); void get_ABC(const TensorShape& shape, size_t& A, size_t& B, size_t& C, size_t axis);


+ 19
- 6
dnn/src/common/reduce_helper_device.h View File

@@ -185,28 +185,41 @@ struct MaxOp<src_ctype, dst_ctype, dt_float32> {
: INIT(wtype(DTypeTrait<wtype>::min())), src(src), dst(dst), B(B) {} : INIT(wtype(DTypeTrait<wtype>::min())), src(src), dst(dst), B(B) {}
}; };


template <typename src_ctype, typename dst_ctype, typename wtype_>
template <typename src_ctype, typename index_ctype, typename dst_ctype, typename wtype_>
struct CheckNonFiniteOp { struct CheckNonFiniteOp {
typedef wtype_ wtype; typedef wtype_ wtype;
const wtype INIT; const wtype INIT;


src_ctype* src;
src_ctype** srcs;
index_ctype* srcs_total_nr_elems;
dst_ctype* dst; dst_ctype* dst;
const size_t B; const size_t B;


MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) {
size_t x = idx / B;
size_t y = idx % B;
if (y < srcs_total_nr_elems[x]) {
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__)
return !isfinite(src[idx]);
wtype val = isfinite(srcs[x][y]);
#else #else
return !std::isfinite(src[idx]);
wtype val = std::isfinite(srcs[x][y]);
#endif #endif
return !val;
}
return 0;
} }
MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { dst[idx] = val; } MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { dst[idx] = val; }
static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) {
return lhs | rhs; return lhs | rhs;
} }
MEGDNN_HOST MEGDNN_DEVICE CheckNonFiniteOp(src_ctype* src, dst_ctype* dst, size_t B)
: INIT(wtype(0)), src(src), dst(dst), B(B) {}
MEGDNN_HOST MEGDNN_DEVICE CheckNonFiniteOp(
src_ctype** srcs, index_ctype* srcs_total_nr_elems, dst_ctype* dst,
size_t B)
: INIT(wtype(0)),
srcs(srcs),
srcs_total_nr_elems(srcs_total_nr_elems),
dst(dst),
B(B) {}
}; };


} // namespace device_reduce } // namespace device_reduce


+ 2
- 1
dnn/src/cuda/check_non_finite/kern.cu View File

@@ -19,7 +19,8 @@ namespace cuda {
#define COMMA , #define COMMA ,


INST_REDUCE( INST_REDUCE(
device_reduce::CheckNonFiniteOp<dt_float32 COMMA dt_int32 COMMA dt_int32>,
device_reduce::CheckNonFiniteOp<
dt_float32 COMMA size_t COMMA dt_int32 COMMA dt_int32>,
false); false);


#undef COMMA #undef COMMA


+ 70
- 9
dnn/src/cuda/check_non_finite/opr_impl.cpp View File

@@ -21,22 +21,83 @@ namespace megdnn {
namespace cuda { namespace cuda {


using device_reduce::CheckNonFiniteOp; using device_reduce::CheckNonFiniteOp;
#define total_nr_elems_max 2048
size_t CheckNonFiniteImpl::_get_workspace_in_bytes() {
// Call the _get_workspace_in_bytes to reduce the loop fetch workspace bytes
typedef CheckNonFiniteOp<dt_float32, size_t, dt_int32, dt_int32> Op;
megdnn_assert(m_size > 0);
WorkspaceBundle bundle(
nullptr, {
sizeof(dt_float32*) * m_size,
sizeof(size_t) * m_size,
});
return get_reduce_workspace_in_bytes<Op>(1, m_size * total_nr_elems_max, 1) +
bundle.total_size_in_bytes();
}


size_t CheckNonFiniteImpl::get_workspace_in_bytes( size_t CheckNonFiniteImpl::get_workspace_in_bytes(
const TensorLayout& src, const TensorLayout& dst) {
typedef CheckNonFiniteOp<dt_float32, dt_int32, dt_int32> Op;
return get_reduce_workspace_in_bytes<Op>(1, src.total_nr_elems(), 1);
const TensorNDArray& srcs, const TensorLayout&) {
m_size = 0;
for (const auto& src : srcs) {
m_size += DIVUP(src.layout.total_nr_elems(), total_nr_elems_max);
}
return _get_workspace_in_bytes();
} }


void CheckNonFiniteImpl::exec( void CheckNonFiniteImpl::exec(
_megdnn_tensor_in src, _megdnn_tensor_out dst, _megdnn_workspace workspace) {
check_exec(src.layout, dst.layout, workspace.size);
typedef CheckNonFiniteOp<dt_float32, dt_int32, dt_int32> Op;
_megdnn_in const TensorNDArray& srcs, _megdnn_tensor_out dst,
_megdnn_workspace workspace) {
check_exec(srcs, dst, workspace.size);
typedef CheckNonFiniteOp<dt_float32, size_t, dt_int32, dt_int32> Op;
auto stream = cuda_stream(this->handle()); auto stream = cuda_stream(this->handle());
auto B = src.layout.total_nr_elems();
SmallVector<size_t> workspace_sizes{
sizeof(dt_float32*) * m_size,
sizeof(size_t) * m_size,
};
WorkspaceBundle workspace_cpu(nullptr, workspace_sizes),
workspace_gpu(nullptr, workspace_sizes);
auto total_workspace_size = workspace_cpu.total_size_in_bytes();
void* workspace_cpu_raw = malloc(total_workspace_size);
megdnn_assert_internal(workspace_cpu_raw);
void* workspace_gpu_raw = workspace.raw_ptr;
workspace_cpu = WorkspaceBundle(workspace_cpu_raw, workspace_sizes);
workspace_gpu = WorkspaceBundle(workspace_gpu_raw, workspace_sizes);

auto srcs_cpu = static_cast<dt_float32**>(workspace_cpu.get(0));
auto srcs_gpu = static_cast<dt_float32**>(workspace_gpu.get(0));
auto srcs_total_nr_elems_cpu = static_cast<size_t*>(workspace_cpu.get(1));
auto srcs_total_nr_elems_gpu = static_cast<size_t*>(workspace_gpu.get(1));

// srcs
// cut the tensor to a fixed length of total_nr_elems_max
size_t i = 0;
for (const auto& src : srcs) {
size_t src_nr_elems = src.layout.total_nr_elems();
size_t nr_elems = DIVUP(src_nr_elems, total_nr_elems_max);
for (size_t j = 0; j < nr_elems; ++j, ++i) {
srcs_cpu[i] = src.ptr<dt_float32>() + j * total_nr_elems_max;
if (j + 1 == nr_elems && src_nr_elems % total_nr_elems_max) {
srcs_total_nr_elems_cpu[i] = src_nr_elems % total_nr_elems_max;
} else {
srcs_total_nr_elems_cpu[i] = total_nr_elems_max;
}
}
}
for (size_t i = 0; i < workspace_cpu.nr_workspace(); ++i) {
cuda_check(cudaMemcpyAsync(
workspace_gpu.get(i), workspace_cpu.get(i), workspace_cpu.get_size(i),
cudaMemcpyHostToDevice, stream));
}
cuda_check(cudaStreamAddCallback(
stream, callback_free, static_cast<void*>(workspace_cpu_raw), 0));

return run_reduce<Op, false>( return run_reduce<Op, false>(
workspace.ptr<dt_int32>(), 1, B, 1, stream,
Op(src.ptr<dt_float32>(), dst.ptr<dt_int32>(), B));
static_cast<dt_int32*>(
(void*)((char*)workspace_gpu_raw +
workspace_gpu.total_size_in_bytes())),
1, m_size * total_nr_elems_max, 1, stream,
Op(srcs_gpu, srcs_total_nr_elems_gpu, dst.ptr<dt_int32>(),
total_nr_elems_max));
} }


} // namespace cuda } // namespace cuda


+ 4
- 2
dnn/src/cuda/check_non_finite/opr_impl.h View File

@@ -18,16 +18,18 @@ namespace megdnn {
namespace cuda { namespace cuda {


class CheckNonFiniteImpl final : public CheckNonFinite { class CheckNonFiniteImpl final : public CheckNonFinite {
size_t _get_workspace_in_bytes() override;

public: public:
using CheckNonFinite::CheckNonFinite; using CheckNonFinite::CheckNonFinite;


size_t get_workspace_in_bytes( size_t get_workspace_in_bytes(
const TensorLayout& src, const TensorLayout& dst) override;
const TensorNDArray& srcs, const TensorLayout& dst) override;


bool is_thread_safe() const override { return true; } bool is_thread_safe() const override { return true; }


void exec( void exec(
_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_in const TensorNDArray& srcs, _megdnn_tensor_out dst,
_megdnn_workspace workspace) override; _megdnn_workspace workspace) override;
}; };




+ 23
- 26
dnn/src/naive/check_non_finite/opr_impl.cpp View File

@@ -17,21 +17,25 @@
namespace { namespace {
using namespace megdnn; using namespace megdnn;


#define src_ctype dt_float32
#define wtype dt_int32

void reduce_fwd(const src_ctype* sptr, wtype* dptr, size_t size) {
std::function<wtype(size_t, size_t)> func;
func = [&](size_t l, size_t r) -> wtype {
if (l + 1 < r) {
size_t mid = l + (r - l) / 2;
return func(l, mid) | func(mid, r);
} else {
return static_cast<wtype>(!std::isfinite(sptr[l]));
}
};

dptr[0] = func(0, size);
#define wtype dt_int32

void reduce_fwd(const TensorNDArray& srcs, wtype* dptr) {
dptr[0] = 0;
for (auto src : srcs) {
auto sptr = src.ptr<dt_float32>();
size_t size = src.layout.total_nr_elems();
std::function<wtype(wtype, wtype)> func;
func = [&](wtype l, wtype r) -> wtype {
if (l + 1 < r) {
wtype mid = l + (r - l) / 2;
return func(l, mid) | func(mid, r);
} else {
auto val = std::isfinite(sptr[l]);
return static_cast<wtype>(!val);
}
};
dptr[0] |= func(0, size);
}
} }


} // namespace } // namespace
@@ -39,20 +43,13 @@ void reduce_fwd(const src_ctype* sptr, wtype* dptr, size_t size) {
namespace megdnn { namespace megdnn {
namespace naive { namespace naive {


size_t CheckNonFiniteImpl::get_workspace_in_bytes(
const TensorLayout&, const TensorLayout&) {
return 0;
}

void CheckNonFiniteImpl::exec( void CheckNonFiniteImpl::exec(
_megdnn_tensor_in src, _megdnn_tensor_out dst, _megdnn_workspace workspace) {
check_exec(src.layout, dst.layout, workspace.size);
_megdnn_in const TensorNDArray& srcs, _megdnn_tensor_out dst,
_megdnn_workspace workspace) {
check_exec(srcs, dst, workspace.size);


auto handle = static_cast<HandleImpl*>(this->handle()); auto handle = static_cast<HandleImpl*>(this->handle());
MEGDNN_DISPATCH_CPU_KERN(
handle, reduce_fwd(
src.ptr<dt_float32>(), dst.ptr<dt_int32>(),
src.layout.total_nr_elems()));
MEGDNN_DISPATCH_CPU_KERN(handle, reduce_fwd(srcs, dst.ptr<dt_int32>()));
} }
} // namespace naive } // namespace naive
} // namespace megdnn } // namespace megdnn


+ 7
- 3
dnn/src/naive/check_non_finite/opr_impl.h View File

@@ -17,16 +17,20 @@ namespace megdnn {
namespace naive { namespace naive {


class CheckNonFiniteImpl final : public CheckNonFinite { class CheckNonFiniteImpl final : public CheckNonFinite {
size_t _get_workspace_in_bytes() override { return 0; }

public: public:
using CheckNonFinite::CheckNonFinite; using CheckNonFinite::CheckNonFinite;


bool is_thread_safe() const override { return true; } bool is_thread_safe() const override { return true; }


size_t get_workspace_in_bytes(
const TensorLayout& src, const TensorLayout& dst) override;
size_t get_workspace_in_bytes(const TensorNDArray&, const TensorLayout&) override {
m_size = 0;
return _get_workspace_in_bytes();
}


void exec( void exec(
_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_in const TensorNDArray& srcs, _megdnn_tensor_out dst,
_megdnn_workspace workspace) override; _megdnn_workspace workspace) override;
}; };




+ 21
- 0
dnn/test/common/opr_proxy.h View File

@@ -203,6 +203,27 @@ struct OprProxy<ConcatForward> {
}; };


template <> template <>
struct OprProxy<CheckNonFinite> {
static void deduce_layout(CheckNonFinite* opr, TensorLayoutArray& layouts) {
megdnn_assert(layouts.size() >= 2);
auto inp = layouts;
inp.pop_back();
opr->deduce_layout(inp, layouts.back());
}

static void exec(CheckNonFinite* opr, const TensorNDArray& tensors) {
megdnn_assert(tensors.size() >= 2);
auto inps = tensors;
inps.pop_back();

WorkspaceWrapper W(
opr->handle(),
opr->get_workspace_in_bytes(inps, tensors.back().layout));
opr->exec(inps, tensors.back(), W.workspace());
}
};

template <>
struct OprProxy<SplitForward> : DeduceLayoutProxy<SplitForward, 0, false> { struct OprProxy<SplitForward> : DeduceLayoutProxy<SplitForward, 0, false> {
WorkspaceWrapper W; WorkspaceWrapper W;
void exec(SplitForward* opr, const TensorNDArray& tensors) { void exec(SplitForward* opr, const TensorNDArray& tensors) {


+ 6
- 3
dnn/test/cuda/check_non_finite.cpp View File

@@ -22,13 +22,16 @@ TEST_F(CUDA, CHECK_NON_FINITE_BASIC) {
const auto nan = std::numeric_limits<float>::quiet_NaN(); const auto nan = std::numeric_limits<float>::quiet_NaN();
UniformFloatWithValueRNG rng(-1.0f, 1.0f, 0.1f, inf); UniformFloatWithValueRNG rng(-1.0f, 1.0f, 0.1f, inf);
checker.set_rng(0, &rng); checker.set_rng(0, &rng);
checker.execs({{512 * 16}, {1}});
checker.execs({{512 * 4}, {4}, {1}});
rng = UniformFloatWithValueRNG(-1.0f, 1.0f, 1.f, inf); rng = UniformFloatWithValueRNG(-1.0f, 1.0f, 1.f, inf);
checker.set_rng(0, &rng); checker.set_rng(0, &rng);
checker.execs({{512 * 16}, {1}});
checker.execs({{4}, {512 * 4}, {1}});
rng = UniformFloatWithValueRNG(-1.0f, 1.0f, 1.f, nan); rng = UniformFloatWithValueRNG(-1.0f, 1.0f, 1.f, nan);
checker.set_rng(0, &rng); checker.set_rng(0, &rng);
checker.execs({{512 * 16}, {1}});
checker.execs({{32}, {256}, {1}});
rng = UniformFloatWithValueRNG(-1.0f, 1.0f, 0.f, nan);
checker.set_rng(0, &rng);
checker.execs({{16}, {16}, {2}, {1}});
} }


} // namespace test } // namespace test


+ 9
- 4
dnn/test/naive/check_non_finite.cpp View File

@@ -20,23 +20,28 @@ namespace test {
TEST_F(NAIVE, CHECK_NON_FINITE_BASIC) { TEST_F(NAIVE, CHECK_NON_FINITE_BASIC) {
Checker<CheckNonFinite> checker(handle(), false); Checker<CheckNonFinite> checker(handle(), false);
checker.exect( checker.exect(
Testcase{TensorValue({4}, dtype::Float32(), {1.1, 2.2, 3.3, 4.3}), {}},
Testcase{{}, TensorValue({1}, dtype::Int32(), {0})});
Testcase{
TensorValue({4}, dtype::Float32(), {1.1, 2.2, 3.3, 4.3}),
TensorValue({4}, dtype::Float32(), {1.1, 2.2, 3.3, 4.3}),
{}},
Testcase{{}, {}, TensorValue({1}, dtype::Int32(), {0})});
checker.exect( checker.exect(
Testcase{ Testcase{
TensorValue({4}, dtype::Float32(), {1.1, 2.2, 3.3, 4.3}),
TensorValue( TensorValue(
{4}, dtype::Float32(), {4}, dtype::Float32(),
{1.1f, 2.2f, 3.3f, std::numeric_limits<float>::infinity()}), {1.1f, 2.2f, 3.3f, std::numeric_limits<float>::infinity()}),
{}}, {}},
Testcase{{}, TensorValue({1}, dtype::Int32(), {1})});
Testcase{{}, {}, TensorValue({1}, dtype::Int32(), {1})});
checker.exect( checker.exect(
Testcase{ Testcase{
TensorValue({4}, dtype::Float32(), {1.1, 2.2, 3.3, 4.3}),
TensorValue( TensorValue(
{4}, dtype::Float32(), {4}, dtype::Float32(),
{1.1f, 2.2f, 3.3f, {1.1f, 2.2f, 3.3f,
std::numeric_limits<float>::quiet_NaN()}), std::numeric_limits<float>::quiet_NaN()}),
{}}, {}},
Testcase{{}, TensorValue({1}, dtype::Int32(), {1})});
Testcase{{}, {}, TensorValue({1}, dtype::Int32(), {1})});
} }


} // namespace test } // namespace test


+ 10
- 9
imperative/python/megengine/amp/grad_scaler.py View File

@@ -128,21 +128,22 @@ class GradScaler:
grad_tensors: Tensors needed to unscale grads. Should be all tensors grad_tensors: Tensors needed to unscale grads. Should be all tensors
that are affected by ``target`` tensor in GradManager's backward. that are affected by ``target`` tensor in GradManager's backward.
""" """
# use float64 for better precision
inv_scale = Tensor(1.0 / self.scale_factor)
for tensor in grad_tensors:
if tensor is None or getattr(tensor, "grad", None) is None:
continue
# to support tracing, _check_gradients should be applied to every grad.
if self._check_gradients(tensor.grad):
self._found_non_finite = True
tensor.grad *= inv_scale
# to support tracing, _check_gradients should be applied to every grad.
if self._check_gradients([x.grad for x in grad_tensors]):
self._found_non_finite = True


if self._found_non_finite: if self._found_non_finite:
for tensor in grad_tensors: for tensor in grad_tensors:
if tensor is None or getattr(tensor, "grad", None) is None: if tensor is None or getattr(tensor, "grad", None) is None:
continue continue
tensor.grad = None tensor.grad = None
else:
# use float64 for better precision
inv_scale = Tensor(1.0 / self.scale_factor)
for tensor in grad_tensors:
if tensor is None or getattr(tensor, "grad", None) is None:
continue
tensor.grad *= inv_scale
return self return self


def _check_gradients(self, grad): def _check_gradients(self, grad):


+ 3
- 3
imperative/python/megengine/functional/math.py View File

@@ -9,7 +9,7 @@
import collections import collections
import math import math
from functools import lru_cache from functools import lru_cache
from typing import Optional, Sequence, Tuple, Union
from typing import Iterable, Optional, Sequence, Tuple, Union


from ..core import _config from ..core import _config
from ..core._imperative_rt.core2 import apply, dtype_promotion from ..core._imperative_rt.core2 import apply, dtype_promotion
@@ -1183,7 +1183,7 @@ def svd(inp: Tensor, full_matrices=False, compute_uv=True) -> Tensor:
return U, sigma, V return U, sigma, V




def _check_non_finite(inp: Tensor) -> Tensor:
def _check_non_finite(inps: Iterable[Tensor]) -> Tensor:
r"""Check whether input contains infinite or nan value. r"""Check whether input contains infinite or nan value.


Args: Args:
@@ -1193,6 +1193,6 @@ def _check_non_finite(inp: Tensor) -> Tensor:
a int32 scalar tensor, 0 for False and 1 for True. a int32 scalar tensor, 0 for False and 1 for True.
""" """
op = builtin.CheckNonFinite() op = builtin.CheckNonFinite()
(oup,) = apply(op, inp.reshape(-1).astype("float32"))
(oup,) = apply(op, *inps)
oup._setscalar() oup._setscalar()
return oup return oup

+ 18
- 13
imperative/python/test/unit/amp/test_grad_scaler.py View File

@@ -10,21 +10,26 @@ import numpy as np
import megengine as mge import megengine as mge
from megengine.amp import GradScaler from megengine.amp import GradScaler
from megengine.autodiff import GradManager from megengine.autodiff import GradManager
from megengine.jit import trace




def test_grad_scaler(): def test_grad_scaler():
gm = GradManager()
scaler = GradScaler()
def f():
gm = GradManager()
scaler = GradScaler()


x = mge.tensor(1.0)
for _ in range(3):
with gm:
y = x + 1
gm.attach(y)
loss = y + 1
scaler.backward(gm, loss, unscale_grad=False)
np.testing.assert_equal(y.grad.numpy(), scaler.scale_factor)
x = mge.tensor(1.0)
for _ in range(3):
with gm:
y = x + 1
gm.attach(y)
loss = y + 1
scaler.backward(gm, loss, unscale_grad=False)
np.testing.assert_equal(y.grad.numpy(), scaler.scale_factor)
scaler.unscale(gm.attached_tensors())
np.testing.assert_equal(y.grad.numpy(), 1)
# test handle None elements
scaler.unscale(gm.attached_tensors()) scaler.unscale(gm.attached_tensors())
np.testing.assert_equal(y.grad.numpy(), 1)
# test handle None elements
scaler.unscale(gm.attached_tensors())
f()
trace(f)()

+ 7
- 6
imperative/python/test/unit/functional/test_math.py View File

@@ -191,16 +191,17 @@ def test_sum_neg_axis():


def test_non_finite(): def test_non_finite():
shape = (32, 3, 32, 32) shape = (32, 3, 32, 32)
data = np.random.random(shape).astype(np.float32)
rst = F.math._check_non_finite(tensor(data))
data1 = np.random.random(shape).astype(np.float32)
data2 = np.random.random(shape).astype(np.float32)
rst = F.math._check_non_finite([tensor(data1), tensor(data2)])
np.testing.assert_equal(rst.numpy(), [0]) np.testing.assert_equal(rst.numpy(), [0])


data[0][0][0][0] = float("inf")
rst = F.math._check_non_finite(tensor(data))
data2[0][0][0][0] = float("inf")
rst = F.math._check_non_finite([tensor(data1), tensor(data2)])
np.testing.assert_equal(rst.numpy(), [1]) np.testing.assert_equal(rst.numpy(), [1])


data[0][0][0][0] = float("nan")
rst = F.math._check_non_finite(tensor(data))
data2[0][0][0][0] = float("nan")
rst = F.math._check_non_finite([tensor(data1), tensor(data2)])
np.testing.assert_equal(rst.numpy(), [1]) np.testing.assert_equal(rst.numpy(), [1])






+ 45
- 3
imperative/src/impl/ops/misc.cpp View File

@@ -17,14 +17,56 @@ namespace mgb {
namespace imperative { namespace imperative {


namespace check_non_finite { namespace check_non_finite {
auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
SymbolVar apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
auto&& op = def.cast_final_safe<CheckNonFinite>(); auto&& op = def.cast_final_safe<CheckNonFinite>();
mgb_assert(inputs.size() == 1);
OperatorNodeConfig config{op.make_name()}; OperatorNodeConfig config{op.make_name()};
return opr::CheckNonFinite::make(inputs[0], {}, config);
return opr::CheckNonFinite::make(inputs, {}, config);
}

SmallVector<TensorPtr> apply_on_physical_tensor(
const OpDef& def, const SmallVector<TensorPtr>& inputs) {
size_t size = inputs.size();

auto dest = Tensor::make(
TensorLayout(TensorShape({1}), dtype::Int32()), inputs[0]->comp_node());
auto cn = dest->comp_node();
auto&& dnn_opr = opr::intl::create_megdnn_opr<megdnn::CheckNonFinite>(cn);
size_t wk_size = 0;
SmallVector<megdnn::TensorND> srcs(size);
for (size_t i = 0; i < size; ++i) {
srcs[i] = inputs[i]->dev_tensor().as_megdnn();
}
wk_size = dnn_opr->get_workspace_in_bytes(srcs, dest->layout());
auto wk = Blob::make(cn, wk_size);
megdnn::Workspace dnn_wk(wk->storage().get(), wk_size);
dnn_opr->exec(srcs, dest->dev_tensor().as_megdnn(), dnn_wk);
return {dest};
}

std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) {
SmallVector<LogicalTensorDesc> dests(1);
dests[0].comp_node = inputs[0].comp_node;
dests[0].layout = TensorLayout(TensorShape({1}), dtype::Int32());
return {dests, true};
}
SmallVector<LogicalTensorDesc> infer_output_attrs(
const OpDef& def, const SmallVector<TensorPtr>& inputs) {
SmallVector<LogicalTensorDesc> dests(1);
dests[0].comp_node = inputs[0]->comp_node();
dests[0].layout = TensorLayout(TensorShape({1}), dtype::Int32());
return dests;
}
std::tuple<SmallVector<MemoryDesc>, SmallVector<MemoryDesc>> infer_output_mem_desc(
const OpDef& def, const SmallVector<TensorPtr>& inputs_tensors,
const SmallVector<MemoryDesc>& inputs_mems) {
return {{}, {}};
} }
OP_TRAIT_REG(CheckNonFinite, CheckNonFinite) OP_TRAIT_REG(CheckNonFinite, CheckNonFinite)
.apply_on_var_node(apply_on_var_node) .apply_on_var_node(apply_on_var_node)
.apply_on_physical_tensor(apply_on_physical_tensor)
.infer_output_attrs_fallible(infer_output_attrs_fallible)
.infer_output_mem_desc(infer_output_mem_desc)
.fallback(); .fallback();
} // namespace check_non_finite } // namespace check_non_finite




+ 69
- 13
src/opr/impl/misc.cpp View File

@@ -482,18 +482,74 @@ MGB_IMPL_OPR_GRAD(TopK) {
#endif #endif


/* ================= CheckNonFinite ================= */ /* ================= CheckNonFinite ================= */
namespace mgb {
namespace opr {
namespace intl {
template <>
struct MegDNNOprInitPostCtor<CheckNonFinite> {
static void apply(cg::OperatorNodeBase& opr) {
opr.output(0)->dtype(dtype::Int32());
}
};
} // namespace intl
} // namespace opr
} // namespace mgb
MGB_DYN_TYPE_OBJ_FINAL_IMPL(CheckNonFinite); MGB_DYN_TYPE_OBJ_FINAL_IMPL(CheckNonFinite);
MEGDNN_OPR_INIT1(CheckNonFinite, "check_non_finite")
CheckNonFinite::CheckNonFinite(
const VarNodeArrayView& inp, const Param& param,
const OperatorNodeConfig& config)
: Super(OperatorNodeBaseCtorParam{
inp[0]->owner_graph(), config, "check_non_finite", inp}) {
mgb_assert(!inp.empty());
for (auto&& i : inp) {
add_input({i});
}
add_output(None)->dtype(dtype::Int32()).add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE);
cg::add_workspace_output(this);
}

SymbolVar CheckNonFinite::make(
const VarNodeArrayView& inp, const Param& param,
const OperatorNodeConfig& config) {
mgb_assert(!inp.empty());
intl::BatchedDTypePromotion dtp{inp};
return SymbolVar{inp[0]}.insert_single_output_opr<CheckNonFinite>(
dtp.get_vars(), param, config);
}

void CheckNonFinite::scn_do_execute() {
megdnn::TensorNDArray inp_arr(input().size());
for (size_t i = 0; i < input().size(); ++i) {
inp_arr[i] = input()[i]->dev_tensor().as_megdnn();
}
megdnn_opr()->exec(
inp_arr, output(0)->dev_tensor().as_megdnn(),
intl::get_megdnn_workspace_from_var(output(1)));
}

void CheckNonFinite::init_output_static_infer_desc() {
using namespace cg::static_infer;

auto&& mgr = owner_graph()->static_infer_manager();

auto infer_oshp = [](TensorShape& dest, const InpVal& iv) {
TensorLayout dst;
dst.shape[0] = 1;
dst.ndim = 1;
dst.dtype = dtype::Int32();
dst.init_contiguous_stride();
dest = dst;
return true;
};
DepVal deps;
for (auto i : input())
deps.push_back({i, DepType::SHAPE});
mgr.register_shape_infer(output(0), {SourceType::DEP, deps, infer_oshp});

auto infer_wk = [this](TensorShape& dest, const InpVal& inp) {
dest.ndim = 1;
megdnn::TensorNDArray inp_arr(input().size());
for (size_t i = 0; i < input().size(); ++i) {
inp_arr[i] = {NULL, {inp.val.at(i).shape(), input(0)->dtype()}};
}
dest.shape[0] = megdnn_opr()->get_workspace_in_bytes(
inp_arr, {output(0)->shape(), output(0)->dtype()});
return true;
};
mgr.register_shape_infer(output(1), {SourceType::DEP, deps, infer_wk});
}

void CheckNonFinite::add_input_layout_constraint() {
for (auto i : input()) {
i->add_layout_constraint_contiguous();
}
}
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 5
- 1
src/opr/impl/misc.sereg.h View File

@@ -55,6 +55,10 @@ struct OprMaker<opr::TopK, 2> {
} }
}; };


template <>
struct OprMaker<opr::CheckNonFinite, 0> : public OprMakerVariadic<opr::CheckNonFinite> {
};

} // namespace serialization } // namespace serialization


namespace opr { namespace opr {
@@ -72,7 +76,7 @@ MGB_SEREG_OPR(CumsumV1, 1);
#if MGB_CUDA #if MGB_CUDA
MGB_SEREG_OPR(NvOf, 1); MGB_SEREG_OPR(NvOf, 1);
#endif #endif
MGB_SEREG_OPR(CheckNonFinite, 1);
MGB_SEREG_OPR(CheckNonFinite, 0);


} // namespace opr } // namespace opr
} // namespace mgb } // namespace mgb


+ 15
- 1
src/opr/include/megbrain/opr/misc.h View File

@@ -142,6 +142,8 @@ using CondTakeBase = cg::SingleCNOperatorNode<
cg::OperatorNodeBase, mixin::MegDNNOprHolderImpl<megdnn::CondTake>>; cg::OperatorNodeBase, mixin::MegDNNOprHolderImpl<megdnn::CondTake>>;
using TopKBase = cg::SingleCNOperatorNode< using TopKBase = cg::SingleCNOperatorNode<
cg::OperatorNodeBase, mixin::MegDNNOprHolderImpl<megdnn::TopK>>; cg::OperatorNodeBase, mixin::MegDNNOprHolderImpl<megdnn::TopK>>;
using CheckNonFiniteBase = cg::SingleCNOperatorNode<
cg::OperatorNodeBase, mixin::MegDNNOprHolderImpl<megdnn::CheckNonFinite>>;
} // namespace intl } // namespace intl


/*! /*!
@@ -181,7 +183,19 @@ public:
const OperatorNodeConfig& config = {}); const OperatorNodeConfig& config = {});
}; };


MGB_DEFINE_MEGDNN_OPR_WRAPPER_FWD1(CheckNonFinite);
MGB_DEFINE_OPR_CLASS(CheckNonFinite, intl::CheckNonFiniteBase) //{
void scn_do_execute() override;
void init_output_static_infer_desc() override;
void add_input_layout_constraint() override;

public:
MGE_WIN_DECLSPEC_FUC CheckNonFinite(
const VarNodeArrayView& inp, const Param& param,
const OperatorNodeConfig& config);
MGE_WIN_DECLSPEC_FUC static SymbolVar make(
const VarNodeArrayView& inp, const Param& param = {},
const OperatorNodeConfig& config = {});
};


} // namespace opr } // namespace opr
} // namespace mgb } // namespace mgb


Loading…
Cancel
Save