Browse Source

feat(dnn/check_non_finite): addmul scale to check_non_finite opr

GitOrigin-RevId: c35a219e52
tags/v1.7.2.m1
Megvii Engine Team 3 years ago
parent
commit
2881934cb8
15 changed files with 137 additions and 101 deletions
  1. +1
    -1
      dnn/include/megdnn/oprs/general.h
  2. +2
    -0
      dnn/scripts/opr_param_defs.py
  3. +0
    -31
      dnn/src/common/reduce_helper.h
  4. +6
    -2
      dnn/src/common/reduce_helper_device.h
  5. +1
    -1
      dnn/src/cuda/check_non_finite/opr_impl.cpp
  6. +5
    -3
      dnn/src/naive/check_non_finite/opr_impl.cpp
  7. +14
    -14
      imperative/python/megengine/amp/grad_scaler.py
  8. +9
    -5
      imperative/python/megengine/functional/math.py
  9. +11
    -7
      imperative/python/test/unit/functional/test_math.py
  10. +30
    -12
      imperative/src/impl/ops/misc.cpp
  11. +1
    -1
      src/core/include/megbrain/ir/ops.td
  12. +35
    -13
      src/opr/impl/misc.cpp
  13. +10
    -1
      src/opr/impl/misc.sereg.h
  14. +11
    -10
      src/opr/include/megbrain/opr/misc.h
  15. +1
    -0
      src/serialization/impl/schema.fbs

+ 1
- 1
dnn/include/megdnn/oprs/general.h View File

@@ -1344,7 +1344,7 @@ protected:
* \brief check whether input contains inf or nan value.
*/
class CheckNonFinite : public OperatorBase {
DEF_OPR_PARAM(Empty);
DEF_OPR_PARAM(CheckNonFinite);
DEF_OPR_IMPL(CheckNonFinite, OperatorBase, -1, 1);
size_t m_size = 0;



+ 2
- 0
dnn/scripts/opr_param_defs.py View File

@@ -1176,6 +1176,8 @@ Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later o
)
pdef('Fill').add_fields('float32', 'value', '0')

pdef('CheckNonFinite').add_fields('float32', 'scale', '1.0')


PADDING_MODES = [Doc('REPLICATE = 0', 'aaaaaa|abcdefgh|hhhhhhh'),
Doc('REFLECT = 1', 'fedcba|abcdefgh|hgfedcb'),


+ 0
- 31
dnn/src/common/reduce_helper.h View File

@@ -156,37 +156,6 @@ struct MaxOp<src_ctype, dst_ctype, dt_float32> {
: INIT(wtype(DTypeTrait<wtype>::min())), src(src), dst(dst), B(B) {}
};

template <typename src_ctype, typename index_ctype, typename dst_ctype, typename wtype_>
struct CheckNonFiniteOp {
typedef wtype_ wtype;
const wtype INIT;

RefPtr* srcs;
RefPtr srcs_total_nr_elems;
RefPtr dst;
const size_t B;

wtype read(uint32_t idx) {
size_t x = idx / B;
size_t y = idx % B;
if (y < srcs_total_nr_elems.ptr<index_ctype>()[x]) {
RefPtr src = srcs[x];
return !std::isfinite(src.ptr<src_ctype>()[y]);
}
return 0;
}
void write(uint32_t idx, wtype val) { dst.ptr<dst_ctype>()[idx] = val; }
static wtype apply(wtype lhs, wtype rhs) { return lhs | rhs; }
CheckNonFiniteOp(
RefPtr* srcs, const RefPtr& srcs_total_nr_elems, const RefPtr& dst,
size_t B)
: INIT(wtype(0)),
srcs(srcs),
srcs_total_nr_elems(srcs_total_nr_elems),
dst(dst),
B(B) {}
};

void get_ABC(const TensorShape& shape, size_t& A, size_t& B, size_t& C, size_t axis);

} // namespace reduce


+ 6
- 2
dnn/src/common/reduce_helper_device.h View File

@@ -194,6 +194,7 @@ struct CheckNonFiniteOp {
index_ctype* srcs_total_nr_elems;
dst_ctype* dst;
const size_t B;
const src_ctype scale;

MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) {
size_t x = idx / B;
@@ -204,6 +205,8 @@ struct CheckNonFiniteOp {
#else
wtype val = std::isfinite(srcs[x][y]);
#endif
if (val)
srcs[x][y] *= scale;
return !val;
}
return 0;
@@ -214,12 +217,13 @@ struct CheckNonFiniteOp {
}
MEGDNN_HOST MEGDNN_DEVICE CheckNonFiniteOp(
src_ctype** srcs, index_ctype* srcs_total_nr_elems, dst_ctype* dst,
size_t B)
size_t B, src_ctype scale)
: INIT(wtype(0)),
srcs(srcs),
srcs_total_nr_elems(srcs_total_nr_elems),
dst(dst),
B(B) {}
B(B),
scale(scale) {}
};

} // namespace device_reduce


+ 1
- 1
dnn/src/cuda/check_non_finite/opr_impl.cpp View File

@@ -97,7 +97,7 @@ void CheckNonFiniteImpl::exec(
workspace_gpu.total_size_in_bytes())),
1, m_size * total_nr_elems_max, 1, stream,
Op(srcs_gpu, srcs_total_nr_elems_gpu, dst.ptr<dt_int32>(),
total_nr_elems_max));
total_nr_elems_max, param().scale));
}

} // namespace cuda


+ 5
- 3
dnn/src/naive/check_non_finite/opr_impl.cpp View File

@@ -19,7 +19,7 @@ using namespace megdnn;

#define wtype dt_int32

void reduce_fwd(const TensorNDArray& srcs, wtype* dptr) {
void reduce_fwd(const TensorNDArray& srcs, wtype* dptr, dt_float32 scale) {
dptr[0] = 0;
for (auto src : srcs) {
auto sptr = src.ptr<dt_float32>();
@@ -31,6 +31,8 @@ void reduce_fwd(const TensorNDArray& srcs, wtype* dptr) {
return func(l, mid) | func(mid, r);
} else {
auto val = std::isfinite(sptr[l]);
if (val)
sptr[l] *= scale;
return static_cast<wtype>(!val);
}
};
@@ -47,9 +49,9 @@ void CheckNonFiniteImpl::exec(
_megdnn_in const TensorNDArray& srcs, _megdnn_tensor_out dst,
_megdnn_workspace workspace) {
check_exec(srcs, dst, workspace.size);
float scale = param().scale;
auto handle = static_cast<HandleImpl*>(this->handle());
MEGDNN_DISPATCH_CPU_KERN(handle, reduce_fwd(srcs, dst.ptr<dt_int32>()));
MEGDNN_DISPATCH_CPU_KERN(handle, reduce_fwd(srcs, dst.ptr<dt_int32>(), scale));
}
} // namespace naive
} // namespace megdnn


+ 14
- 14
imperative/python/megengine/amp/grad_scaler.py View File

@@ -128,28 +128,28 @@ class GradScaler:
grad_tensors: Tensors needed to unscale grads. Should be all tensors
that are affected by ``target`` tensor in GradManager's backward.
"""
# to support tracing, _check_gradients should be applied to every grad.
if self._check_gradients([x.grad for x in grad_tensors]):
self._found_non_finite = True

if self._found_non_finite:
for tensor in grad_tensors:
if tensor is None or getattr(tensor, "grad", None) is None:
continue
tensor.grad = None
else:
if self.growth_interval == 0:
# use float64 for better precision
inv_scale = Tensor(1.0 / self.scale_factor)
for tensor in grad_tensors:
if tensor is None or getattr(tensor, "grad", None) is None:
continue
tensor.grad *= inv_scale
return self

# to support tracing, _check_gradients should be applied to every grad.
if self._check_gradients(
[x.grad for x in grad_tensors], 1.0 / self.scale_factor
):
self._found_non_finite = True
for tensor in grad_tensors:
if tensor is None or getattr(tensor, "grad", None) is None:
continue
tensor.grad = None
return self

def _check_gradients(self, grad):
if self.growth_interval == 0:
return False
return _check_non_finite(grad)
def _check_gradients(self, grad, scale):
return _check_non_finite(grad, scale)

def update(self, new_scale: float = None):
r"""Update the scale factor according to whether encountered overflow grad.


+ 9
- 5
imperative/python/megengine/functional/math.py View File

@@ -1183,7 +1183,7 @@ def svd(inp: Tensor, full_matrices=False, compute_uv=True) -> Tensor:
return U, sigma, V


def _check_non_finite(inps: Iterable[Tensor]) -> Tensor:
def _check_non_finite(inps: Iterable[Tensor], scale=1.0) -> Tensor:
r"""Check whether input contains infinite or nan value.

Args:
@@ -1192,7 +1192,11 @@ def _check_non_finite(inps: Iterable[Tensor]) -> Tensor:
Returns:
a int32 scalar tensor, 0 for False and 1 for True.
"""
op = builtin.CheckNonFinite()
(oup,) = apply(op, *inps)
oup._setscalar()
return oup
op = builtin.CheckNonFinite(scale=scale)
oups = apply(op, *inps)
out = oups[-1]
for i in range(len(inps)):
inps[i]._reset(oups[i])

out._setscalar()
return out

+ 11
- 7
imperative/python/test/unit/functional/test_math.py View File

@@ -191,17 +191,21 @@ def test_sum_neg_axis():

def test_non_finite():
shape = (32, 3, 32, 32)
data1 = np.random.random(shape).astype(np.float32)
data2 = np.random.random(shape).astype(np.float32)
rst = F.math._check_non_finite([tensor(data1), tensor(data2)])
data = []
for i in range(2):
data.append(np.random.random(shape).astype(np.float32))
tensorList = [tensor(x) for x in data]
rst = F.math._check_non_finite(tensorList, 0.7)
np.testing.assert_equal(rst.numpy(), [0])
for i in range(len(tensorList)):
np.testing.assert_allclose(tensorList[i].numpy() / 0.7, data[i], rtol=1e-6)

data2[0][0][0][0] = float("inf")
rst = F.math._check_non_finite([tensor(data1), tensor(data2)])
data[1][0][0][0][0] = float("inf")
rst = F.math._check_non_finite([tensor(x) for x in data], 0.7)
np.testing.assert_equal(rst.numpy(), [1])

data2[0][0][0][0] = float("nan")
rst = F.math._check_non_finite([tensor(data1), tensor(data2)])
data[1][0][0][0][0] = float("nan")
rst = F.math._check_non_finite([tensor(x) for x in data], 0.7)
np.testing.assert_equal(rst.numpy(), [1])




+ 30
- 12
imperative/src/impl/ops/misc.cpp View File

@@ -17,44 +17,62 @@ namespace mgb {
namespace imperative {

namespace check_non_finite {
SymbolVar apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
SymbolVarArray apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
auto&& op = def.cast_final_safe<CheckNonFinite>();
OperatorNodeConfig config{op.make_name()};
return opr::CheckNonFinite::make(inputs, {}, config);
return opr::CheckNonFinite::make(inputs, op.param(), config);
}

SmallVector<TensorPtr> apply_on_physical_tensor(
const OpDef& def, const SmallVector<TensorPtr>& inputs) {
size_t size = inputs.size();

auto dest = Tensor::make(
auto&& op = def.cast_final_safe<CheckNonFinite>();
SmallVector<TensorPtr> outputs(size + 1);
outputs[size] = Tensor::make(
TensorLayout(TensorShape({1}), dtype::Int32()), inputs[0]->comp_node());

auto dest = outputs[size];
auto cn = dest->comp_node();
auto&& dnn_opr = opr::intl::create_megdnn_opr<megdnn::CheckNonFinite>(cn);
size_t wk_size = 0;
SmallVector<megdnn::TensorND> srcs(size);
// copy an outputs to the dnn for inplace
for (size_t i = 0; i < size; ++i) {
srcs[i] = inputs[i]->dev_tensor().as_megdnn();
outputs[i] = Tensor::make(inputs[i]->layout(), inputs[0]->comp_node());
outputs[i]->dev_tensor().copy_from_fixlayout(inputs[i]->dev_tensor());
srcs[i] = outputs[i]->dev_tensor().as_megdnn();
}
megdnn::CheckNonFinite::Param param({op.scale});
dnn_opr->param() = param;
wk_size = dnn_opr->get_workspace_in_bytes(srcs, dest->layout());
auto wk = Blob::make(cn, wk_size);
megdnn::Workspace dnn_wk(wk->storage().get(), wk_size);
dnn_opr->exec(srcs, dest->dev_tensor().as_megdnn(), dnn_wk);
return {dest};
return outputs;
}

std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) {
SmallVector<LogicalTensorDesc> dests(1);
dests[0].comp_node = inputs[0].comp_node;
dests[0].layout = TensorLayout(TensorShape({1}), dtype::Int32());
size_t size = inputs.size();
SmallVector<LogicalTensorDesc> dests(size + 1);
for (size_t i = 0; i < size; ++i) {
dests[i].comp_node = inputs[i].comp_node;
dests[i].layout = inputs[i].layout;
}
dests[size].comp_node = inputs[0].comp_node;
dests[size].layout = TensorLayout(TensorShape({1}), dtype::Int32());
return {dests, true};
}
SmallVector<LogicalTensorDesc> infer_output_attrs(
const OpDef& def, const SmallVector<TensorPtr>& inputs) {
SmallVector<LogicalTensorDesc> dests(1);
dests[0].comp_node = inputs[0]->comp_node();
dests[0].layout = TensorLayout(TensorShape({1}), dtype::Int32());
size_t size = inputs.size();
SmallVector<LogicalTensorDesc> dests(size + 1);
for (size_t i = 0; i < size; ++i) {
dests[i].comp_node = inputs[i]->comp_node();
dests[i].layout = inputs[i]->layout();
}
dests[size].comp_node = inputs[0]->comp_node();
dests[size].layout = TensorLayout(TensorShape({1}), dtype::Int32());
return dests;
}
std::tuple<SmallVector<MemoryDesc>, SmallVector<MemoryDesc>> infer_output_mem_desc(


+ 1
- 1
src/core/include/megbrain/ir/ops.td View File

@@ -397,7 +397,7 @@ def MagicMindRuntime: MgbHashableOp<"MagicMindRuntime"> {

def CvtColor: MgbHashableOp<"CvtColor", [CvtColorParam]>;

def CheckNonFinite: MgbHashableOp<"CheckNonFinite", [EmptyParam]>;
def CheckNonFinite: MgbHashableOp<"CheckNonFinite", [CheckNonFiniteParam]>;

def FastpathCopy: MgbHashableOp<"FastpathCopy">;



+ 35
- 13
src/opr/impl/misc.cpp View File

@@ -487,39 +487,60 @@ CheckNonFinite::CheckNonFinite(
const VarNodeArrayView& inp, const Param& param,
const OperatorNodeConfig& config)
: Super(OperatorNodeBaseCtorParam{
inp[0]->owner_graph(), config, "check_non_finite", inp}) {
inp[0]->owner_graph(), config, "check_non_finite", inp}),
m_scale(param.scale) {
mgb_assert(!inp.empty());

for (auto&& i : inp) {
add_input({i});
add_output(None)
->dtype(dtype::Float32())
.add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE);
}
add_output(None)->dtype(dtype::Int32()).add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE);
cg::add_workspace_output(this);
}

SymbolVar CheckNonFinite::make(
SymbolVarArray CheckNonFinite::make(
const VarNodeArrayView& inp, const Param& param,
const OperatorNodeConfig& config) {
mgb_assert(!inp.empty());
intl::BatchedDTypePromotion dtp{inp};
return SymbolVar{inp[0]}.insert_single_output_opr<CheckNonFinite>(
dtp.get_vars(), param, config);
auto outputs =
inp[0]->owner_graph()
->insert_opr(std::make_unique<CheckNonFinite>(inp, param, config))
->output();
mgb_assert(outputs.size() == inp.size() + 2);
SymbolVarArray ret(outputs.size() - 1);
for (size_t i = 0; i < ret.size(); ++i)
ret[i] = outputs[i];
return ret;
}

void CheckNonFinite::scn_do_execute() {
megdnn::TensorNDArray inp_arr(input().size());
for (size_t i = 0; i < input().size(); ++i) {
inp_arr[i] = input()[i]->dev_tensor().as_megdnn();
size_t size = input().size();
megdnn::TensorNDArray oup_arr(size);
// copy an outputs to the dnn for inplace
for (size_t i = 0; i < size; ++i) {
oup_arr[i] = output(i)
->dev_tensor()
.copy_from_fixlayout(input(i)->dev_tensor())
.as_megdnn();
}
megdnn_opr()->param().scale = m_scale;
megdnn_opr()->exec(
inp_arr, output(0)->dev_tensor().as_megdnn(),
intl::get_megdnn_workspace_from_var(output(1)));
oup_arr, output(size)->dev_tensor().as_megdnn(),
intl::get_megdnn_workspace_from_var(output(size + 1)));
}

void CheckNonFinite::init_output_static_infer_desc() {
using namespace cg::static_infer;

auto&& mgr = owner_graph()->static_infer_manager();

size_t size = input().size();
for (size_t i = 0; i < size; ++i) {
mgr.register_shape_infer(output(i), ShapeInferDesc::make_identity(input(i)));
}
auto infer_oshp = [](TensorShape& dest, const InpVal& iv) {
TensorLayout dst;
dst.shape[0] = 1;
@@ -532,7 +553,7 @@ void CheckNonFinite::init_output_static_infer_desc() {
DepVal deps;
for (auto i : input())
deps.push_back({i, DepType::SHAPE});
mgr.register_shape_infer(output(0), {SourceType::DEP, deps, infer_oshp});
mgr.register_shape_infer(output(size), {SourceType::DEP, deps, infer_oshp});

auto infer_wk = [this](TensorShape& dest, const InpVal& inp) {
dest.ndim = 1;
@@ -541,10 +562,11 @@ void CheckNonFinite::init_output_static_infer_desc() {
inp_arr[i] = {NULL, {inp.val.at(i).shape(), input(0)->dtype()}};
}
dest.shape[0] = megdnn_opr()->get_workspace_in_bytes(
inp_arr, {output(0)->shape(), output(0)->dtype()});
inp_arr, {output(input().size() + 1)->shape(),
output(input().size() + 1)->dtype()});
return true;
};
mgr.register_shape_infer(output(1), {SourceType::DEP, deps, infer_wk});
mgr.register_shape_infer(output(size + 1), {SourceType::DEP, deps, infer_wk});
}

void CheckNonFinite::add_input_layout_constraint() {


+ 10
- 1
src/opr/impl/misc.sereg.h View File

@@ -56,7 +56,16 @@ struct OprMaker<opr::TopK, 2> {
};

template <>
struct OprMaker<opr::CheckNonFinite, 0> : public OprMakerVariadic<opr::CheckNonFinite> {
struct OprMaker<opr::CheckNonFinite, 0> {
using Opr = opr::CheckNonFinite;
using Param = Opr::Param;
static cg::OperatorNodeBase* make(
const Param& param, const cg::VarNodeArray& inputs, ComputingGraph& graph,
const OperatorNodeConfig& config) {
MGB_MARK_USED_VAR(graph);
auto out = Opr::make(inputs, param, config);
return out[0].node()->owner_opr();
}
};

} // namespace serialization


+ 11
- 10
src/opr/include/megbrain/opr/misc.h View File

@@ -183,18 +183,19 @@ public:
const OperatorNodeConfig& config = {});
};

MGB_DEFINE_OPR_CLASS(CheckNonFinite, intl::CheckNonFiniteBase) //{
void scn_do_execute() override;
void init_output_static_infer_desc() override;
void add_input_layout_constraint() override;
MGB_DEFINE_OPR_CLASS(CheckNonFinite, intl::CheckNonFiniteBase) // {
void scn_do_execute() override;
void init_output_static_infer_desc() override;
void add_input_layout_constraint() override;
float m_scale = 1;

public:
MGE_WIN_DECLSPEC_FUC CheckNonFinite(
const VarNodeArrayView& inp, const Param& param,
const OperatorNodeConfig& config);
MGE_WIN_DECLSPEC_FUC static SymbolVar make(
const VarNodeArrayView& inp, const Param& param = {},
const OperatorNodeConfig& config = {});
MGE_WIN_DECLSPEC_FUC CheckNonFinite(
const VarNodeArrayView& inp, const Param& param,
const OperatorNodeConfig& config);
MGE_WIN_DECLSPEC_FUC static SymbolVarArray make(
const VarNodeArrayView& inp, const Param& param = {},
const OperatorNodeConfig& config = {});
};

} // namespace opr


+ 1
- 0
src/serialization/impl/schema.fbs View File

@@ -115,6 +115,7 @@ union OperatorParam {
param.SlidingWindowTranspose = 81,
param.Padding = 82,
param.ShuffleRNG = 83,
param.CheckNonFinite = 84,
}

table Operator {


Loading…
Cancel
Save