GitOrigin-RevId: 52ddd805b4
release-1.10
@@ -175,13 +175,13 @@ struct MaxOp<src_ctype, dst_ctype, dt_float32> { | |||
: INIT(wtype(DTypeTrait<wtype>::min())), src(src), dst(dst), B(B) {} | |||
}; | |||
template <typename src_ctype, typename index_ctype, typename dst_ctype, typename wtype_> | |||
template <typename src_ctype, typename dst_ctype, typename wtype_> | |||
struct CheckNonFiniteOp { | |||
typedef wtype_ wtype; | |||
const wtype INIT; | |||
src_ctype** srcs; | |||
index_ctype* srcs_total_nr_elems; | |||
size_t* srcs_total_nr_elems; | |||
dst_ctype* dst; | |||
const size_t B; | |||
const src_ctype scale; | |||
@@ -206,7 +206,7 @@ struct CheckNonFiniteOp { | |||
return lhs | rhs; | |||
} | |||
MEGDNN_HOST MEGDNN_DEVICE CheckNonFiniteOp( | |||
src_ctype** srcs, index_ctype* srcs_total_nr_elems, dst_ctype* dst, | |||
src_ctype** srcs, size_t* srcs_total_nr_elems, dst_ctype* dst, | |||
size_t B, src_ctype scale) | |||
: INIT(wtype(0)), | |||
srcs(srcs), | |||
@@ -8,10 +8,10 @@ namespace cuda { | |||
#define COMMA , | |||
#define cb(_dtype) \ | |||
INST_REDUCE( \ | |||
device_reduce::CheckNonFiniteOp< \ | |||
_dtype COMMA size_t COMMA dt_int32 COMMA dt_int32>, \ | |||
#define cb(_dtype) \ | |||
INST_REDUCE( \ | |||
device_reduce::CheckNonFiniteOp< \ | |||
_dtype COMMA dt_float32 COMMA dt_int32 COMMA dt_int32>, \ | |||
false); | |||
cb(dt_float32); | |||
@@ -10,11 +10,11 @@ namespace megdnn { | |||
namespace cuda { | |||
using device_reduce::CheckNonFiniteOp; | |||
#define total_nr_elems_max 2048 | |||
#define total_nr_elems_max 8192 | |||
template <typename T> | |||
size_t CheckNonFiniteImpl::_get_workspace_in_bytes() { | |||
// Call the _get_workspace_in_bytes to reduce the loop fetch workspace bytes | |||
typedef CheckNonFiniteOp<T, size_t, dt_int32, dt_int32> Op; | |||
typedef CheckNonFiniteOp<T, dt_float32, dt_int32, dt_int32> Op; | |||
megdnn_assert(m_size > 0); | |||
WorkspaceBundle bundle( | |||
nullptr, { | |||
@@ -59,7 +59,7 @@ void CheckNonFiniteImpl::_exec( | |||
_megdnn_in const TensorNDArray& srcs, _megdnn_tensor_out dst, | |||
_megdnn_workspace workspace) { | |||
check_exec(srcs, dst, workspace.size); | |||
typedef CheckNonFiniteOp<T, size_t, dt_int32, dt_int32> Op; | |||
typedef CheckNonFiniteOp<T, dt_float32, dt_int32, dt_int32> Op; | |||
auto stream = cuda_stream(this->handle()); | |||
SmallVector<size_t> workspace_sizes{ | |||
sizeof(T*) * m_size, | |||
@@ -102,7 +102,7 @@ void CheckNonFiniteImpl::_exec( | |||
cuda_check(cudaStreamAddCallback( | |||
stream, callback_free, static_cast<void*>(workspace_cpu_raw), 0)); | |||
return run_reduce<Op, false>( | |||
run_reduce<Op, false>( | |||
static_cast<dt_int32*>( | |||
(void*)((char*)workspace_gpu_raw + | |||
workspace_gpu.total_size_in_bytes())), | |||
@@ -141,8 +141,10 @@ class GradScaler: | |||
tensor.grad = None | |||
return self | |||
def _check_gradients(self, grad, scale): | |||
return _check_non_finite(grad, scale) | |||
def _check_gradients(self, grads, scale): | |||
if len(grads) == 0: | |||
return False | |||
return _check_non_finite(grads, scale) | |||
def update(self, new_scale: float = None): | |||
r"""Update the scale factor according to whether encountered overflow grad. | |||
@@ -691,11 +691,13 @@ def _check_non_finite(inps: Iterable[Tensor], scale=1.0) -> Tensor: | |||
r"""Check whether input contains infinite or nan value. | |||
Args: | |||
inp: a tensor to be checked. | |||
inps: tensors to be checked. | |||
Returns: | |||
a int32 scalar tensor, 0 for False and 1 for True. | |||
""" | |||
if isinstance(inps, Tensor): | |||
inps = [inps] | |||
op = builtin.CheckNonFinite(scale=scale) | |||
oups = apply(op, *inps) | |||
out = oups[-1] | |||
@@ -1,4 +1,5 @@ | |||
import numpy as np | |||
import pytest | |||
import megengine as mge | |||
from megengine.amp import GradScaler | |||
@@ -6,23 +7,46 @@ from megengine.autodiff import GradManager | |||
from megengine.jit import trace | |||
def test_grad_scaler(): | |||
def f(): | |||
gm = GradManager() | |||
scaler = GradScaler() | |||
x = mge.tensor(1.0) | |||
for _ in range(3): | |||
with gm: | |||
y = x + 1 | |||
gm.attach(y) | |||
loss = y + 1 | |||
scaler.backward(gm, loss, unscale_grad=False) | |||
np.testing.assert_equal(y.grad.numpy(), scaler.scale_factor) | |||
scaler.unscale(gm.attached_tensors()) | |||
np.testing.assert_equal(y.grad.numpy(), 1) | |||
# test handle None elements | |||
scaler.unscale(gm.attached_tensors()) | |||
f() | |||
trace(f)() | |||
@pytest.mark.parametrize( | |||
"is_trace", [False, True], | |||
) | |||
def test_grad_scaler(is_trace): | |||
gm = GradManager() | |||
scaler = GradScaler() | |||
def f(idx, data, calc): | |||
x = mge.tensor(data, no_cache=True) | |||
y = mge.tensor(data, no_cache=True) | |||
if is_trace: | |||
calc = trace(calc) | |||
gm.attach([x, y]) | |||
with gm: | |||
loss = calc(x, y) | |||
scaler.backward(gm, loss, unscale_grad=False) | |||
np.testing.assert_equal(x.grad.numpy(), 2 * scaler.scale_factor) | |||
scaler.unscale(filter(lambda t: t.grad is not None, gm.attached_tensors())) | |||
# scaler.unscale(gm.attached_tensors()) | |||
np.testing.assert_equal(x.grad.numpy(), 2) | |||
def double_variables(x, y): | |||
z = x + 2 * y | |||
loss = 2 * z + 1 | |||
return loss | |||
def single_variable(x, y): | |||
z = x + 1 | |||
loss = 2 * z + 1 | |||
return loss | |||
# need grad being unique storage or not inplace modifying grad | |||
def double_variables_with_same_grad(x, y): | |||
z = x + y | |||
loss = 2 * z + 1 | |||
return loss | |||
for data in [np.random.random((1, 2, 3, 4)), 1.0]: | |||
for calc in [double_variables, single_variable, double_variables_with_same_grad]: | |||
for idx in range(3): | |||
f(idx, data, calc) |