GitOrigin-RevId: 826788113a
release-1.10
@@ -557,7 +557,14 @@ void init_ops(py::module m) { | |||||
m.def( | m.def( | ||||
"delete_rng_handle", | "delete_rng_handle", | ||||
[](size_t handle) { | [](size_t handle) { | ||||
if (mgb::imperative::python::interpreter_for_py->check_available()) { | |||||
mgb::imperative::python::interpreter_for_py->sync(); | |||||
} | |||||
mgb::CompNode::sync_all(); | mgb::CompNode::sync_all(); | ||||
mgb::CompNode::foreach ([](mgb::CompNode cn) { | |||||
auto err = cn.check_async_error(); | |||||
mgb_assert(!err, "%s", err->what()); | |||||
}); | |||||
py_task_q.wait_all_task_finish(); | py_task_q.wait_all_task_finish(); | ||||
rng::delete_handle(handle); | rng::delete_handle(handle); | ||||
}, | }, | ||||
@@ -11,13 +11,17 @@ import sys | |||||
import pytest | import pytest | ||||
import megengine.functional | |||||
import megengine.module | |||||
from megengine import Parameter | |||||
from megengine.core._imperative_rt.core2 import sync | |||||
from megengine.core import _config as config | |||||
from megengine.core import _trace_option as trace_option | |||||
from megengine.core import get_option | |||||
from megengine.core._imperative_rt.core2 import ( | |||||
_get_amp_dtype_autocast, | |||||
_get_amp_high_prec_dtype, | |||||
_get_amp_low_prec_dtype, | |||||
_get_convert_inputs, | |||||
) | |||||
from megengine.core.tensor import amp | |||||
from megengine.device import get_device_count | from megengine.device import get_device_count | ||||
from megengine.jit import trace as _trace | |||||
from megengine.module import Linear, Module | |||||
sys.path.append(os.path.join(os.path.dirname(__file__), "helpers")) | sys.path.append(os.path.join(os.path.dirname(__file__), "helpers")) | ||||
@@ -41,3 +45,58 @@ def skip_distributed(request): | |||||
platform.system() | platform.system() | ||||
) | ) | ||||
) | ) | ||||
@pytest.fixture(autouse=True) | |||||
def run_around_tests(): | |||||
env_vars1 = { | |||||
"symbolic_shape": trace_option.use_symbolic_shape(), | |||||
"async_level": get_option("async_level"), | |||||
"enable_drop": get_option("enable_drop"), | |||||
"max_recompute_time": get_option("max_recompute_time"), | |||||
"catch_worker_execption": get_option("catch_worker_execption"), | |||||
"enable_host_compute": get_option("enable_host_compute"), | |||||
# "record_computing_path": get_option("record_computing_path"), | |||||
"disable_memory_forwarding": get_option("disable_memory_forwarding"), | |||||
"enable_dtr_auto_drop": get_option("enable_dtr_auto_drop"), | |||||
"enable_dtr_sqrt_sampling": get_option("enable_dtr_sqrt_sampling"), | |||||
"dtr_eviction_threshold": get_option("dtr_eviction_threshold"), | |||||
"dtr_evictee_minimum_size": get_option("dtr_evictee_minimum_size"), | |||||
"benchmark_kernel": config.benchmark_kernel, | |||||
"deterministic_kernel": config.deterministic_kernel, | |||||
"compute_mode": config._compute_mode, | |||||
"conv_format": config._conv_format, | |||||
"amp_enabled": amp.enabled, | |||||
"convert_inputs": _get_convert_inputs(), | |||||
"amp_dtype_autocast": _get_amp_dtype_autocast(), | |||||
"amp_high_prec_dtype": _get_amp_high_prec_dtype(), | |||||
"amp_low_prec_dtype": _get_amp_low_prec_dtype(), | |||||
} | |||||
yield | |||||
env_vars2 = { | |||||
"symbolic_shape": trace_option.use_symbolic_shape(), | |||||
"async_level": get_option("async_level"), | |||||
"enable_drop": get_option("enable_drop"), | |||||
"max_recompute_time": get_option("max_recompute_time"), | |||||
"catch_worker_execption": get_option("catch_worker_execption"), | |||||
"enable_host_compute": get_option("enable_host_compute"), | |||||
# "record_computing_path": get_option("record_computing_path"), | |||||
"disable_memory_forwarding": get_option("disable_memory_forwarding"), | |||||
"enable_dtr_auto_drop": get_option("enable_dtr_auto_drop"), | |||||
"enable_dtr_sqrt_sampling": get_option("enable_dtr_sqrt_sampling"), | |||||
"dtr_eviction_threshold": get_option("dtr_eviction_threshold"), | |||||
"dtr_evictee_minimum_size": get_option("dtr_evictee_minimum_size"), | |||||
"benchmark_kernel": config.benchmark_kernel, | |||||
"deterministic_kernel": config.deterministic_kernel, | |||||
"compute_mode": config._compute_mode, | |||||
"conv_format": config._conv_format, | |||||
"amp_enabled": amp.enabled, | |||||
"convert_inputs": _get_convert_inputs(), | |||||
"amp_dtype_autocast": _get_amp_dtype_autocast(), | |||||
"amp_high_prec_dtype": _get_amp_high_prec_dtype(), | |||||
"amp_low_prec_dtype": _get_amp_low_prec_dtype(), | |||||
} | |||||
for key in env_vars1: | |||||
assert ( | |||||
env_vars1[key] == env_vars2[key] | |||||
), "{} have been changed after test".format(key) |
@@ -37,7 +37,7 @@ if [[ "$TEST_PLAT" =~ "local" ]]; then | |||||
PY_IGNORE_IMPORTMISMATCH=1 python3 -m pytest -s -v $test_dirs -m 'not isolated_distributed' | PY_IGNORE_IMPORTMISMATCH=1 python3 -m pytest -s -v $test_dirs -m 'not isolated_distributed' | ||||
if [[ "$TEST_PLAT" =~ "cuda" ]]; then | if [[ "$TEST_PLAT" =~ "cuda" ]]; then | ||||
echo "test GPU pytest now" | echo "test GPU pytest now" | ||||
PY_IGNORE_IMPORTMISMATCH=1 python3 -m pytest -s -v $test_dirs -m 'isolated_distributed' | |||||
PY_IGNORE_IMPORTMISMATCH=1 python3 -m pytest -s -v $test_dirs -m 'isolated_distributed' --ignore=./integration/test_dtr.py | |||||
fi | fi | ||||
else | else | ||||
cd $(dirname "${BASH_SOURCE[0]}")/.. | cd $(dirname "${BASH_SOURCE[0]}")/.. | ||||
@@ -39,8 +39,6 @@ from megengine.random import uniform | |||||
get_device_count("xpu") <= 2, reason="xpu counts need > 2", | get_device_count("xpu") <= 2, reason="xpu counts need > 2", | ||||
) | ) | ||||
def test_gaussian_op(): | def test_gaussian_op(): | ||||
# FIXME: remove this sync | |||||
mge.core.set_option("async_level", 0) | |||||
set_global_seed(1024) | set_global_seed(1024) | ||||
shape = ( | shape = ( | ||||
8, | 8, | ||||
@@ -516,4 +514,3 @@ def test_rng_empty_tensor(is_symbolic): | |||||
np.testing.assert_equal(out.numpy().shape, (0,)) | np.testing.assert_equal(out.numpy().shape, (0,)) | ||||
if is_symbolic is None: | if is_symbolic is None: | ||||
break | break | ||||
mge.core.set_option("async_level", 2) |
@@ -227,6 +227,11 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
TensorLayout dst_layout = TensorLayout({layout_a[0], layout_b[1]}, dst_dtype); | TensorLayout dst_layout = TensorLayout({layout_a[0], layout_b[1]}, dst_dtype); | ||||
dst_layout.init_contiguous_stride(); | dst_layout.init_contiguous_stride(); | ||||
if (matmul.transposeA) | |||||
std::swap(layout_a.shape[0], layout_a.shape[1]); | |||||
if (matmul.transposeB) | |||||
std::swap(layout_b.shape[0], layout_b.shape[1]); | |||||
DeviceTensorND out = | DeviceTensorND out = | ||||
BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout); | BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout); | ||||
size_t sz = setup_algo<megdnn::MatrixMul>( | size_t sz = setup_algo<megdnn::MatrixMul>( | ||||
@@ -80,13 +80,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
op_def.policy(), false); | op_def.policy(), false); | ||||
megdnn::Workspace dnn_wk; | megdnn::Workspace dnn_wk; | ||||
if (wk_size != 0) { | |||||
auto wk = Blob::make(cn, wk_size); | |||||
dnn_wk.raw_ptr = wk->storage().get(); | |||||
dnn_wk.size = wk_size; | |||||
if (wk_size) { | |||||
TensorLayout w_layout({wk_size}, dtype::Byte()); | |||||
dnn_wk = caller.create_workspace(w_layout); | |||||
} | } | ||||
dnn_opr->exec(inp_tensornd, out_devtensor.as_megdnn(), {}); | |||||
dnn_opr->exec(inp_tensornd, out_devtensor.as_megdnn(), dnn_wk); | |||||
return {Tensor::make(out_devtensor)}; | return {Tensor::make(out_devtensor)}; | ||||
} | } | ||||
@@ -174,10 +174,9 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
megdnn::Workspace dnn_wk; | megdnn::Workspace dnn_wk; | ||||
auto wk_size = dnn_op.op->get_workspace_in_bytes(src, layout); | auto wk_size = dnn_op.op->get_workspace_in_bytes(src, layout); | ||||
if (wk_size != 0) { | |||||
auto wk = Blob::make(comp_node, wk_size); | |||||
dnn_wk.raw_ptr = wk->storage().get(); | |||||
dnn_wk.size = wk_size; | |||||
if (wk_size) { | |||||
TensorLayout w_layout({wk_size}, dtype::Byte()); | |||||
dnn_wk = dnn_op.create_workspace(w_layout); | |||||
} | } | ||||
DeviceTensorND out = | DeviceTensorND out = | ||||
@@ -14,7 +14,7 @@ | |||||
#include "megbrain_build_config.h" | #include "megbrain_build_config.h" | ||||
#define MGE_MAJOR 1 | #define MGE_MAJOR 1 | ||||
#define MGE_MINOR 8 | |||||
#define MGE_MINOR 9999 | |||||
#define MGE_PATCH 0 | #define MGE_PATCH 0 | ||||
// for rc version, could be like "rc1", "rc2", etc | // for rc version, could be like "rc1", "rc2", etc | ||||