GitOrigin-RevId: 7f6629ae1f
tags/v1.0.0-rc1
@@ -697,8 +697,10 @@ endif() | |||
if(MGE_WITH_PYTHON_MODULE) | |||
if(MGE_BUILD_IMPERATIVE_RT) | |||
add_subdirectory(imperative) | |||
message("-- Enable imperative python wrapper runtime") | |||
else() | |||
add_subdirectory(python_module) | |||
message("-- Enable legacy python wrapper runtime") | |||
endif() | |||
endif() | |||
@@ -342,7 +342,11 @@ template <typename T> | |||
struct SafeMultiplies; | |||
template <typename T> | |||
#if __cplusplus >= 201703L | |||
struct _SafeMultipliesImplUnsigned { | |||
#else | |||
struct _SafeMultipliesImplUnsigned : public std::binary_function<T, T, T> { | |||
#endif | |||
static MEGDNN_CONSTEXPR size_t nbits = sizeof(T) * 8; | |||
static size_t clz(unsigned x) { | |||
@@ -70,8 +70,10 @@ if (MEG_WITH_ROCM) | |||
target_link_libraries (megdnn_test ${MGE_ROCM_LIBS}) | |||
endif () | |||
if(APPLE OR ANDROID) | |||
target_link_libraries(megdnn_test dl) | |||
else() | |||
target_link_libraries(megdnn_test dl rt) | |||
if(UNIX) | |||
if(APPLE OR ANDROID) | |||
target_link_libraries(megdnn_test dl) | |||
else() | |||
target_link_libraries(megdnn_test dl rt) | |||
endif() | |||
endif() |
@@ -89,7 +89,7 @@ public: | |||
auto ptr = tensor.ptr<int>(); | |||
for (size_t n = 0; n < size; ++n) { | |||
std::set<int> used; | |||
std::random_shuffle(seq.begin(), seq.end()); | |||
COMPAT_RANDOM(seq.begin(), seq.end()); | |||
for (size_t step = 0; step < stride; ++step) { | |||
megdnn_assert(used.size() < m_size); | |||
ptr[n * stride + step] = seq[step]; | |||
@@ -75,7 +75,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG() : m_offset(0) { | |||
i2f.i = static_cast<uint16_t>(x); | |||
m_sequence.push_back(i2f.f); | |||
} | |||
std::random_shuffle(m_sequence.begin(), m_sequence.end()); | |||
COMPAT_RANDOM(m_sequence.begin(), m_sequence.end()); | |||
} | |||
Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) { | |||
@@ -99,7 +99,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) { | |||
m_sequence.push_back(i2f.f); | |||
} | |||
std::random_shuffle(m_sequence.begin(), m_sequence.end()); | |||
COMPAT_RANDOM(m_sequence.begin(), m_sequence.end()); | |||
} | |||
void Float16PeriodicalRNG::gen(const TensorND& tensor) { | |||
@@ -19,6 +19,16 @@ | |||
namespace megdnn { | |||
namespace test { | |||
#if __cplusplus >= 201703L | |||
#define COMPAT_RANDOM(begin, end) \ | |||
{ \ | |||
std::default_random_engine rng_engine; \ | |||
std::shuffle(begin, end, rng_engine); \ | |||
} | |||
#else | |||
#define COMPAT_RANDOM(begin, end) std::random_shuffle(begin, end); | |||
#endif | |||
class RNG { | |||
protected: | |||
class RNGxorshf; | |||
@@ -24,15 +24,16 @@ class ArgmxxRNG final: public RNG { | |||
void gen(const TensorND &tensor) override { | |||
auto offset = tensor.layout.span().low_elem; | |||
auto nr_elems = tensor.layout.span().dist_elem(); | |||
#define cb(DType) \ | |||
if (tensor.layout.dtype == DType()) { \ | |||
using ctype = typename DTypeTrait<DType>::ctype; \ | |||
auto ptr = tensor.ptr<ctype>(); \ | |||
for (size_t i = 0; i < nr_elems; ++i) { \ | |||
ptr[offset+i] = i; \ | |||
} \ | |||
std::random_shuffle(ptr + offset, ptr + offset + nr_elems); \ | |||
} | |||
#define cb(DType) \ | |||
if (tensor.layout.dtype == DType()) { \ | |||
using ctype = typename DTypeTrait<DType>::ctype; \ | |||
auto ptr = tensor.ptr<ctype>(); \ | |||
for (size_t i = 0; i < nr_elems; ++i) { \ | |||
ptr[offset + i] = i; \ | |||
} \ | |||
COMPAT_RANDOM(ptr + offset, ptr + offset + nr_elems); \ | |||
} | |||
MEGDNN_FOREACH_COMPUTING_DTYPE(cb); | |||
#undef cb | |||
} | |||
@@ -32,7 +32,7 @@ class ArgsortRNG final : public RNG { | |||
} else { | |||
for (int i = 0; i < n; ++i) | |||
ptr[i] = static_cast<T>(i - n / 2); | |||
std::random_shuffle(ptr, ptr + n); | |||
COMPAT_RANDOM(ptr, ptr + n); | |||
} | |||
} | |||
@@ -86,7 +86,7 @@ void run_backward_test(Handle* handle, DType dtype) { | |||
for (size_t j = 0; j < n; ++j) { | |||
ptr[j] = j; | |||
} | |||
std::random_shuffle(ptr, ptr + n); | |||
COMPAT_RANDOM(ptr, ptr + n); | |||
ptr += n; | |||
} | |||
} | |||
@@ -361,9 +361,8 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_7) { | |||
for (size_t r = 0; r < _dim.size(); r++) | |||
permutation[r] = r; | |||
for (int nsample = 0; nsample < 50; nsample++) { | |||
std::random_shuffle(_dim.begin(), _dim.end()); | |||
std::random_shuffle(permutation.begin(), permutation.end()); | |||
COMPAT_RANDOM(_dim.begin(), _dim.end()); | |||
COMPAT_RANDOM(permutation.begin(), permutation.end()); | |||
if (!isTrivial(permutation)) { | |||
run({{_dim[0], _dim[1], _dim[2], _dim[3], _dim[4], _dim[5], | |||
_dim[6]}, | |||
@@ -451,9 +450,10 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_5) { | |||
printf("vol %d cur_ratio %lf | %lf\n", vol, cur_ratio, vol_re); | |||
// printVec(dim); | |||
std::random_shuffle(dim.begin(), dim.end()); | |||
COMPAT_RANDOM(dim.begin(), dim.end()); | |||
while (isTrivial(permutation)) { | |||
std::random_shuffle(permutation.begin(), permutation.end()); | |||
COMPAT_RANDOM(permutation.begin(), permutation.end()); | |||
} | |||
run({{dim[0], dim[1], dim[2], dim[3], dim[4]}, dtype::Int32()}, | |||
@@ -603,8 +603,9 @@ TEST_F(CUDA, BENCHMARK_LAST_CONTIG_ALIGN_TEST) { | |||
for (size_t r = 0; r < _dim.size(); r++) | |||
permutation[r] = r; | |||
for (int nsample = 0; nsample < 20; nsample++) { | |||
std::random_shuffle(_dim.begin(), _dim.end() - 1); | |||
std::random_shuffle(permutation.begin(), permutation.end() - 1); | |||
COMPAT_RANDOM(_dim.begin(), _dim.end() - 1); | |||
COMPAT_RANDOM(permutation.begin(), permutation.end() - 1); | |||
if (nsample < 5) | |||
_dim[5] = (u.gen_single_val() / 4 + 1) * 4; | |||
@@ -24,7 +24,7 @@ using namespace test; | |||
TEST_F(CUDA, SLEEP) { | |||
auto opr = this->handle_cuda()->create_operator<Sleep>(); | |||
auto opr = this->handle_cuda()->create_operator<megdnn::SleepForward>(); | |||
auto run = [&](float time) -> double { | |||
opr->param() = {time}; | |||
@@ -24,16 +24,17 @@ class ArgmxxRNG final: public RNG { | |||
void gen(const TensorND &tensor) override { | |||
auto offset = tensor.layout.span().low_elem; | |||
auto nr_elems = tensor.layout.span().dist_elem(); | |||
#define cb(DType) \ | |||
if (tensor.layout.dtype == DType()) { \ | |||
using ctype = typename DTypeTrait<DType>::ctype; \ | |||
auto ptr = tensor.ptr<ctype>(); \ | |||
for (size_t i = 0; i < nr_elems; ++i) { \ | |||
ptr[offset+i] = i; \ | |||
} \ | |||
std::random_shuffle(ptr + offset, ptr + offset + nr_elems); \ | |||
return; \ | |||
} | |||
#define cb(DType) \ | |||
if (tensor.layout.dtype == DType()) { \ | |||
using ctype = typename DTypeTrait<DType>::ctype; \ | |||
auto ptr = tensor.ptr<ctype>(); \ | |||
for (size_t i = 0; i < nr_elems; ++i) { \ | |||
ptr[offset + i] = i; \ | |||
} \ | |||
COMPAT_RANDOM(ptr + offset, ptr + offset + nr_elems); \ | |||
return; \ | |||
} | |||
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); | |||
#undef cb | |||
megdnn_throw(megdnn_mangle(ssprintf("Unsupported DType: %s", | |||
@@ -76,7 +76,11 @@ add_custom_target(_version_ld SOURCES ${VERSION_SCRIPT}) | |||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11) | |||
pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS}) | |||
target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) | |||
if (APPLE OR MSVC OR WIN32) | |||
target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn) | |||
else() | |||
target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) | |||
endif() | |||
if (MGE_WITH_DISTRIBUTED) | |||
message("Imperative configured to link megray") | |||
target_link_libraries(${MODULE_NAME} PRIVATE megray) | |||
@@ -91,6 +95,10 @@ set_target_properties(${MODULE_NAME} PROPERTIES | |||
SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX} | |||
LIBRARY_OUTPUT_DIRECTORY ${MEGENGINE_DIR}/${PACKAGE_NAME}/core | |||
) | |||
if (APPLE OR MSVC OR WIN32) | |||
message("-- overwriting SUFFIX at macos and windows before config by set_target_properties") | |||
pybind11_extension(${MODULE_NAME}) | |||
endif() | |||
add_dependencies(${MODULE_NAME} gen_opr_py _version_ld) | |||
if(MGE_WITH_TEST AND MGE_ENABLE_RTTI) | |||
@@ -8,6 +8,67 @@ | |||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
import os | |||
import sys | |||
import platform | |||
import ctypes | |||
if sys.platform == "win32": | |||
lib_path = os.path.join(os.path.dirname(__file__), "core/lib") | |||
dll_paths = list(filter(os.path.exists, [lib_path,])) | |||
assert len(dll_paths) > 0 | |||
kernel32 = ctypes.WinDLL("kernel32.dll", use_last_error=True) | |||
has_load_library_attr = hasattr(kernel32, "AddDllDirectory") | |||
old_error_mode = kernel32.SetErrorMode(0x0001) | |||
kernel32.LoadLibraryW.restype = ctypes.c_void_p | |||
if has_load_library_attr: | |||
kernel32.AddDllDirectory.restype = ctypes.c_void_p | |||
kernel32.LoadLibraryExW.restype = ctypes.c_void_p | |||
for dll_path in dll_paths: | |||
if sys.version_info >= (3, 8): | |||
os.add_dll_directory(dll_path) | |||
elif has_load_library_attr: | |||
res = kernel32.AddDllDirectory(dll_path) | |||
if res is None: | |||
err = ctypes.WinError(ctypes.get_last_error()) | |||
err.strerror += ' Error adding "{}" to the DLL search PATH.'.format( | |||
dll_path | |||
) | |||
raise err | |||
else: | |||
print("WARN: python or OS env have some issue, may load DLL failed!!!") | |||
import glob | |||
dlls = glob.glob(os.path.join(lib_path, "*.dll")) | |||
path_patched = False | |||
for dll in dlls: | |||
is_loaded = False | |||
if has_load_library_attr: | |||
res = kernel32.LoadLibraryExW(dll, None, 0x00001100) | |||
last_error = ctypes.get_last_error() | |||
if res is None and last_error != 126: | |||
err = ctypes.WinError(last_error) | |||
err.strerror += ' Error loading "{}" or one of its dependencies.'.format( | |||
dll | |||
) | |||
raise err | |||
elif res is not None: | |||
is_loaded = True | |||
if not is_loaded: | |||
if not path_patched: | |||
os.environ["PATH"] = ";".join(dll_paths + [os.environ["PATH"]]) | |||
path_patched = True | |||
res = kernel32.LoadLibraryW(dll) | |||
if res is None: | |||
err = ctypes.WinError(ctypes.get_last_error()) | |||
err.strerror += ' Error loading "{}" or one of its dependencies.'.format( | |||
dll | |||
) | |||
raise err | |||
kernel32.SetErrorMode(old_error_mode) | |||
from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func | |||
from .device import * | |||
@@ -6,10 +6,14 @@ | |||
# Unless required by applicable law or agreed to in writing, | |||
# software distributed under the License is distributed on an | |||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
import resource | |||
import platform | |||
import sys | |||
import threading | |||
# Windows do not imp resource package | |||
if platform.system() != "Windows": | |||
import resource | |||
class AlternativeRecursionLimit: | |||
r"""A reentrant context manager for setting global recursion limits. | |||
@@ -28,16 +32,24 @@ class AlternativeRecursionLimit: | |||
with self.lock: | |||
if self.count == 0: | |||
self.orig_py_limit = sys.getrecursionlimit() | |||
if platform.system() != "Windows": | |||
( | |||
self.orig_rlim_stack_soft, | |||
self.orig_rlim_stack_hard, | |||
) = resource.getrlimit(resource.RLIMIT_STACK) | |||
resource.setrlimit( | |||
resource.RLIMIT_STACK, | |||
(self.orig_rlim_stack_hard, self.orig_rlim_stack_hard), | |||
) | |||
# increase recursion limit | |||
sys.setrecursionlimit(self.new_py_limit) | |||
# FIXME: https://bugs.python.org/issue34602, python3 release version | |||
# on Macos always have this issue, not all user install python3 from src | |||
try: | |||
resource.setrlimit( | |||
resource.RLIMIT_STACK, | |||
(self.orig_rlim_stack_hard, self.orig_rlim_stack_hard), | |||
) | |||
except ValueError as exc: | |||
if platform.system() != "Darwin": | |||
raise exc | |||
# increase recursion limit | |||
sys.setrecursionlimit(self.new_py_limit) | |||
self.count += 1 | |||
def __exit__(self, type, value, traceback): | |||
@@ -45,10 +57,16 @@ class AlternativeRecursionLimit: | |||
self.count -= 1 | |||
if self.count == 0: | |||
sys.setrecursionlimit(self.orig_py_limit) | |||
resource.setrlimit( | |||
resource.RLIMIT_STACK, | |||
(self.orig_rlim_stack_soft, self.orig_rlim_stack_hard), | |||
) | |||
if platform.system() != "Windows": | |||
try: | |||
resource.setrlimit( | |||
resource.RLIMIT_STACK, | |||
(self.orig_rlim_stack_soft, self.orig_rlim_stack_hard), | |||
) | |||
except ValueError as exc: | |||
if platform.system() != "Darwin": | |||
raise exc | |||
_max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1) | |||
@@ -9,6 +9,7 @@ | |||
import os | |||
import re | |||
import pathlib | |||
import platform | |||
from distutils.file_util import copy_file | |||
from setuptools import setup, find_packages, Extension | |||
from setuptools.command.build_ext import build_ext as _build_ext | |||
@@ -29,7 +30,10 @@ class build_ext(_build_ext): | |||
extdir.parent.mkdir(parents=True, exist_ok=True) | |||
modpath = self.get_ext_fullname(ext.name).split('.') | |||
modpath[-1] += '.so' | |||
if platform.system() == 'Windows': | |||
modpath[-1] += '.pyd' | |||
else: | |||
modpath[-1] += '.so' | |||
modpath = str(pathlib.Path(*modpath).resolve()) | |||
copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run) | |||
@@ -47,6 +51,14 @@ if local_version: | |||
__version__ = '{}+{}'.format(__version__, local_version) | |||
packages = find_packages(exclude=['test']) | |||
package_data = [ | |||
str(f.relative_to('megengine')) | |||
for f in pathlib.Path('megengine', 'core', 'include').glob('**/*') | |||
] | |||
package_data += [ | |||
str(f.relative_to('megengine')) | |||
for f in pathlib.Path('megengine', 'core', 'lib').glob('**/*') | |||
] | |||
with open('requires.txt') as f: | |||
requires = f.read().splitlines() | |||
@@ -63,6 +75,9 @@ setup_kwargs = dict( | |||
author='Megvii Engine Team', | |||
author_email=email, | |||
packages=packages, | |||
package_data={ | |||
'megengine': package_data, | |||
}, | |||
ext_modules=[PrecompiledExtesion('megengine.core._imperative_rt')], | |||
install_requires=requires, | |||
extras_require={ | |||
@@ -9,15 +9,6 @@ | |||
#include "megbrain/utils/mempool.h" | |||
#include "./numpy_dtypes.h" | |||
/* | |||
* demangle typeid, see | |||
* http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname | |||
*/ | |||
#ifdef __GNUG__ | |||
#include <cstdlib> | |||
#include <memory> | |||
#include <cxxabi.h> | |||
namespace py = pybind11; | |||
PyTaskDipatcher py_task_q = {}; | |||
@@ -34,10 +25,18 @@ py::module rel_import(py::str name, py::module m, int level) { | |||
return import(name, m.attr("__dict__"), py::arg("level")=level); | |||
} | |||
/* | |||
* demangle typeid, see | |||
* http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname | |||
*/ | |||
#ifdef __GNUG__ | |||
#include <cxxabi.h> | |||
#include <cstdlib> | |||
#include <memory> | |||
namespace { | |||
std::string demangle_typeid(const char* name) { | |||
int status = -4; // some arbitrary value to eliminate the compiler warning | |||
// enable c++11 by passing the flag -std=c++11 to g++ | |||
@@ -48,7 +47,7 @@ std::string demangle_typeid(const char* name) { | |||
return (status==0) ? res.get() : name ; | |||
} | |||
} | |||
} // namespace | |||
#else | |||
namespace { | |||
@@ -1,4 +1,8 @@ | |||
#include "utils.h" | |||
#ifdef WIN32 | |||
#include <stdio.h> | |||
#include <windows.h> | |||
#endif | |||
#include <pybind11/operators.h> | |||
#include <atomic> | |||
@@ -8,6 +8,7 @@ | |||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
import multiprocessing as mp | |||
import os | |||
import platform | |||
import re | |||
import subprocess | |||
import sys | |||
@@ -196,6 +197,9 @@ def run_test( | |||
@pytest.mark.isolated_distributed | |||
@pytest.mark.skipif( | |||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
) | |||
def test_dp_correctness(): | |||
model_name = "mnist_model_with_test.mge" | |||
model_path = os.path.join(os.path.dirname(__file__), model_name) | |||
@@ -35,7 +35,7 @@ from megengine.functional.distributed import ( | |||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
) | |||
@pytest.mark.skipif( | |||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
) | |||
@pytest.mark.isolated_distributed | |||
def test_reduce_sum(): | |||
@@ -77,7 +77,7 @@ def test_reduce_sum(): | |||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
) | |||
@pytest.mark.skipif( | |||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
) | |||
@pytest.mark.isolated_distributed | |||
def test_broadcast(): | |||
@@ -115,7 +115,7 @@ def test_broadcast(): | |||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
) | |||
@pytest.mark.skipif( | |||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
) | |||
@pytest.mark.isolated_distributed | |||
def test_all_gather(): | |||
@@ -154,7 +154,7 @@ def test_all_gather(): | |||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
) | |||
@pytest.mark.skipif( | |||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
) | |||
@pytest.mark.isolated_distributed | |||
def test_reduce_scatter_sum(): | |||
@@ -193,7 +193,7 @@ def test_reduce_scatter_sum(): | |||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
) | |||
@pytest.mark.skipif( | |||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
) | |||
@pytest.mark.isolated_distributed | |||
def test_all_reduce_sum(): | |||
@@ -232,7 +232,7 @@ def test_all_reduce_sum(): | |||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
) | |||
@pytest.mark.skipif( | |||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
) | |||
@pytest.mark.isolated_distributed | |||
def test_all_reduce_max(): | |||
@@ -271,7 +271,7 @@ def test_all_reduce_max(): | |||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
) | |||
@pytest.mark.skipif( | |||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
) | |||
@pytest.mark.isolated_distributed | |||
def test_all_reduce_min(): | |||
@@ -310,7 +310,7 @@ def test_all_reduce_min(): | |||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
) | |||
@pytest.mark.skipif( | |||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
) | |||
@pytest.mark.isolated_distributed | |||
def test_gather(): | |||
@@ -352,7 +352,7 @@ def test_gather(): | |||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
) | |||
@pytest.mark.skipif( | |||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
) | |||
@pytest.mark.isolated_distributed | |||
def test_scatter(): | |||
@@ -390,7 +390,7 @@ def test_scatter(): | |||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
) | |||
@pytest.mark.skipif( | |||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
) | |||
@pytest.mark.isolated_distributed | |||
def test_all_to_all(): | |||
@@ -430,7 +430,7 @@ def test_all_to_all(): | |||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
) | |||
@pytest.mark.skipif( | |||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
) | |||
@pytest.mark.isolated_distributed | |||
def test_io_remote(): | |||
@@ -6,6 +6,7 @@ | |||
# Unless required by applicable law or agreed to in writing, | |||
# software distributed under the License is distributed on an | |||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
import platform | |||
import weakref | |||
import numpy as np | |||
@@ -51,6 +52,9 @@ def save_to(self, name="grad"): | |||
@pytest.mark.isolated_distributed | |||
@pytest.mark.skipif( | |||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
) | |||
def test_dist_grad(): | |||
world_size = 2 | |||
x_np = np.random.rand(10).astype("float32") | |||
@@ -9,7 +9,17 @@ | |||
#include "megbrain/imperative/profiler.h" | |||
#if defined(_MSC_VER) || defined(WIN32) | |||
#include <windows.h> | |||
#define getpid GetCurrentProcessId | |||
#else | |||
#include <sys/unistd.h> | |||
#endif | |||
#if defined(__APPLE__) || defined(__MACOSX) | |||
#include <unistd.h> | |||
#endif | |||
#include <variant> | |||
#include "megbrain/imperative/ops/opr_attr.h" | |||
@@ -16,6 +16,10 @@ | |||
#include "megbrain/imperative/ops/opr_attr.h" | |||
#include "megbrain/imperative/ops/backward_graph.h" | |||
#if __cplusplus >= 201703L | |||
#include <optional> | |||
#endif | |||
namespace mgb { | |||
namespace imperative { | |||
@@ -38,8 +38,11 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS) | |||
endif() | |||
if(UNIX) | |||
target_link_libraries(imperative_test dl rt) | |||
if(APPLE OR ANDROID) | |||
target_link_libraries(imperative_test dl) | |||
else() | |||
target_link_libraries(imperative_test dl rt) | |||
endif() | |||
endif() | |||
install(TARGETS imperative_test RUNTIME DESTINATION test) |
@@ -81,7 +81,10 @@ else() | |||
target_link_libraries(mgb megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) | |||
endif() | |||
target_include_directories(mgb PRIVATE ${PYTHON_INCLUDE_DIRS} src/cpp ${CMAKE_CURRENT_BINARY_DIR} ${NUMPY_INCLUDE_DIR}) | |||
target_link_libraries(mgb ${PYTHON_LIBRARIES}) | |||
# only windows need link PYTHON_LIBRARIES | |||
if(MSVC OR WIN32) | |||
target_link_libraries(mgb ${PYTHON_LIBRARIES}) | |||
endif() | |||
if (MGE_WITH_DISTRIBUTED) | |||
target_link_libraries(mgb megray) | |||
@@ -30,11 +30,17 @@ | |||
4e: add C:\Program Files\NVIDIA GPU Computing Toolkit\cudnn-10.1-windows10-x64-v7.6.5.32\cuda\bin to system Path env | |||
4f: add C:\Program Files\NVIDIA GPU Computing Toolkit\TensorRT-6.0.1.5\lib Path | |||
if u do not do 4d/4e/4f, CUDA runtime can not find dll | |||
5: install python3 (DFT 3.8.3) to /c/Users/${USER}/mge_whl_python_env/3.8.3 and | |||
put it to PATH env and run python3 -m pip install numpy (if u want to build with training mode or build python whl) | |||
6: install swig from install gui (if u want to build with training mode or build python whl) | |||
a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip | |||
b: install swig to /c/Users/${USER}/swigwin-4.0.2 | |||
c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2 | |||
``` | |||
### linux host build | |||
``` | |||
1: cmake, which version > 3.14.4 | |||
2: gcc/g++, which version > 6 | |||
2: gcc/g++, which version > 6, (gcc/g++ >= 7, if need build training) | |||
3: install build-essential git git-lfs gfortran libgfortran-6-dev autoconf gnupg flex bison gperf curl | |||
4: zlib1g-dev gcc-multilib g++-multilib lib32ncurses5-dev libxml2-utils xsltproc unzip libtool: | |||
5: librdmacm-dev rdmacm-utils python3-dev swig python3-numpy texinfo | |||
@@ -47,6 +53,7 @@ | |||
3: brew install python python3 swig coreutils | |||
4: install at least xcode command line tool: https://developer.apple.com/xcode/ | |||
5: about cuda: we do not support CUDA on macos | |||
6: python3 -m pip install numpy (if u want to build with training mode or build python whl) | |||
``` | |||
### cross build for arm-android | |||
now we support windows/linux/macos cross build to arm-android | |||
@@ -9,6 +9,7 @@ function usage() { | |||
echo "-t : Build with training mode, default inference only" | |||
echo "-m : Build with m32 mode(only for windows build), default m64" | |||
echo "-r : remove old build dir before make, default off" | |||
echo "-n : enable new python runtime(valid when training mode with -t, default is legacy runtime)" | |||
echo "-h : show usage" | |||
echo "append other cmake config by export EXTRA_CMAKE_ARGS=..." | |||
echo "example: $0 -d" | |||
@@ -22,9 +23,10 @@ MGE_WINDOWS_BUILD_ARCH=x64 | |||
MGE_WINDOWS_BUILD_MARCH=m64 | |||
MGE_ARCH=x86_64 | |||
REMOVE_OLD_BUILD=false | |||
MGE_BUILD_IMPERATIVE_RT=OFF | |||
echo "EXTRA_CMAKE_ARGS: ${EXTRA_CMAKE_ARGS}" | |||
while getopts "rhdctm" arg | |||
while getopts "rhdctmn" arg | |||
do | |||
case $arg in | |||
d) | |||
@@ -48,11 +50,15 @@ do | |||
REMOVE_OLD_BUILD=true | |||
;; | |||
m) | |||
echo "build for m32(only use for windows)" | |||
echo "build for m32(only valid use for windows)" | |||
MGE_WINDOWS_BUILD_ARCH=x86 | |||
MGE_WINDOWS_BUILD_MARCH=m32 | |||
MGE_ARCH=i386 | |||
;; | |||
n) | |||
echo "Enable imperative python wrapper runtime" | |||
MGE_BUILD_IMPERATIVE_RT=ON | |||
;; | |||
?) | |||
echo "unkonw argument" | |||
usage | |||
@@ -101,6 +107,7 @@ function cmake_build() { | |||
cmake \ | |||
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \ | |||
-DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ | |||
-DMGE_BUILD_IMPERATIVE_RT=${MGE_BUILD_IMPERATIVE_RT} \ | |||
-DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | |||
-DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | |||
${EXTRA_CMAKE_ARGS} \ | |||
@@ -112,7 +119,7 @@ function cmake_build() { | |||
function windows_env_err() { | |||
echo "check windows env failed!!" | |||
echo "please install LLVM/clang-cl/cmake/python at Visual Studio Extensions" | |||
echo "please install env refs for: scripts/cmake-build/BUILD_README.md" | |||
exit -1 | |||
} | |||
@@ -178,6 +185,25 @@ function prepare_env_for_windows_build() { | |||
export CPATH=$CPATH:$NIVIDA_INSTALL_PRE/${TRT_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include/nvtx3:$PC_CUDNN_INCLUDE_DIRS | |||
export LIBRARY_PATH=$LIBRARY_PATH:$LD_LIBRARY_PATH | |||
export INCLUDE=$INCLUDE:$CPATH | |||
# python version will be config by whl build script or ci script, we need | |||
# a DFT version for build success when we just call host_build.sh | |||
if [[ -z ${ALREADY_CONFIG_PYTHON_VER} ]] | |||
then | |||
echo "config a default python3" | |||
DFT_PYTHON_BIN=/c/Users/${USER}/mge_whl_python_env/3.8.3 | |||
if [ ! -f "${DFT_PYTHON_BIN}/python3.exe" ]; then | |||
echo "ERR: can not find ${DFT_PYTHON_BIN}/python3.exe , Invalid env" | |||
windows_env_err | |||
else | |||
echo "put python3 to env..." | |||
export PATH=${DFT_PYTHON_BIN}:$PATH | |||
which python3 | |||
fi | |||
fi | |||
echo "export swig pwd to PATH" | |||
export PATH=/c/Users/${USER}/swigwin-4.0.2::$PATH | |||
} | |||
WINDOWS_BUILD_TARGET="Ninja all > build.log" | |||
@@ -218,6 +244,7 @@ function cmake_build_windows() { | |||
vcvarsall.bat $MGE_WINDOWS_BUILD_ARCH && cmake -G "Ninja" \ | |||
-DMGE_ARCH=$MGE_ARCH \ | |||
-DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ | |||
-DMGE_BUILD_IMPERATIVE_RT=${MGE_BUILD_IMPERATIVE_RT} \ | |||
-DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | |||
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \ | |||
-DCMAKE_INSTALL_PREFIX:PATH=$INSTALL_DIR \ | |||
@@ -230,8 +257,18 @@ function cmake_build_windows() { | |||
${WINDOWS_BUILD_TARGET}" | |||
} | |||
if [ ${MGE_BUILD_IMPERATIVE_RT} = "ON" ] && [ ${MGE_INFERENCE_ONLY} = "ON" ]; then | |||
echo "ERR: MGE_BUILD_IMPERATIVE_RT(-n) only valid when enable training mode(-t)" | |||
echo "pls remove -n or add -t" | |||
exit -1 | |||
fi | |||
if [[ $OS =~ "NT" ]]; then | |||
if [ ${MGE_ARCH} = "i386" ] && [ ${MGE_INFERENCE_ONLY} = "OFF" ]; then | |||
echo "ERR: training mode(-t) only support 64 bit mode" | |||
echo "pls remove -t or remove -m" | |||
exit -1 | |||
fi | |||
config_windows_build_target | |||
cmake_build_windows $MGE_WITH_CUDA $MGE_INFERENCE_ONLY $BUILD_TYPE | |||
else | |||
@@ -53,10 +53,6 @@ | |||
d0: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install --upgrade pip | |||
d1: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install -r python_module/requires-test.txt | |||
d2: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install numpy wheel requests tqdm tabulate | |||
5: install swig from install gui | |||
a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip | |||
b: install swig to /c/Users/${USER}/swigwin-4.0.2 | |||
c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2 | |||
``` | |||
# how to build | |||
@@ -90,6 +86,11 @@ | |||
``` | |||
ALL_PYTHON=3.5.9 ./scripts/whl/macos/macos_build_whl.sh | |||
``` | |||
If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg: | |||
``` | |||
ALL_PYTHON=3.5.9 BUILD_IMPERATIVE="ON" ./scripts/whl/macos/macos_build_whl.sh | |||
``` | |||
## build for windows | |||
``` | |||
./scripts/whl/windows/windows_build_whl.sh | |||
@@ -102,5 +103,7 @@ | |||
If you want to build windows whl with cuda, also a specific Python verison. eg: | |||
``` | |||
WINDOWS_WHL_WITH_CUDA="true" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh | |||
WINDOWS_WHL_WITH_CUDA="ON" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh | |||
``` | |||
If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg: | |||
BUILD_IMPERATIVE="ON" WINDOWS_WHL_WITH_CUDA="ON" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh |
@@ -65,16 +65,18 @@ function config_python_env() { | |||
fi | |||
echo ${ver} | |||
#config a dir to trick cmake find a null pythonlib | |||
PYTHON_LIBRARY=${PYTHON_DIR}lib/ | |||
if [ "$1" = "3.5.9" ]; then | |||
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.5m | |||
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.5m.dylib | |||
elif [ "$1" = "3.6.10" ]; then | |||
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.6m | |||
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.6m.dylib | |||
elif [ "$1" = "3.7.7" ]; then | |||
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.7m | |||
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.7m.dylib | |||
elif [ "$1" = "3.8.3" ]; then | |||
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.8 | |||
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.8.dylib | |||
else | |||
echo "ERR: DO NOT SUPPORT PYTHON VERSION" | |||
echo "now support list: ${FULL_PYTHON_VER}" | |||
@@ -82,6 +84,11 @@ function config_python_env() { | |||
fi | |||
} | |||
if [[ -z ${BUILD_IMPERATIVE} ]] | |||
then | |||
BUILD_IMPERATIVE="OFF" | |||
fi | |||
function do_build() { | |||
for ver in ${ALL_PYTHON} | |||
do | |||
@@ -89,7 +96,7 @@ function do_build() { | |||
config_python_env ${ver} | |||
#check env | |||
if [ ! -d "$PYTHON_LIBRARY" ]; then | |||
if [ ! -f "$PYTHON_LIBRARY" ]; then | |||
echo "ERR: can not find $PYTHON_LIBRARY , Invalid python package" | |||
err_env | |||
fi | |||
@@ -102,14 +109,20 @@ function do_build() { | |||
#append cmake args for config python | |||
export EXTRA_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${PYTHON_DIR} -DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " | |||
#config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc | |||
export EXTRA_CMAKE_ARGS=${EXTRA_CMAKE_ARGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||
export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||
#call build and install | |||
#FIXME: cmake do not triger update python config, after | |||
#change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add | |||
#-r to remove build cache after a new ver build, which | |||
#will be more slow build than without -r | |||
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r | |||
if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||
echo "build whl with IMPERATIVE python rt" | |||
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -n -r | |||
else | |||
echo "build whl with legacy python rt" | |||
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r | |||
fi | |||
#call setup.py | |||
BUILD_DIR=${SRC_DIR}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release/build/ | |||
@@ -121,12 +134,47 @@ function do_build() { | |||
fi | |||
mkdir -p staging | |||
if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||
echo "build whl with IMPERATIVE python rt" | |||
cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||
cd ${BUILD_DIR}/staging/megengine/core | |||
rt_file=`ls _imperative_rt.*.so` | |||
echo "rt file is: ${rt_file}" | |||
if [[ -z ${rt_file} ]] | |||
then | |||
echo "ERR: can not find valid rt file" | |||
exit -1 | |||
fi | |||
llvm-strip -s ${rt_file} | |||
mv ${rt_file} _imperative_rt.so | |||
echo "check so valid or not..." | |||
otool_out=`otool -L _imperative_rt.so` | |||
if [[ "${otool_out}" =~ "ython" ]]; then | |||
echo "ERR: invalid _imperative_rt.so which depend on python lib, detail: log" | |||
echo ${otool_out} | |||
exit -1 | |||
else | |||
echo "valid..." | |||
fi | |||
else | |||
echo "build whl with legacy python rt" | |||
cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||
cd ${BUILD_DIR}/staging/megengine/_internal | |||
#FIXME: set lib suffix to dylib may be better, BUT we find after distutils.file_util.copy_file | |||
#will change to .so at macos even we set suffix to dylib, at the same time, macos also support .so | |||
echo "check so valid or not..." | |||
llvm-strip -s _mgb.so | |||
otool_out=`otool -L _mgb.so` | |||
if [[ "${otool_out}" =~ "ython" ]]; then | |||
echo "ERR: invalid _mgb.so which depend on python lib, detail: log" | |||
echo ${otool_out} | |||
exit -1 | |||
else | |||
echo "valid..." | |||
fi | |||
fi | |||
cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||
cd ${BUILD_DIR}/staging/megengine/_internal | |||
#FIXME: set lib suffix to dylib may be better, BUT we find after distutils.file_util.copy_file | |||
#will change to .so at macos even we set suffix to dylib, at the same time, macos also support .so | |||
llvm-strip -s _mgb.so | |||
cd ${BUILD_DIR}/staging | |||
${PYTHON_DIR}/bin/python3 setup.py bdist_wheel | |||
cd ${BUILD_DIR}/staging/dist/ | |||
@@ -14,8 +14,6 @@ function err_env() { | |||
} | |||
function append_path_env_and_check() { | |||
echo "export swig pwd to PATH" | |||
export PATH=/c/Users/${USER}/swigwin-4.0.2::$PATH | |||
echo "export vs2019 install path" | |||
export VS_PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Enterprise | |||
# for llvm-strip | |||
@@ -62,7 +60,7 @@ function config_python_env() { | |||
if [[ -z ${WINDOWS_WHL_WITH_CUDA} ]] | |||
then | |||
WINDOWS_WHL_WITH_CUDA="false" | |||
WINDOWS_WHL_WITH_CUDA="OFF" | |||
fi | |||
@@ -74,26 +72,46 @@ CUBLAS_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublas6 | |||
CURAND_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/curand64_10.dll" | |||
CUBLASLT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublasLt64_10.dll" | |||
CUDART_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cudart64_101.dll" | |||
function depend_real_copy() { | |||
REAL_DST=$1 | |||
echo "real copy lib to $1" | |||
cp "${TRT_LIB}" ${REAL_DST} | |||
cp "${CUDNN_LIB}" ${REAL_DST} | |||
cp "${CUSOLVER_LIB}" ${REAL_DST} | |||
cp "${CUBLAS_LIB}" ${REAL_DST} | |||
cp "${CURAND_LIB}" ${REAL_DST} | |||
cp "${CUBLASLT_LIB}" ${REAL_DST} | |||
cp "${CUDART_LIB}" ${REAL_DST} | |||
} | |||
function copy_more_dll() { | |||
# for python whl real use | |||
CP_DST=${BUILD_DIR}/staging/megengine/_internal/lib | |||
rm -rf ${CP_DST} | |||
mkdir ${CP_DST} | |||
if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||
echo "config BUILD_IMPERATIVE core lib dir" | |||
CP_WHL_DST=${BUILD_DIR}/staging/megengine/core/lib | |||
else | |||
echo "config legacy python lib dir" | |||
CP_WHL_DST=${BUILD_DIR}/staging/megengine/_internal/lib | |||
fi | |||
rm -rf ${CP_WHL_DST} | |||
mkdir ${CP_WHL_DST} | |||
# workround for cpu-only version import failed, use a | |||
# empty.file to triger setup.py to create a null empty | |||
echo "empty" > ${CP_WHL_DST}/empty.file | |||
if [ ${WINDOWS_WHL_WITH_CUDA} = "true" ]; then | |||
if [ ${WINDOWS_WHL_WITH_CUDA} = "ON" ]; then | |||
echo "copy nvidia lib to whl use...." | |||
cp "${TRT_LIB}" ${CP_DST} | |||
cp "${CUDNN_LIB}" ${CP_DST} | |||
cp "${CUSOLVER_LIB}" ${CP_DST} | |||
cp "${CUBLAS_LIB}" ${CP_DST} | |||
cp "${CURAND_LIB}" ${CP_DST} | |||
cp "${CUBLASLT_LIB}" ${CP_DST} | |||
cp "${CUDART_LIB}" ${CP_DST} | |||
depend_real_copy ${CP_WHL_DST} | |||
fi | |||
} | |||
if [[ -z ${BUILD_IMPERATIVE} ]] | |||
then | |||
BUILD_IMPERATIVE="OFF" | |||
fi | |||
function do_build() { | |||
for ver in ${ALL_PYTHON} | |||
do | |||
@@ -118,21 +136,31 @@ function do_build() { | |||
#force LINK a real PYTHON_LIBRARY file, after test we do not find the symbols conflict with python | |||
#export EXTRA_CMAKE_ARGS="-DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " | |||
#config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc | |||
export EXTRA_CMAKE_ARGS=${EXTRA_CMAKE_ARGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||
export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||
#call build and install | |||
#FIXME: cmake do not triger update python config, after | |||
#change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add | |||
#-r to remove build cache after a new ver build, which | |||
#will be more slow build than without -r | |||
if [ ${WINDOWS_WHL_WITH_CUDA} = "true" ]; then | |||
BUILD_ARGS=" -t -r" | |||
if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||
echo "build whl with IMPERATIVE python rt" | |||
BUILD_ARGS="${BUILD_ARGS} -n " | |||
else | |||
echo "build whl with legacy python rt" | |||
fi | |||
if [ ${WINDOWS_WHL_WITH_CUDA} = "ON" ]; then | |||
echo "build windows whl with cuda" | |||
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r -c | |||
BUILD_ARGS="${BUILD_ARGS} -c " | |||
else | |||
echo "build windows whl with cpu only" | |||
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r | |||
fi | |||
echo "host_build.sh BUILD_ARGS: ${BUILD_ARGS}" | |||
${SRC_DIR}/scripts/cmake-build/host_build.sh ${BUILD_ARGS} | |||
#call setup.py | |||
BUILD_DIR=${SRC_DIR}/build_dir/host/build/ | |||
cd ${BUILD_DIR} | |||
@@ -143,10 +171,27 @@ function do_build() { | |||
fi | |||
mkdir -p staging | |||
if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||
echo "build whl with IMPERATIVE python rt" | |||
cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||
cd ${BUILD_DIR}/staging/megengine/core | |||
rt_file=`ls _imperative_rt.*.pyd` | |||
echo "rt file is: ${rt_file}" | |||
if [[ -z ${rt_file} ]] | |||
then | |||
echo "ERR: can not find valid rt file" | |||
exit -1 | |||
fi | |||
llvm-strip -s ${rt_file} | |||
mv ${rt_file} _imperative_rt.pyd | |||
else | |||
echo "build whl with legacy python rt" | |||
cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||
cd ${BUILD_DIR}/staging/megengine/_internal | |||
llvm-strip -s _mgb.pyd | |||
fi | |||
cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||
cd ${BUILD_DIR}/staging/megengine/_internal | |||
llvm-strip -s _mgb.pyd | |||
copy_more_dll | |||
cd ${BUILD_DIR}/staging | |||
${PYTHON_DIR}/python3 setup.py bdist_wheel | |||
@@ -175,5 +220,6 @@ function third_party_prepare() { | |||
} | |||
###################### | |||
export ALREADY_CONFIG_PYTHON_VER="yes" | |||
third_party_prepare | |||
do_build |
@@ -33,6 +33,11 @@ class RNGxorshf { | |||
uint64_t s[2]; | |||
public: | |||
#if __cplusplus >= 201703L | |||
typedef uint64_t result_type; | |||
static constexpr uint64_t min() { return 0; } | |||
static constexpr uint64_t max() { return UINT64_MAX; } | |||
#endif | |||
RNGxorshf(uint64_t seed) { | |||
std::mt19937_64 gen(seed); | |||
s[0] = gen(); | |||
@@ -936,8 +941,12 @@ void SeqModifierForSublinearMemory::ActionSearcherSingleCN::search_genetic() { | |||
} | |||
} | |||
m_cur_records = records; | |||
#if __cplusplus >= 201703L | |||
std::shuffle(perm.begin(), perm.end(), rng); | |||
#else | |||
std::random_shuffle(perm.begin(), perm.end(), | |||
[&](size_t x) { return rng() % x; }); | |||
#endif | |||
for (size_t i = 0; i < length; ++i) { | |||
invoke_search(mutation(mutation(records[i].first))); | |||
invoke_search(crossover(records[i].first, records[perm[i]].first)); | |||
@@ -705,7 +705,12 @@ TEST(TestOprBlas, MatrixInverse) { | |||
} | |||
auto ptr = inp[0]->ptr<float>(); | |||
for (size_t i = 0; i < batch; ++i, ptr += n * n) { | |||
#if __cplusplus >= 201703L | |||
std::default_random_engine rng_engine; | |||
std::shuffle(perm.begin(), perm.end(), rng_engine); | |||
#else | |||
std::random_shuffle(perm.begin(), perm.end()); | |||
#endif | |||
for (size_t j = 0; j < n; ++j) { | |||
ptr[j * n + perm[j]] += 5; | |||
} | |||
@@ -36,7 +36,12 @@ void run_all_gather(const std::vector<size_t>& axis_size, bool& success, | |||
sleep_time.push_back(i * 0.05 + 0.1); | |||
tot_axis_size += axis_size[i]; | |||
} | |||
#if __cplusplus >= 201703L | |||
std::default_random_engine rng_engine; | |||
std::shuffle(sleep_time.begin(), sleep_time.end(), rng_engine); | |||
#else | |||
std::random_shuffle(sleep_time.begin(), sleep_time.end()); | |||
#endif | |||
auto constexpr DEVICE_TYPE = CompNode::DeviceType::CUDA; | |||
size_t nr_dev = std::min<size_t>( | |||
@@ -18,7 +18,11 @@ endif() | |||
add_executable(megbrain_test ${SOURCES}) | |||
target_link_libraries(megbrain_test gtest) | |||
target_link_libraries(megbrain_test megengine) | |||
if(MSVC OR WIN32) | |||
target_link_libraries(megbrain_test megbrain megdnn) | |||
else() | |||
target_link_libraries(megbrain_test megengine) | |||
endif() | |||
if(CXX_SUPPORT_WCLASS_MEMACCESS) | |||
if(MGE_WITH_CUDA) | |||
target_compile_options(megbrain_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>" | |||
@@ -28,10 +32,12 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS) | |||
endif() | |||
endif() | |||
if(APPLE OR ANDROID) | |||
target_link_libraries(megbrain_test dl) | |||
else() | |||
target_link_libraries(megbrain_test dl rt) | |||
if(UNIX) | |||
if(APPLE OR ANDROID) | |||
target_link_libraries(megbrain_test dl) | |||
else() | |||
target_link_libraries(megbrain_test dl rt) | |||
endif() | |||
endif() | |||
if (MGE_WITH_DISTRIBUTED) | |||