GitOrigin-RevId: 7f6629ae1f
tags/v1.0.0-rc1
@@ -697,8 +697,10 @@ endif() | |||||
if(MGE_WITH_PYTHON_MODULE) | if(MGE_WITH_PYTHON_MODULE) | ||||
if(MGE_BUILD_IMPERATIVE_RT) | if(MGE_BUILD_IMPERATIVE_RT) | ||||
add_subdirectory(imperative) | add_subdirectory(imperative) | ||||
message("-- Enable imperative python wrapper runtime") | |||||
else() | else() | ||||
add_subdirectory(python_module) | add_subdirectory(python_module) | ||||
message("-- Enable legacy python wrapper runtime") | |||||
endif() | endif() | ||||
endif() | endif() | ||||
@@ -342,7 +342,11 @@ template <typename T> | |||||
struct SafeMultiplies; | struct SafeMultiplies; | ||||
template <typename T> | template <typename T> | ||||
#if __cplusplus >= 201703L | |||||
struct _SafeMultipliesImplUnsigned { | |||||
#else | |||||
struct _SafeMultipliesImplUnsigned : public std::binary_function<T, T, T> { | struct _SafeMultipliesImplUnsigned : public std::binary_function<T, T, T> { | ||||
#endif | |||||
static MEGDNN_CONSTEXPR size_t nbits = sizeof(T) * 8; | static MEGDNN_CONSTEXPR size_t nbits = sizeof(T) * 8; | ||||
static size_t clz(unsigned x) { | static size_t clz(unsigned x) { | ||||
@@ -70,8 +70,10 @@ if (MEG_WITH_ROCM) | |||||
target_link_libraries (megdnn_test ${MGE_ROCM_LIBS}) | target_link_libraries (megdnn_test ${MGE_ROCM_LIBS}) | ||||
endif () | endif () | ||||
if(APPLE OR ANDROID) | |||||
target_link_libraries(megdnn_test dl) | |||||
else() | |||||
target_link_libraries(megdnn_test dl rt) | |||||
if(UNIX) | |||||
if(APPLE OR ANDROID) | |||||
target_link_libraries(megdnn_test dl) | |||||
else() | |||||
target_link_libraries(megdnn_test dl rt) | |||||
endif() | |||||
endif() | endif() |
@@ -89,7 +89,7 @@ public: | |||||
auto ptr = tensor.ptr<int>(); | auto ptr = tensor.ptr<int>(); | ||||
for (size_t n = 0; n < size; ++n) { | for (size_t n = 0; n < size; ++n) { | ||||
std::set<int> used; | std::set<int> used; | ||||
std::random_shuffle(seq.begin(), seq.end()); | |||||
COMPAT_RANDOM(seq.begin(), seq.end()); | |||||
for (size_t step = 0; step < stride; ++step) { | for (size_t step = 0; step < stride; ++step) { | ||||
megdnn_assert(used.size() < m_size); | megdnn_assert(used.size() < m_size); | ||||
ptr[n * stride + step] = seq[step]; | ptr[n * stride + step] = seq[step]; | ||||
@@ -75,7 +75,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG() : m_offset(0) { | |||||
i2f.i = static_cast<uint16_t>(x); | i2f.i = static_cast<uint16_t>(x); | ||||
m_sequence.push_back(i2f.f); | m_sequence.push_back(i2f.f); | ||||
} | } | ||||
std::random_shuffle(m_sequence.begin(), m_sequence.end()); | |||||
COMPAT_RANDOM(m_sequence.begin(), m_sequence.end()); | |||||
} | } | ||||
Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) { | Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) { | ||||
@@ -99,7 +99,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) { | |||||
m_sequence.push_back(i2f.f); | m_sequence.push_back(i2f.f); | ||||
} | } | ||||
std::random_shuffle(m_sequence.begin(), m_sequence.end()); | |||||
COMPAT_RANDOM(m_sequence.begin(), m_sequence.end()); | |||||
} | } | ||||
void Float16PeriodicalRNG::gen(const TensorND& tensor) { | void Float16PeriodicalRNG::gen(const TensorND& tensor) { | ||||
@@ -19,6 +19,16 @@ | |||||
namespace megdnn { | namespace megdnn { | ||||
namespace test { | namespace test { | ||||
#if __cplusplus >= 201703L | |||||
#define COMPAT_RANDOM(begin, end) \ | |||||
{ \ | |||||
std::default_random_engine rng_engine; \ | |||||
std::shuffle(begin, end, rng_engine); \ | |||||
} | |||||
#else | |||||
#define COMPAT_RANDOM(begin, end) std::random_shuffle(begin, end); | |||||
#endif | |||||
class RNG { | class RNG { | ||||
protected: | protected: | ||||
class RNGxorshf; | class RNGxorshf; | ||||
@@ -24,15 +24,16 @@ class ArgmxxRNG final: public RNG { | |||||
void gen(const TensorND &tensor) override { | void gen(const TensorND &tensor) override { | ||||
auto offset = tensor.layout.span().low_elem; | auto offset = tensor.layout.span().low_elem; | ||||
auto nr_elems = tensor.layout.span().dist_elem(); | auto nr_elems = tensor.layout.span().dist_elem(); | ||||
#define cb(DType) \ | |||||
if (tensor.layout.dtype == DType()) { \ | |||||
using ctype = typename DTypeTrait<DType>::ctype; \ | |||||
auto ptr = tensor.ptr<ctype>(); \ | |||||
for (size_t i = 0; i < nr_elems; ++i) { \ | |||||
ptr[offset+i] = i; \ | |||||
} \ | |||||
std::random_shuffle(ptr + offset, ptr + offset + nr_elems); \ | |||||
} | |||||
#define cb(DType) \ | |||||
if (tensor.layout.dtype == DType()) { \ | |||||
using ctype = typename DTypeTrait<DType>::ctype; \ | |||||
auto ptr = tensor.ptr<ctype>(); \ | |||||
for (size_t i = 0; i < nr_elems; ++i) { \ | |||||
ptr[offset + i] = i; \ | |||||
} \ | |||||
COMPAT_RANDOM(ptr + offset, ptr + offset + nr_elems); \ | |||||
} | |||||
MEGDNN_FOREACH_COMPUTING_DTYPE(cb); | MEGDNN_FOREACH_COMPUTING_DTYPE(cb); | ||||
#undef cb | #undef cb | ||||
} | } | ||||
@@ -32,7 +32,7 @@ class ArgsortRNG final : public RNG { | |||||
} else { | } else { | ||||
for (int i = 0; i < n; ++i) | for (int i = 0; i < n; ++i) | ||||
ptr[i] = static_cast<T>(i - n / 2); | ptr[i] = static_cast<T>(i - n / 2); | ||||
std::random_shuffle(ptr, ptr + n); | |||||
COMPAT_RANDOM(ptr, ptr + n); | |||||
} | } | ||||
} | } | ||||
@@ -86,7 +86,7 @@ void run_backward_test(Handle* handle, DType dtype) { | |||||
for (size_t j = 0; j < n; ++j) { | for (size_t j = 0; j < n; ++j) { | ||||
ptr[j] = j; | ptr[j] = j; | ||||
} | } | ||||
std::random_shuffle(ptr, ptr + n); | |||||
COMPAT_RANDOM(ptr, ptr + n); | |||||
ptr += n; | ptr += n; | ||||
} | } | ||||
} | } | ||||
@@ -361,9 +361,8 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_7) { | |||||
for (size_t r = 0; r < _dim.size(); r++) | for (size_t r = 0; r < _dim.size(); r++) | ||||
permutation[r] = r; | permutation[r] = r; | ||||
for (int nsample = 0; nsample < 50; nsample++) { | for (int nsample = 0; nsample < 50; nsample++) { | ||||
std::random_shuffle(_dim.begin(), _dim.end()); | |||||
std::random_shuffle(permutation.begin(), permutation.end()); | |||||
COMPAT_RANDOM(_dim.begin(), _dim.end()); | |||||
COMPAT_RANDOM(permutation.begin(), permutation.end()); | |||||
if (!isTrivial(permutation)) { | if (!isTrivial(permutation)) { | ||||
run({{_dim[0], _dim[1], _dim[2], _dim[3], _dim[4], _dim[5], | run({{_dim[0], _dim[1], _dim[2], _dim[3], _dim[4], _dim[5], | ||||
_dim[6]}, | _dim[6]}, | ||||
@@ -451,9 +450,10 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_5) { | |||||
printf("vol %d cur_ratio %lf | %lf\n", vol, cur_ratio, vol_re); | printf("vol %d cur_ratio %lf | %lf\n", vol, cur_ratio, vol_re); | ||||
// printVec(dim); | // printVec(dim); | ||||
std::random_shuffle(dim.begin(), dim.end()); | |||||
COMPAT_RANDOM(dim.begin(), dim.end()); | |||||
while (isTrivial(permutation)) { | while (isTrivial(permutation)) { | ||||
std::random_shuffle(permutation.begin(), permutation.end()); | |||||
COMPAT_RANDOM(permutation.begin(), permutation.end()); | |||||
} | } | ||||
run({{dim[0], dim[1], dim[2], dim[3], dim[4]}, dtype::Int32()}, | run({{dim[0], dim[1], dim[2], dim[3], dim[4]}, dtype::Int32()}, | ||||
@@ -603,8 +603,9 @@ TEST_F(CUDA, BENCHMARK_LAST_CONTIG_ALIGN_TEST) { | |||||
for (size_t r = 0; r < _dim.size(); r++) | for (size_t r = 0; r < _dim.size(); r++) | ||||
permutation[r] = r; | permutation[r] = r; | ||||
for (int nsample = 0; nsample < 20; nsample++) { | for (int nsample = 0; nsample < 20; nsample++) { | ||||
std::random_shuffle(_dim.begin(), _dim.end() - 1); | |||||
std::random_shuffle(permutation.begin(), permutation.end() - 1); | |||||
COMPAT_RANDOM(_dim.begin(), _dim.end() - 1); | |||||
COMPAT_RANDOM(permutation.begin(), permutation.end() - 1); | |||||
if (nsample < 5) | if (nsample < 5) | ||||
_dim[5] = (u.gen_single_val() / 4 + 1) * 4; | _dim[5] = (u.gen_single_val() / 4 + 1) * 4; | ||||
@@ -24,7 +24,7 @@ using namespace test; | |||||
TEST_F(CUDA, SLEEP) { | TEST_F(CUDA, SLEEP) { | ||||
auto opr = this->handle_cuda()->create_operator<Sleep>(); | |||||
auto opr = this->handle_cuda()->create_operator<megdnn::SleepForward>(); | |||||
auto run = [&](float time) -> double { | auto run = [&](float time) -> double { | ||||
opr->param() = {time}; | opr->param() = {time}; | ||||
@@ -24,16 +24,17 @@ class ArgmxxRNG final: public RNG { | |||||
void gen(const TensorND &tensor) override { | void gen(const TensorND &tensor) override { | ||||
auto offset = tensor.layout.span().low_elem; | auto offset = tensor.layout.span().low_elem; | ||||
auto nr_elems = tensor.layout.span().dist_elem(); | auto nr_elems = tensor.layout.span().dist_elem(); | ||||
#define cb(DType) \ | |||||
if (tensor.layout.dtype == DType()) { \ | |||||
using ctype = typename DTypeTrait<DType>::ctype; \ | |||||
auto ptr = tensor.ptr<ctype>(); \ | |||||
for (size_t i = 0; i < nr_elems; ++i) { \ | |||||
ptr[offset+i] = i; \ | |||||
} \ | |||||
std::random_shuffle(ptr + offset, ptr + offset + nr_elems); \ | |||||
return; \ | |||||
} | |||||
#define cb(DType) \ | |||||
if (tensor.layout.dtype == DType()) { \ | |||||
using ctype = typename DTypeTrait<DType>::ctype; \ | |||||
auto ptr = tensor.ptr<ctype>(); \ | |||||
for (size_t i = 0; i < nr_elems; ++i) { \ | |||||
ptr[offset + i] = i; \ | |||||
} \ | |||||
COMPAT_RANDOM(ptr + offset, ptr + offset + nr_elems); \ | |||||
return; \ | |||||
} | |||||
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); | MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); | ||||
#undef cb | #undef cb | ||||
megdnn_throw(megdnn_mangle(ssprintf("Unsupported DType: %s", | megdnn_throw(megdnn_mangle(ssprintf("Unsupported DType: %s", | ||||
@@ -76,7 +76,11 @@ add_custom_target(_version_ld SOURCES ${VERSION_SCRIPT}) | |||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11) | add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11) | ||||
pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS}) | pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS}) | ||||
target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) | |||||
if (APPLE OR MSVC OR WIN32) | |||||
target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn) | |||||
else() | |||||
target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) | |||||
endif() | |||||
if (MGE_WITH_DISTRIBUTED) | if (MGE_WITH_DISTRIBUTED) | ||||
message("Imperative configured to link megray") | message("Imperative configured to link megray") | ||||
target_link_libraries(${MODULE_NAME} PRIVATE megray) | target_link_libraries(${MODULE_NAME} PRIVATE megray) | ||||
@@ -91,6 +95,10 @@ set_target_properties(${MODULE_NAME} PROPERTIES | |||||
SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX} | SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX} | ||||
LIBRARY_OUTPUT_DIRECTORY ${MEGENGINE_DIR}/${PACKAGE_NAME}/core | LIBRARY_OUTPUT_DIRECTORY ${MEGENGINE_DIR}/${PACKAGE_NAME}/core | ||||
) | ) | ||||
if (APPLE OR MSVC OR WIN32) | |||||
message("-- overwriting SUFFIX at macos and windows before config by set_target_properties") | |||||
pybind11_extension(${MODULE_NAME}) | |||||
endif() | |||||
add_dependencies(${MODULE_NAME} gen_opr_py _version_ld) | add_dependencies(${MODULE_NAME} gen_opr_py _version_ld) | ||||
if(MGE_WITH_TEST AND MGE_ENABLE_RTTI) | if(MGE_WITH_TEST AND MGE_ENABLE_RTTI) | ||||
@@ -8,6 +8,67 @@ | |||||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
import os | import os | ||||
import sys | import sys | ||||
import platform | |||||
import ctypes | |||||
if sys.platform == "win32": | |||||
lib_path = os.path.join(os.path.dirname(__file__), "core/lib") | |||||
dll_paths = list(filter(os.path.exists, [lib_path,])) | |||||
assert len(dll_paths) > 0 | |||||
kernel32 = ctypes.WinDLL("kernel32.dll", use_last_error=True) | |||||
has_load_library_attr = hasattr(kernel32, "AddDllDirectory") | |||||
old_error_mode = kernel32.SetErrorMode(0x0001) | |||||
kernel32.LoadLibraryW.restype = ctypes.c_void_p | |||||
if has_load_library_attr: | |||||
kernel32.AddDllDirectory.restype = ctypes.c_void_p | |||||
kernel32.LoadLibraryExW.restype = ctypes.c_void_p | |||||
for dll_path in dll_paths: | |||||
if sys.version_info >= (3, 8): | |||||
os.add_dll_directory(dll_path) | |||||
elif has_load_library_attr: | |||||
res = kernel32.AddDllDirectory(dll_path) | |||||
if res is None: | |||||
err = ctypes.WinError(ctypes.get_last_error()) | |||||
err.strerror += ' Error adding "{}" to the DLL search PATH.'.format( | |||||
dll_path | |||||
) | |||||
raise err | |||||
else: | |||||
print("WARN: python or OS env have some issue, may load DLL failed!!!") | |||||
import glob | |||||
dlls = glob.glob(os.path.join(lib_path, "*.dll")) | |||||
path_patched = False | |||||
for dll in dlls: | |||||
is_loaded = False | |||||
if has_load_library_attr: | |||||
res = kernel32.LoadLibraryExW(dll, None, 0x00001100) | |||||
last_error = ctypes.get_last_error() | |||||
if res is None and last_error != 126: | |||||
err = ctypes.WinError(last_error) | |||||
err.strerror += ' Error loading "{}" or one of its dependencies.'.format( | |||||
dll | |||||
) | |||||
raise err | |||||
elif res is not None: | |||||
is_loaded = True | |||||
if not is_loaded: | |||||
if not path_patched: | |||||
os.environ["PATH"] = ";".join(dll_paths + [os.environ["PATH"]]) | |||||
path_patched = True | |||||
res = kernel32.LoadLibraryW(dll) | |||||
if res is None: | |||||
err = ctypes.WinError(ctypes.get_last_error()) | |||||
err.strerror += ' Error loading "{}" or one of its dependencies.'.format( | |||||
dll | |||||
) | |||||
raise err | |||||
kernel32.SetErrorMode(old_error_mode) | |||||
from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func | from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func | ||||
from .device import * | from .device import * | ||||
@@ -6,10 +6,14 @@ | |||||
# Unless required by applicable law or agreed to in writing, | # Unless required by applicable law or agreed to in writing, | ||||
# software distributed under the License is distributed on an | # software distributed under the License is distributed on an | ||||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
import resource | |||||
import platform | |||||
import sys | import sys | ||||
import threading | import threading | ||||
# Windows do not imp resource package | |||||
if platform.system() != "Windows": | |||||
import resource | |||||
class AlternativeRecursionLimit: | class AlternativeRecursionLimit: | ||||
r"""A reentrant context manager for setting global recursion limits. | r"""A reentrant context manager for setting global recursion limits. | ||||
@@ -28,16 +32,24 @@ class AlternativeRecursionLimit: | |||||
with self.lock: | with self.lock: | ||||
if self.count == 0: | if self.count == 0: | ||||
self.orig_py_limit = sys.getrecursionlimit() | self.orig_py_limit = sys.getrecursionlimit() | ||||
if platform.system() != "Windows": | |||||
( | ( | ||||
self.orig_rlim_stack_soft, | self.orig_rlim_stack_soft, | ||||
self.orig_rlim_stack_hard, | self.orig_rlim_stack_hard, | ||||
) = resource.getrlimit(resource.RLIMIT_STACK) | ) = resource.getrlimit(resource.RLIMIT_STACK) | ||||
resource.setrlimit( | |||||
resource.RLIMIT_STACK, | |||||
(self.orig_rlim_stack_hard, self.orig_rlim_stack_hard), | |||||
) | |||||
# increase recursion limit | |||||
sys.setrecursionlimit(self.new_py_limit) | |||||
# FIXME: https://bugs.python.org/issue34602, python3 release version | |||||
# on Macos always have this issue, not all user install python3 from src | |||||
try: | |||||
resource.setrlimit( | |||||
resource.RLIMIT_STACK, | |||||
(self.orig_rlim_stack_hard, self.orig_rlim_stack_hard), | |||||
) | |||||
except ValueError as exc: | |||||
if platform.system() != "Darwin": | |||||
raise exc | |||||
# increase recursion limit | |||||
sys.setrecursionlimit(self.new_py_limit) | |||||
self.count += 1 | self.count += 1 | ||||
def __exit__(self, type, value, traceback): | def __exit__(self, type, value, traceback): | ||||
@@ -45,10 +57,16 @@ class AlternativeRecursionLimit: | |||||
self.count -= 1 | self.count -= 1 | ||||
if self.count == 0: | if self.count == 0: | ||||
sys.setrecursionlimit(self.orig_py_limit) | sys.setrecursionlimit(self.orig_py_limit) | ||||
resource.setrlimit( | |||||
resource.RLIMIT_STACK, | |||||
(self.orig_rlim_stack_soft, self.orig_rlim_stack_hard), | |||||
) | |||||
if platform.system() != "Windows": | |||||
try: | |||||
resource.setrlimit( | |||||
resource.RLIMIT_STACK, | |||||
(self.orig_rlim_stack_soft, self.orig_rlim_stack_hard), | |||||
) | |||||
except ValueError as exc: | |||||
if platform.system() != "Darwin": | |||||
raise exc | |||||
_max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1) | _max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1) | ||||
@@ -9,6 +9,7 @@ | |||||
import os | import os | ||||
import re | import re | ||||
import pathlib | import pathlib | ||||
import platform | |||||
from distutils.file_util import copy_file | from distutils.file_util import copy_file | ||||
from setuptools import setup, find_packages, Extension | from setuptools import setup, find_packages, Extension | ||||
from setuptools.command.build_ext import build_ext as _build_ext | from setuptools.command.build_ext import build_ext as _build_ext | ||||
@@ -29,7 +30,10 @@ class build_ext(_build_ext): | |||||
extdir.parent.mkdir(parents=True, exist_ok=True) | extdir.parent.mkdir(parents=True, exist_ok=True) | ||||
modpath = self.get_ext_fullname(ext.name).split('.') | modpath = self.get_ext_fullname(ext.name).split('.') | ||||
modpath[-1] += '.so' | |||||
if platform.system() == 'Windows': | |||||
modpath[-1] += '.pyd' | |||||
else: | |||||
modpath[-1] += '.so' | |||||
modpath = str(pathlib.Path(*modpath).resolve()) | modpath = str(pathlib.Path(*modpath).resolve()) | ||||
copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run) | copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run) | ||||
@@ -47,6 +51,14 @@ if local_version: | |||||
__version__ = '{}+{}'.format(__version__, local_version) | __version__ = '{}+{}'.format(__version__, local_version) | ||||
packages = find_packages(exclude=['test']) | packages = find_packages(exclude=['test']) | ||||
package_data = [ | |||||
str(f.relative_to('megengine')) | |||||
for f in pathlib.Path('megengine', 'core', 'include').glob('**/*') | |||||
] | |||||
package_data += [ | |||||
str(f.relative_to('megengine')) | |||||
for f in pathlib.Path('megengine', 'core', 'lib').glob('**/*') | |||||
] | |||||
with open('requires.txt') as f: | with open('requires.txt') as f: | ||||
requires = f.read().splitlines() | requires = f.read().splitlines() | ||||
@@ -63,6 +75,9 @@ setup_kwargs = dict( | |||||
author='Megvii Engine Team', | author='Megvii Engine Team', | ||||
author_email=email, | author_email=email, | ||||
packages=packages, | packages=packages, | ||||
package_data={ | |||||
'megengine': package_data, | |||||
}, | |||||
ext_modules=[PrecompiledExtesion('megengine.core._imperative_rt')], | ext_modules=[PrecompiledExtesion('megengine.core._imperative_rt')], | ||||
install_requires=requires, | install_requires=requires, | ||||
extras_require={ | extras_require={ | ||||
@@ -9,15 +9,6 @@ | |||||
#include "megbrain/utils/mempool.h" | #include "megbrain/utils/mempool.h" | ||||
#include "./numpy_dtypes.h" | #include "./numpy_dtypes.h" | ||||
/* | |||||
* demangle typeid, see | |||||
* http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname | |||||
*/ | |||||
#ifdef __GNUG__ | |||||
#include <cstdlib> | |||||
#include <memory> | |||||
#include <cxxabi.h> | |||||
namespace py = pybind11; | namespace py = pybind11; | ||||
PyTaskDipatcher py_task_q = {}; | PyTaskDipatcher py_task_q = {}; | ||||
@@ -34,10 +25,18 @@ py::module rel_import(py::str name, py::module m, int level) { | |||||
return import(name, m.attr("__dict__"), py::arg("level")=level); | return import(name, m.attr("__dict__"), py::arg("level")=level); | ||||
} | } | ||||
/* | |||||
* demangle typeid, see | |||||
* http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname | |||||
*/ | |||||
#ifdef __GNUG__ | |||||
#include <cxxabi.h> | |||||
#include <cstdlib> | |||||
#include <memory> | |||||
namespace { | namespace { | ||||
std::string demangle_typeid(const char* name) { | std::string demangle_typeid(const char* name) { | ||||
int status = -4; // some arbitrary value to eliminate the compiler warning | int status = -4; // some arbitrary value to eliminate the compiler warning | ||||
// enable c++11 by passing the flag -std=c++11 to g++ | // enable c++11 by passing the flag -std=c++11 to g++ | ||||
@@ -48,7 +47,7 @@ std::string demangle_typeid(const char* name) { | |||||
return (status==0) ? res.get() : name ; | return (status==0) ? res.get() : name ; | ||||
} | } | ||||
} | |||||
} // namespace | |||||
#else | #else | ||||
namespace { | namespace { | ||||
@@ -1,4 +1,8 @@ | |||||
#include "utils.h" | #include "utils.h" | ||||
#ifdef WIN32 | |||||
#include <stdio.h> | |||||
#include <windows.h> | |||||
#endif | |||||
#include <pybind11/operators.h> | #include <pybind11/operators.h> | ||||
#include <atomic> | #include <atomic> | ||||
@@ -8,6 +8,7 @@ | |||||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
import multiprocessing as mp | import multiprocessing as mp | ||||
import os | import os | ||||
import platform | |||||
import re | import re | ||||
import subprocess | import subprocess | ||||
import sys | import sys | ||||
@@ -196,6 +197,9 @@ def run_test( | |||||
@pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
@pytest.mark.skipif( | |||||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
) | |||||
def test_dp_correctness(): | def test_dp_correctness(): | ||||
model_name = "mnist_model_with_test.mge" | model_name = "mnist_model_with_test.mge" | ||||
model_path = os.path.join(os.path.dirname(__file__), model_name) | model_path = os.path.join(os.path.dirname(__file__), model_name) | ||||
@@ -35,7 +35,7 @@ from megengine.functional.distributed import ( | |||||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
) | ) | ||||
@pytest.mark.skipif( | @pytest.mark.skipif( | ||||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
) | ) | ||||
@pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
def test_reduce_sum(): | def test_reduce_sum(): | ||||
@@ -77,7 +77,7 @@ def test_reduce_sum(): | |||||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
) | ) | ||||
@pytest.mark.skipif( | @pytest.mark.skipif( | ||||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
) | ) | ||||
@pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
def test_broadcast(): | def test_broadcast(): | ||||
@@ -115,7 +115,7 @@ def test_broadcast(): | |||||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
) | ) | ||||
@pytest.mark.skipif( | @pytest.mark.skipif( | ||||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
) | ) | ||||
@pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
def test_all_gather(): | def test_all_gather(): | ||||
@@ -154,7 +154,7 @@ def test_all_gather(): | |||||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
) | ) | ||||
@pytest.mark.skipif( | @pytest.mark.skipif( | ||||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
) | ) | ||||
@pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
def test_reduce_scatter_sum(): | def test_reduce_scatter_sum(): | ||||
@@ -193,7 +193,7 @@ def test_reduce_scatter_sum(): | |||||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
) | ) | ||||
@pytest.mark.skipif( | @pytest.mark.skipif( | ||||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
) | ) | ||||
@pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
def test_all_reduce_sum(): | def test_all_reduce_sum(): | ||||
@@ -232,7 +232,7 @@ def test_all_reduce_sum(): | |||||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
) | ) | ||||
@pytest.mark.skipif( | @pytest.mark.skipif( | ||||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
) | ) | ||||
@pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
def test_all_reduce_max(): | def test_all_reduce_max(): | ||||
@@ -271,7 +271,7 @@ def test_all_reduce_max(): | |||||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
) | ) | ||||
@pytest.mark.skipif( | @pytest.mark.skipif( | ||||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
) | ) | ||||
@pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
def test_all_reduce_min(): | def test_all_reduce_min(): | ||||
@@ -310,7 +310,7 @@ def test_all_reduce_min(): | |||||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
) | ) | ||||
@pytest.mark.skipif( | @pytest.mark.skipif( | ||||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
) | ) | ||||
@pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
def test_gather(): | def test_gather(): | ||||
@@ -352,7 +352,7 @@ def test_gather(): | |||||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
) | ) | ||||
@pytest.mark.skipif( | @pytest.mark.skipif( | ||||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
) | ) | ||||
@pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
def test_scatter(): | def test_scatter(): | ||||
@@ -390,7 +390,7 @@ def test_scatter(): | |||||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
) | ) | ||||
@pytest.mark.skipif( | @pytest.mark.skipif( | ||||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
) | ) | ||||
@pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
def test_all_to_all(): | def test_all_to_all(): | ||||
@@ -430,7 +430,7 @@ def test_all_to_all(): | |||||
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
) | ) | ||||
@pytest.mark.skipif( | @pytest.mark.skipif( | ||||
platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
) | ) | ||||
@pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
def test_io_remote(): | def test_io_remote(): | ||||
@@ -6,6 +6,7 @@ | |||||
# Unless required by applicable law or agreed to in writing, | # Unless required by applicable law or agreed to in writing, | ||||
# software distributed under the License is distributed on an | # software distributed under the License is distributed on an | ||||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
import platform | |||||
import weakref | import weakref | ||||
import numpy as np | import numpy as np | ||||
@@ -51,6 +52,9 @@ def save_to(self, name="grad"): | |||||
@pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
@pytest.mark.skipif( | |||||
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
) | |||||
def test_dist_grad(): | def test_dist_grad(): | ||||
world_size = 2 | world_size = 2 | ||||
x_np = np.random.rand(10).astype("float32") | x_np = np.random.rand(10).astype("float32") | ||||
@@ -9,7 +9,17 @@ | |||||
#include "megbrain/imperative/profiler.h" | #include "megbrain/imperative/profiler.h" | ||||
#if defined(_MSC_VER) || defined(WIN32) | |||||
#include <windows.h> | |||||
#define getpid GetCurrentProcessId | |||||
#else | |||||
#include <sys/unistd.h> | #include <sys/unistd.h> | ||||
#endif | |||||
#if defined(__APPLE__) || defined(__MACOSX) | |||||
#include <unistd.h> | |||||
#endif | |||||
#include <variant> | #include <variant> | ||||
#include "megbrain/imperative/ops/opr_attr.h" | #include "megbrain/imperative/ops/opr_attr.h" | ||||
@@ -16,6 +16,10 @@ | |||||
#include "megbrain/imperative/ops/opr_attr.h" | #include "megbrain/imperative/ops/opr_attr.h" | ||||
#include "megbrain/imperative/ops/backward_graph.h" | #include "megbrain/imperative/ops/backward_graph.h" | ||||
#if __cplusplus >= 201703L | |||||
#include <optional> | |||||
#endif | |||||
namespace mgb { | namespace mgb { | ||||
namespace imperative { | namespace imperative { | ||||
@@ -38,8 +38,11 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS) | |||||
endif() | endif() | ||||
if(UNIX) | if(UNIX) | ||||
target_link_libraries(imperative_test dl rt) | |||||
if(APPLE OR ANDROID) | |||||
target_link_libraries(imperative_test dl) | |||||
else() | |||||
target_link_libraries(imperative_test dl rt) | |||||
endif() | |||||
endif() | endif() | ||||
install(TARGETS imperative_test RUNTIME DESTINATION test) | install(TARGETS imperative_test RUNTIME DESTINATION test) |
@@ -81,7 +81,10 @@ else() | |||||
target_link_libraries(mgb megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) | target_link_libraries(mgb megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) | ||||
endif() | endif() | ||||
target_include_directories(mgb PRIVATE ${PYTHON_INCLUDE_DIRS} src/cpp ${CMAKE_CURRENT_BINARY_DIR} ${NUMPY_INCLUDE_DIR}) | target_include_directories(mgb PRIVATE ${PYTHON_INCLUDE_DIRS} src/cpp ${CMAKE_CURRENT_BINARY_DIR} ${NUMPY_INCLUDE_DIR}) | ||||
target_link_libraries(mgb ${PYTHON_LIBRARIES}) | |||||
# only windows need link PYTHON_LIBRARIES | |||||
if(MSVC OR WIN32) | |||||
target_link_libraries(mgb ${PYTHON_LIBRARIES}) | |||||
endif() | |||||
if (MGE_WITH_DISTRIBUTED) | if (MGE_WITH_DISTRIBUTED) | ||||
target_link_libraries(mgb megray) | target_link_libraries(mgb megray) | ||||
@@ -30,11 +30,17 @@ | |||||
4e: add C:\Program Files\NVIDIA GPU Computing Toolkit\cudnn-10.1-windows10-x64-v7.6.5.32\cuda\bin to system Path env | 4e: add C:\Program Files\NVIDIA GPU Computing Toolkit\cudnn-10.1-windows10-x64-v7.6.5.32\cuda\bin to system Path env | ||||
4f: add C:\Program Files\NVIDIA GPU Computing Toolkit\TensorRT-6.0.1.5\lib Path | 4f: add C:\Program Files\NVIDIA GPU Computing Toolkit\TensorRT-6.0.1.5\lib Path | ||||
if u do not do 4d/4e/4f, CUDA runtime can not find dll | if u do not do 4d/4e/4f, CUDA runtime can not find dll | ||||
5: install python3 (DFT 3.8.3) to /c/Users/${USER}/mge_whl_python_env/3.8.3 and | |||||
put it to PATH env and run python3 -m pip install numpy (if u want to build with training mode or build python whl) | |||||
6: install swig from install gui (if u want to build with training mode or build python whl) | |||||
a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip | |||||
b: install swig to /c/Users/${USER}/swigwin-4.0.2 | |||||
c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2 | |||||
``` | ``` | ||||
### linux host build | ### linux host build | ||||
``` | ``` | ||||
1: cmake, which version > 3.14.4 | 1: cmake, which version > 3.14.4 | ||||
2: gcc/g++, which version > 6 | |||||
2: gcc/g++, which version > 6, (gcc/g++ >= 7, if need build training) | |||||
3: install build-essential git git-lfs gfortran libgfortran-6-dev autoconf gnupg flex bison gperf curl | 3: install build-essential git git-lfs gfortran libgfortran-6-dev autoconf gnupg flex bison gperf curl | ||||
4: zlib1g-dev gcc-multilib g++-multilib lib32ncurses5-dev libxml2-utils xsltproc unzip libtool: | 4: zlib1g-dev gcc-multilib g++-multilib lib32ncurses5-dev libxml2-utils xsltproc unzip libtool: | ||||
5: librdmacm-dev rdmacm-utils python3-dev swig python3-numpy texinfo | 5: librdmacm-dev rdmacm-utils python3-dev swig python3-numpy texinfo | ||||
@@ -47,6 +53,7 @@ | |||||
3: brew install python python3 swig coreutils | 3: brew install python python3 swig coreutils | ||||
4: install at least xcode command line tool: https://developer.apple.com/xcode/ | 4: install at least xcode command line tool: https://developer.apple.com/xcode/ | ||||
5: about cuda: we do not support CUDA on macos | 5: about cuda: we do not support CUDA on macos | ||||
6: python3 -m pip install numpy (if u want to build with training mode or build python whl) | |||||
``` | ``` | ||||
### cross build for arm-android | ### cross build for arm-android | ||||
now we support windows/linux/macos cross build to arm-android | now we support windows/linux/macos cross build to arm-android | ||||
@@ -9,6 +9,7 @@ function usage() { | |||||
echo "-t : Build with training mode, default inference only" | echo "-t : Build with training mode, default inference only" | ||||
echo "-m : Build with m32 mode(only for windows build), default m64" | echo "-m : Build with m32 mode(only for windows build), default m64" | ||||
echo "-r : remove old build dir before make, default off" | echo "-r : remove old build dir before make, default off" | ||||
echo "-n : enable new python runtime(valid when training mode with -t, default is legacy runtime)" | |||||
echo "-h : show usage" | echo "-h : show usage" | ||||
echo "append other cmake config by export EXTRA_CMAKE_ARGS=..." | echo "append other cmake config by export EXTRA_CMAKE_ARGS=..." | ||||
echo "example: $0 -d" | echo "example: $0 -d" | ||||
@@ -22,9 +23,10 @@ MGE_WINDOWS_BUILD_ARCH=x64 | |||||
MGE_WINDOWS_BUILD_MARCH=m64 | MGE_WINDOWS_BUILD_MARCH=m64 | ||||
MGE_ARCH=x86_64 | MGE_ARCH=x86_64 | ||||
REMOVE_OLD_BUILD=false | REMOVE_OLD_BUILD=false | ||||
MGE_BUILD_IMPERATIVE_RT=OFF | |||||
echo "EXTRA_CMAKE_ARGS: ${EXTRA_CMAKE_ARGS}" | echo "EXTRA_CMAKE_ARGS: ${EXTRA_CMAKE_ARGS}" | ||||
while getopts "rhdctm" arg | |||||
while getopts "rhdctmn" arg | |||||
do | do | ||||
case $arg in | case $arg in | ||||
d) | d) | ||||
@@ -48,11 +50,15 @@ do | |||||
REMOVE_OLD_BUILD=true | REMOVE_OLD_BUILD=true | ||||
;; | ;; | ||||
m) | m) | ||||
echo "build for m32(only use for windows)" | |||||
echo "build for m32(only valid use for windows)" | |||||
MGE_WINDOWS_BUILD_ARCH=x86 | MGE_WINDOWS_BUILD_ARCH=x86 | ||||
MGE_WINDOWS_BUILD_MARCH=m32 | MGE_WINDOWS_BUILD_MARCH=m32 | ||||
MGE_ARCH=i386 | MGE_ARCH=i386 | ||||
;; | ;; | ||||
n) | |||||
echo "Enable imperative python wrapper runtime" | |||||
MGE_BUILD_IMPERATIVE_RT=ON | |||||
;; | |||||
?) | ?) | ||||
echo "unkonw argument" | echo "unkonw argument" | ||||
usage | usage | ||||
@@ -101,6 +107,7 @@ function cmake_build() { | |||||
cmake \ | cmake \ | ||||
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \ | -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ | ||||
-DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ | -DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ | ||||
-DMGE_BUILD_IMPERATIVE_RT=${MGE_BUILD_IMPERATIVE_RT} \ | |||||
-DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | -DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | ||||
-DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | ||||
${EXTRA_CMAKE_ARGS} \ | ${EXTRA_CMAKE_ARGS} \ | ||||
@@ -112,7 +119,7 @@ function cmake_build() { | |||||
function windows_env_err() { | function windows_env_err() { | ||||
echo "check windows env failed!!" | echo "check windows env failed!!" | ||||
echo "please install LLVM/clang-cl/cmake/python at Visual Studio Extensions" | |||||
echo "please install env refs for: scripts/cmake-build/BUILD_README.md" | |||||
exit -1 | exit -1 | ||||
} | } | ||||
@@ -178,6 +185,25 @@ function prepare_env_for_windows_build() { | |||||
export CPATH=$CPATH:$NIVIDA_INSTALL_PRE/${TRT_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include/nvtx3:$PC_CUDNN_INCLUDE_DIRS | export CPATH=$CPATH:$NIVIDA_INSTALL_PRE/${TRT_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include/nvtx3:$PC_CUDNN_INCLUDE_DIRS | ||||
export LIBRARY_PATH=$LIBRARY_PATH:$LD_LIBRARY_PATH | export LIBRARY_PATH=$LIBRARY_PATH:$LD_LIBRARY_PATH | ||||
export INCLUDE=$INCLUDE:$CPATH | export INCLUDE=$INCLUDE:$CPATH | ||||
# python version will be config by whl build script or ci script, we need | |||||
# a DFT version for build success when we just call host_build.sh | |||||
if [[ -z ${ALREADY_CONFIG_PYTHON_VER} ]] | |||||
then | |||||
echo "config a default python3" | |||||
DFT_PYTHON_BIN=/c/Users/${USER}/mge_whl_python_env/3.8.3 | |||||
if [ ! -f "${DFT_PYTHON_BIN}/python3.exe" ]; then | |||||
echo "ERR: can not find ${DFT_PYTHON_BIN}/python3.exe , Invalid env" | |||||
windows_env_err | |||||
else | |||||
echo "put python3 to env..." | |||||
export PATH=${DFT_PYTHON_BIN}:$PATH | |||||
which python3 | |||||
fi | |||||
fi | |||||
echo "export swig pwd to PATH" | |||||
export PATH=/c/Users/${USER}/swigwin-4.0.2::$PATH | |||||
} | } | ||||
WINDOWS_BUILD_TARGET="Ninja all > build.log" | WINDOWS_BUILD_TARGET="Ninja all > build.log" | ||||
@@ -218,6 +244,7 @@ function cmake_build_windows() { | |||||
vcvarsall.bat $MGE_WINDOWS_BUILD_ARCH && cmake -G "Ninja" \ | vcvarsall.bat $MGE_WINDOWS_BUILD_ARCH && cmake -G "Ninja" \ | ||||
-DMGE_ARCH=$MGE_ARCH \ | -DMGE_ARCH=$MGE_ARCH \ | ||||
-DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ | -DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ | ||||
-DMGE_BUILD_IMPERATIVE_RT=${MGE_BUILD_IMPERATIVE_RT} \ | |||||
-DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | -DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | ||||
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \ | -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ | ||||
-DCMAKE_INSTALL_PREFIX:PATH=$INSTALL_DIR \ | -DCMAKE_INSTALL_PREFIX:PATH=$INSTALL_DIR \ | ||||
@@ -230,8 +257,18 @@ function cmake_build_windows() { | |||||
${WINDOWS_BUILD_TARGET}" | ${WINDOWS_BUILD_TARGET}" | ||||
} | } | ||||
if [ ${MGE_BUILD_IMPERATIVE_RT} = "ON" ] && [ ${MGE_INFERENCE_ONLY} = "ON" ]; then | |||||
echo "ERR: MGE_BUILD_IMPERATIVE_RT(-n) only valid when enable training mode(-t)" | |||||
echo "pls remove -n or add -t" | |||||
exit -1 | |||||
fi | |||||
if [[ $OS =~ "NT" ]]; then | if [[ $OS =~ "NT" ]]; then | ||||
if [ ${MGE_ARCH} = "i386" ] && [ ${MGE_INFERENCE_ONLY} = "OFF" ]; then | |||||
echo "ERR: training mode(-t) only support 64 bit mode" | |||||
echo "pls remove -t or remove -m" | |||||
exit -1 | |||||
fi | |||||
config_windows_build_target | config_windows_build_target | ||||
cmake_build_windows $MGE_WITH_CUDA $MGE_INFERENCE_ONLY $BUILD_TYPE | cmake_build_windows $MGE_WITH_CUDA $MGE_INFERENCE_ONLY $BUILD_TYPE | ||||
else | else | ||||
@@ -53,10 +53,6 @@ | |||||
d0: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install --upgrade pip | d0: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install --upgrade pip | ||||
d1: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install -r python_module/requires-test.txt | d1: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install -r python_module/requires-test.txt | ||||
d2: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install numpy wheel requests tqdm tabulate | d2: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install numpy wheel requests tqdm tabulate | ||||
5: install swig from install gui | |||||
a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip | |||||
b: install swig to /c/Users/${USER}/swigwin-4.0.2 | |||||
c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2 | |||||
``` | ``` | ||||
# how to build | # how to build | ||||
@@ -90,6 +86,11 @@ | |||||
``` | ``` | ||||
ALL_PYTHON=3.5.9 ./scripts/whl/macos/macos_build_whl.sh | ALL_PYTHON=3.5.9 ./scripts/whl/macos/macos_build_whl.sh | ||||
``` | ``` | ||||
If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg: | |||||
``` | |||||
ALL_PYTHON=3.5.9 BUILD_IMPERATIVE="ON" ./scripts/whl/macos/macos_build_whl.sh | |||||
``` | |||||
## build for windows | ## build for windows | ||||
``` | ``` | ||||
./scripts/whl/windows/windows_build_whl.sh | ./scripts/whl/windows/windows_build_whl.sh | ||||
@@ -102,5 +103,7 @@ | |||||
If you want to build windows whl with cuda, also a specific Python verison. eg: | If you want to build windows whl with cuda, also a specific Python verison. eg: | ||||
``` | ``` | ||||
WINDOWS_WHL_WITH_CUDA="true" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh | |||||
WINDOWS_WHL_WITH_CUDA="ON" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh | |||||
``` | ``` | ||||
If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg: | |||||
BUILD_IMPERATIVE="ON" WINDOWS_WHL_WITH_CUDA="ON" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh |
@@ -65,16 +65,18 @@ function config_python_env() { | |||||
fi | fi | ||||
echo ${ver} | echo ${ver} | ||||
#config a dir to trick cmake find a null pythonlib | |||||
PYTHON_LIBRARY=${PYTHON_DIR}lib/ | |||||
if [ "$1" = "3.5.9" ]; then | if [ "$1" = "3.5.9" ]; then | ||||
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.5m | PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.5m | ||||
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.5m.dylib | |||||
elif [ "$1" = "3.6.10" ]; then | elif [ "$1" = "3.6.10" ]; then | ||||
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.6m | PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.6m | ||||
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.6m.dylib | |||||
elif [ "$1" = "3.7.7" ]; then | elif [ "$1" = "3.7.7" ]; then | ||||
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.7m | PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.7m | ||||
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.7m.dylib | |||||
elif [ "$1" = "3.8.3" ]; then | elif [ "$1" = "3.8.3" ]; then | ||||
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.8 | PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.8 | ||||
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.8.dylib | |||||
else | else | ||||
echo "ERR: DO NOT SUPPORT PYTHON VERSION" | echo "ERR: DO NOT SUPPORT PYTHON VERSION" | ||||
echo "now support list: ${FULL_PYTHON_VER}" | echo "now support list: ${FULL_PYTHON_VER}" | ||||
@@ -82,6 +84,11 @@ function config_python_env() { | |||||
fi | fi | ||||
} | } | ||||
if [[ -z ${BUILD_IMPERATIVE} ]] | |||||
then | |||||
BUILD_IMPERATIVE="OFF" | |||||
fi | |||||
function do_build() { | function do_build() { | ||||
for ver in ${ALL_PYTHON} | for ver in ${ALL_PYTHON} | ||||
do | do | ||||
@@ -89,7 +96,7 @@ function do_build() { | |||||
config_python_env ${ver} | config_python_env ${ver} | ||||
#check env | #check env | ||||
if [ ! -d "$PYTHON_LIBRARY" ]; then | |||||
if [ ! -f "$PYTHON_LIBRARY" ]; then | |||||
echo "ERR: can not find $PYTHON_LIBRARY , Invalid python package" | echo "ERR: can not find $PYTHON_LIBRARY , Invalid python package" | ||||
err_env | err_env | ||||
fi | fi | ||||
@@ -102,14 +109,20 @@ function do_build() { | |||||
#append cmake args for config python | #append cmake args for config python | ||||
export EXTRA_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${PYTHON_DIR} -DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " | export EXTRA_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${PYTHON_DIR} -DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " | ||||
#config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc | #config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc | ||||
export EXTRA_CMAKE_ARGS=${EXTRA_CMAKE_ARGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||||
export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||||
#call build and install | #call build and install | ||||
#FIXME: cmake do not triger update python config, after | #FIXME: cmake do not triger update python config, after | ||||
#change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add | #change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add | ||||
#-r to remove build cache after a new ver build, which | #-r to remove build cache after a new ver build, which | ||||
#will be more slow build than without -r | #will be more slow build than without -r | ||||
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r | |||||
if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||||
echo "build whl with IMPERATIVE python rt" | |||||
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -n -r | |||||
else | |||||
echo "build whl with legacy python rt" | |||||
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r | |||||
fi | |||||
#call setup.py | #call setup.py | ||||
BUILD_DIR=${SRC_DIR}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release/build/ | BUILD_DIR=${SRC_DIR}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release/build/ | ||||
@@ -121,12 +134,47 @@ function do_build() { | |||||
fi | fi | ||||
mkdir -p staging | mkdir -p staging | ||||
if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||||
echo "build whl with IMPERATIVE python rt" | |||||
cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||||
cd ${BUILD_DIR}/staging/megengine/core | |||||
rt_file=`ls _imperative_rt.*.so` | |||||
echo "rt file is: ${rt_file}" | |||||
if [[ -z ${rt_file} ]] | |||||
then | |||||
echo "ERR: can not find valid rt file" | |||||
exit -1 | |||||
fi | |||||
llvm-strip -s ${rt_file} | |||||
mv ${rt_file} _imperative_rt.so | |||||
echo "check so valid or not..." | |||||
otool_out=`otool -L _imperative_rt.so` | |||||
if [[ "${otool_out}" =~ "ython" ]]; then | |||||
echo "ERR: invalid _imperative_rt.so which depend on python lib, detail: log" | |||||
echo ${otool_out} | |||||
exit -1 | |||||
else | |||||
echo "valid..." | |||||
fi | |||||
else | |||||
echo "build whl with legacy python rt" | |||||
cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||||
cd ${BUILD_DIR}/staging/megengine/_internal | |||||
#FIXME: set lib suffix to dylib may be better, BUT we find after distutils.file_util.copy_file | |||||
#will change to .so at macos even we set suffix to dylib, at the same time, macos also support .so | |||||
echo "check so valid or not..." | |||||
llvm-strip -s _mgb.so | |||||
otool_out=`otool -L _mgb.so` | |||||
if [[ "${otool_out}" =~ "ython" ]]; then | |||||
echo "ERR: invalid _mgb.so which depend on python lib, detail: log" | |||||
echo ${otool_out} | |||||
exit -1 | |||||
else | |||||
echo "valid..." | |||||
fi | |||||
fi | |||||
cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||||
cd ${BUILD_DIR}/staging/megengine/_internal | |||||
#FIXME: set lib suffix to dylib may be better, BUT we find after distutils.file_util.copy_file | |||||
#will change to .so at macos even we set suffix to dylib, at the same time, macos also support .so | |||||
llvm-strip -s _mgb.so | |||||
cd ${BUILD_DIR}/staging | cd ${BUILD_DIR}/staging | ||||
${PYTHON_DIR}/bin/python3 setup.py bdist_wheel | ${PYTHON_DIR}/bin/python3 setup.py bdist_wheel | ||||
cd ${BUILD_DIR}/staging/dist/ | cd ${BUILD_DIR}/staging/dist/ | ||||
@@ -14,8 +14,6 @@ function err_env() { | |||||
} | } | ||||
function append_path_env_and_check() { | function append_path_env_and_check() { | ||||
echo "export swig pwd to PATH" | |||||
export PATH=/c/Users/${USER}/swigwin-4.0.2::$PATH | |||||
echo "export vs2019 install path" | echo "export vs2019 install path" | ||||
export VS_PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Enterprise | export VS_PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Enterprise | ||||
# for llvm-strip | # for llvm-strip | ||||
@@ -62,7 +60,7 @@ function config_python_env() { | |||||
if [[ -z ${WINDOWS_WHL_WITH_CUDA} ]] | if [[ -z ${WINDOWS_WHL_WITH_CUDA} ]] | ||||
then | then | ||||
WINDOWS_WHL_WITH_CUDA="false" | |||||
WINDOWS_WHL_WITH_CUDA="OFF" | |||||
fi | fi | ||||
@@ -74,26 +72,46 @@ CUBLAS_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublas6 | |||||
CURAND_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/curand64_10.dll" | CURAND_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/curand64_10.dll" | ||||
CUBLASLT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublasLt64_10.dll" | CUBLASLT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublasLt64_10.dll" | ||||
CUDART_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cudart64_101.dll" | CUDART_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cudart64_101.dll" | ||||
function depend_real_copy() { | |||||
REAL_DST=$1 | |||||
echo "real copy lib to $1" | |||||
cp "${TRT_LIB}" ${REAL_DST} | |||||
cp "${CUDNN_LIB}" ${REAL_DST} | |||||
cp "${CUSOLVER_LIB}" ${REAL_DST} | |||||
cp "${CUBLAS_LIB}" ${REAL_DST} | |||||
cp "${CURAND_LIB}" ${REAL_DST} | |||||
cp "${CUBLASLT_LIB}" ${REAL_DST} | |||||
cp "${CUDART_LIB}" ${REAL_DST} | |||||
} | |||||
function copy_more_dll() { | function copy_more_dll() { | ||||
# for python whl real use | # for python whl real use | ||||
CP_DST=${BUILD_DIR}/staging/megengine/_internal/lib | |||||
rm -rf ${CP_DST} | |||||
mkdir ${CP_DST} | |||||
if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||||
echo "config BUILD_IMPERATIVE core lib dir" | |||||
CP_WHL_DST=${BUILD_DIR}/staging/megengine/core/lib | |||||
else | |||||
echo "config legacy python lib dir" | |||||
CP_WHL_DST=${BUILD_DIR}/staging/megengine/_internal/lib | |||||
fi | |||||
rm -rf ${CP_WHL_DST} | |||||
mkdir ${CP_WHL_DST} | |||||
# workround for cpu-only version import failed, use a | |||||
# empty.file to triger setup.py to create a null empty | |||||
echo "empty" > ${CP_WHL_DST}/empty.file | |||||
if [ ${WINDOWS_WHL_WITH_CUDA} = "true" ]; then | |||||
if [ ${WINDOWS_WHL_WITH_CUDA} = "ON" ]; then | |||||
echo "copy nvidia lib to whl use...." | echo "copy nvidia lib to whl use...." | ||||
cp "${TRT_LIB}" ${CP_DST} | |||||
cp "${CUDNN_LIB}" ${CP_DST} | |||||
cp "${CUSOLVER_LIB}" ${CP_DST} | |||||
cp "${CUBLAS_LIB}" ${CP_DST} | |||||
cp "${CURAND_LIB}" ${CP_DST} | |||||
cp "${CUBLASLT_LIB}" ${CP_DST} | |||||
cp "${CUDART_LIB}" ${CP_DST} | |||||
depend_real_copy ${CP_WHL_DST} | |||||
fi | fi | ||||
} | } | ||||
if [[ -z ${BUILD_IMPERATIVE} ]] | |||||
then | |||||
BUILD_IMPERATIVE="OFF" | |||||
fi | |||||
function do_build() { | function do_build() { | ||||
for ver in ${ALL_PYTHON} | for ver in ${ALL_PYTHON} | ||||
do | do | ||||
@@ -118,21 +136,31 @@ function do_build() { | |||||
#force LINK a real PYTHON_LIBRARY file, after test we do not find the symbols conflict with python | #force LINK a real PYTHON_LIBRARY file, after test we do not find the symbols conflict with python | ||||
#export EXTRA_CMAKE_ARGS="-DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " | #export EXTRA_CMAKE_ARGS="-DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " | ||||
#config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc | #config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc | ||||
export EXTRA_CMAKE_ARGS=${EXTRA_CMAKE_ARGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||||
export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||||
#call build and install | #call build and install | ||||
#FIXME: cmake do not triger update python config, after | #FIXME: cmake do not triger update python config, after | ||||
#change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add | #change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add | ||||
#-r to remove build cache after a new ver build, which | #-r to remove build cache after a new ver build, which | ||||
#will be more slow build than without -r | #will be more slow build than without -r | ||||
if [ ${WINDOWS_WHL_WITH_CUDA} = "true" ]; then | |||||
BUILD_ARGS=" -t -r" | |||||
if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||||
echo "build whl with IMPERATIVE python rt" | |||||
BUILD_ARGS="${BUILD_ARGS} -n " | |||||
else | |||||
echo "build whl with legacy python rt" | |||||
fi | |||||
if [ ${WINDOWS_WHL_WITH_CUDA} = "ON" ]; then | |||||
echo "build windows whl with cuda" | echo "build windows whl with cuda" | ||||
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r -c | |||||
BUILD_ARGS="${BUILD_ARGS} -c " | |||||
else | else | ||||
echo "build windows whl with cpu only" | echo "build windows whl with cpu only" | ||||
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r | |||||
fi | fi | ||||
echo "host_build.sh BUILD_ARGS: ${BUILD_ARGS}" | |||||
${SRC_DIR}/scripts/cmake-build/host_build.sh ${BUILD_ARGS} | |||||
#call setup.py | #call setup.py | ||||
BUILD_DIR=${SRC_DIR}/build_dir/host/build/ | BUILD_DIR=${SRC_DIR}/build_dir/host/build/ | ||||
cd ${BUILD_DIR} | cd ${BUILD_DIR} | ||||
@@ -143,10 +171,27 @@ function do_build() { | |||||
fi | fi | ||||
mkdir -p staging | mkdir -p staging | ||||
if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||||
echo "build whl with IMPERATIVE python rt" | |||||
cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||||
cd ${BUILD_DIR}/staging/megengine/core | |||||
rt_file=`ls _imperative_rt.*.pyd` | |||||
echo "rt file is: ${rt_file}" | |||||
if [[ -z ${rt_file} ]] | |||||
then | |||||
echo "ERR: can not find valid rt file" | |||||
exit -1 | |||||
fi | |||||
llvm-strip -s ${rt_file} | |||||
mv ${rt_file} _imperative_rt.pyd | |||||
else | |||||
echo "build whl with legacy python rt" | |||||
cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||||
cd ${BUILD_DIR}/staging/megengine/_internal | |||||
llvm-strip -s _mgb.pyd | |||||
fi | |||||
cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||||
cd ${BUILD_DIR}/staging/megengine/_internal | |||||
llvm-strip -s _mgb.pyd | |||||
copy_more_dll | copy_more_dll | ||||
cd ${BUILD_DIR}/staging | cd ${BUILD_DIR}/staging | ||||
${PYTHON_DIR}/python3 setup.py bdist_wheel | ${PYTHON_DIR}/python3 setup.py bdist_wheel | ||||
@@ -175,5 +220,6 @@ function third_party_prepare() { | |||||
} | } | ||||
###################### | ###################### | ||||
export ALREADY_CONFIG_PYTHON_VER="yes" | |||||
third_party_prepare | third_party_prepare | ||||
do_build | do_build |
@@ -33,6 +33,11 @@ class RNGxorshf { | |||||
uint64_t s[2]; | uint64_t s[2]; | ||||
public: | public: | ||||
#if __cplusplus >= 201703L | |||||
typedef uint64_t result_type; | |||||
static constexpr uint64_t min() { return 0; } | |||||
static constexpr uint64_t max() { return UINT64_MAX; } | |||||
#endif | |||||
RNGxorshf(uint64_t seed) { | RNGxorshf(uint64_t seed) { | ||||
std::mt19937_64 gen(seed); | std::mt19937_64 gen(seed); | ||||
s[0] = gen(); | s[0] = gen(); | ||||
@@ -936,8 +941,12 @@ void SeqModifierForSublinearMemory::ActionSearcherSingleCN::search_genetic() { | |||||
} | } | ||||
} | } | ||||
m_cur_records = records; | m_cur_records = records; | ||||
#if __cplusplus >= 201703L | |||||
std::shuffle(perm.begin(), perm.end(), rng); | |||||
#else | |||||
std::random_shuffle(perm.begin(), perm.end(), | std::random_shuffle(perm.begin(), perm.end(), | ||||
[&](size_t x) { return rng() % x; }); | [&](size_t x) { return rng() % x; }); | ||||
#endif | |||||
for (size_t i = 0; i < length; ++i) { | for (size_t i = 0; i < length; ++i) { | ||||
invoke_search(mutation(mutation(records[i].first))); | invoke_search(mutation(mutation(records[i].first))); | ||||
invoke_search(crossover(records[i].first, records[perm[i]].first)); | invoke_search(crossover(records[i].first, records[perm[i]].first)); | ||||
@@ -705,7 +705,12 @@ TEST(TestOprBlas, MatrixInverse) { | |||||
} | } | ||||
auto ptr = inp[0]->ptr<float>(); | auto ptr = inp[0]->ptr<float>(); | ||||
for (size_t i = 0; i < batch; ++i, ptr += n * n) { | for (size_t i = 0; i < batch; ++i, ptr += n * n) { | ||||
#if __cplusplus >= 201703L | |||||
std::default_random_engine rng_engine; | |||||
std::shuffle(perm.begin(), perm.end(), rng_engine); | |||||
#else | |||||
std::random_shuffle(perm.begin(), perm.end()); | std::random_shuffle(perm.begin(), perm.end()); | ||||
#endif | |||||
for (size_t j = 0; j < n; ++j) { | for (size_t j = 0; j < n; ++j) { | ||||
ptr[j * n + perm[j]] += 5; | ptr[j * n + perm[j]] += 5; | ||||
} | } | ||||
@@ -36,7 +36,12 @@ void run_all_gather(const std::vector<size_t>& axis_size, bool& success, | |||||
sleep_time.push_back(i * 0.05 + 0.1); | sleep_time.push_back(i * 0.05 + 0.1); | ||||
tot_axis_size += axis_size[i]; | tot_axis_size += axis_size[i]; | ||||
} | } | ||||
#if __cplusplus >= 201703L | |||||
std::default_random_engine rng_engine; | |||||
std::shuffle(sleep_time.begin(), sleep_time.end(), rng_engine); | |||||
#else | |||||
std::random_shuffle(sleep_time.begin(), sleep_time.end()); | std::random_shuffle(sleep_time.begin(), sleep_time.end()); | ||||
#endif | |||||
auto constexpr DEVICE_TYPE = CompNode::DeviceType::CUDA; | auto constexpr DEVICE_TYPE = CompNode::DeviceType::CUDA; | ||||
size_t nr_dev = std::min<size_t>( | size_t nr_dev = std::min<size_t>( | ||||
@@ -18,7 +18,11 @@ endif() | |||||
add_executable(megbrain_test ${SOURCES}) | add_executable(megbrain_test ${SOURCES}) | ||||
target_link_libraries(megbrain_test gtest) | target_link_libraries(megbrain_test gtest) | ||||
target_link_libraries(megbrain_test megengine) | |||||
if(MSVC OR WIN32) | |||||
target_link_libraries(megbrain_test megbrain megdnn) | |||||
else() | |||||
target_link_libraries(megbrain_test megengine) | |||||
endif() | |||||
if(CXX_SUPPORT_WCLASS_MEMACCESS) | if(CXX_SUPPORT_WCLASS_MEMACCESS) | ||||
if(MGE_WITH_CUDA) | if(MGE_WITH_CUDA) | ||||
target_compile_options(megbrain_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>" | target_compile_options(megbrain_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>" | ||||
@@ -28,10 +32,12 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS) | |||||
endif() | endif() | ||||
endif() | endif() | ||||
if(APPLE OR ANDROID) | |||||
target_link_libraries(megbrain_test dl) | |||||
else() | |||||
target_link_libraries(megbrain_test dl rt) | |||||
if(UNIX) | |||||
if(APPLE OR ANDROID) | |||||
target_link_libraries(megbrain_test dl) | |||||
else() | |||||
target_link_libraries(megbrain_test dl rt) | |||||
endif() | |||||
endif() | endif() | ||||
if (MGE_WITH_DISTRIBUTED) | if (MGE_WITH_DISTRIBUTED) | ||||