Browse Source

feat(whl/imperative): compat for build python whl imperative and legacy runtime

GitOrigin-RevId: 7f6629ae1f
tags/v1.0.0-rc1
Megvii Engine Team 4 years ago
parent
commit
6e882c1a86
33 changed files with 439 additions and 119 deletions
  1. +2
    -0
      CMakeLists.txt
  2. +4
    -0
      dnn/src/common/utils.h
  3. +6
    -4
      dnn/test/CMakeLists.txt
  4. +1
    -1
      dnn/test/common/mesh_indexing.h
  5. +2
    -2
      dnn/test/common/rng.cpp
  6. +10
    -0
      dnn/test/common/rng.h
  7. +10
    -9
      dnn/test/cuda/argmxx.cpp
  8. +2
    -2
      dnn/test/cuda/argsort.cpp
  9. +8
    -7
      dnn/test/cuda/relayout.cpp
  10. +1
    -1
      dnn/test/cuda/sleep.cpp
  11. +11
    -10
      dnn/test/rocm/argmxx.cpp
  12. +9
    -1
      imperative/CMakeLists.txt
  13. +61
    -0
      imperative/python/megengine/__init__.py
  14. +29
    -11
      imperative/python/megengine/utils/max_recursion_limit.py
  15. +16
    -1
      imperative/python/setup.py
  16. +10
    -11
      imperative/python/src/helper.cpp
  17. +4
    -0
      imperative/python/src/utils.cpp
  18. +4
    -0
      imperative/python/test/integration/test_dp_correctness.py
  19. +11
    -11
      imperative/python/test/unit/functional/test_distributed.py
  20. +4
    -0
      imperative/python/test/unit/test_autodiff.py
  21. +10
    -0
      imperative/src/impl/profiler.cpp
  22. +4
    -0
      imperative/src/impl/proxy_graph.cpp
  23. +5
    -2
      imperative/test/CMakeLists.txt
  24. +4
    -1
      python_module/CMakeLists.txt
  25. +8
    -1
      scripts/cmake-build/BUILD_README.md
  26. +40
    -3
      scripts/cmake-build/host_build.sh
  27. +8
    -5
      scripts/whl/BUILD_PYTHON_WHL_README.md
  28. +58
    -10
      scripts/whl/macos/macos_build_whl.sh
  29. +67
    -21
      scripts/whl/windows/windows_build_whl.sh
  30. +9
    -0
      src/core/impl/graph/seq_sublinear_memory.cpp
  31. +5
    -0
      src/opr/test/blas.cpp
  32. +5
    -0
      src/opr/test/muxing.cpp
  33. +11
    -5
      test/CMakeLists.txt

+ 2
- 0
CMakeLists.txt View File

@@ -697,8 +697,10 @@ endif()
if(MGE_WITH_PYTHON_MODULE) if(MGE_WITH_PYTHON_MODULE)
if(MGE_BUILD_IMPERATIVE_RT) if(MGE_BUILD_IMPERATIVE_RT)
add_subdirectory(imperative) add_subdirectory(imperative)
message("-- Enable imperative python wrapper runtime")
else() else()
add_subdirectory(python_module) add_subdirectory(python_module)
message("-- Enable legacy python wrapper runtime")
endif() endif()
endif() endif()




+ 4
- 0
dnn/src/common/utils.h View File

@@ -342,7 +342,11 @@ template <typename T>
struct SafeMultiplies; struct SafeMultiplies;


template <typename T> template <typename T>
#if __cplusplus >= 201703L
struct _SafeMultipliesImplUnsigned {
#else
struct _SafeMultipliesImplUnsigned : public std::binary_function<T, T, T> { struct _SafeMultipliesImplUnsigned : public std::binary_function<T, T, T> {
#endif
static MEGDNN_CONSTEXPR size_t nbits = sizeof(T) * 8; static MEGDNN_CONSTEXPR size_t nbits = sizeof(T) * 8;


static size_t clz(unsigned x) { static size_t clz(unsigned x) {


+ 6
- 4
dnn/test/CMakeLists.txt View File

@@ -70,8 +70,10 @@ if (MEG_WITH_ROCM)
target_link_libraries (megdnn_test ${MGE_ROCM_LIBS}) target_link_libraries (megdnn_test ${MGE_ROCM_LIBS})
endif () endif ()


if(APPLE OR ANDROID)
target_link_libraries(megdnn_test dl)
else()
target_link_libraries(megdnn_test dl rt)
if(UNIX)
if(APPLE OR ANDROID)
target_link_libraries(megdnn_test dl)
else()
target_link_libraries(megdnn_test dl rt)
endif()
endif() endif()

+ 1
- 1
dnn/test/common/mesh_indexing.h View File

@@ -89,7 +89,7 @@ public:
auto ptr = tensor.ptr<int>(); auto ptr = tensor.ptr<int>();
for (size_t n = 0; n < size; ++n) { for (size_t n = 0; n < size; ++n) {
std::set<int> used; std::set<int> used;
std::random_shuffle(seq.begin(), seq.end());
COMPAT_RANDOM(seq.begin(), seq.end());
for (size_t step = 0; step < stride; ++step) { for (size_t step = 0; step < stride; ++step) {
megdnn_assert(used.size() < m_size); megdnn_assert(used.size() < m_size);
ptr[n * stride + step] = seq[step]; ptr[n * stride + step] = seq[step];


+ 2
- 2
dnn/test/common/rng.cpp View File

@@ -75,7 +75,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG() : m_offset(0) {
i2f.i = static_cast<uint16_t>(x); i2f.i = static_cast<uint16_t>(x);
m_sequence.push_back(i2f.f); m_sequence.push_back(i2f.f);
} }
std::random_shuffle(m_sequence.begin(), m_sequence.end());
COMPAT_RANDOM(m_sequence.begin(), m_sequence.end());
} }


Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) { Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) {
@@ -99,7 +99,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) {
m_sequence.push_back(i2f.f); m_sequence.push_back(i2f.f);
} }


std::random_shuffle(m_sequence.begin(), m_sequence.end());
COMPAT_RANDOM(m_sequence.begin(), m_sequence.end());
} }


void Float16PeriodicalRNG::gen(const TensorND& tensor) { void Float16PeriodicalRNG::gen(const TensorND& tensor) {


+ 10
- 0
dnn/test/common/rng.h View File

@@ -19,6 +19,16 @@
namespace megdnn { namespace megdnn {
namespace test { namespace test {


#if __cplusplus >= 201703L
#define COMPAT_RANDOM(begin, end) \
{ \
std::default_random_engine rng_engine; \
std::shuffle(begin, end, rng_engine); \
}
#else
#define COMPAT_RANDOM(begin, end) std::random_shuffle(begin, end);
#endif

class RNG { class RNG {
protected: protected:
class RNGxorshf; class RNGxorshf;


+ 10
- 9
dnn/test/cuda/argmxx.cpp View File

@@ -24,15 +24,16 @@ class ArgmxxRNG final: public RNG {
void gen(const TensorND &tensor) override { void gen(const TensorND &tensor) override {
auto offset = tensor.layout.span().low_elem; auto offset = tensor.layout.span().low_elem;
auto nr_elems = tensor.layout.span().dist_elem(); auto nr_elems = tensor.layout.span().dist_elem();
#define cb(DType) \
if (tensor.layout.dtype == DType()) { \
using ctype = typename DTypeTrait<DType>::ctype; \
auto ptr = tensor.ptr<ctype>(); \
for (size_t i = 0; i < nr_elems; ++i) { \
ptr[offset+i] = i; \
} \
std::random_shuffle(ptr + offset, ptr + offset + nr_elems); \
}

#define cb(DType) \
if (tensor.layout.dtype == DType()) { \
using ctype = typename DTypeTrait<DType>::ctype; \
auto ptr = tensor.ptr<ctype>(); \
for (size_t i = 0; i < nr_elems; ++i) { \
ptr[offset + i] = i; \
} \
COMPAT_RANDOM(ptr + offset, ptr + offset + nr_elems); \
}
MEGDNN_FOREACH_COMPUTING_DTYPE(cb); MEGDNN_FOREACH_COMPUTING_DTYPE(cb);
#undef cb #undef cb
} }


+ 2
- 2
dnn/test/cuda/argsort.cpp View File

@@ -32,7 +32,7 @@ class ArgsortRNG final : public RNG {
} else { } else {
for (int i = 0; i < n; ++i) for (int i = 0; i < n; ++i)
ptr[i] = static_cast<T>(i - n / 2); ptr[i] = static_cast<T>(i - n / 2);
std::random_shuffle(ptr, ptr + n);
COMPAT_RANDOM(ptr, ptr + n);
} }
} }


@@ -86,7 +86,7 @@ void run_backward_test(Handle* handle, DType dtype) {
for (size_t j = 0; j < n; ++j) { for (size_t j = 0; j < n; ++j) {
ptr[j] = j; ptr[j] = j;
} }
std::random_shuffle(ptr, ptr + n);
COMPAT_RANDOM(ptr, ptr + n);
ptr += n; ptr += n;
} }
} }


+ 8
- 7
dnn/test/cuda/relayout.cpp View File

@@ -361,9 +361,8 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_7) {
for (size_t r = 0; r < _dim.size(); r++) for (size_t r = 0; r < _dim.size(); r++)
permutation[r] = r; permutation[r] = r;
for (int nsample = 0; nsample < 50; nsample++) { for (int nsample = 0; nsample < 50; nsample++) {
std::random_shuffle(_dim.begin(), _dim.end());

std::random_shuffle(permutation.begin(), permutation.end());
COMPAT_RANDOM(_dim.begin(), _dim.end());
COMPAT_RANDOM(permutation.begin(), permutation.end());
if (!isTrivial(permutation)) { if (!isTrivial(permutation)) {
run({{_dim[0], _dim[1], _dim[2], _dim[3], _dim[4], _dim[5], run({{_dim[0], _dim[1], _dim[2], _dim[3], _dim[4], _dim[5],
_dim[6]}, _dim[6]},
@@ -451,9 +450,10 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_5) {
printf("vol %d cur_ratio %lf | %lf\n", vol, cur_ratio, vol_re); printf("vol %d cur_ratio %lf | %lf\n", vol, cur_ratio, vol_re);
// printVec(dim); // printVec(dim);


std::random_shuffle(dim.begin(), dim.end());
COMPAT_RANDOM(dim.begin(), dim.end());

while (isTrivial(permutation)) { while (isTrivial(permutation)) {
std::random_shuffle(permutation.begin(), permutation.end());
COMPAT_RANDOM(permutation.begin(), permutation.end());
} }


run({{dim[0], dim[1], dim[2], dim[3], dim[4]}, dtype::Int32()}, run({{dim[0], dim[1], dim[2], dim[3], dim[4]}, dtype::Int32()},
@@ -603,8 +603,9 @@ TEST_F(CUDA, BENCHMARK_LAST_CONTIG_ALIGN_TEST) {
for (size_t r = 0; r < _dim.size(); r++) for (size_t r = 0; r < _dim.size(); r++)
permutation[r] = r; permutation[r] = r;
for (int nsample = 0; nsample < 20; nsample++) { for (int nsample = 0; nsample < 20; nsample++) {
std::random_shuffle(_dim.begin(), _dim.end() - 1);
std::random_shuffle(permutation.begin(), permutation.end() - 1);
COMPAT_RANDOM(_dim.begin(), _dim.end() - 1);

COMPAT_RANDOM(permutation.begin(), permutation.end() - 1);


if (nsample < 5) if (nsample < 5)
_dim[5] = (u.gen_single_val() / 4 + 1) * 4; _dim[5] = (u.gen_single_val() / 4 + 1) * 4;


+ 1
- 1
dnn/test/cuda/sleep.cpp View File

@@ -24,7 +24,7 @@ using namespace test;




TEST_F(CUDA, SLEEP) { TEST_F(CUDA, SLEEP) {
auto opr = this->handle_cuda()->create_operator<Sleep>();
auto opr = this->handle_cuda()->create_operator<megdnn::SleepForward>();


auto run = [&](float time) -> double { auto run = [&](float time) -> double {
opr->param() = {time}; opr->param() = {time};


+ 11
- 10
dnn/test/rocm/argmxx.cpp View File

@@ -24,16 +24,17 @@ class ArgmxxRNG final: public RNG {
void gen(const TensorND &tensor) override { void gen(const TensorND &tensor) override {
auto offset = tensor.layout.span().low_elem; auto offset = tensor.layout.span().low_elem;
auto nr_elems = tensor.layout.span().dist_elem(); auto nr_elems = tensor.layout.span().dist_elem();
#define cb(DType) \
if (tensor.layout.dtype == DType()) { \
using ctype = typename DTypeTrait<DType>::ctype; \
auto ptr = tensor.ptr<ctype>(); \
for (size_t i = 0; i < nr_elems; ++i) { \
ptr[offset+i] = i; \
} \
std::random_shuffle(ptr + offset, ptr + offset + nr_elems); \
return; \
}

#define cb(DType) \
if (tensor.layout.dtype == DType()) { \
using ctype = typename DTypeTrait<DType>::ctype; \
auto ptr = tensor.ptr<ctype>(); \
for (size_t i = 0; i < nr_elems; ++i) { \
ptr[offset + i] = i; \
} \
COMPAT_RANDOM(ptr + offset, ptr + offset + nr_elems); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb);
#undef cb #undef cb
megdnn_throw(megdnn_mangle(ssprintf("Unsupported DType: %s", megdnn_throw(megdnn_mangle(ssprintf("Unsupported DType: %s",


+ 9
- 1
imperative/CMakeLists.txt View File

@@ -76,7 +76,11 @@ add_custom_target(_version_ld SOURCES ${VERSION_SCRIPT})


add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11) add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11)
pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS}) pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS})
target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT})
if (APPLE OR MSVC OR WIN32)
target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn)
else()
target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT})
endif()
if (MGE_WITH_DISTRIBUTED) if (MGE_WITH_DISTRIBUTED)
message("Imperative configured to link megray") message("Imperative configured to link megray")
target_link_libraries(${MODULE_NAME} PRIVATE megray) target_link_libraries(${MODULE_NAME} PRIVATE megray)
@@ -91,6 +95,10 @@ set_target_properties(${MODULE_NAME} PROPERTIES
SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX} SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}
LIBRARY_OUTPUT_DIRECTORY ${MEGENGINE_DIR}/${PACKAGE_NAME}/core LIBRARY_OUTPUT_DIRECTORY ${MEGENGINE_DIR}/${PACKAGE_NAME}/core
) )
if (APPLE OR MSVC OR WIN32)
message("-- overwriting SUFFIX at macos and windows before config by set_target_properties")
pybind11_extension(${MODULE_NAME})
endif()
add_dependencies(${MODULE_NAME} gen_opr_py _version_ld) add_dependencies(${MODULE_NAME} gen_opr_py _version_ld)


if(MGE_WITH_TEST AND MGE_ENABLE_RTTI) if(MGE_WITH_TEST AND MGE_ENABLE_RTTI)


+ 61
- 0
imperative/python/megengine/__init__.py View File

@@ -8,6 +8,67 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import os import os
import sys import sys
import platform
import ctypes

if sys.platform == "win32":
lib_path = os.path.join(os.path.dirname(__file__), "core/lib")
dll_paths = list(filter(os.path.exists, [lib_path,]))
assert len(dll_paths) > 0

kernel32 = ctypes.WinDLL("kernel32.dll", use_last_error=True)
has_load_library_attr = hasattr(kernel32, "AddDllDirectory")
old_error_mode = kernel32.SetErrorMode(0x0001)

kernel32.LoadLibraryW.restype = ctypes.c_void_p
if has_load_library_attr:
kernel32.AddDllDirectory.restype = ctypes.c_void_p
kernel32.LoadLibraryExW.restype = ctypes.c_void_p

for dll_path in dll_paths:
if sys.version_info >= (3, 8):
os.add_dll_directory(dll_path)
elif has_load_library_attr:
res = kernel32.AddDllDirectory(dll_path)
if res is None:
err = ctypes.WinError(ctypes.get_last_error())
err.strerror += ' Error adding "{}" to the DLL search PATH.'.format(
dll_path
)
raise err
else:
print("WARN: python or OS env have some issue, may load DLL failed!!!")

import glob

dlls = glob.glob(os.path.join(lib_path, "*.dll"))
path_patched = False
for dll in dlls:
is_loaded = False
if has_load_library_attr:
res = kernel32.LoadLibraryExW(dll, None, 0x00001100)
last_error = ctypes.get_last_error()
if res is None and last_error != 126:
err = ctypes.WinError(last_error)
err.strerror += ' Error loading "{}" or one of its dependencies.'.format(
dll
)
raise err
elif res is not None:
is_loaded = True
if not is_loaded:
if not path_patched:
os.environ["PATH"] = ";".join(dll_paths + [os.environ["PATH"]])
path_patched = True
res = kernel32.LoadLibraryW(dll)
if res is None:
err = ctypes.WinError(ctypes.get_last_error())
err.strerror += ' Error loading "{}" or one of its dependencies.'.format(
dll
)
raise err

kernel32.SetErrorMode(old_error_mode)


from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func
from .device import * from .device import *


+ 29
- 11
imperative/python/megengine/utils/max_recursion_limit.py View File

@@ -6,10 +6,14 @@
# Unless required by applicable law or agreed to in writing, # Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an # software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import resource
import platform
import sys import sys
import threading import threading


# Windows do not imp resource package
if platform.system() != "Windows":
import resource



class AlternativeRecursionLimit: class AlternativeRecursionLimit:
r"""A reentrant context manager for setting global recursion limits. r"""A reentrant context manager for setting global recursion limits.
@@ -28,16 +32,24 @@ class AlternativeRecursionLimit:
with self.lock: with self.lock:
if self.count == 0: if self.count == 0:
self.orig_py_limit = sys.getrecursionlimit() self.orig_py_limit = sys.getrecursionlimit()
if platform.system() != "Windows":
( (
self.orig_rlim_stack_soft, self.orig_rlim_stack_soft,
self.orig_rlim_stack_hard, self.orig_rlim_stack_hard,
) = resource.getrlimit(resource.RLIMIT_STACK) ) = resource.getrlimit(resource.RLIMIT_STACK)
resource.setrlimit(
resource.RLIMIT_STACK,
(self.orig_rlim_stack_hard, self.orig_rlim_stack_hard),
)
# increase recursion limit
sys.setrecursionlimit(self.new_py_limit)
# FIXME: https://bugs.python.org/issue34602, python3 release version
# on Macos always have this issue, not all user install python3 from src
try:
resource.setrlimit(
resource.RLIMIT_STACK,
(self.orig_rlim_stack_hard, self.orig_rlim_stack_hard),
)
except ValueError as exc:
if platform.system() != "Darwin":
raise exc

# increase recursion limit
sys.setrecursionlimit(self.new_py_limit)
self.count += 1 self.count += 1


def __exit__(self, type, value, traceback): def __exit__(self, type, value, traceback):
@@ -45,10 +57,16 @@ class AlternativeRecursionLimit:
self.count -= 1 self.count -= 1
if self.count == 0: if self.count == 0:
sys.setrecursionlimit(self.orig_py_limit) sys.setrecursionlimit(self.orig_py_limit)
resource.setrlimit(
resource.RLIMIT_STACK,
(self.orig_rlim_stack_soft, self.orig_rlim_stack_hard),
)

if platform.system() != "Windows":
try:
resource.setrlimit(
resource.RLIMIT_STACK,
(self.orig_rlim_stack_soft, self.orig_rlim_stack_hard),
)
except ValueError as exc:
if platform.system() != "Darwin":
raise exc




_max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1) _max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1)


+ 16
- 1
imperative/python/setup.py View File

@@ -9,6 +9,7 @@
import os import os
import re import re
import pathlib import pathlib
import platform
from distutils.file_util import copy_file from distutils.file_util import copy_file
from setuptools import setup, find_packages, Extension from setuptools import setup, find_packages, Extension
from setuptools.command.build_ext import build_ext as _build_ext from setuptools.command.build_ext import build_ext as _build_ext
@@ -29,7 +30,10 @@ class build_ext(_build_ext):
extdir.parent.mkdir(parents=True, exist_ok=True) extdir.parent.mkdir(parents=True, exist_ok=True)


modpath = self.get_ext_fullname(ext.name).split('.') modpath = self.get_ext_fullname(ext.name).split('.')
modpath[-1] += '.so'
if platform.system() == 'Windows':
modpath[-1] += '.pyd'
else:
modpath[-1] += '.so'
modpath = str(pathlib.Path(*modpath).resolve()) modpath = str(pathlib.Path(*modpath).resolve())


copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run) copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run)
@@ -47,6 +51,14 @@ if local_version:
__version__ = '{}+{}'.format(__version__, local_version) __version__ = '{}+{}'.format(__version__, local_version)


packages = find_packages(exclude=['test']) packages = find_packages(exclude=['test'])
package_data = [
str(f.relative_to('megengine'))
for f in pathlib.Path('megengine', 'core', 'include').glob('**/*')
]
package_data += [
str(f.relative_to('megengine'))
for f in pathlib.Path('megengine', 'core', 'lib').glob('**/*')
]


with open('requires.txt') as f: with open('requires.txt') as f:
requires = f.read().splitlines() requires = f.read().splitlines()
@@ -63,6 +75,9 @@ setup_kwargs = dict(
author='Megvii Engine Team', author='Megvii Engine Team',
author_email=email, author_email=email,
packages=packages, packages=packages,
package_data={
'megengine': package_data,
},
ext_modules=[PrecompiledExtesion('megengine.core._imperative_rt')], ext_modules=[PrecompiledExtesion('megengine.core._imperative_rt')],
install_requires=requires, install_requires=requires,
extras_require={ extras_require={


+ 10
- 11
imperative/python/src/helper.cpp View File

@@ -9,15 +9,6 @@
#include "megbrain/utils/mempool.h" #include "megbrain/utils/mempool.h"
#include "./numpy_dtypes.h" #include "./numpy_dtypes.h"


/*
* demangle typeid, see
* http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname
*/
#ifdef __GNUG__
#include <cstdlib>
#include <memory>
#include <cxxabi.h>

namespace py = pybind11; namespace py = pybind11;


PyTaskDipatcher py_task_q = {}; PyTaskDipatcher py_task_q = {};
@@ -34,10 +25,18 @@ py::module rel_import(py::str name, py::module m, int level) {
return import(name, m.attr("__dict__"), py::arg("level")=level); return import(name, m.attr("__dict__"), py::arg("level")=level);
} }


/*
* demangle typeid, see
* http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname
*/
#ifdef __GNUG__
#include <cxxabi.h>
#include <cstdlib>
#include <memory>

namespace { namespace {


std::string demangle_typeid(const char* name) { std::string demangle_typeid(const char* name) {

int status = -4; // some arbitrary value to eliminate the compiler warning int status = -4; // some arbitrary value to eliminate the compiler warning


// enable c++11 by passing the flag -std=c++11 to g++ // enable c++11 by passing the flag -std=c++11 to g++
@@ -48,7 +47,7 @@ std::string demangle_typeid(const char* name) {


return (status==0) ? res.get() : name ; return (status==0) ? res.get() : name ;
} }
}
} // namespace
#else #else


namespace { namespace {


+ 4
- 0
imperative/python/src/utils.cpp View File

@@ -1,4 +1,8 @@
#include "utils.h" #include "utils.h"
#ifdef WIN32
#include <stdio.h>
#include <windows.h>
#endif


#include <pybind11/operators.h> #include <pybind11/operators.h>
#include <atomic> #include <atomic>


+ 4
- 0
imperative/python/test/integration/test_dp_correctness.py View File

@@ -8,6 +8,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import multiprocessing as mp import multiprocessing as mp
import os import os
import platform
import re import re
import subprocess import subprocess
import sys import sys
@@ -196,6 +197,9 @@ def run_test(




@pytest.mark.isolated_distributed @pytest.mark.isolated_distributed
@pytest.mark.skipif(
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
def test_dp_correctness(): def test_dp_correctness():
model_name = "mnist_model_with_test.mge" model_name = "mnist_model_with_test.mge"
model_path = os.path.join(os.path.dirname(__file__), model_name) model_path = os.path.join(os.path.dirname(__file__), model_name)


+ 11
- 11
imperative/python/test/unit/functional/test_distributed.py View File

@@ -35,7 +35,7 @@ from megengine.functional.distributed import (
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
) )
@pytest.mark.skipif( @pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
) )
@pytest.mark.isolated_distributed @pytest.mark.isolated_distributed
def test_reduce_sum(): def test_reduce_sum():
@@ -77,7 +77,7 @@ def test_reduce_sum():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
) )
@pytest.mark.skipif( @pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
) )
@pytest.mark.isolated_distributed @pytest.mark.isolated_distributed
def test_broadcast(): def test_broadcast():
@@ -115,7 +115,7 @@ def test_broadcast():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
) )
@pytest.mark.skipif( @pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
) )
@pytest.mark.isolated_distributed @pytest.mark.isolated_distributed
def test_all_gather(): def test_all_gather():
@@ -154,7 +154,7 @@ def test_all_gather():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
) )
@pytest.mark.skipif( @pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
) )
@pytest.mark.isolated_distributed @pytest.mark.isolated_distributed
def test_reduce_scatter_sum(): def test_reduce_scatter_sum():
@@ -193,7 +193,7 @@ def test_reduce_scatter_sum():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
) )
@pytest.mark.skipif( @pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
) )
@pytest.mark.isolated_distributed @pytest.mark.isolated_distributed
def test_all_reduce_sum(): def test_all_reduce_sum():
@@ -232,7 +232,7 @@ def test_all_reduce_sum():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
) )
@pytest.mark.skipif( @pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
) )
@pytest.mark.isolated_distributed @pytest.mark.isolated_distributed
def test_all_reduce_max(): def test_all_reduce_max():
@@ -271,7 +271,7 @@ def test_all_reduce_max():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
) )
@pytest.mark.skipif( @pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
) )
@pytest.mark.isolated_distributed @pytest.mark.isolated_distributed
def test_all_reduce_min(): def test_all_reduce_min():
@@ -310,7 +310,7 @@ def test_all_reduce_min():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
) )
@pytest.mark.skipif( @pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
) )
@pytest.mark.isolated_distributed @pytest.mark.isolated_distributed
def test_gather(): def test_gather():
@@ -352,7 +352,7 @@ def test_gather():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
) )
@pytest.mark.skipif( @pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
) )
@pytest.mark.isolated_distributed @pytest.mark.isolated_distributed
def test_scatter(): def test_scatter():
@@ -390,7 +390,7 @@ def test_scatter():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
) )
@pytest.mark.skipif( @pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
) )
@pytest.mark.isolated_distributed @pytest.mark.isolated_distributed
def test_all_to_all(): def test_all_to_all():
@@ -430,7 +430,7 @@ def test_all_to_all():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now" platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
) )
@pytest.mark.skipif( @pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
) )
@pytest.mark.isolated_distributed @pytest.mark.isolated_distributed
def test_io_remote(): def test_io_remote():


+ 4
- 0
imperative/python/test/unit/test_autodiff.py View File

@@ -6,6 +6,7 @@
# Unless required by applicable law or agreed to in writing, # Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an # software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import platform
import weakref import weakref


import numpy as np import numpy as np
@@ -51,6 +52,9 @@ def save_to(self, name="grad"):




@pytest.mark.isolated_distributed @pytest.mark.isolated_distributed
@pytest.mark.skipif(
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
def test_dist_grad(): def test_dist_grad():
world_size = 2 world_size = 2
x_np = np.random.rand(10).astype("float32") x_np = np.random.rand(10).astype("float32")


+ 10
- 0
imperative/src/impl/profiler.cpp View File

@@ -9,7 +9,17 @@


#include "megbrain/imperative/profiler.h" #include "megbrain/imperative/profiler.h"


#if defined(_MSC_VER) || defined(WIN32)
#include <windows.h>
#define getpid GetCurrentProcessId
#else
#include <sys/unistd.h> #include <sys/unistd.h>
#endif

#if defined(__APPLE__) || defined(__MACOSX)
#include <unistd.h>
#endif

#include <variant> #include <variant>


#include "megbrain/imperative/ops/opr_attr.h" #include "megbrain/imperative/ops/opr_attr.h"


+ 4
- 0
imperative/src/impl/proxy_graph.cpp View File

@@ -16,6 +16,10 @@
#include "megbrain/imperative/ops/opr_attr.h" #include "megbrain/imperative/ops/opr_attr.h"
#include "megbrain/imperative/ops/backward_graph.h" #include "megbrain/imperative/ops/backward_graph.h"


#if __cplusplus >= 201703L
#include <optional>
#endif

namespace mgb { namespace mgb {
namespace imperative { namespace imperative {




+ 5
- 2
imperative/test/CMakeLists.txt View File

@@ -38,8 +38,11 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS)
endif() endif()


if(UNIX) if(UNIX)
target_link_libraries(imperative_test dl rt)
if(APPLE OR ANDROID)
target_link_libraries(imperative_test dl)
else()
target_link_libraries(imperative_test dl rt)
endif()
endif() endif()



install(TARGETS imperative_test RUNTIME DESTINATION test) install(TARGETS imperative_test RUNTIME DESTINATION test)

+ 4
- 1
python_module/CMakeLists.txt View File

@@ -81,7 +81,10 @@ else()
target_link_libraries(mgb megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) target_link_libraries(mgb megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT})
endif() endif()
target_include_directories(mgb PRIVATE ${PYTHON_INCLUDE_DIRS} src/cpp ${CMAKE_CURRENT_BINARY_DIR} ${NUMPY_INCLUDE_DIR}) target_include_directories(mgb PRIVATE ${PYTHON_INCLUDE_DIRS} src/cpp ${CMAKE_CURRENT_BINARY_DIR} ${NUMPY_INCLUDE_DIR})
target_link_libraries(mgb ${PYTHON_LIBRARIES})
# only windows need link PYTHON_LIBRARIES
if(MSVC OR WIN32)
target_link_libraries(mgb ${PYTHON_LIBRARIES})
endif()


if (MGE_WITH_DISTRIBUTED) if (MGE_WITH_DISTRIBUTED)
target_link_libraries(mgb megray) target_link_libraries(mgb megray)


+ 8
- 1
scripts/cmake-build/BUILD_README.md View File

@@ -30,11 +30,17 @@
4e: add C:\Program Files\NVIDIA GPU Computing Toolkit\cudnn-10.1-windows10-x64-v7.6.5.32\cuda\bin to system Path env 4e: add C:\Program Files\NVIDIA GPU Computing Toolkit\cudnn-10.1-windows10-x64-v7.6.5.32\cuda\bin to system Path env
4f: add C:\Program Files\NVIDIA GPU Computing Toolkit\TensorRT-6.0.1.5\lib Path 4f: add C:\Program Files\NVIDIA GPU Computing Toolkit\TensorRT-6.0.1.5\lib Path
if u do not do 4d/4e/4f, CUDA runtime can not find dll if u do not do 4d/4e/4f, CUDA runtime can not find dll
5: install python3 (DFT 3.8.3) to /c/Users/${USER}/mge_whl_python_env/3.8.3 and
put it to PATH env and run python3 -m pip install numpy (if u want to build with training mode or build python whl)
6: install swig from install gui (if u want to build with training mode or build python whl)
a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip
b: install swig to /c/Users/${USER}/swigwin-4.0.2
c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2
``` ```
### linux host build ### linux host build
``` ```
1: cmake, which version > 3.14.4 1: cmake, which version > 3.14.4
2: gcc/g++, which version > 6
2: gcc/g++, which version > 6, (gcc/g++ >= 7, if need build training)
3: install build-essential git git-lfs gfortran libgfortran-6-dev autoconf gnupg flex bison gperf curl 3: install build-essential git git-lfs gfortran libgfortran-6-dev autoconf gnupg flex bison gperf curl
4: zlib1g-dev gcc-multilib g++-multilib lib32ncurses5-dev libxml2-utils xsltproc unzip libtool: 4: zlib1g-dev gcc-multilib g++-multilib lib32ncurses5-dev libxml2-utils xsltproc unzip libtool:
5: librdmacm-dev rdmacm-utils python3-dev swig python3-numpy texinfo 5: librdmacm-dev rdmacm-utils python3-dev swig python3-numpy texinfo
@@ -47,6 +53,7 @@
3: brew install python python3 swig coreutils 3: brew install python python3 swig coreutils
4: install at least xcode command line tool: https://developer.apple.com/xcode/ 4: install at least xcode command line tool: https://developer.apple.com/xcode/
5: about cuda: we do not support CUDA on macos 5: about cuda: we do not support CUDA on macos
6: python3 -m pip install numpy (if u want to build with training mode or build python whl)
``` ```
### cross build for arm-android ### cross build for arm-android
now we support windows/linux/macos cross build to arm-android now we support windows/linux/macos cross build to arm-android


+ 40
- 3
scripts/cmake-build/host_build.sh View File

@@ -9,6 +9,7 @@ function usage() {
echo "-t : Build with training mode, default inference only" echo "-t : Build with training mode, default inference only"
echo "-m : Build with m32 mode(only for windows build), default m64" echo "-m : Build with m32 mode(only for windows build), default m64"
echo "-r : remove old build dir before make, default off" echo "-r : remove old build dir before make, default off"
echo "-n : enable new python runtime(valid when training mode with -t, default is legacy runtime)"
echo "-h : show usage" echo "-h : show usage"
echo "append other cmake config by export EXTRA_CMAKE_ARGS=..." echo "append other cmake config by export EXTRA_CMAKE_ARGS=..."
echo "example: $0 -d" echo "example: $0 -d"
@@ -22,9 +23,10 @@ MGE_WINDOWS_BUILD_ARCH=x64
MGE_WINDOWS_BUILD_MARCH=m64 MGE_WINDOWS_BUILD_MARCH=m64
MGE_ARCH=x86_64 MGE_ARCH=x86_64
REMOVE_OLD_BUILD=false REMOVE_OLD_BUILD=false
MGE_BUILD_IMPERATIVE_RT=OFF
echo "EXTRA_CMAKE_ARGS: ${EXTRA_CMAKE_ARGS}" echo "EXTRA_CMAKE_ARGS: ${EXTRA_CMAKE_ARGS}"


while getopts "rhdctm" arg
while getopts "rhdctmn" arg
do do
case $arg in case $arg in
d) d)
@@ -48,11 +50,15 @@ do
REMOVE_OLD_BUILD=true REMOVE_OLD_BUILD=true
;; ;;
m) m)
echo "build for m32(only use for windows)"
echo "build for m32(only valid use for windows)"
MGE_WINDOWS_BUILD_ARCH=x86 MGE_WINDOWS_BUILD_ARCH=x86
MGE_WINDOWS_BUILD_MARCH=m32 MGE_WINDOWS_BUILD_MARCH=m32
MGE_ARCH=i386 MGE_ARCH=i386
;; ;;
n)
echo "Enable imperative python wrapper runtime"
MGE_BUILD_IMPERATIVE_RT=ON
;;
?) ?)
echo "unkonw argument" echo "unkonw argument"
usage usage
@@ -101,6 +107,7 @@ function cmake_build() {
cmake \ cmake \
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \ -DCMAKE_BUILD_TYPE=$BUILD_TYPE \
-DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ -DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \
-DMGE_BUILD_IMPERATIVE_RT=${MGE_BUILD_IMPERATIVE_RT} \
-DMGE_WITH_CUDA=$MGE_WITH_CUDA \ -DMGE_WITH_CUDA=$MGE_WITH_CUDA \
-DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \
${EXTRA_CMAKE_ARGS} \ ${EXTRA_CMAKE_ARGS} \
@@ -112,7 +119,7 @@ function cmake_build() {


function windows_env_err() { function windows_env_err() {
echo "check windows env failed!!" echo "check windows env failed!!"
echo "please install LLVM/clang-cl/cmake/python at Visual Studio Extensions"
echo "please install env refs for: scripts/cmake-build/BUILD_README.md"
exit -1 exit -1
} }


@@ -178,6 +185,25 @@ function prepare_env_for_windows_build() {
export CPATH=$CPATH:$NIVIDA_INSTALL_PRE/${TRT_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include/nvtx3:$PC_CUDNN_INCLUDE_DIRS export CPATH=$CPATH:$NIVIDA_INSTALL_PRE/${TRT_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include/nvtx3:$PC_CUDNN_INCLUDE_DIRS
export LIBRARY_PATH=$LIBRARY_PATH:$LD_LIBRARY_PATH export LIBRARY_PATH=$LIBRARY_PATH:$LD_LIBRARY_PATH
export INCLUDE=$INCLUDE:$CPATH export INCLUDE=$INCLUDE:$CPATH

# python version will be config by whl build script or ci script, we need
# a DFT version for build success when we just call host_build.sh
if [[ -z ${ALREADY_CONFIG_PYTHON_VER} ]]
then
echo "config a default python3"
DFT_PYTHON_BIN=/c/Users/${USER}/mge_whl_python_env/3.8.3
if [ ! -f "${DFT_PYTHON_BIN}/python3.exe" ]; then
echo "ERR: can not find ${DFT_PYTHON_BIN}/python3.exe , Invalid env"
windows_env_err
else
echo "put python3 to env..."
export PATH=${DFT_PYTHON_BIN}:$PATH
which python3
fi
fi

echo "export swig pwd to PATH"
export PATH=/c/Users/${USER}/swigwin-4.0.2::$PATH
} }


WINDOWS_BUILD_TARGET="Ninja all > build.log" WINDOWS_BUILD_TARGET="Ninja all > build.log"
@@ -218,6 +244,7 @@ function cmake_build_windows() {
vcvarsall.bat $MGE_WINDOWS_BUILD_ARCH && cmake -G "Ninja" \ vcvarsall.bat $MGE_WINDOWS_BUILD_ARCH && cmake -G "Ninja" \
-DMGE_ARCH=$MGE_ARCH \ -DMGE_ARCH=$MGE_ARCH \
-DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ -DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \
-DMGE_BUILD_IMPERATIVE_RT=${MGE_BUILD_IMPERATIVE_RT} \
-DMGE_WITH_CUDA=$MGE_WITH_CUDA \ -DMGE_WITH_CUDA=$MGE_WITH_CUDA \
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \ -DCMAKE_BUILD_TYPE=$BUILD_TYPE \
-DCMAKE_INSTALL_PREFIX:PATH=$INSTALL_DIR \ -DCMAKE_INSTALL_PREFIX:PATH=$INSTALL_DIR \
@@ -230,8 +257,18 @@ function cmake_build_windows() {
${WINDOWS_BUILD_TARGET}" ${WINDOWS_BUILD_TARGET}"
} }


if [ ${MGE_BUILD_IMPERATIVE_RT} = "ON" ] && [ ${MGE_INFERENCE_ONLY} = "ON" ]; then
echo "ERR: MGE_BUILD_IMPERATIVE_RT(-n) only valid when enable training mode(-t)"
echo "pls remove -n or add -t"
exit -1
fi


if [[ $OS =~ "NT" ]]; then if [[ $OS =~ "NT" ]]; then
if [ ${MGE_ARCH} = "i386" ] && [ ${MGE_INFERENCE_ONLY} = "OFF" ]; then
echo "ERR: training mode(-t) only support 64 bit mode"
echo "pls remove -t or remove -m"
exit -1
fi
config_windows_build_target config_windows_build_target
cmake_build_windows $MGE_WITH_CUDA $MGE_INFERENCE_ONLY $BUILD_TYPE cmake_build_windows $MGE_WITH_CUDA $MGE_INFERENCE_ONLY $BUILD_TYPE
else else


+ 8
- 5
scripts/whl/BUILD_PYTHON_WHL_README.md View File

@@ -53,10 +53,6 @@
d0: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install --upgrade pip d0: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install --upgrade pip
d1: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install -r python_module/requires-test.txt d1: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install -r python_module/requires-test.txt
d2: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install numpy wheel requests tqdm tabulate d2: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install numpy wheel requests tqdm tabulate
5: install swig from install gui
a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip
b: install swig to /c/Users/${USER}/swigwin-4.0.2
c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2
``` ```


# how to build # how to build
@@ -90,6 +86,11 @@
``` ```
ALL_PYTHON=3.5.9 ./scripts/whl/macos/macos_build_whl.sh ALL_PYTHON=3.5.9 ./scripts/whl/macos/macos_build_whl.sh
``` ```
If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg:

```
ALL_PYTHON=3.5.9 BUILD_IMPERATIVE="ON" ./scripts/whl/macos/macos_build_whl.sh
```
## build for windows ## build for windows
``` ```
./scripts/whl/windows/windows_build_whl.sh ./scripts/whl/windows/windows_build_whl.sh
@@ -102,5 +103,7 @@
If you want to build windows whl with cuda, also a specific Python verison. eg: If you want to build windows whl with cuda, also a specific Python verison. eg:


``` ```
WINDOWS_WHL_WITH_CUDA="true" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh
WINDOWS_WHL_WITH_CUDA="ON" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh
``` ```
If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg:
BUILD_IMPERATIVE="ON" WINDOWS_WHL_WITH_CUDA="ON" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh

+ 58
- 10
scripts/whl/macos/macos_build_whl.sh View File

@@ -65,16 +65,18 @@ function config_python_env() {
fi fi
echo ${ver} echo ${ver}


#config a dir to trick cmake find a null pythonlib
PYTHON_LIBRARY=${PYTHON_DIR}lib/
if [ "$1" = "3.5.9" ]; then if [ "$1" = "3.5.9" ]; then
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.5m PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.5m
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.5m.dylib
elif [ "$1" = "3.6.10" ]; then elif [ "$1" = "3.6.10" ]; then
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.6m PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.6m
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.6m.dylib
elif [ "$1" = "3.7.7" ]; then elif [ "$1" = "3.7.7" ]; then
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.7m PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.7m
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.7m.dylib
elif [ "$1" = "3.8.3" ]; then elif [ "$1" = "3.8.3" ]; then
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.8 PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.8
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.8.dylib
else else
echo "ERR: DO NOT SUPPORT PYTHON VERSION" echo "ERR: DO NOT SUPPORT PYTHON VERSION"
echo "now support list: ${FULL_PYTHON_VER}" echo "now support list: ${FULL_PYTHON_VER}"
@@ -82,6 +84,11 @@ function config_python_env() {
fi fi
} }


if [[ -z ${BUILD_IMPERATIVE} ]]
then
BUILD_IMPERATIVE="OFF"
fi

function do_build() { function do_build() {
for ver in ${ALL_PYTHON} for ver in ${ALL_PYTHON}
do do
@@ -89,7 +96,7 @@ function do_build() {
config_python_env ${ver} config_python_env ${ver}


#check env #check env
if [ ! -d "$PYTHON_LIBRARY" ]; then
if [ ! -f "$PYTHON_LIBRARY" ]; then
echo "ERR: can not find $PYTHON_LIBRARY , Invalid python package" echo "ERR: can not find $PYTHON_LIBRARY , Invalid python package"
err_env err_env
fi fi
@@ -102,14 +109,20 @@ function do_build() {
#append cmake args for config python #append cmake args for config python
export EXTRA_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${PYTHON_DIR} -DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " export EXTRA_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${PYTHON_DIR} -DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} "
#config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc #config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc
export EXTRA_CMAKE_ARGS=${EXTRA_CMAKE_ARGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo "
export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_BUILD_TYPE=RelWithDebInfo "


#call build and install #call build and install
#FIXME: cmake do not triger update python config, after #FIXME: cmake do not triger update python config, after
#change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add #change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add
#-r to remove build cache after a new ver build, which #-r to remove build cache after a new ver build, which
#will be more slow build than without -r #will be more slow build than without -r
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r
if [ ${BUILD_IMPERATIVE} = "ON" ]; then
echo "build whl with IMPERATIVE python rt"
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -n -r
else
echo "build whl with legacy python rt"
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r
fi


#call setup.py #call setup.py
BUILD_DIR=${SRC_DIR}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release/build/ BUILD_DIR=${SRC_DIR}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release/build/
@@ -121,12 +134,47 @@ function do_build() {
fi fi
mkdir -p staging mkdir -p staging


if [ ${BUILD_IMPERATIVE} = "ON" ]; then
echo "build whl with IMPERATIVE python rt"
cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/
cd ${BUILD_DIR}/staging/megengine/core
rt_file=`ls _imperative_rt.*.so`
echo "rt file is: ${rt_file}"
if [[ -z ${rt_file} ]]
then
echo "ERR: can not find valid rt file"
exit -1
fi
llvm-strip -s ${rt_file}
mv ${rt_file} _imperative_rt.so
echo "check so valid or not..."
otool_out=`otool -L _imperative_rt.so`
if [[ "${otool_out}" =~ "ython" ]]; then
echo "ERR: invalid _imperative_rt.so which depend on python lib, detail: log"
echo ${otool_out}
exit -1
else
echo "valid..."
fi
else
echo "build whl with legacy python rt"

cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/
cd ${BUILD_DIR}/staging/megengine/_internal
#FIXME: set lib suffix to dylib may be better, BUT we find after distutils.file_util.copy_file
#will change to .so at macos even we set suffix to dylib, at the same time, macos also support .so
echo "check so valid or not..."
llvm-strip -s _mgb.so
otool_out=`otool -L _mgb.so`
if [[ "${otool_out}" =~ "ython" ]]; then
echo "ERR: invalid _mgb.so which depend on python lib, detail: log"
echo ${otool_out}
exit -1
else
echo "valid..."
fi
fi


cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/
cd ${BUILD_DIR}/staging/megengine/_internal
#FIXME: set lib suffix to dylib may be better, BUT we find after distutils.file_util.copy_file
#will change to .so at macos even we set suffix to dylib, at the same time, macos also support .so
llvm-strip -s _mgb.so
cd ${BUILD_DIR}/staging cd ${BUILD_DIR}/staging
${PYTHON_DIR}/bin/python3 setup.py bdist_wheel ${PYTHON_DIR}/bin/python3 setup.py bdist_wheel
cd ${BUILD_DIR}/staging/dist/ cd ${BUILD_DIR}/staging/dist/


+ 67
- 21
scripts/whl/windows/windows_build_whl.sh View File

@@ -14,8 +14,6 @@ function err_env() {
} }


function append_path_env_and_check() { function append_path_env_and_check() {
echo "export swig pwd to PATH"
export PATH=/c/Users/${USER}/swigwin-4.0.2::$PATH
echo "export vs2019 install path" echo "export vs2019 install path"
export VS_PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Enterprise export VS_PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Enterprise
# for llvm-strip # for llvm-strip
@@ -62,7 +60,7 @@ function config_python_env() {


if [[ -z ${WINDOWS_WHL_WITH_CUDA} ]] if [[ -z ${WINDOWS_WHL_WITH_CUDA} ]]
then then
WINDOWS_WHL_WITH_CUDA="false"
WINDOWS_WHL_WITH_CUDA="OFF"
fi fi




@@ -74,26 +72,46 @@ CUBLAS_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublas6
CURAND_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/curand64_10.dll" CURAND_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/curand64_10.dll"
CUBLASLT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublasLt64_10.dll" CUBLASLT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublasLt64_10.dll"
CUDART_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cudart64_101.dll" CUDART_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cudart64_101.dll"
function depend_real_copy() {
REAL_DST=$1
echo "real copy lib to $1"
cp "${TRT_LIB}" ${REAL_DST}
cp "${CUDNN_LIB}" ${REAL_DST}
cp "${CUSOLVER_LIB}" ${REAL_DST}
cp "${CUBLAS_LIB}" ${REAL_DST}
cp "${CURAND_LIB}" ${REAL_DST}
cp "${CUBLASLT_LIB}" ${REAL_DST}
cp "${CUDART_LIB}" ${REAL_DST}
}

function copy_more_dll() { function copy_more_dll() {
# for python whl real use # for python whl real use
CP_DST=${BUILD_DIR}/staging/megengine/_internal/lib
rm -rf ${CP_DST}
mkdir ${CP_DST}
if [ ${BUILD_IMPERATIVE} = "ON" ]; then
echo "config BUILD_IMPERATIVE core lib dir"
CP_WHL_DST=${BUILD_DIR}/staging/megengine/core/lib
else
echo "config legacy python lib dir"
CP_WHL_DST=${BUILD_DIR}/staging/megengine/_internal/lib
fi
rm -rf ${CP_WHL_DST}
mkdir ${CP_WHL_DST}
# workround for cpu-only version import failed, use a
# empty.file to triger setup.py to create a null empty
echo "empty" > ${CP_WHL_DST}/empty.file




if [ ${WINDOWS_WHL_WITH_CUDA} = "true" ]; then
if [ ${WINDOWS_WHL_WITH_CUDA} = "ON" ]; then
echo "copy nvidia lib to whl use...." echo "copy nvidia lib to whl use...."
cp "${TRT_LIB}" ${CP_DST}
cp "${CUDNN_LIB}" ${CP_DST}
cp "${CUSOLVER_LIB}" ${CP_DST}
cp "${CUBLAS_LIB}" ${CP_DST}
cp "${CURAND_LIB}" ${CP_DST}
cp "${CUBLASLT_LIB}" ${CP_DST}
cp "${CUDART_LIB}" ${CP_DST}
depend_real_copy ${CP_WHL_DST}


fi fi
} }


if [[ -z ${BUILD_IMPERATIVE} ]]
then
BUILD_IMPERATIVE="OFF"
fi

function do_build() { function do_build() {
for ver in ${ALL_PYTHON} for ver in ${ALL_PYTHON}
do do
@@ -118,21 +136,31 @@ function do_build() {
#force LINK a real PYTHON_LIBRARY file, after test we do not find the symbols conflict with python #force LINK a real PYTHON_LIBRARY file, after test we do not find the symbols conflict with python
#export EXTRA_CMAKE_ARGS="-DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " #export EXTRA_CMAKE_ARGS="-DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} "
#config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc #config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc
export EXTRA_CMAKE_ARGS=${EXTRA_CMAKE_ARGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo "
export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_BUILD_TYPE=RelWithDebInfo "


#call build and install #call build and install
#FIXME: cmake do not triger update python config, after #FIXME: cmake do not triger update python config, after
#change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add #change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add
#-r to remove build cache after a new ver build, which #-r to remove build cache after a new ver build, which
#will be more slow build than without -r #will be more slow build than without -r
if [ ${WINDOWS_WHL_WITH_CUDA} = "true" ]; then
BUILD_ARGS=" -t -r"
if [ ${BUILD_IMPERATIVE} = "ON" ]; then
echo "build whl with IMPERATIVE python rt"
BUILD_ARGS="${BUILD_ARGS} -n "
else
echo "build whl with legacy python rt"
fi

if [ ${WINDOWS_WHL_WITH_CUDA} = "ON" ]; then
echo "build windows whl with cuda" echo "build windows whl with cuda"
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r -c
BUILD_ARGS="${BUILD_ARGS} -c "
else else
echo "build windows whl with cpu only" echo "build windows whl with cpu only"
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r
fi fi


echo "host_build.sh BUILD_ARGS: ${BUILD_ARGS}"
${SRC_DIR}/scripts/cmake-build/host_build.sh ${BUILD_ARGS}

#call setup.py #call setup.py
BUILD_DIR=${SRC_DIR}/build_dir/host/build/ BUILD_DIR=${SRC_DIR}/build_dir/host/build/
cd ${BUILD_DIR} cd ${BUILD_DIR}
@@ -143,10 +171,27 @@ function do_build() {
fi fi
mkdir -p staging mkdir -p staging


if [ ${BUILD_IMPERATIVE} = "ON" ]; then
echo "build whl with IMPERATIVE python rt"
cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/
cd ${BUILD_DIR}/staging/megengine/core
rt_file=`ls _imperative_rt.*.pyd`
echo "rt file is: ${rt_file}"
if [[ -z ${rt_file} ]]
then
echo "ERR: can not find valid rt file"
exit -1
fi
llvm-strip -s ${rt_file}
mv ${rt_file} _imperative_rt.pyd
else
echo "build whl with legacy python rt"

cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/
cd ${BUILD_DIR}/staging/megengine/_internal
llvm-strip -s _mgb.pyd
fi


cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/
cd ${BUILD_DIR}/staging/megengine/_internal
llvm-strip -s _mgb.pyd
copy_more_dll copy_more_dll
cd ${BUILD_DIR}/staging cd ${BUILD_DIR}/staging
${PYTHON_DIR}/python3 setup.py bdist_wheel ${PYTHON_DIR}/python3 setup.py bdist_wheel
@@ -175,5 +220,6 @@ function third_party_prepare() {
} }


###################### ######################
export ALREADY_CONFIG_PYTHON_VER="yes"
third_party_prepare third_party_prepare
do_build do_build

+ 9
- 0
src/core/impl/graph/seq_sublinear_memory.cpp View File

@@ -33,6 +33,11 @@ class RNGxorshf {
uint64_t s[2]; uint64_t s[2];


public: public:
#if __cplusplus >= 201703L
typedef uint64_t result_type;
static constexpr uint64_t min() { return 0; }
static constexpr uint64_t max() { return UINT64_MAX; }
#endif
RNGxorshf(uint64_t seed) { RNGxorshf(uint64_t seed) {
std::mt19937_64 gen(seed); std::mt19937_64 gen(seed);
s[0] = gen(); s[0] = gen();
@@ -936,8 +941,12 @@ void SeqModifierForSublinearMemory::ActionSearcherSingleCN::search_genetic() {
} }
} }
m_cur_records = records; m_cur_records = records;
#if __cplusplus >= 201703L
std::shuffle(perm.begin(), perm.end(), rng);
#else
std::random_shuffle(perm.begin(), perm.end(), std::random_shuffle(perm.begin(), perm.end(),
[&](size_t x) { return rng() % x; }); [&](size_t x) { return rng() % x; });
#endif
for (size_t i = 0; i < length; ++i) { for (size_t i = 0; i < length; ++i) {
invoke_search(mutation(mutation(records[i].first))); invoke_search(mutation(mutation(records[i].first)));
invoke_search(crossover(records[i].first, records[perm[i]].first)); invoke_search(crossover(records[i].first, records[perm[i]].first));


+ 5
- 0
src/opr/test/blas.cpp View File

@@ -705,7 +705,12 @@ TEST(TestOprBlas, MatrixInverse) {
} }
auto ptr = inp[0]->ptr<float>(); auto ptr = inp[0]->ptr<float>();
for (size_t i = 0; i < batch; ++i, ptr += n * n) { for (size_t i = 0; i < batch; ++i, ptr += n * n) {
#if __cplusplus >= 201703L
std::default_random_engine rng_engine;
std::shuffle(perm.begin(), perm.end(), rng_engine);
#else
std::random_shuffle(perm.begin(), perm.end()); std::random_shuffle(perm.begin(), perm.end());
#endif
for (size_t j = 0; j < n; ++j) { for (size_t j = 0; j < n; ++j) {
ptr[j * n + perm[j]] += 5; ptr[j * n + perm[j]] += 5;
} }


+ 5
- 0
src/opr/test/muxing.cpp View File

@@ -36,7 +36,12 @@ void run_all_gather(const std::vector<size_t>& axis_size, bool& success,
sleep_time.push_back(i * 0.05 + 0.1); sleep_time.push_back(i * 0.05 + 0.1);
tot_axis_size += axis_size[i]; tot_axis_size += axis_size[i];
} }
#if __cplusplus >= 201703L
std::default_random_engine rng_engine;
std::shuffle(sleep_time.begin(), sleep_time.end(), rng_engine);
#else
std::random_shuffle(sleep_time.begin(), sleep_time.end()); std::random_shuffle(sleep_time.begin(), sleep_time.end());
#endif


auto constexpr DEVICE_TYPE = CompNode::DeviceType::CUDA; auto constexpr DEVICE_TYPE = CompNode::DeviceType::CUDA;
size_t nr_dev = std::min<size_t>( size_t nr_dev = std::min<size_t>(


+ 11
- 5
test/CMakeLists.txt View File

@@ -18,7 +18,11 @@ endif()


add_executable(megbrain_test ${SOURCES}) add_executable(megbrain_test ${SOURCES})
target_link_libraries(megbrain_test gtest) target_link_libraries(megbrain_test gtest)
target_link_libraries(megbrain_test megengine)
if(MSVC OR WIN32)
target_link_libraries(megbrain_test megbrain megdnn)
else()
target_link_libraries(megbrain_test megengine)
endif()
if(CXX_SUPPORT_WCLASS_MEMACCESS) if(CXX_SUPPORT_WCLASS_MEMACCESS)
if(MGE_WITH_CUDA) if(MGE_WITH_CUDA)
target_compile_options(megbrain_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>" target_compile_options(megbrain_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>"
@@ -28,10 +32,12 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS)
endif() endif()
endif() endif()


if(APPLE OR ANDROID)
target_link_libraries(megbrain_test dl)
else()
target_link_libraries(megbrain_test dl rt)
if(UNIX)
if(APPLE OR ANDROID)
target_link_libraries(megbrain_test dl)
else()
target_link_libraries(megbrain_test dl rt)
endif()
endif() endif()


if (MGE_WITH_DISTRIBUTED) if (MGE_WITH_DISTRIBUTED)


Loading…
Cancel
Save