From 177c2688984c9f91c6fa555330e9a4f36a62fe89 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Tue, 14 Jul 2020 13:26:40 +0800 Subject: [PATCH] feat(osx/python_whl): fix python pytest failedon osx GitOrigin-RevId: 97d2b496bb2501a887b1676c59778e1ddd09a65d --- CMakeLists.txt | 16 ++++----- python_module/CMakeLists.txt | 6 +--- python_module/megengine/_internal/config.py | 8 ++--- .../megengine/utils/max_recursion_limit.py | 27 +++++++++----- python_module/src/swig/mgb.i | 9 ++--- python_module/test/integration/test_distributed.py | 5 +++ .../test/unit/distributed/test_functional.py | 34 ++++++++++++++++++ python_module/test/unit/distributed/test_util.py | 13 +++++++ python_module/test/unit/module/test_batchnorm.py | 16 +++++++++ scripts/whl/macos/macos_build_whl.sh | 12 +++++++ scripts/whl/macos/macos_whl_env_prepare.sh | 42 ++++++++++++---------- src/core/impl/utils/debug.cpp | 3 ++ 12 files changed, 141 insertions(+), 50 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c67ac73..81b85956 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -426,6 +426,14 @@ endif() set(MGB_JIT ${MGE_WITH_JIT}) set(MGB_JIT_HALIDE ${MGE_WITH_HALIDE}) +IF(APPLE) + set(CMAKE_THREAD_LIBS_INIT "-lpthread") + set(CMAKE_HAVE_THREADS_LIBRARY 1) + set(CMAKE_USE_WIN32_THREADS_INIT 0) + set(CMAKE_USE_PTHREADS_INIT 1) + set(THREADS_PREFER_PTHREAD_FLAG ON) +ENDIF() + # Thread if(CMAKE_THREAD_LIBS_INIT) set(MGB_HAVE_THREAD 1) @@ -641,14 +649,6 @@ elseif(TARGET _xxx) ) endif() -IF(APPLE) - set(CMAKE_THREAD_LIBS_INIT "-lpthread") - set(CMAKE_HAVE_THREADS_LIBRARY 1) - set(CMAKE_USE_WIN32_THREADS_INIT 0) - set(CMAKE_USE_PTHREADS_INIT 1) - set(THREADS_PREFER_PTHREAD_FLAG ON) -ENDIF() - # Configure and install pkg-config. # Note that unlike the Config.cmake modules, this is not relocatable (and not # really portable) because we have two dependencies without pkg-config diff --git a/python_module/CMakeLists.txt b/python_module/CMakeLists.txt index 1a46edf5..2789d01a 100644 --- a/python_module/CMakeLists.txt +++ b/python_module/CMakeLists.txt @@ -11,11 +11,7 @@ find_package(NumPy REQUIRED) find_package(SWIG REQUIRED) set(SWIG_SRC src/swig/mgb.i) -if (APPLE) - set(CMAKE_SWIG_FLAGS -Wall -threads -py3 -modern) -else() - set(CMAKE_SWIG_FLAGS -Wall -threads -py3 -modern -DSWIGWORDSIZE64) -endif() +set(CMAKE_SWIG_FLAGS -Wall -threads -py3 -modern -DSWIGWORDSIZE64) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter") file(GLOB_RECURSE OPR_DECL_SRCS "${PROJECT_SOURCE_DIR}/src/**/*.oprdecl") diff --git a/python_module/megengine/_internal/config.py b/python_module/megengine/_internal/config.py index 31f8ccda..25c36046 100644 --- a/python_module/megengine/_internal/config.py +++ b/python_module/megengine/_internal/config.py @@ -285,8 +285,8 @@ class exc_opr_tracker_scope(_GraphPropertyBase): ), "bad args for exc_opr_tracker_scope: {!r} {!r}".format(comp_graph, tracker) super().__init__(comp_graph, tracker) - __prop_setup__ = _mgb._config.begin_set_exc_opr_tracker - __prop_clear__ = _mgb._config.end_set_exc_opr_tracker + __prop_setup__ = staticmethod(_mgb._config.begin_set_exc_opr_tracker) + __prop_clear__ = staticmethod(_mgb._config.end_set_exc_opr_tracker) class opr_priority_scope(_GraphPropertyBase): @@ -312,8 +312,8 @@ class opr_priority_scope(_GraphPropertyBase): def __init__(self, comp_graph, priority): super().__init__(comp_graph, int(priority)) - __prop_setup__ = _mgb._config.begin_set_opr_priority - __prop_clear__ = _mgb._config.end_set_opr_priority + __prop_setup__ = staticmethod(_mgb._config.begin_set_opr_priority) + __prop_clear__ = staticmethod(_mgb._config.end_set_opr_priority) OprTrackerResult = collections.namedtuple( diff --git a/python_module/megengine/utils/max_recursion_limit.py b/python_module/megengine/utils/max_recursion_limit.py index 0870b7fa..cda37dfe 100644 --- a/python_module/megengine/utils/max_recursion_limit.py +++ b/python_module/megengine/utils/max_recursion_limit.py @@ -6,6 +6,7 @@ # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import platform import resource import sys import threading @@ -32,10 +33,16 @@ class AlternativeRecursionLimit: self.orig_rlim_stack_soft, self.orig_rlim_stack_hard, ) = resource.getrlimit(resource.RLIMIT_STACK) - resource.setrlimit( - resource.RLIMIT_STACK, - (self.orig_rlim_stack_hard, self.orig_rlim_stack_hard), - ) + # FIXME: https://bugs.python.org/issue34602, python3 release version + # on Macos always have this issue, not all user install python3 from src + try: + resource.setrlimit( + resource.RLIMIT_STACK, + (self.orig_rlim_stack_hard, self.orig_rlim_stack_hard), + ) + except ValueError as exc: + if platform.system() != "Darwin": + raise exc # increase recursion limit sys.setrecursionlimit(self.new_py_limit) self.count += 1 @@ -45,10 +52,14 @@ class AlternativeRecursionLimit: self.count -= 1 if self.count == 0: sys.setrecursionlimit(self.orig_py_limit) - resource.setrlimit( - resource.RLIMIT_STACK, - (self.orig_rlim_stack_soft, self.orig_rlim_stack_hard), - ) + try: + resource.setrlimit( + resource.RLIMIT_STACK, + (self.orig_rlim_stack_soft, self.orig_rlim_stack_hard), + ) + except ValueError as exc: + if platform.system() != "Darwin": + raise exc _max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1) diff --git a/python_module/src/swig/mgb.i b/python_module/src/swig/mgb.i index b8bdf5e0..e668f6d6 100644 --- a/python_module/src/swig/mgb.i +++ b/python_module/src/swig/mgb.i @@ -32,12 +32,9 @@ void _init_bfloat16_types(PyObject *m); // implemented in bfloat16.cpp %template(_PairStringSizeT) std::pair; %template(_PairSizeTSizeT) std::pair; /* - * - * real define uint64_t here, BUT, do not define SWIGWORDSIZE64 - * at osx env, at this time uint64_t means unsigned long long, - * BUT, unsigned long long do not have type_name() method at c++, - * when define SWIGWORDSIZE64 at linux env, uint64_t means - * unsigned long int, more detail refs stdint.i + * swig use uint64_t have compat build issue with + * clang at osx env, so we use unsigned long to + * replace uint64_t,more detail refs stdint.i * */ %template(_VectorPairUint64String) std::vector>; diff --git a/python_module/test/integration/test_distributed.py b/python_module/test/integration/test_distributed.py index 61ef5246..bd629b2f 100644 --- a/python_module/test/integration/test_distributed.py +++ b/python_module/test/integration/test_distributed.py @@ -7,10 +7,12 @@ # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. import multiprocessing as mp +import platform import subprocess import sys import numpy as np +import pytest def worker(master_ip, master_port, world_size, rank, dev, trace): @@ -84,6 +86,9 @@ def start_workers(worker, world_size, trace=False): assert p.exitcode == 0 +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) def test_distributed(): start_workers(worker, 2, trace=True) start_workers(worker, 2, trace=False) diff --git a/python_module/test/unit/distributed/test_functional.py b/python_module/test/unit/distributed/test_functional.py index 2b901d66..d417ddcb 100644 --- a/python_module/test/unit/distributed/test_functional.py +++ b/python_module/test/unit/distributed/test_functional.py @@ -7,6 +7,7 @@ # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. import multiprocessing as mp +import platform import numpy as np import pytest @@ -25,6 +26,9 @@ def _init_process_group_wrapper(world_size, rank, dev, backend, q): dist.init_process_group("localhost", port, world_size, rank, dev, backend) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_reduce_sum(): world_size = 2 @@ -61,6 +65,9 @@ def test_reduce_sum(): check(shape, backend) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_gather(): world_size = 2 @@ -97,6 +104,9 @@ def test_gather(): check(shape, backend) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_broadcast(): world_size = 2 @@ -129,6 +139,9 @@ def test_broadcast(): check(shape, backend) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_scatter(): world_size = 2 @@ -165,6 +178,9 @@ def test_scatter(): check(shape, backend) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_all_to_all(): world_size = 2 @@ -199,6 +215,9 @@ def test_all_to_all(): check(shape, backend) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_all_gather(): world_size = 2 @@ -232,6 +251,9 @@ def test_all_gather(): check(shape, backend) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_reduce_scatter_sum(): world_size = 2 @@ -269,6 +291,9 @@ def test_reduce_scatter_sum(): check(shape, backend) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_all_reduce_sum(): world_size = 2 @@ -302,6 +327,9 @@ def test_all_reduce_sum(): check(shape, backend) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_all_reduce_max(): world_size = 2 @@ -335,6 +363,9 @@ def test_all_reduce_max(): check(shape, backend) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_all_reduce_min(): world_size = 2 @@ -368,6 +399,9 @@ def test_all_reduce_min(): check(shape, backend) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_bcast_param(): world_size = 2 diff --git a/python_module/test/unit/distributed/test_util.py b/python_module/test/unit/distributed/test_util.py index bbb4dd41..932145d9 100644 --- a/python_module/test/unit/distributed/test_util.py +++ b/python_module/test/unit/distributed/test_util.py @@ -6,6 +6,7 @@ # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. import multiprocessing as mp +import platform import queue from time import sleep @@ -41,6 +42,9 @@ def _init_process_group_wrapper(world_size, rank, dev, backend, q): dist.init_process_group(_LOCALHOST, port, world_size, rank, dev, backend) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_create_mm_server(): def worker(): @@ -60,6 +64,9 @@ def test_create_mm_server(): assert p.exitcode == 0 +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_init_process_group(): world_size = 2 @@ -92,6 +99,9 @@ def test_init_process_group(): check("ucx") +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_group_barrier(): world_size = 2 @@ -124,6 +134,9 @@ def test_group_barrier(): assert p0.exitcode == 0 and p1.exitcode == 0 +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_synchronized(): world_size = 2 diff --git a/python_module/test/unit/module/test_batchnorm.py b/python_module/test/unit/module/test_batchnorm.py index 41b0fed7..f1c94b97 100644 --- a/python_module/test/unit/module/test_batchnorm.py +++ b/python_module/test/unit/module/test_batchnorm.py @@ -7,6 +7,7 @@ # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. import multiprocessing as mp +import platform import numpy as np import pytest @@ -18,6 +19,9 @@ from megengine.module import BatchNorm1d, BatchNorm2d, SyncBatchNorm from megengine.test import assertTensorClose +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) @pytest.mark.isolated_distributed def test_syncbn(): nr_chan = 8 @@ -136,6 +140,9 @@ def test_batchnorm(): assertTensorClose(yv_expect, yv1.numpy(), max_err=5e-6) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) def test_syncbn1d(): nr_chan = 8 data_shape = (3, nr_chan, 4) @@ -231,6 +238,9 @@ def test_batchnorm2d(): assertTensorClose(yv_expect, yv1.numpy(), max_err=5e-6) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) def test_syncbn2d(): nr_chan = 8 data_shape = (3, nr_chan, 16, 16) @@ -302,6 +312,9 @@ def test_batchnorm_no_stats(): assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) def test_syncbn_no_stats(): nr_chan = 8 data_shape = (3, nr_chan, 4) @@ -351,6 +364,9 @@ def test_batchnorm2d_no_stats(): assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) def test_syncbn2d_no_stats(): nr_chan = 8 data_shape = (3, nr_chan, 16, 16) diff --git a/scripts/whl/macos/macos_build_whl.sh b/scripts/whl/macos/macos_build_whl.sh index 7d820009..105fe931 100755 --- a/scripts/whl/macos/macos_build_whl.sh +++ b/scripts/whl/macos/macos_build_whl.sh @@ -141,5 +141,17 @@ function do_build() { done } +function third_party_prepare() { + if [[ -z ${ALREADY_INSTALL_THIRD_PARTY} ]] + then + echo "init third_party..." + ${SRC_DIR}/third_party/prepare.sh + ${SRC_DIR}/third_party/install-mkl.sh + else + echo "skip init third_party..." + fi +} + ###################### +third_party_prepare do_build diff --git a/scripts/whl/macos/macos_whl_env_prepare.sh b/scripts/whl/macos/macos_whl_env_prepare.sh index f92dec83..b83b1b85 100755 --- a/scripts/whl/macos/macos_whl_env_prepare.sh +++ b/scripts/whl/macos/macos_whl_env_prepare.sh @@ -1,20 +1,6 @@ #!/bin/bash -e -READLINK=readlink -OS=$(uname -s) - -if [ $OS = "Darwin" ];then - READLINK=greadlink -else - echo "ERR: only run at macos env" - exit -1 -fi - -SRC_DIR=$($READLINK -f "`dirname $0`/../../../") - -echo ${SRC_DIR} -ALL_PYTHON="3.5.9 3.6.10 3.7.7 3.8.3" - +#install env before use greadlink function try_install_brew() { which brew if [ $? -eq 0 ]; then @@ -34,13 +20,33 @@ function try_install_brew() { } function install_brew_package() { - BREW_PACKAGE="openssl readline sqlite3 xz gdbm zlib pyenv wget swig coreutils llvm" + BREW_PACKAGE="openssl readline sqlite3 xz gdbm zlib pyenv wget swig coreutils llvm git-lfs" for pak in ${BREW_PACKAGE} do echo "###### do command: brew install ${pak}" brew install ${pak} done + + git lfs install } +try_install_brew +install_brew_package + +READLINK=readlink +OS=$(uname -s) + +if [ $OS = "Darwin" ];then + READLINK=greadlink +else + echo "ERR: only run at macos env" + exit -1 +fi + +SRC_DIR=$($READLINK -f "`dirname $0`/../../../") + +echo ${SRC_DIR} +ALL_PYTHON="3.5.9 3.6.10 3.7.7 3.8.3" + function install_python_package() { for pak in ${ALL_PYTHON} @@ -51,7 +57,7 @@ function install_python_package() { else env PYTHON_CONFIGURE_OPTS="--enable-shared" pyenv install ${pak} fi - echo "###### do command: /Users/${USER}/.pyenv/versions/${pak}/bin/python3 -m pip install numpy wheel" + echo "###### do command: /Users/${USER}/.pyenv/versions/${pak}/bin/python3 -m pip install numpy wheel requests tqdm tabulate" /Users/${USER}/.pyenv/versions/${pak}/bin/python3 -m pip install numpy wheel echo "###### do command: /Users/${USER}/.pyenv/versions/${pak}/bin/python3 -m pip install -r ${SRC_DIR}/python_module/requires-test.txt" /Users/${USER}/.pyenv/versions/${pak}/bin/python3 -m pip install -r ${SRC_DIR}/python_module/requires-test.txt @@ -95,8 +101,6 @@ function append_path_env_message() { } ############install env now########### -try_install_brew -install_brew_package install_python_package install_cmake append_path_env_message diff --git a/src/core/impl/utils/debug.cpp b/src/core/impl/utils/debug.cpp index 8b97d745..9752597c 100644 --- a/src/core/impl/utils/debug.cpp +++ b/src/core/impl/utils/debug.cpp @@ -139,9 +139,12 @@ class SigHandlerInit { mgb_log_error("%s: caught deadly signal %d(%s)", msg0, signum, strsignal(signum)); } +//FIXME: imp backtrace for macos +#ifndef __APPLE__ std::string bp; debug::backtrace(2).fmt_to_str(bp); mgb_log_error("%s", bp.c_str()); +#endif exit(EXIT_FAILURE); }