Browse Source

feat(osx/python_whl): fix python pytest failedon osx

GitOrigin-RevId: 97d2b496bb
release-0.6
Megvii Engine Team Xu Xinran 4 years ago
parent
commit
177c268898
12 changed files with 141 additions and 50 deletions
  1. +8
    -8
      CMakeLists.txt
  2. +1
    -5
      python_module/CMakeLists.txt
  3. +4
    -4
      python_module/megengine/_internal/config.py
  4. +19
    -8
      python_module/megengine/utils/max_recursion_limit.py
  5. +3
    -6
      python_module/src/swig/mgb.i
  6. +5
    -0
      python_module/test/integration/test_distributed.py
  7. +34
    -0
      python_module/test/unit/distributed/test_functional.py
  8. +13
    -0
      python_module/test/unit/distributed/test_util.py
  9. +16
    -0
      python_module/test/unit/module/test_batchnorm.py
  10. +12
    -0
      scripts/whl/macos/macos_build_whl.sh
  11. +23
    -19
      scripts/whl/macos/macos_whl_env_prepare.sh
  12. +3
    -0
      src/core/impl/utils/debug.cpp

+ 8
- 8
CMakeLists.txt View File

@@ -426,6 +426,14 @@ endif()
set(MGB_JIT ${MGE_WITH_JIT})
set(MGB_JIT_HALIDE ${MGE_WITH_HALIDE})

IF(APPLE)
set(CMAKE_THREAD_LIBS_INIT "-lpthread")
set(CMAKE_HAVE_THREADS_LIBRARY 1)
set(CMAKE_USE_WIN32_THREADS_INIT 0)
set(CMAKE_USE_PTHREADS_INIT 1)
set(THREADS_PREFER_PTHREAD_FLAG ON)
ENDIF()

# Thread
if(CMAKE_THREAD_LIBS_INIT)
set(MGB_HAVE_THREAD 1)
@@ -641,14 +649,6 @@ elseif(TARGET _xxx)
)
endif()

IF(APPLE)
set(CMAKE_THREAD_LIBS_INIT "-lpthread")
set(CMAKE_HAVE_THREADS_LIBRARY 1)
set(CMAKE_USE_WIN32_THREADS_INIT 0)
set(CMAKE_USE_PTHREADS_INIT 1)
set(THREADS_PREFER_PTHREAD_FLAG ON)
ENDIF()

# Configure and install pkg-config.
# Note that unlike the Config.cmake modules, this is not relocatable (and not
# really portable) because we have two dependencies without pkg-config


+ 1
- 5
python_module/CMakeLists.txt View File

@@ -11,11 +11,7 @@ find_package(NumPy REQUIRED)

find_package(SWIG REQUIRED)
set(SWIG_SRC src/swig/mgb.i)
if (APPLE)
set(CMAKE_SWIG_FLAGS -Wall -threads -py3 -modern)
else()
set(CMAKE_SWIG_FLAGS -Wall -threads -py3 -modern -DSWIGWORDSIZE64)
endif()
set(CMAKE_SWIG_FLAGS -Wall -threads -py3 -modern -DSWIGWORDSIZE64)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")

file(GLOB_RECURSE OPR_DECL_SRCS "${PROJECT_SOURCE_DIR}/src/**/*.oprdecl")


+ 4
- 4
python_module/megengine/_internal/config.py View File

@@ -285,8 +285,8 @@ class exc_opr_tracker_scope(_GraphPropertyBase):
), "bad args for exc_opr_tracker_scope: {!r} {!r}".format(comp_graph, tracker)
super().__init__(comp_graph, tracker)

__prop_setup__ = _mgb._config.begin_set_exc_opr_tracker
__prop_clear__ = _mgb._config.end_set_exc_opr_tracker
__prop_setup__ = staticmethod(_mgb._config.begin_set_exc_opr_tracker)
__prop_clear__ = staticmethod(_mgb._config.end_set_exc_opr_tracker)


class opr_priority_scope(_GraphPropertyBase):
@@ -312,8 +312,8 @@ class opr_priority_scope(_GraphPropertyBase):
def __init__(self, comp_graph, priority):
super().__init__(comp_graph, int(priority))

__prop_setup__ = _mgb._config.begin_set_opr_priority
__prop_clear__ = _mgb._config.end_set_opr_priority
__prop_setup__ = staticmethod(_mgb._config.begin_set_opr_priority)
__prop_clear__ = staticmethod(_mgb._config.end_set_opr_priority)


OprTrackerResult = collections.namedtuple(


+ 19
- 8
python_module/megengine/utils/max_recursion_limit.py View File

@@ -6,6 +6,7 @@
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import platform
import resource
import sys
import threading
@@ -32,10 +33,16 @@ class AlternativeRecursionLimit:
self.orig_rlim_stack_soft,
self.orig_rlim_stack_hard,
) = resource.getrlimit(resource.RLIMIT_STACK)
resource.setrlimit(
resource.RLIMIT_STACK,
(self.orig_rlim_stack_hard, self.orig_rlim_stack_hard),
)
# FIXME: https://bugs.python.org/issue34602, python3 release version
# on Macos always have this issue, not all user install python3 from src
try:
resource.setrlimit(
resource.RLIMIT_STACK,
(self.orig_rlim_stack_hard, self.orig_rlim_stack_hard),
)
except ValueError as exc:
if platform.system() != "Darwin":
raise exc
# increase recursion limit
sys.setrecursionlimit(self.new_py_limit)
self.count += 1
@@ -45,10 +52,14 @@ class AlternativeRecursionLimit:
self.count -= 1
if self.count == 0:
sys.setrecursionlimit(self.orig_py_limit)
resource.setrlimit(
resource.RLIMIT_STACK,
(self.orig_rlim_stack_soft, self.orig_rlim_stack_hard),
)
try:
resource.setrlimit(
resource.RLIMIT_STACK,
(self.orig_rlim_stack_soft, self.orig_rlim_stack_hard),
)
except ValueError as exc:
if platform.system() != "Darwin":
raise exc


_max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1)


+ 3
- 6
python_module/src/swig/mgb.i View File

@@ -32,12 +32,9 @@ void _init_bfloat16_types(PyObject *m); // implemented in bfloat16.cpp
%template(_PairStringSizeT) std::pair<std::string, size_t>;
%template(_PairSizeTSizeT) std::pair<size_t, size_t>;
/*
*
* real define uint64_t here, BUT, do not define SWIGWORDSIZE64
* at osx env, at this time uint64_t means unsigned long long,
* BUT, unsigned long long do not have type_name() method at c++,
* when define SWIGWORDSIZE64 at linux env, uint64_t means
* unsigned long int, more detail refs stdint.i
* swig use uint64_t have compat build issue with
* clang at osx env, so we use unsigned long to
* replace uint64_t,more detail refs stdint.i
*
*/
%template(_VectorPairUint64String) std::vector<std::pair<unsigned long int, std::string>>;


+ 5
- 0
python_module/test/integration/test_distributed.py View File

@@ -7,10 +7,12 @@
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import multiprocessing as mp
import platform
import subprocess
import sys

import numpy as np
import pytest


def worker(master_ip, master_port, world_size, rank, dev, trace):
@@ -84,6 +86,9 @@ def start_workers(worker, world_size, trace=False):
assert p.exitcode == 0


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
def test_distributed():
start_workers(worker, 2, trace=True)
start_workers(worker, 2, trace=False)

+ 34
- 0
python_module/test/unit/distributed/test_functional.py View File

@@ -7,6 +7,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

import multiprocessing as mp
import platform

import numpy as np
import pytest
@@ -25,6 +26,9 @@ def _init_process_group_wrapper(world_size, rank, dev, backend, q):
dist.init_process_group("localhost", port, world_size, rank, dev, backend)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_reduce_sum():
world_size = 2
@@ -61,6 +65,9 @@ def test_reduce_sum():
check(shape, backend)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_gather():
world_size = 2
@@ -97,6 +104,9 @@ def test_gather():
check(shape, backend)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_broadcast():
world_size = 2
@@ -129,6 +139,9 @@ def test_broadcast():
check(shape, backend)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_scatter():
world_size = 2
@@ -165,6 +178,9 @@ def test_scatter():
check(shape, backend)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_all_to_all():
world_size = 2
@@ -199,6 +215,9 @@ def test_all_to_all():
check(shape, backend)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_all_gather():
world_size = 2
@@ -232,6 +251,9 @@ def test_all_gather():
check(shape, backend)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_reduce_scatter_sum():
world_size = 2
@@ -269,6 +291,9 @@ def test_reduce_scatter_sum():
check(shape, backend)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_all_reduce_sum():
world_size = 2
@@ -302,6 +327,9 @@ def test_all_reduce_sum():
check(shape, backend)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_all_reduce_max():
world_size = 2
@@ -335,6 +363,9 @@ def test_all_reduce_max():
check(shape, backend)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_all_reduce_min():
world_size = 2
@@ -368,6 +399,9 @@ def test_all_reduce_min():
check(shape, backend)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_bcast_param():
world_size = 2


+ 13
- 0
python_module/test/unit/distributed/test_util.py View File

@@ -6,6 +6,7 @@
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import multiprocessing as mp
import platform
import queue
from time import sleep

@@ -41,6 +42,9 @@ def _init_process_group_wrapper(world_size, rank, dev, backend, q):
dist.init_process_group(_LOCALHOST, port, world_size, rank, dev, backend)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_create_mm_server():
def worker():
@@ -60,6 +64,9 @@ def test_create_mm_server():
assert p.exitcode == 0


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_init_process_group():
world_size = 2
@@ -92,6 +99,9 @@ def test_init_process_group():
check("ucx")


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_group_barrier():
world_size = 2
@@ -124,6 +134,9 @@ def test_group_barrier():
assert p0.exitcode == 0 and p1.exitcode == 0


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_synchronized():
world_size = 2


+ 16
- 0
python_module/test/unit/module/test_batchnorm.py View File

@@ -7,6 +7,7 @@
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import multiprocessing as mp
import platform

import numpy as np
import pytest
@@ -18,6 +19,9 @@ from megengine.module import BatchNorm1d, BatchNorm2d, SyncBatchNorm
from megengine.test import assertTensorClose


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.isolated_distributed
def test_syncbn():
nr_chan = 8
@@ -136,6 +140,9 @@ def test_batchnorm():
assertTensorClose(yv_expect, yv1.numpy(), max_err=5e-6)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
def test_syncbn1d():
nr_chan = 8
data_shape = (3, nr_chan, 4)
@@ -231,6 +238,9 @@ def test_batchnorm2d():
assertTensorClose(yv_expect, yv1.numpy(), max_err=5e-6)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
def test_syncbn2d():
nr_chan = 8
data_shape = (3, nr_chan, 16, 16)
@@ -302,6 +312,9 @@ def test_batchnorm_no_stats():
assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
def test_syncbn_no_stats():
nr_chan = 8
data_shape = (3, nr_chan, 4)
@@ -351,6 +364,9 @@ def test_batchnorm2d_no_stats():
assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)


@pytest.mark.skipif(
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
def test_syncbn2d_no_stats():
nr_chan = 8
data_shape = (3, nr_chan, 16, 16)


+ 12
- 0
scripts/whl/macos/macos_build_whl.sh View File

@@ -141,5 +141,17 @@ function do_build() {
done
}

function third_party_prepare() {
if [[ -z ${ALREADY_INSTALL_THIRD_PARTY} ]]
then
echo "init third_party..."
${SRC_DIR}/third_party/prepare.sh
${SRC_DIR}/third_party/install-mkl.sh
else
echo "skip init third_party..."
fi
}

######################
third_party_prepare
do_build

+ 23
- 19
scripts/whl/macos/macos_whl_env_prepare.sh View File

@@ -1,20 +1,6 @@
#!/bin/bash -e

READLINK=readlink
OS=$(uname -s)

if [ $OS = "Darwin" ];then
READLINK=greadlink
else
echo "ERR: only run at macos env"
exit -1
fi

SRC_DIR=$($READLINK -f "`dirname $0`/../../../")

echo ${SRC_DIR}
ALL_PYTHON="3.5.9 3.6.10 3.7.7 3.8.3"

#install env before use greadlink
function try_install_brew() {
which brew
if [ $? -eq 0 ]; then
@@ -34,13 +20,33 @@ function try_install_brew() {
}

function install_brew_package() {
BREW_PACKAGE="openssl readline sqlite3 xz gdbm zlib pyenv wget swig coreutils llvm"
BREW_PACKAGE="openssl readline sqlite3 xz gdbm zlib pyenv wget swig coreutils llvm git-lfs"
for pak in ${BREW_PACKAGE}
do
echo "###### do command: brew install ${pak}"
brew install ${pak}
done

git lfs install
}
try_install_brew
install_brew_package

READLINK=readlink
OS=$(uname -s)

if [ $OS = "Darwin" ];then
READLINK=greadlink
else
echo "ERR: only run at macos env"
exit -1
fi

SRC_DIR=$($READLINK -f "`dirname $0`/../../../")

echo ${SRC_DIR}
ALL_PYTHON="3.5.9 3.6.10 3.7.7 3.8.3"


function install_python_package() {
for pak in ${ALL_PYTHON}
@@ -51,7 +57,7 @@ function install_python_package() {
else
env PYTHON_CONFIGURE_OPTS="--enable-shared" pyenv install ${pak}
fi
echo "###### do command: /Users/${USER}/.pyenv/versions/${pak}/bin/python3 -m pip install numpy wheel"
echo "###### do command: /Users/${USER}/.pyenv/versions/${pak}/bin/python3 -m pip install numpy wheel requests tqdm tabulate"
/Users/${USER}/.pyenv/versions/${pak}/bin/python3 -m pip install numpy wheel
echo "###### do command: /Users/${USER}/.pyenv/versions/${pak}/bin/python3 -m pip install -r ${SRC_DIR}/python_module/requires-test.txt"
/Users/${USER}/.pyenv/versions/${pak}/bin/python3 -m pip install -r ${SRC_DIR}/python_module/requires-test.txt
@@ -95,8 +101,6 @@ function append_path_env_message() {
}

############install env now###########
try_install_brew
install_brew_package
install_python_package
install_cmake
append_path_env_message

+ 3
- 0
src/core/impl/utils/debug.cpp View File

@@ -139,9 +139,12 @@ class SigHandlerInit {
mgb_log_error("%s: caught deadly signal %d(%s)", msg0, signum,
strsignal(signum));
}
//FIXME: imp backtrace for macos
#ifndef __APPLE__
std::string bp;
debug::backtrace(2).fmt_to_str(bp);
mgb_log_error("%s", bp.c_str());
#endif
exit(EXIT_FAILURE);
}



Loading…
Cancel
Save