|
- # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- #
- # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
- #
- # Unless required by applicable law or agreed to in writing,
- # software distributed under the License is distributed on an
- # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-
- import collections
- import ctypes
- import glob
- import os
- import re
- import subprocess
- import sys
- import time
- from typing import List, Optional, Union
-
- from ..core.ops.custom import get_custom_op_abi_tag, load
- from ..logger import get_logger
-
-
- def _get_win_folder_with_ctypes(csidl_name):
- csidl_const = {
- "CSIDL_APPDATA": 26,
- "CSIDL_COMMON_APPDATA": 35,
- "CSIDL_LOCAL_APPDATA": 28,
- }[csidl_name]
-
- buf = ctypes.create_unicode_buffer(1024)
- ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
-
- # Downgrade to short path name if have highbit chars. See
- # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
- has_high_char = False
- for c in buf:
- if ord(c) > 255:
- has_high_char = True
- break
- if has_high_char:
- buf2 = ctypes.create_unicode_buffer(1024)
- if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
- buf = buf2
-
- return buf.value
-
-
- system = sys.platform
- if system == "win32":
- _get_win_folder = _get_win_folder_with_ctypes
-
- PLAT_TO_VCVARS = {
- "win-amd64": "x86_amd64",
- }
-
- logger = get_logger()
-
- # environment varible
- ev_custom_op_root_dir = "MGE_CUSTOM_OP_DIR"
- ev_cuda_root_dir = "CUDA_ROOT_DIR"
- ev_cudnn_root_dir = "CUDNN_ROOT_DIR"
-
- # operating system
- IS_WINDOWS = system == "win32"
- IS_LINUX = system == "linux"
- IS_MACOS = system == "darwin"
-
- MGE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
- MGE_INC_PATH = os.path.join(MGE_PATH, "core", "include")
- MGE_LIB_PATH = os.path.join(MGE_PATH, "core", "lib")
- MGE_ABI_VER = 0
-
-
- # compile version
- MINIMUM_GCC_VERSION = (5, 0, 0)
- MINIMUM_CLANG_CL_VERSION = (12, 0, 1)
-
- # compile flags
- COMMON_MSVC_FLAGS = [
- "/MD",
- "/wd4002",
- "/wd4819",
- "/EHsc",
- ]
-
- MSVC_IGNORE_CUDAFE_WARNINGS = [
- "field_without_dll_interface",
- ]
-
- COMMON_NVCC_FLAGS = []
-
- # Finds the CUDA install path
- def _find_cuda_root_dir() -> Optional[str]:
- cuda_root_dir = os.environ.get(ev_cuda_root_dir)
- if cuda_root_dir is None:
- try:
- which = "where" if IS_WINDOWS else "which"
- with open(os.devnull, "w") as devnull:
- nvcc = (
- subprocess.check_output([which, "nvcc"], stderr=devnull)
- .decode()
- .rstrip("\r\n")
- )
- cuda_root_dir = os.path.dirname(os.path.dirname(nvcc))
- except Exception:
- if IS_WINDOWS:
- cuda_root_dir = os.environ.get("CUDA_PATH", None)
- if cuda_root_dir == None:
- cuda_root_dirs = glob.glob(
- "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*"
- )
- if len(cuda_root_dirs) == 0:
- cuda_root_dir = ""
- else:
- cuda_root_dir = cuda_root_dirs[0]
- else:
- cuda_root_dir = "/usr/local/cuda"
- if not os.path.exists(cuda_root_dir):
- cuda_root_dir = None
- return cuda_root_dir
-
-
- def _find_cudnn_root_dir() -> Optional[str]:
- cudnn_root_dir = os.environ.get(ev_cudnn_root_dir)
- return cudnn_root_dir
-
-
- CUDA_ROOT_DIR = _find_cuda_root_dir()
- CUDNN_ROOT_DIR = _find_cudnn_root_dir()
-
- #####################################################################
- # Phase 1
- #####################################################################
-
-
- def _is_cuda_file(path: str) -> bool:
- valid_ext = [".cu", ".cuh"]
- return os.path.splitext(path)[1] in valid_ext
-
-
- # Return full path to the user-specific cache dir for this application.
- # Typical user cache directories are:
- # Mac OS X: ~/Library/Caches/<AppName>
- # Unix: ~/.cache/<AppName> (XDG default)
- # Windows: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
- def _get_user_cache_dir(appname=None, appauthor=None, version=None, opinion=True):
- if system == "win32":
- appauthor = appname if appauthor is None else appauthor
- path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
- if appname:
- if appauthor is not False:
- path = os.path.join(path, appauthor)
- else:
- path = os.path.join(path, appname)
- if opinion:
- path = os.path.join(path, "Cache")
- elif system == "darwin":
- path = os.path.expanduser("~/Library/Caches")
- if appname:
- path = os.path.join(path, appname)
- else:
- path = os.getenv("XDG_CACHE_HOME", os.path.expanduser("~/.cache"))
- if appname:
- path = os.path.join(path, appname)
- if appname and version:
- path = os.path.join(path, version)
- return path
-
-
- # Returns the path to the root folder under which custom op will built.
- def _get_default_build_root() -> str:
- return os.path.realpath(_get_user_cache_dir(appname="mge_custom_op"))
-
-
- def _get_build_dir(name: str) -> str:
- custom_op_root_dir = os.environ.get(ev_custom_op_root_dir)
- if custom_op_root_dir is None:
- custom_op_root_dir = _get_default_build_root()
-
- build_dir = os.path.join(custom_op_root_dir, name)
- return build_dir
-
-
- #####################################################################
- # Phase 2
- #####################################################################
-
-
- def update_hash(seed, value):
- # using boost::hash_combine
- # https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html
- return seed ^ (hash(value) + 0x9E3779B9 + (seed << 6) + (seed >> 2))
-
-
- def hash_source_files(hash_value, source_files):
- for filename in source_files:
- with open(filename) as file:
- hash_value = update_hash(hash_value, file.read())
- return hash_value
-
-
- def hash_build_args(hash_value, build_args):
- for group in build_args:
- for arg in group:
- hash_value = update_hash(hash_value, arg)
- return hash_value
-
-
- Entry = collections.namedtuple("Entry", "version, hash")
-
-
- class Versioner(object):
- def __init__(self):
- self.entries = {}
-
- def get_version(self, name):
- entry = self.entries.get(name)
- return None if entry is None else entry.version
-
- def bump_version_if_changed(
- self, name, sources, build_args, build_dir, with_cuda, with_cudnn, abi_tag
- ):
- hash_value = 0
- hash_value = hash_source_files(hash_value, sources)
- hash_value = hash_build_args(hash_value, build_args)
- hash_value = update_hash(hash_value, build_dir)
- hash_value = update_hash(hash_value, with_cuda)
- hash_value = update_hash(hash_value, with_cudnn)
- hash_value = update_hash(hash_value, abi_tag)
-
- entry = self.entries.get(name)
- if entry is None:
- self.entries[name] = entry = Entry(0, hash_value)
- elif hash_value != entry.hash:
- self.entries[name] = entry = Entry(entry.version + 1, hash_value)
-
- return entry.version
-
-
- custom_op_versioner = Versioner()
-
-
- def version_check(
- name, sources, build_args, build_dir, with_cuda, with_cudnn, abi_tag,
- ):
- old_version = custom_op_versioner.get_version(name)
- version = custom_op_versioner.bump_version_if_changed(
- name, sources, build_args, build_dir, with_cuda, with_cudnn, abi_tag,
- )
- return version, old_version
-
-
- #####################################################################
- # Phase 3
- #####################################################################
-
-
- def _check_ninja_availability():
- try:
- subprocess.check_output("ninja --version".split())
- except Exception:
- raise RuntimeError(
- "Ninja is required to build custom op, please install ninja and update your PATH"
- )
-
-
- def _mge_is_built_from_src():
- file_path = os.path.abspath(__file__)
- if "site-packages" in file_path:
- return False
- else:
- return True
-
-
- def _accepted_compilers_for_platform():
- if IS_WINDOWS:
- return ["clang-cl"]
- if IS_MACOS:
- return ["clang++", "clang"]
- if IS_LINUX:
- return ["g++", "gcc", "gnu-c++", "gnu-cc"]
-
-
- # Verifies that the compiler is the expected one for the current platform.
- def _check_compiler_existed_for_platform(compiler: str) -> bool:
- # there is no suitable cmd like `which` on windows, so we assume the compiler is always true on windows
- if IS_WINDOWS:
- try:
- version_string = subprocess.check_output(
- ["clang-cl", "--version"], stderr=subprocess.STDOUT
- ).decode()
- return True
- except Exception:
- return False
-
- # use os.path.realpath to resolve any symlinks, in particular from "c++" to e.g. "g++".
- which = subprocess.check_output(["which", compiler], stderr=subprocess.STDOUT)
- compiler_path = os.path.realpath(which.decode().strip())
- if any(name in compiler_path for name in _accepted_compilers_for_platform()):
- return True
-
- version_string = subprocess.check_output(
- [compiler, "-v"], stderr=subprocess.STDOUT
- ).decode()
- if sys.platform.startswith("linux"):
- pattern = re.compile("^COLLECT_GCC=(.*)$", re.MULTILINE)
- results = re.findall(pattern, version_string)
- if len(results) != 1:
- return False
- compiler_path = os.path.realpath(results[0].strip())
- return any(name in compiler_path for name in _accepted_compilers_for_platform())
-
- if sys.platform.startswith("darwin"):
- return version_string.startswith("Apple clang")
-
- return False
-
-
- # Verifies that the given compiler is ABI-compatible with MegEngine.
- def _check_compiler_abi_compatibility(compiler: str):
- # we think if the megengine is built from source, the user will use the same compiler to compile the custom op
- if _mge_is_built_from_src() or os.environ.get("MGE_CHECK_ABI", "1") == "0":
- return True
-
- # [TODO] There is no particular minimum version we need for clang, so we"re good here.
- if sys.platform.startswith("darwin"):
- return True
-
- try:
- if sys.platform.startswith("linux"):
- minimum_required_version = MINIMUM_GCC_VERSION
- versionstr = subprocess.check_output(
- [compiler, "-dumpfullversion", "-dumpversion"]
- )
- version = versionstr.decode().strip().split(".")
- else:
- minimum_required_version = MINIMUM_CLANG_CL_VERSION
- compiler_info = subprocess.check_output(
- [compiler, "--version"], stderr=subprocess.STDOUT
- )
- match = re.search(r"(\d+)\.(\d+)\.(\d+)", compiler_info.decode().strip())
- version = (0, 0, 0) if match is None else match.groups()
- except Exception:
- _, error, _ = sys.exc_info()
- logger.warning(
- "Error checking compiler version for {}: {}".format(compiler, error)
- )
- return False
-
- if tuple(map(int, version)) >= minimum_required_version:
- return True
-
- return False
-
-
- def _check_compiler_comatibility():
- # we use clang-cl on windows, refer: https://clang.llvm.org/docs/UsersManual.html#clang-cl
- compiler = (
- os.environ.get("CXX", "clang-cl")
- if IS_WINDOWS
- else os.environ.get("CXX", "c++")
- )
-
- existed = _check_compiler_existed_for_platform(compiler)
- if existed == False:
- log_str = (
- "Cannot find compiler which is compatible with the compiler "
- "MegEngine was built with for this platform, which is {mge_compiler} on "
- "{platform}. Please use {mge_compiler} to to compile your extension. "
- "Alternatively, you may compile MegEngine from source using "
- "{user_compiler}, and then you can also use {user_compiler} to compile "
- "your extension."
- ).format(
- user_compiler=compiler,
- mge_compiler=_accepted_compilers_for_platform()[0],
- platform=sys.platform,
- )
-
- logger.warning(log_str)
- return False
-
- compatible = _check_compiler_abi_compatibility(compiler)
- if compatible == False:
- log_str = (
- "Your compiler version may be ABI-incompatible with MegEngine! "
- "Please use a compiler that is ABI-compatible with GCC 5.0 on Linux "
- "and LLVM/Clang 12.0 on Windows ."
- )
- logger.warning(log_str)
- return True
-
-
- #####################################################################
- # Phase 4
- #####################################################################
-
-
- # Quote command-line arguments for DOS/Windows conventions.
- def _nt_quote_args(args: Optional[List[str]]) -> List[str]:
- # Cover None-type
- if not args:
- return []
- return ['"{}"'.format(arg) if " " in arg else arg for arg in args]
-
-
- # Now we need user to specify the arch of GPU
- def _get_cuda_arch_flags(cflags: Optional[List[str]] = None) -> List[str]:
- return []
-
-
- def _setup_sys_includes(with_cuda: bool, with_cudnn: bool):
- includes = [os.path.join(MGE_INC_PATH)]
- if with_cuda:
- includes.append(os.path.join(CUDA_ROOT_DIR, "include"))
- if with_cudnn:
- includes.append(os.path.join(CUDNN_ROOT_DIR, "include"))
- return includes
-
-
- def _setup_includes(extra_include_paths: List[str], with_cuda: bool, with_cudnn: bool):
- user_includes = [os.path.abspath(path) for path in extra_include_paths]
- system_includes = _setup_sys_includes(with_cuda, with_cudnn)
- if IS_WINDOWS:
- user_includes += system_includes
- system_includes.clear()
- return user_includes, system_includes
-
-
- def _setup_common_cflags(user_includes: List[str], system_includes: List[str]):
- common_cflags = []
- common_cflags += ["-I{}".format(include) for include in user_includes]
- common_cflags += ["-isystem {}".format(include) for include in system_includes]
- if not IS_WINDOWS:
- common_cflags += ["-D_GLIBCXX_USE_CXX11_ABI={}".format(MGE_ABI_VER)]
- return common_cflags
-
-
- def _setup_cuda_cflags(cflags: List[str], extra_cuda_cflags: List[str]):
- cuda_flags = cflags + COMMON_NVCC_FLAGS + _get_cuda_arch_flags()
- if IS_WINDOWS:
- for flag in COMMON_MSVC_FLAGS:
- cuda_flags = ["-Xcompiler", flag] + cuda_flags
- for ignore_warning in MSVC_IGNORE_CUDAFE_WARNINGS:
- cuda_flags = ["-Xcudafe", "--diag_suppress=" + ignore_warning] + cuda_flags
- cuda_flags = _nt_quote_args(cuda_flags)
- cuda_flags += _nt_quote_args(extra_cuda_cflags)
- else:
- cuda_flags += ["--compiler-options", '"-fPIC"']
- cuda_flags += extra_cuda_cflags
- if not any(flag.startswith("-std=") for flag in cuda_flags):
- cuda_flags.append("-std=c++14")
- if os.getenv("CC") is not None:
- cuda_flags = ["-ccbin", os.getenv("CC")] + cuda_flags
- return cuda_flags
-
-
- def _setup_ldflags(
- extra_ldflags: List[str], with_cuda: bool, with_cudnn: bool
- ) -> List[str]:
- ldflags = extra_ldflags
- if IS_WINDOWS:
- ldflags.append(os.path.join(MGE_LIB_PATH, "megengine_shared.lib"))
- if with_cuda:
- ldflags.append(os.path.join(CUDA_ROOT_DIR, "lib", "x64", "cudart.lib"))
- if with_cudnn:
- ldflags.append(os.path.join(CUDNN_ROOT_DIR, "lib", "x64", "cudnn.lib"))
-
- else:
- ldflags.append("-lmegengine_shared -L{}".format(MGE_LIB_PATH))
- ldflags.append("-Wl,-rpath,{}".format(MGE_LIB_PATH))
- if with_cuda:
- ldflags.append("-lcudart")
- ldflags.append("-L{}".format(os.path.join(CUDA_ROOT_DIR, "lib64")))
- ldflags.append("-Wl,-rpath,{}".format(os.path.join(CUDA_ROOT_DIR, "lib64")))
- if with_cudnn:
- ldflags.append("-L{}".format(os.path.join(CUDNN_ROOT_DIR, "lib64")))
- ldflags.append(
- "-Wl,-rpath,{}".format(os.path.join(CUDNN_ROOT_DIR, "lib64"))
- )
-
- return ldflags
-
-
- def _add_shared_flag(ldflags: List[str]):
- ldflags += ["/LD" if IS_WINDOWS else "-shared"]
- return ldflags
-
-
- #####################################################################
- # Phase 5
- #####################################################################
-
-
- def _obj_file_path(src_file_path: str):
- file_name = os.path.splitext(os.path.basename(src_file_path))[0]
- if _is_cuda_file(src_file_path):
- target = "{}.cuda.o".format(file_name)
- else:
- target = "{}.o".format(file_name)
- return target
-
-
- def _dump_ninja_file(
- path,
- cflags,
- post_cflags,
- cuda_cflags,
- cuda_post_cflags,
- sources,
- objects,
- ldflags,
- library_target,
- with_cuda,
- ):
- def sanitize_flags(flags):
- return [] if flags is None else [flag.strip() for flag in flags]
-
- cflags = sanitize_flags(cflags)
- post_cflags = sanitize_flags(post_cflags)
- cuda_cflags = sanitize_flags(cuda_cflags)
- cuda_post_cflags = sanitize_flags(cuda_post_cflags)
- ldflags = sanitize_flags(ldflags)
-
- assert len(sources) == len(objects)
- assert len(sources) > 0
-
- if IS_WINDOWS:
- compiler = os.environ.get("CXX", "clang-cl")
- else:
- compiler = os.environ.get("CXX", "c++")
-
- # Version 1.3 is required for the `deps` directive.
- config = ["ninja_required_version = 1.3"]
- config.append("cxx = {}".format(compiler))
- if with_cuda:
- nvcc = os.path.join(CUDA_ROOT_DIR, "bin", "nvcc")
- config.append("nvcc = {}".format(nvcc))
-
- flags = ["cflags = {}".format(" ".join(cflags))]
- flags.append("post_cflags = {}".format(" ".join(post_cflags)))
- if with_cuda:
- flags.append("cuda_cflags = {}".format(" ".join(cuda_cflags)))
- flags.append("cuda_post_cflags = {}".format(" ".join(cuda_post_cflags)))
- flags.append("ldflags = {}".format(" ".join(ldflags)))
-
- # Turn into absolute paths so we can emit them into the ninja build
- # file wherever it is.
- sources = [os.path.abspath(file) for file in sources]
-
- # See https://ninja-build.org/build.ninja.html for reference.
- compile_rule = ["rule compile"]
- if IS_WINDOWS:
- compile_rule.append(
- " command = clang-cl /showIncludes $cflags -c $in /Fo$out $post_cflags"
- )
- compile_rule.append(" deps = msvc")
- else:
- compile_rule.append(
- " command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags"
- )
- compile_rule.append(" depfile = $out.d")
- compile_rule.append(" deps = gcc")
-
- if with_cuda:
- cuda_compile_rule = ["rule cuda_compile"]
- nvcc_gendeps = ""
- cuda_compile_rule.append(
- " command = $nvcc {} $cuda_cflags -c $in -o $out $cuda_post_cflags".format(
- nvcc_gendeps
- )
- )
-
- # Emit one build rule per source to enable incremental build.
- build = []
- for source_file, object_file in zip(sources, objects):
- is_cuda_source = _is_cuda_file(source_file) and with_cuda
- rule = "cuda_compile" if is_cuda_source else "compile"
- if IS_WINDOWS:
- source_file = source_file.replace(":", "$:")
- object_file = object_file.replace(":", "$:")
- source_file = source_file.replace(" ", "$ ")
- object_file = object_file.replace(" ", "$ ")
- build.append("build {}: {} {}".format(object_file, rule, source_file))
-
- if library_target is not None:
- link_rule = ["rule link"]
- if IS_WINDOWS:
- link_rule.append(" command = clang-cl $in /nologo $ldflags /out:$out")
- else:
- link_rule.append(" command = $cxx $in $ldflags -o $out")
-
- link = ["build {}: link {}".format(library_target, " ".join(objects))]
- default = ["default {}".format(library_target)]
- else:
- link_rule, link, default = [], [], []
-
- # 'Blocks' should be separated by newlines, for visual benefit.
- blocks = [config, flags, compile_rule]
- if with_cuda:
- blocks.append(cuda_compile_rule)
- blocks += [link_rule, build, link, default]
- with open(path, "w") as build_file:
- for block in blocks:
- lines = "\n".join(block)
- build_file.write("{}\n\n".format(lines))
-
-
- class FileBaton:
- def __init__(self, lock_file_path, wait_seconds=0.1):
- self.lock_file_path = lock_file_path
- self.wait_seconds = wait_seconds
- self.fd = None
-
- def try_acquire(self):
- try:
- self.fd = os.open(self.lock_file_path, os.O_CREAT | os.O_EXCL)
- return True
- except FileExistsError:
- return False
-
- def wait(self):
- while os.path.exists(self.lock_file_path):
- time.sleep(self.wait_seconds)
-
- def release(self):
- if self.fd is not None:
- os.close(self.fd)
-
- os.remove(self.lock_file_path)
-
-
- #####################################################################
- # Phase 6
- #####################################################################
-
-
- def _build_with_ninja(build_dir: str, verbose: bool, error_prefix: str):
- command = ["ninja", "-v"]
- env = os.environ.copy()
- try:
- sys.stdout.flush()
- sys.stderr.flush()
- stdout_fileno = 1
- subprocess.run(
- command,
- stdout=stdout_fileno if verbose else subprocess.PIPE,
- stderr=subprocess.STDOUT,
- cwd=build_dir,
- check=True,
- env=env,
- )
- except subprocess.CalledProcessError as e:
- with open(os.path.join(build_dir, "build.ninja")) as f:
- lines = f.readlines()
- print(lines)
- _, error, _ = sys.exc_info()
- message = error_prefix
- if hasattr(error, "output") and error.output:
- message += ": {}".format(error.output.decode())
- raise RuntimeError(message) from e
-
-
- def build(
- name: str,
- sources: Union[str, List[str]],
- extra_cflags: Union[str, List[str]] = [],
- extra_cuda_cflags: Union[str, List[str]] = [],
- extra_ldflags: Union[str, List[str]] = [],
- extra_include_paths: Union[str, List[str]] = [],
- with_cuda: Optional[bool] = None,
- build_dir: Optional[bool] = None,
- verbose: bool = False,
- abi_tag: Optional[int] = None,
- ) -> str:
- r"""Build a Custom Op with ninja in the way of just-in-time (JIT).
-
- To build the custom op, a Ninja build file is emitted, which is used to
- compile the given sources into a dynamic library.
-
- By default, the directory to which the build file is emitted and the
- resulting library compiled to is ``<tmp>/mge_custom_op/<name>``, where
- ``<tmp>`` is the temporary folder on the current platform and ``<name>``
- the name of the custom op. This location can be overridden in two ways.
- First, if the ``MGE_CUSTOM_OP_DIR`` environment variable is set, it
- replaces ``<tmp>/mge_custom_op`` and all custom op will be compiled
- into subfolders of this directory. Second, if the ``build_dir``
- argument to this function is supplied, it overrides the entire path, i.e.
- the library will be compiled into that folder directly.
-
- To compile the sources, the default system compiler (``c++``) is used,
- which can be overridden by setting the ``CXX`` environment variable. To pass
- additional arguments to the compilation process, ``extra_cflags`` or
- ``extra_ldflags`` can be provided. For example, to compile your custom op
- with optimizations, pass ``extra_cflags=['-O3']``. You can also use
- ``extra_cflags`` to pass further include directories.
-
- CUDA support with mixed compilation is provided. Simply pass CUDA source
- files (``.cu`` or ``.cuh``) along with other sources. Such files will be
- detected and compiled with nvcc rather than the C++ compiler. This includes
- passing the CUDA lib64 directory as a library directory, and linking
- ``cudart``. You can pass additional flags to nvcc via
- ``extra_cuda_cflags``, just like with ``extra_cflags`` for C++. Various
- heuristics for finding the CUDA install directory are used, which usually
- work fine. If not, setting the ``CUDA_ROOT_DIR`` environment variable is the
- safest option. If you use CUDNN, please also setting the ``CUDNN_ROOT_DIR``
- environment variable.
-
- Args:
- name: The name of the custom op to build.
- sources: A list of relative or absolute paths to C++ source files.
- extra_cflags: optional list of compiler flags to forward to the build.
- extra_cuda_cflags: optional list of compiler flags to forward to nvcc
- when building CUDA sources.
- extra_ldflags: optional list of linker flags to forward to the build.
- extra_include_paths: optional list of include directories to forward
- to the build.
- with_cuda: Determines whether CUDA headers and libraries are added to
- the build. If set to ``None`` (default), this value is
- automatically determined based on the existence of ``.cu`` or
- ``.cuh`` in ``sources``. Set it to `True`` to force CUDA headers
- and libraries to be included.
- build_dir: optional path to use as build workspace.
- verbose: If ``True``, turns on verbose logging of load steps.
- abi_tag: Determines the value of MACRO ``_GLIBCXX_USE_CXX11_ABI``
- in gcc compiler, should be ``0`` or ``1``.
-
- Returns:
- the compiled dynamic library path
-
- """
-
- # phase 1: prepare config
- if abi_tag == None:
- abi_tag = get_custom_op_abi_tag()
- global MGE_ABI_VER
- MGE_ABI_VER = abi_tag
-
- def strlist(args, name):
- assert isinstance(args, str) or isinstance(
- args, list
- ), "{} must be str or list[str]".format(name)
- if isinstance(args, str):
- return [args]
- for arg in args:
- assert isinstance(arg, str)
- args = [arg.strip() for arg in args]
- return args
-
- sources = strlist(sources, "sources")
- extra_cflags = strlist(extra_cflags, "extra_cflags")
- extra_cuda_cflags = strlist(extra_cuda_cflags, "extra_cuda_cflags")
- extra_ldflags = strlist(extra_ldflags, "extra_ldflags")
- extra_include_paths = strlist(extra_include_paths, "extra_include_paths")
-
- with_cuda = any(map(_is_cuda_file, sources)) if with_cuda is None else with_cuda
- with_cudnn = any(["cudnn" in f for f in extra_ldflags])
-
- if CUDA_ROOT_DIR == None and with_cuda:
- print(
- "No CUDA runtime is found, using {}=/path/to/your/cuda_root_dir".format(
- ev_cuda_root_dir
- )
- )
- if CUDNN_ROOT_DIR == None and with_cudnn:
- print(
- "Cannot find the root directory of cudnn, using {}=/path/to/your/cudnn_root_dir".format(
- ev_cudnn_root_dir
- )
- )
-
- build_dir = os.path.abspath(
- _get_build_dir(name) if build_dir is None else build_dir
- )
- if not os.path.exists(build_dir):
- os.makedirs(build_dir, exist_ok=True)
-
- if verbose:
- print("Using {} to build megengine custom op".format(build_dir))
-
- # phase 2: version check
- version, old_version = version_check(
- name,
- sources,
- [extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths],
- build_dir,
- with_cuda,
- with_cudnn,
- abi_tag,
- )
- if verbose:
- if version != old_version and old_version != None:
- print(
- "Input conditions of custom op {} have changed, bumping to version {}".format(
- name, version
- )
- )
- print("Building custom op {} with version {}".format(name, version))
- if version == old_version:
- if verbose:
- print(
- "No modifications detected for {}, skipping build step...".format(name)
- )
- return
- name = "{}_v{}".format(name, version)
-
- # phase 3: compiler and ninja check
- _check_ninja_availability()
- _check_compiler_comatibility()
-
- # phase 4: setup the compile flags
- user_includes, system_includes = _setup_includes(
- extra_include_paths, with_cuda, with_cudnn
- )
- common_cflags = _setup_common_cflags(user_includes, system_includes)
- cuda_cflags = (
- _setup_cuda_cflags(common_cflags, extra_cuda_cflags) if with_cuda else None
- )
- ldflags = _setup_ldflags(extra_ldflags, with_cuda, with_cudnn)
-
- if IS_WINDOWS:
- cflags = common_cflags + COMMON_MSVC_FLAGS + extra_cflags
- cflags = _nt_quote_args(cflags)
- else:
- cflags = common_cflags + ["-fPIC", "-std=c++14"] + extra_cflags
-
- ldflags = _add_shared_flag(ldflags)
- if sys.platform.startswith("darwin"):
- ldflags.append("-undefined dynamic_lookup")
- elif IS_WINDOWS:
- ldflags += ["/link"]
- ldflags = _nt_quote_args(ldflags)
-
- baton = FileBaton(os.path.join(build_dir, "lock"))
- if baton.try_acquire():
- try:
- # phase 5: generate ninja build file
- objs = [_obj_file_path(src) for src in sources]
- name += ".dll" if IS_WINDOWS else ".so"
-
- build_file_path = os.path.join(build_dir, "build.ninja")
- if verbose:
- print("Emitting ninja build file {}".format(build_file_path))
- _dump_ninja_file(
- path=build_file_path,
- cflags=cflags,
- post_cflags=None,
- cuda_cflags=cuda_cflags,
- cuda_post_cflags=None,
- sources=sources,
- objects=objs,
- ldflags=ldflags,
- library_target=name,
- with_cuda=with_cuda,
- )
-
- # phase 6: build with ninja
- if verbose:
- print(
- "Compiling and linking your custom op {}".format(
- os.path.join(build_dir, name)
- )
- )
- _build_with_ninja(build_dir, verbose, "compiling error")
- finally:
- baton.release()
- else:
- baton.wait()
-
- return os.path.join(build_dir, name)
-
-
- def build_and_load(
- name: str,
- sources: Union[str, List[str]],
- extra_cflags: Union[str, List[str]] = [],
- extra_cuda_cflags: Union[str, List[str]] = [],
- extra_ldflags: Union[str, List[str]] = [],
- extra_include_paths: Union[str, List[str]] = [],
- with_cuda: Optional[bool] = None,
- build_dir: Optional[bool] = None,
- verbose: bool = False,
- abi_tag: Optional[int] = None,
- ) -> str:
- r"""Build and Load a Custom Op with ninja in the way of just-in-time (JIT).
- Same as the function ``build()`` but load the built dynamic library.
-
- Args:
- same as ``build()``
-
- Returns:
- the compiled dynamic library path
-
- """
-
- lib_path = build(
- name,
- sources,
- extra_cflags,
- extra_cuda_cflags,
- extra_ldflags,
- extra_include_paths,
- with_cuda,
- build_dir,
- verbose,
- abi_tag,
- )
- if verbose:
- print("Load the compiled custom op {}".format(lib_path))
- load(lib_path)
- return lib_path
|