import collections import ctypes import glob import os import re import subprocess import sys import time from typing import List, Optional, Union from ..core.ops.custom import get_custom_op_abi_tag, load from ..logger import get_logger def _get_win_folder_with_ctypes(csidl_name): csidl_const = { "CSIDL_APPDATA": 26, "CSIDL_COMMON_APPDATA": 35, "CSIDL_LOCAL_APPDATA": 28, }[csidl_name] buf = ctypes.create_unicode_buffer(1024) ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf) # Downgrade to short path name if have highbit chars. See # . has_high_char = False for c in buf: if ord(c) > 255: has_high_char = True break if has_high_char: buf2 = ctypes.create_unicode_buffer(1024) if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024): buf = buf2 return buf.value system = sys.platform if system == "win32": _get_win_folder = _get_win_folder_with_ctypes PLAT_TO_VCVARS = { "win-amd64": "x86_amd64", } logger = get_logger() # environment varible ev_custom_op_root_dir = "MGE_CUSTOM_OP_DIR" ev_cuda_root_dir = "CUDA_ROOT_DIR" ev_cudnn_root_dir = "CUDNN_ROOT_DIR" # operating system IS_WINDOWS = system == "win32" IS_LINUX = system == "linux" IS_MACOS = system == "darwin" MGE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) MGE_INC_PATH = os.path.join(MGE_PATH, "core", "include") MGE_LIB_PATH = os.path.join(MGE_PATH, "core", "lib") MGE_ABI_VER = 0 # compile version MINIMUM_GCC_VERSION = (5, 0, 0) MINIMUM_CLANG_CL_VERSION = (12, 0, 1) # compile flags COMMON_MSVC_FLAGS = [ "/MD", "/wd4002", "/wd4819", "/EHsc", ] MSVC_IGNORE_CUDAFE_WARNINGS = [ "field_without_dll_interface", ] COMMON_NVCC_FLAGS = [] # Finds the CUDA install path def _find_cuda_root_dir() -> Optional[str]: cuda_root_dir = os.environ.get(ev_cuda_root_dir) if cuda_root_dir is None: try: which = "where" if IS_WINDOWS else "which" with open(os.devnull, "w") as devnull: nvcc = ( subprocess.check_output([which, "nvcc"], stderr=devnull) .decode() .rstrip("\r\n") ) cuda_root_dir = os.path.dirname(os.path.dirname(nvcc)) except Exception: if IS_WINDOWS: cuda_root_dir = os.environ.get("CUDA_PATH", None) if cuda_root_dir == None: cuda_root_dirs = glob.glob( "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*" ) if len(cuda_root_dirs) == 0: cuda_root_dir = "" else: cuda_root_dir = cuda_root_dirs[0] else: cuda_root_dir = "/usr/local/cuda" if not os.path.exists(cuda_root_dir): cuda_root_dir = None return cuda_root_dir def _find_cudnn_root_dir() -> Optional[str]: cudnn_root_dir = os.environ.get(ev_cudnn_root_dir) return cudnn_root_dir CUDA_ROOT_DIR = _find_cuda_root_dir() CUDNN_ROOT_DIR = _find_cudnn_root_dir() ##################################################################### # Phase 1 ##################################################################### def _is_cuda_file(path: str) -> bool: valid_ext = [".cu", ".cuh"] return os.path.splitext(path)[1] in valid_ext # Return full path to the user-specific cache dir for this application. # Typical user cache directories are: # Mac OS X: ~/Library/Caches/ # Unix: ~/.cache/ (XDG default) # Windows: C:\Users\\AppData\Local\\\Cache def _get_user_cache_dir(appname=None, appauthor=None, version=None, opinion=True): if system == "win32": appauthor = appname if appauthor is None else appauthor path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA")) if appname: if appauthor is not False: path = os.path.join(path, appauthor) else: path = os.path.join(path, appname) if opinion: path = os.path.join(path, "Cache") elif system == "darwin": path = os.path.expanduser("~/Library/Caches") if appname: path = os.path.join(path, appname) else: path = os.getenv("XDG_CACHE_HOME", os.path.expanduser("~/.cache")) if appname: path = os.path.join(path, appname) if appname and version: path = os.path.join(path, version) return path # Returns the path to the root folder under which custom op will built. def _get_default_build_root() -> str: return os.path.realpath(_get_user_cache_dir(appname="mge_custom_op")) def _get_build_dir(name: str) -> str: custom_op_root_dir = os.environ.get(ev_custom_op_root_dir) if custom_op_root_dir is None: custom_op_root_dir = _get_default_build_root() build_dir = os.path.join(custom_op_root_dir, name) return build_dir ##################################################################### # Phase 2 ##################################################################### def update_hash(seed, value): # using boost::hash_combine # https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html return seed ^ (hash(value) + 0x9E3779B9 + (seed << 6) + (seed >> 2)) def hash_source_files(hash_value, source_files): for filename in source_files: with open(filename) as file: hash_value = update_hash(hash_value, file.read()) return hash_value def hash_build_args(hash_value, build_args): for group in build_args: for arg in group: hash_value = update_hash(hash_value, arg) return hash_value Entry = collections.namedtuple("Entry", "version, hash") class Versioner(object): def __init__(self): self.entries = {} def get_version(self, name): entry = self.entries.get(name) return None if entry is None else entry.version def bump_version_if_changed( self, name, sources, build_args, build_dir, with_cuda, with_cudnn, abi_tag ): hash_value = 0 hash_value = hash_source_files(hash_value, sources) hash_value = hash_build_args(hash_value, build_args) hash_value = update_hash(hash_value, build_dir) hash_value = update_hash(hash_value, with_cuda) hash_value = update_hash(hash_value, with_cudnn) hash_value = update_hash(hash_value, abi_tag) entry = self.entries.get(name) if entry is None: self.entries[name] = entry = Entry(0, hash_value) elif hash_value != entry.hash: self.entries[name] = entry = Entry(entry.version + 1, hash_value) return entry.version custom_op_versioner = Versioner() def version_check( name, sources, build_args, build_dir, with_cuda, with_cudnn, abi_tag, ): old_version = custom_op_versioner.get_version(name) version = custom_op_versioner.bump_version_if_changed( name, sources, build_args, build_dir, with_cuda, with_cudnn, abi_tag, ) return version, old_version ##################################################################### # Phase 3 ##################################################################### def _check_ninja_availability(): try: subprocess.check_output("ninja --version".split()) except Exception: raise RuntimeError( "Ninja is required to build custom op, please install ninja and update your PATH" ) def _mge_is_built_from_src(): file_path = os.path.abspath(__file__) if "site-packages" in file_path: return False else: return True def _accepted_compilers_for_platform(): if IS_WINDOWS: return ["clang-cl"] if IS_MACOS: return ["clang++", "clang"] if IS_LINUX: return ["g++", "gcc", "gnu-c++", "gnu-cc"] # Verifies that the compiler is the expected one for the current platform. def _check_compiler_existed_for_platform(compiler: str) -> bool: # there is no suitable cmd like `which` on windows, so we assume the compiler is always true on windows if IS_WINDOWS: try: version_string = subprocess.check_output( ["clang-cl", "--version"], stderr=subprocess.STDOUT ).decode() return True except Exception: return False # use os.path.realpath to resolve any symlinks, in particular from "c++" to e.g. "g++". which = subprocess.check_output(["which", compiler], stderr=subprocess.STDOUT) compiler_path = os.path.realpath(which.decode().strip()) if any(name in compiler_path for name in _accepted_compilers_for_platform()): return True version_string = subprocess.check_output( [compiler, "-v"], stderr=subprocess.STDOUT ).decode() if sys.platform.startswith("linux"): pattern = re.compile("^COLLECT_GCC=(.*)$", re.MULTILINE) results = re.findall(pattern, version_string) if len(results) != 1: return False compiler_path = os.path.realpath(results[0].strip()) return any(name in compiler_path for name in _accepted_compilers_for_platform()) if sys.platform.startswith("darwin"): return version_string.startswith("Apple clang") return False # Verifies that the given compiler is ABI-compatible with MegEngine. def _check_compiler_abi_compatibility(compiler: str): # we think if the megengine is built from source, the user will use the same compiler to compile the custom op if _mge_is_built_from_src() or os.environ.get("MGE_CHECK_ABI", "1") == "0": return True # [TODO] There is no particular minimum version we need for clang, so we"re good here. if sys.platform.startswith("darwin"): return True try: if sys.platform.startswith("linux"): minimum_required_version = MINIMUM_GCC_VERSION versionstr = subprocess.check_output( [compiler, "-dumpfullversion", "-dumpversion"] ) version = versionstr.decode().strip().split(".") else: minimum_required_version = MINIMUM_CLANG_CL_VERSION compiler_info = subprocess.check_output( [compiler, "--version"], stderr=subprocess.STDOUT ) match = re.search(r"(\d+)\.(\d+)\.(\d+)", compiler_info.decode().strip()) version = (0, 0, 0) if match is None else match.groups() except Exception: _, error, _ = sys.exc_info() logger.warning( "Error checking compiler version for {}: {}".format(compiler, error) ) return False if tuple(map(int, version)) >= minimum_required_version: return True return False def _check_compiler_comatibility(): # we use clang-cl on windows, refer: https://clang.llvm.org/docs/UsersManual.html#clang-cl compiler = ( os.environ.get("CXX", "clang-cl") if IS_WINDOWS else os.environ.get("CXX", "c++") ) existed = _check_compiler_existed_for_platform(compiler) if existed == False: log_str = ( "Cannot find compiler which is compatible with the compiler " "MegEngine was built with for this platform, which is {mge_compiler} on " "{platform}. Please use {mge_compiler} to to compile your extension. " "Alternatively, you may compile MegEngine from source using " "{user_compiler}, and then you can also use {user_compiler} to compile " "your extension." ).format( user_compiler=compiler, mge_compiler=_accepted_compilers_for_platform()[0], platform=sys.platform, ) logger.warning(log_str) return False compatible = _check_compiler_abi_compatibility(compiler) if compatible == False: log_str = ( "Your compiler version may be ABI-incompatible with MegEngine! " "Please use a compiler that is ABI-compatible with GCC 5.0 on Linux " "and LLVM/Clang 12.0 on Windows ." ) logger.warning(log_str) return True ##################################################################### # Phase 4 ##################################################################### # Quote command-line arguments for DOS/Windows conventions. def _nt_quote_args(args: Optional[List[str]]) -> List[str]: # Cover None-type if not args: return [] return ['"{}"'.format(arg) if " " in arg else arg for arg in args] # Now we need user to specify the arch of GPU def _get_cuda_arch_flags(cflags: Optional[List[str]] = None) -> List[str]: return [] def _setup_sys_includes(with_cuda: bool, with_cudnn: bool): includes = [os.path.join(MGE_INC_PATH)] if with_cuda: includes.append(os.path.join(CUDA_ROOT_DIR, "include")) if with_cudnn: includes.append(os.path.join(CUDNN_ROOT_DIR, "include")) return includes def _setup_includes(extra_include_paths: List[str], with_cuda: bool, with_cudnn: bool): user_includes = [os.path.abspath(path) for path in extra_include_paths] system_includes = _setup_sys_includes(with_cuda, with_cudnn) if IS_WINDOWS: user_includes += system_includes system_includes.clear() return user_includes, system_includes def _setup_common_cflags(user_includes: List[str], system_includes: List[str]): common_cflags = [] common_cflags += ["-I{}".format(include) for include in user_includes] common_cflags += ["-isystem {}".format(include) for include in system_includes] if not IS_WINDOWS: common_cflags += ["-D_GLIBCXX_USE_CXX11_ABI={}".format(MGE_ABI_VER)] return common_cflags def _setup_cuda_cflags(cflags: List[str], extra_cuda_cflags: List[str]): cuda_flags = cflags + COMMON_NVCC_FLAGS + _get_cuda_arch_flags() if IS_WINDOWS: for flag in COMMON_MSVC_FLAGS: cuda_flags = ["-Xcompiler", flag] + cuda_flags for ignore_warning in MSVC_IGNORE_CUDAFE_WARNINGS: cuda_flags = ["-Xcudafe", "--diag_suppress=" + ignore_warning] + cuda_flags cuda_flags = _nt_quote_args(cuda_flags) cuda_flags += _nt_quote_args(extra_cuda_cflags) else: cuda_flags += ["--compiler-options", '"-fPIC"'] cuda_flags += extra_cuda_cflags if not any(flag.startswith("-std=") for flag in cuda_flags): cuda_flags.append("-std=c++14") if os.getenv("CC") is not None: cuda_flags = ["-ccbin", os.getenv("CC")] + cuda_flags return cuda_flags def _setup_ldflags( extra_ldflags: List[str], with_cuda: bool, with_cudnn: bool ) -> List[str]: ldflags = extra_ldflags if IS_WINDOWS: ldflags.append(os.path.join(MGE_LIB_PATH, "megengine_shared.lib")) if with_cuda: ldflags.append(os.path.join(CUDA_ROOT_DIR, "lib", "x64", "cudart.lib")) if with_cudnn: ldflags.append(os.path.join(CUDNN_ROOT_DIR, "lib", "x64", "cudnn.lib")) else: ldflags.append("-lmegengine_shared -L{}".format(MGE_LIB_PATH)) ldflags.append("-Wl,-rpath,{}".format(MGE_LIB_PATH)) if with_cuda: ldflags.append("-lcudart") ldflags.append("-L{}".format(os.path.join(CUDA_ROOT_DIR, "lib64"))) ldflags.append("-Wl,-rpath,{}".format(os.path.join(CUDA_ROOT_DIR, "lib64"))) if with_cudnn: ldflags.append("-L{}".format(os.path.join(CUDNN_ROOT_DIR, "lib64"))) ldflags.append( "-Wl,-rpath,{}".format(os.path.join(CUDNN_ROOT_DIR, "lib64")) ) return ldflags def _add_shared_flag(ldflags: List[str]): ldflags += ["/LD" if IS_WINDOWS else "-shared"] return ldflags ##################################################################### # Phase 5 ##################################################################### def _obj_file_path(src_file_path: str): file_name = os.path.splitext(os.path.basename(src_file_path))[0] if _is_cuda_file(src_file_path): target = "{}.cuda.o".format(file_name) else: target = "{}.o".format(file_name) return target def _dump_ninja_file( path, cflags, post_cflags, cuda_cflags, cuda_post_cflags, sources, objects, ldflags, library_target, with_cuda, ): def sanitize_flags(flags): return [] if flags is None else [flag.strip() for flag in flags] cflags = sanitize_flags(cflags) post_cflags = sanitize_flags(post_cflags) cuda_cflags = sanitize_flags(cuda_cflags) cuda_post_cflags = sanitize_flags(cuda_post_cflags) ldflags = sanitize_flags(ldflags) assert len(sources) == len(objects) assert len(sources) > 0 if IS_WINDOWS: compiler = os.environ.get("CXX", "clang-cl") else: compiler = os.environ.get("CXX", "c++") # Version 1.3 is required for the `deps` directive. config = ["ninja_required_version = 1.3"] config.append("cxx = {}".format(compiler)) if with_cuda: nvcc = os.path.join(CUDA_ROOT_DIR, "bin", "nvcc") config.append("nvcc = {}".format(nvcc)) flags = ["cflags = {}".format(" ".join(cflags))] flags.append("post_cflags = {}".format(" ".join(post_cflags))) if with_cuda: flags.append("cuda_cflags = {}".format(" ".join(cuda_cflags))) flags.append("cuda_post_cflags = {}".format(" ".join(cuda_post_cflags))) flags.append("ldflags = {}".format(" ".join(ldflags))) # Turn into absolute paths so we can emit them into the ninja build # file wherever it is. sources = [os.path.abspath(file) for file in sources] # See https://ninja-build.org/build.ninja.html for reference. compile_rule = ["rule compile"] if IS_WINDOWS: compile_rule.append( " command = clang-cl /showIncludes $cflags -c $in /Fo$out $post_cflags" ) compile_rule.append(" deps = msvc") else: compile_rule.append( " command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags" ) compile_rule.append(" depfile = $out.d") compile_rule.append(" deps = gcc") if with_cuda: cuda_compile_rule = ["rule cuda_compile"] nvcc_gendeps = "" cuda_compile_rule.append( " command = $nvcc {} $cuda_cflags -c $in -o $out $cuda_post_cflags".format( nvcc_gendeps ) ) # Emit one build rule per source to enable incremental build. build = [] for source_file, object_file in zip(sources, objects): is_cuda_source = _is_cuda_file(source_file) and with_cuda rule = "cuda_compile" if is_cuda_source else "compile" if IS_WINDOWS: source_file = source_file.replace(":", "$:") object_file = object_file.replace(":", "$:") source_file = source_file.replace(" ", "$ ") object_file = object_file.replace(" ", "$ ") build.append("build {}: {} {}".format(object_file, rule, source_file)) if library_target is not None: link_rule = ["rule link"] if IS_WINDOWS: link_rule.append(" command = clang-cl $in /nologo $ldflags /out:$out") else: link_rule.append(" command = $cxx $in $ldflags -o $out") link = ["build {}: link {}".format(library_target, " ".join(objects))] default = ["default {}".format(library_target)] else: link_rule, link, default = [], [], [] # 'Blocks' should be separated by newlines, for visual benefit. blocks = [config, flags, compile_rule] if with_cuda: blocks.append(cuda_compile_rule) blocks += [link_rule, build, link, default] with open(path, "w") as build_file: for block in blocks: lines = "\n".join(block) build_file.write("{}\n\n".format(lines)) class FileBaton: def __init__(self, lock_file_path, wait_seconds=0.1): self.lock_file_path = lock_file_path self.wait_seconds = wait_seconds self.fd = None def try_acquire(self): try: self.fd = os.open(self.lock_file_path, os.O_CREAT | os.O_EXCL) return True except FileExistsError: return False def wait(self): while os.path.exists(self.lock_file_path): time.sleep(self.wait_seconds) def release(self): if self.fd is not None: os.close(self.fd) os.remove(self.lock_file_path) ##################################################################### # Phase 6 ##################################################################### def _build_with_ninja(build_dir: str, verbose: bool, error_prefix: str): command = ["ninja", "-v"] env = os.environ.copy() try: sys.stdout.flush() sys.stderr.flush() stdout_fileno = 1 subprocess.run( command, stdout=stdout_fileno if verbose else subprocess.PIPE, stderr=subprocess.STDOUT, cwd=build_dir, check=True, env=env, ) except subprocess.CalledProcessError as e: with open(os.path.join(build_dir, "build.ninja")) as f: lines = f.readlines() print(lines) _, error, _ = sys.exc_info() message = error_prefix if hasattr(error, "output") and error.output: message += ": {}".format(error.output.decode()) raise RuntimeError(message) from e def build( name: str, sources: Union[str, List[str]], extra_cflags: Union[str, List[str]] = [], extra_cuda_cflags: Union[str, List[str]] = [], extra_ldflags: Union[str, List[str]] = [], extra_include_paths: Union[str, List[str]] = [], with_cuda: Optional[bool] = None, build_dir: Optional[bool] = None, verbose: bool = False, abi_tag: Optional[int] = None, ) -> str: r"""Build a Custom Op with ninja in the way of just-in-time (JIT). To build the custom op, a Ninja build file is emitted, which is used to compile the given sources into a dynamic library. By default, the directory to which the build file is emitted and the resulting library compiled to is ``/mge_custom_op/``, where ```` is the temporary folder on the current platform and ```` the name of the custom op. This location can be overridden in two ways. First, if the ``MGE_CUSTOM_OP_DIR`` environment variable is set, it replaces ``/mge_custom_op`` and all custom op will be compiled into subfolders of this directory. Second, if the ``build_dir`` argument to this function is supplied, it overrides the entire path, i.e. the library will be compiled into that folder directly. To compile the sources, the default system compiler (``c++``) is used, which can be overridden by setting the ``CXX`` environment variable. To pass additional arguments to the compilation process, ``extra_cflags`` or ``extra_ldflags`` can be provided. For example, to compile your custom op with optimizations, pass ``extra_cflags=['-O3']``. You can also use ``extra_cflags`` to pass further include directories. CUDA support with mixed compilation is provided. Simply pass CUDA source files (``.cu`` or ``.cuh``) along with other sources. Such files will be detected and compiled with nvcc rather than the C++ compiler. This includes passing the CUDA lib64 directory as a library directory, and linking ``cudart``. You can pass additional flags to nvcc via ``extra_cuda_cflags``, just like with ``extra_cflags`` for C++. Various heuristics for finding the CUDA install directory are used, which usually work fine. If not, setting the ``CUDA_ROOT_DIR`` environment variable is the safest option. If you use CUDNN, please also setting the ``CUDNN_ROOT_DIR`` environment variable. Args: name: The name of the custom op to build. sources: A list of relative or absolute paths to C++ source files. extra_cflags: optional list of compiler flags to forward to the build. extra_cuda_cflags: optional list of compiler flags to forward to nvcc when building CUDA sources. extra_ldflags: optional list of linker flags to forward to the build. extra_include_paths: optional list of include directories to forward to the build. with_cuda: Determines whether CUDA headers and libraries are added to the build. If set to ``None`` (default), this value is automatically determined based on the existence of ``.cu`` or ``.cuh`` in ``sources``. Set it to `True`` to force CUDA headers and libraries to be included. build_dir: optional path to use as build workspace. verbose: If ``True``, turns on verbose logging of load steps. abi_tag: Determines the value of MACRO ``_GLIBCXX_USE_CXX11_ABI`` in gcc compiler, should be ``0`` or ``1``. Returns: the compiled dynamic library path """ # phase 1: prepare config if abi_tag == None: abi_tag = get_custom_op_abi_tag() global MGE_ABI_VER MGE_ABI_VER = abi_tag def strlist(args, name): assert isinstance(args, str) or isinstance( args, list ), "{} must be str or list[str]".format(name) if isinstance(args, str): return [args] for arg in args: assert isinstance(arg, str) args = [arg.strip() for arg in args] return args sources = strlist(sources, "sources") extra_cflags = strlist(extra_cflags, "extra_cflags") extra_cuda_cflags = strlist(extra_cuda_cflags, "extra_cuda_cflags") extra_ldflags = strlist(extra_ldflags, "extra_ldflags") extra_include_paths = strlist(extra_include_paths, "extra_include_paths") with_cuda = any(map(_is_cuda_file, sources)) if with_cuda is None else with_cuda with_cudnn = any(["cudnn" in f for f in extra_ldflags]) if CUDA_ROOT_DIR == None and with_cuda: print( "No CUDA runtime is found, using {}=/path/to/your/cuda_root_dir".format( ev_cuda_root_dir ) ) if CUDNN_ROOT_DIR == None and with_cudnn: print( "Cannot find the root directory of cudnn, using {}=/path/to/your/cudnn_root_dir".format( ev_cudnn_root_dir ) ) build_dir = os.path.abspath( _get_build_dir(name) if build_dir is None else build_dir ) if not os.path.exists(build_dir): os.makedirs(build_dir, exist_ok=True) if verbose: print("Using {} to build megengine custom op".format(build_dir)) # phase 2: version check version, old_version = version_check( name, sources, [extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths], build_dir, with_cuda, with_cudnn, abi_tag, ) if verbose: if version != old_version and old_version != None: print( "Input conditions of custom op {} have changed, bumping to version {}".format( name, version ) ) print("Building custom op {} with version {}".format(name, version)) if version == old_version: if verbose: print( "No modifications detected for {}, skipping build step...".format(name) ) return name = "{}_v{}".format(name, version) # phase 3: compiler and ninja check _check_ninja_availability() _check_compiler_comatibility() # phase 4: setup the compile flags user_includes, system_includes = _setup_includes( extra_include_paths, with_cuda, with_cudnn ) common_cflags = _setup_common_cflags(user_includes, system_includes) cuda_cflags = ( _setup_cuda_cflags(common_cflags, extra_cuda_cflags) if with_cuda else None ) ldflags = _setup_ldflags(extra_ldflags, with_cuda, with_cudnn) if IS_WINDOWS: cflags = common_cflags + COMMON_MSVC_FLAGS + extra_cflags cflags = _nt_quote_args(cflags) else: cflags = common_cflags + ["-fPIC", "-std=c++14"] + extra_cflags ldflags = _add_shared_flag(ldflags) if sys.platform.startswith("darwin"): ldflags.append("-undefined dynamic_lookup") elif IS_WINDOWS: ldflags += ["/link"] ldflags = _nt_quote_args(ldflags) baton = FileBaton(os.path.join(build_dir, "lock")) if baton.try_acquire(): try: # phase 5: generate ninja build file objs = [_obj_file_path(src) for src in sources] name += ".dll" if IS_WINDOWS else ".so" build_file_path = os.path.join(build_dir, "build.ninja") if verbose: print("Emitting ninja build file {}".format(build_file_path)) _dump_ninja_file( path=build_file_path, cflags=cflags, post_cflags=None, cuda_cflags=cuda_cflags, cuda_post_cflags=None, sources=sources, objects=objs, ldflags=ldflags, library_target=name, with_cuda=with_cuda, ) # phase 6: build with ninja if verbose: print( "Compiling and linking your custom op {}".format( os.path.join(build_dir, name) ) ) _build_with_ninja(build_dir, verbose, "compiling error") finally: baton.release() else: baton.wait() return os.path.join(build_dir, name) def build_and_load( name: str, sources: Union[str, List[str]], extra_cflags: Union[str, List[str]] = [], extra_cuda_cflags: Union[str, List[str]] = [], extra_ldflags: Union[str, List[str]] = [], extra_include_paths: Union[str, List[str]] = [], with_cuda: Optional[bool] = None, build_dir: Optional[bool] = None, verbose: bool = False, abi_tag: Optional[int] = None, ) -> str: r"""Build and Load a Custom Op with ninja in the way of just-in-time (JIT). Same as the function ``build()`` but load the built dynamic library. Args: same as ``build()`` Returns: the compiled dynamic library path """ lib_path = build( name, sources, extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths, with_cuda, build_dir, verbose, abi_tag, ) if verbose: print("Load the compiled custom op {}".format(lib_path)) load(lib_path) return lib_path