GitOrigin-RevId: 3bb0b46311
tags/v1.8.0
@@ -1145,6 +1145,14 @@ if(TARGET _imperative_rt) | |||
COMMAND ${CMAKE_COMMAND} -E create_symlink | |||
${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/version.py | |||
${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/version.py | |||
COMMAND ${CMAKE_COMMAND} -E create_symlink | |||
${CMAKE_CURRENT_SOURCE_DIR}/src/custom/include | |||
${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/include | |||
COMMAND ${CMAKE_COMMAND} -E make_directory | |||
${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/lib | |||
COMMAND ${CMAKE_COMMAND} -E create_symlink | |||
${CMAKE_CURRENT_BINARY_DIR}/src/$<TARGET_FILE_NAME:${MGE_SHARED_LIB}> | |||
${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/lib/$<TARGET_FILE_NAME:${MGE_SHARED_LIB}> | |||
DEPENDS _imperative_rt | |||
VERBATIM | |||
) | |||
@@ -67,8 +67,11 @@ add_custom_command( | |||
COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/LICENSE ${PROJECT_SOURCE_DIR}/ACKNOWLEDGMENTS ${PROJECT_BINARY_DIR} | |||
COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/core/$<TARGET_FILE_NAME:${MODULE_NAME}> # clean develop | |||
COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/version.py # clean develop | |||
COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/core/include # clean develop | |||
COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/core/lib # clean develop | |||
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine ${CMAKE_CURRENT_BINARY_DIR}/python/megengine | |||
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/test ${CMAKE_CURRENT_BINARY_DIR}/python/test | |||
COMMAND ${CMAKE_COMMAND} -E copy_directory ${PROJECT_SOURCE_DIR}/src/custom/include ${CMAKE_CURRENT_BINARY_DIR}/python/megengine/core/include | |||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/setup.py ${CMAKE_CURRENT_BINARY_DIR}/python/setup.py | |||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/requires.txt ${CMAKE_CURRENT_BINARY_DIR}/python/requires.txt | |||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/requires-style.txt ${CMAKE_CURRENT_BINARY_DIR}/python/requires-style.txt | |||
@@ -7,11 +7,14 @@ | |||
# software distributed under the License is distributed on an | |||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
import os | |||
from .._imperative_rt.ops._custom import ( | |||
_get_custom_op_list, | |||
_install, | |||
_make_custom_op, | |||
_uninstall, | |||
get_custom_op_abi_tag, | |||
) | |||
__all__ = ["load"] | |||
@@ -25,8 +28,16 @@ def _gen_custom_op_maker(custom_op_name): | |||
def load(lib_path): | |||
op_in_this_lib = _install(lib_path[0:-3], lib_path) | |||
lib_path = os.path.abspath(lib_path) | |||
lib_name = os.path.splitext(lib_path)[0] | |||
op_in_this_lib = _install(lib_name, lib_path) | |||
for op in op_in_this_lib: | |||
op_maker = _gen_custom_op_maker(op) | |||
globals()[op] = op_maker | |||
__all__.append(op) | |||
def unload(lib_path): | |||
lib_path = os.path.abspath(lib_path) | |||
lib_name = os.path.splitext(lib_path)[0] | |||
_uninstall(lib_name) |
@@ -0,0 +1,909 @@ | |||
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
# | |||
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
# | |||
# Unless required by applicable law or agreed to in writing, | |||
# software distributed under the License is distributed on an | |||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
import collections | |||
import ctypes | |||
import glob | |||
import os | |||
import re | |||
import subprocess | |||
import sys | |||
import time | |||
from typing import List, Optional, Union | |||
from ..core.ops.custom import load | |||
from ..logger import get_logger | |||
def _get_win_folder_with_ctypes(csidl_name): | |||
csidl_const = { | |||
"CSIDL_APPDATA": 26, | |||
"CSIDL_COMMON_APPDATA": 35, | |||
"CSIDL_LOCAL_APPDATA": 28, | |||
}[csidl_name] | |||
buf = ctypes.create_unicode_buffer(1024) | |||
ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf) | |||
# Downgrade to short path name if have highbit chars. See | |||
# <http://bugs.activestate.com/show_bug.cgi?id=85099>. | |||
has_high_char = False | |||
for c in buf: | |||
if ord(c) > 255: | |||
has_high_char = True | |||
break | |||
if has_high_char: | |||
buf2 = ctypes.create_unicode_buffer(1024) | |||
if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024): | |||
buf = buf2 | |||
return buf.value | |||
system = sys.platform | |||
if system == "win32": | |||
_get_win_folder = _get_win_folder_with_ctypes | |||
PLAT_TO_VCVARS = { | |||
"win-amd64": "x86_amd64", | |||
} | |||
logger = get_logger() | |||
# environment varible | |||
ev_custom_op_root_dir = "MGE_CUSTOM_OP_DIR" | |||
ev_cuda_root_dir = "CUDA_ROOT_DIR" | |||
ev_cudnn_root_dir = "CUDNN_ROOT_DIR" | |||
# operating system | |||
IS_WINDOWS = system == "win32" | |||
IS_LINUX = system == "linux" | |||
IS_MACOS = system == "darwin" | |||
MGE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |||
MGE_INC_PATH = os.path.join(MGE_PATH, "core", "include") | |||
MGE_LIB_PATH = os.path.join(MGE_PATH, "core", "lib") | |||
MGE_ABI_VER = 0 | |||
# compile version | |||
MINIMUM_GCC_VERSION = (5, 0, 0) | |||
MINIMUM_CLANG_CL_VERSION = (12, 0, 1) | |||
# compile flags | |||
COMMON_MSVC_FLAGS = [ | |||
"/MD", | |||
"/wd4002", | |||
"/wd4819", | |||
"/EHsc", | |||
] | |||
MSVC_IGNORE_CUDAFE_WARNINGS = [ | |||
"field_without_dll_interface", | |||
] | |||
COMMON_NVCC_FLAGS = [] | |||
# Finds the CUDA install path | |||
def _find_cuda_root_dir() -> Optional[str]: | |||
cuda_root_dir = os.environ.get(ev_cuda_root_dir) | |||
if cuda_root_dir is None: | |||
try: | |||
which = "where" if IS_WINDOWS else "which" | |||
with open(os.devnull, "w") as devnull: | |||
nvcc = ( | |||
subprocess.check_output([which, "nvcc"], stderr=devnull) | |||
.decode() | |||
.rstrip("\r\n") | |||
) | |||
cuda_root_dir = os.path.dirname(os.path.dirname(nvcc)) | |||
except Exception: | |||
if IS_WINDOWS: | |||
cuda_root_dir = os.environ.get("CUDA_PATH", None) | |||
if cuda_root_dir == None: | |||
cuda_root_dirs = glob.glob( | |||
"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*" | |||
) | |||
if len(cuda_root_dirs) == 0: | |||
cuda_root_dir = "" | |||
else: | |||
cuda_root_dir = cuda_root_dirs[0] | |||
else: | |||
cuda_root_dir = "/usr/local/cuda" | |||
if not os.path.exists(cuda_root_dir): | |||
cuda_root_dir = None | |||
return cuda_root_dir | |||
def _find_cudnn_root_dir() -> Optional[str]: | |||
cudnn_root_dir = os.environ.get(ev_cudnn_root_dir) | |||
return cudnn_root_dir | |||
CUDA_ROOT_DIR = _find_cuda_root_dir() | |||
CUDNN_ROOT_DIR = _find_cudnn_root_dir() | |||
##################################################################### | |||
# Phase 1 | |||
##################################################################### | |||
def _is_cuda_file(path: str) -> bool: | |||
valid_ext = [".cu", ".cuh"] | |||
return os.path.splitext(path)[1] in valid_ext | |||
# Return full path to the user-specific cache dir for this application. | |||
# Typical user cache directories are: | |||
# Mac OS X: ~/Library/Caches/<AppName> | |||
# Unix: ~/.cache/<AppName> (XDG default) | |||
# Windows: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache | |||
def _get_user_cache_dir(appname=None, appauthor=None, version=None, opinion=True): | |||
if system == "win32": | |||
appauthor = appname if appauthor is None else appauthor | |||
path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA")) | |||
if appname: | |||
if appauthor is not False: | |||
path = os.path.join(path, appauthor) | |||
else: | |||
path = os.path.join(path, appname) | |||
if opinion: | |||
path = os.path.join(path, "Cache") | |||
elif system == "darwin": | |||
path = os.path.expanduser("~/Library/Caches") | |||
if appname: | |||
path = os.path.join(path, appname) | |||
else: | |||
path = os.getenv("XDG_CACHE_HOME", os.path.expanduser("~/.cache")) | |||
if appname: | |||
path = os.path.join(path, appname) | |||
if appname and version: | |||
path = os.path.join(path, version) | |||
return path | |||
# Returns the path to the root folder under which custom op will built. | |||
def _get_default_build_root() -> str: | |||
return os.path.realpath(_get_user_cache_dir(appname="mge_custom_op")) | |||
def _get_build_dir(name: str) -> str: | |||
custom_op_root_dir = os.environ.get(ev_custom_op_root_dir) | |||
if custom_op_root_dir is None: | |||
custom_op_root_dir = _get_default_build_root() | |||
build_dir = os.path.join(custom_op_root_dir, name) | |||
return build_dir | |||
##################################################################### | |||
# Phase 2 | |||
##################################################################### | |||
def update_hash(seed, value): | |||
# using boost::hash_combine | |||
# https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html | |||
return seed ^ (hash(value) + 0x9E3779B9 + (seed << 6) + (seed >> 2)) | |||
def hash_source_files(hash_value, source_files): | |||
for filename in source_files: | |||
with open(filename) as file: | |||
hash_value = update_hash(hash_value, file.read()) | |||
return hash_value | |||
def hash_build_args(hash_value, build_args): | |||
for group in build_args: | |||
for arg in group: | |||
hash_value = update_hash(hash_value, arg) | |||
return hash_value | |||
Entry = collections.namedtuple("Entry", "version, hash") | |||
class Versioner(object): | |||
def __init__(self): | |||
self.entries = {} | |||
def get_version(self, name): | |||
entry = self.entries.get(name) | |||
return None if entry is None else entry.version | |||
def bump_version_if_changed( | |||
self, name, sources, build_args, build_dir, with_cuda, with_cudnn, abi_tag | |||
): | |||
hash_value = 0 | |||
hash_value = hash_source_files(hash_value, sources) | |||
hash_value = hash_build_args(hash_value, build_args) | |||
hash_value = update_hash(hash_value, build_dir) | |||
hash_value = update_hash(hash_value, with_cuda) | |||
hash_value = update_hash(hash_value, with_cudnn) | |||
hash_value = update_hash(hash_value, abi_tag) | |||
entry = self.entries.get(name) | |||
if entry is None: | |||
self.entries[name] = entry = Entry(0, hash_value) | |||
elif hash_value != entry.hash: | |||
self.entries[name] = entry = Entry(entry.version + 1, hash_value) | |||
return entry.version | |||
custom_op_versioner = Versioner() | |||
def version_check( | |||
name, sources, build_args, build_dir, with_cuda, with_cudnn, abi_tag, | |||
): | |||
old_version = custom_op_versioner.get_version(name) | |||
version = custom_op_versioner.bump_version_if_changed( | |||
name, sources, build_args, build_dir, with_cuda, with_cudnn, abi_tag, | |||
) | |||
return version, old_version | |||
##################################################################### | |||
# Phase 3 | |||
##################################################################### | |||
def _check_ninja_availability(): | |||
try: | |||
subprocess.check_output("ninja --version".split()) | |||
except Exception: | |||
raise RuntimeError( | |||
"Ninja is required to build custom op, please install ninja and update your PATH" | |||
) | |||
def _mge_is_built_from_src(): | |||
file_path = os.path.abspath(__file__) | |||
if "site-packages" in file_path: | |||
return False | |||
else: | |||
return True | |||
def _accepted_compilers_for_platform(): | |||
if IS_WINDOWS: | |||
return ["clang-cl"] | |||
if IS_MACOS: | |||
return ["clang++", "clang"] | |||
if IS_LINUX: | |||
return ["g++", "gcc", "gnu-c++", "gnu-cc"] | |||
# Verifies that the compiler is the expected one for the current platform. | |||
def _check_compiler_existed_for_platform(compiler: str) -> bool: | |||
# there is no suitable cmd like `which` on windows, so we assume the compiler is always true on windows | |||
if IS_WINDOWS: | |||
try: | |||
version_string = subprocess.check_output( | |||
["clang-cl", "--version"], stderr=subprocess.STDOUT | |||
).decode() | |||
return True | |||
except Exception: | |||
return False | |||
# use os.path.realpath to resolve any symlinks, in particular from "c++" to e.g. "g++". | |||
which = subprocess.check_output(["which", compiler], stderr=subprocess.STDOUT) | |||
compiler_path = os.path.realpath(which.decode().strip()) | |||
if any(name in compiler_path for name in _accepted_compilers_for_platform()): | |||
return True | |||
version_string = subprocess.check_output( | |||
[compiler, "-v"], stderr=subprocess.STDOUT | |||
).decode() | |||
if sys.platform.startswith("linux"): | |||
pattern = re.compile("^COLLECT_GCC=(.*)$", re.MULTILINE) | |||
results = re.findall(pattern, version_string) | |||
if len(results) != 1: | |||
return False | |||
compiler_path = os.path.realpath(results[0].strip()) | |||
return any(name in compiler_path for name in _accepted_compilers_for_platform()) | |||
if sys.platform.startswith("darwin"): | |||
return version_string.startswith("Apple clang") | |||
return False | |||
# Verifies that the given compiler is ABI-compatible with MegEngine. | |||
def _check_compiler_abi_compatibility(compiler: str): | |||
# we think if the megengine is built from source, the user will use the same compiler to compile the custom op | |||
if _mge_is_built_from_src() or os.environ.get("MGE_CHECK_ABI", "1") == "0": | |||
return True | |||
# [TODO] There is no particular minimum version we need for clang, so we"re good here. | |||
if sys.platform.startswith("darwin"): | |||
return True | |||
try: | |||
if sys.platform.startswith("linux"): | |||
minimum_required_version = MINIMUM_GCC_VERSION | |||
versionstr = subprocess.check_output( | |||
[compiler, "-dumpfullversion", "-dumpversion"] | |||
) | |||
version = versionstr.decode().strip().split(".") | |||
else: | |||
minimum_required_version = MINIMUM_CLANG_CL_VERSION | |||
compiler_info = subprocess.check_output( | |||
[compiler, "--version"], stderr=subprocess.STDOUT | |||
) | |||
match = re.search(r"(\d+)\.(\d+)\.(\d+)", compiler_info.decode().strip()) | |||
version = (0, 0, 0) if match is None else match.groups() | |||
except Exception: | |||
_, error, _ = sys.exc_info() | |||
logger.warning( | |||
"Error checking compiler version for {}: {}".format(compiler, error) | |||
) | |||
return False | |||
if tuple(map(int, version)) >= minimum_required_version: | |||
return True | |||
return False | |||
def _check_compiler_comatibility(): | |||
# we use clang-cl on windows, refer: https://clang.llvm.org/docs/UsersManual.html#clang-cl | |||
compiler = ( | |||
os.environ.get("CXX", "clang-cl") | |||
if IS_WINDOWS | |||
else os.environ.get("CXX", "c++") | |||
) | |||
existed = _check_compiler_existed_for_platform(compiler) | |||
if existed == False: | |||
log_str = ( | |||
"Cannot find compiler which is compatible with the compiler " | |||
"MegEngine was built with for this platform, which is {mge_compiler} on " | |||
"{platform}. Please use {mge_compiler} to to compile your extension. " | |||
"Alternatively, you may compile MegEngine from source using " | |||
"{user_compiler}, and then you can also use {user_compiler} to compile " | |||
"your extension." | |||
).format( | |||
user_compiler=compiler, | |||
mge_compiler=_accepted_compilers_for_platform()[0], | |||
platform=sys.platform, | |||
) | |||
logger.warning(log_str) | |||
return False | |||
compatible = _check_compiler_abi_compatibility(compiler) | |||
if compatible == False: | |||
log_str = ( | |||
"Your compiler version may be ABI-incompatible with MegEngine! " | |||
"Please use a compiler that is ABI-compatible with GCC 5.0 on Linux " | |||
"and LLVM/Clang 12.0 on Windows ." | |||
) | |||
logger.warning(log_str) | |||
return True | |||
##################################################################### | |||
# Phase 4 | |||
##################################################################### | |||
# Quote command-line arguments for DOS/Windows conventions. | |||
def _nt_quote_args(args: Optional[List[str]]) -> List[str]: | |||
# Cover None-type | |||
if not args: | |||
return [] | |||
return ['"{}"'.format(arg) if " " in arg else arg for arg in args] | |||
# Now we need user to specify the arch of GPU | |||
def _get_cuda_arch_flags(cflags: Optional[List[str]] = None) -> List[str]: | |||
return [] | |||
def _setup_sys_includes(with_cuda: bool, with_cudnn: bool): | |||
includes = [os.path.join(MGE_INC_PATH)] | |||
if with_cuda: | |||
includes.append(os.path.join(CUDA_ROOT_DIR, "include")) | |||
if with_cudnn: | |||
includes.append(os.path.join(CUDNN_ROOT_DIR, "include")) | |||
return includes | |||
def _setup_includes(extra_include_paths: List[str], with_cuda: bool, with_cudnn: bool): | |||
user_includes = [os.path.abspath(path) for path in extra_include_paths] | |||
system_includes = _setup_sys_includes(with_cuda, with_cudnn) | |||
if IS_WINDOWS: | |||
user_includes += system_includes | |||
system_includes.clear() | |||
return user_includes, system_includes | |||
def _setup_common_cflags(user_includes: List[str], system_includes: List[str]): | |||
common_cflags = [] | |||
common_cflags += ["-I{}".format(include) for include in user_includes] | |||
common_cflags += ["-isystem {}".format(include) for include in system_includes] | |||
if not IS_WINDOWS: | |||
common_cflags += ["-D_GLIBCXX_USE_CXX11_ABI={}".format(MGE_ABI_VER)] | |||
return common_cflags | |||
def _setup_cuda_cflags(cflags: List[str], extra_cuda_cflags: List[str]): | |||
cuda_flags = cflags + COMMON_NVCC_FLAGS + _get_cuda_arch_flags() | |||
if IS_WINDOWS: | |||
for flag in COMMON_MSVC_FLAGS: | |||
cuda_flags = ["-Xcompiler", flag] + cuda_flags | |||
for ignore_warning in MSVC_IGNORE_CUDAFE_WARNINGS: | |||
cuda_flags = ["-Xcudafe", "--diag_suppress=" + ignore_warning] + cuda_flags | |||
cuda_flags = _nt_quote_args(cuda_flags) | |||
cuda_flags += _nt_quote_args(extra_cuda_cflags) | |||
else: | |||
cuda_flags += ["--compiler-options", '"-fPIC"'] | |||
cuda_flags += extra_cuda_cflags | |||
if not any(flag.startswith("-std=") for flag in cuda_flags): | |||
cuda_flags.append("-std=c++14") | |||
if os.getenv("CC") is not None: | |||
cuda_flags = ["-ccbin", os.getenv("CC")] + cuda_flags | |||
return cuda_flags | |||
def _setup_ldflags( | |||
extra_ldflags: List[str], with_cuda: bool, with_cudnn: bool | |||
) -> List[str]: | |||
ldflags = extra_ldflags | |||
if IS_WINDOWS: | |||
ldflags.append(os.path.join(MGE_LIB_PATH, "megengine_shared.lib")) | |||
if with_cuda: | |||
ldflags.append(os.path.join(CUDA_ROOT_DIR, "lib", "x64", "cudart.lib")) | |||
if with_cudnn: | |||
ldflags.append(os.path.join(CUDNN_ROOT_DIR, "lib", "x64", "cudnn.lib")) | |||
else: | |||
ldflags.append("-lmegengine_shared -L{}".format(MGE_LIB_PATH)) | |||
ldflags.append("-Wl,-rpath,{}".format(MGE_LIB_PATH)) | |||
if with_cuda: | |||
ldflags.append("-lcudart") | |||
ldflags.append("-L{}".format(os.path.join(CUDA_ROOT_DIR, "lib64"))) | |||
ldflags.append("-Wl,-rpath,{}".format(os.path.join(CUDA_ROOT_DIR, "lib64"))) | |||
if with_cudnn: | |||
ldflags.append("-L{}".format(os.path.join(CUDNN_ROOT_DIR, "lib64"))) | |||
ldflags.append( | |||
"-Wl,-rpath,{}".format(os.path.join(CUDNN_ROOT_DIR, "lib64")) | |||
) | |||
return ldflags | |||
def _add_shared_flag(ldflags: List[str]): | |||
ldflags += ["/LD" if IS_WINDOWS else "-shared"] | |||
return ldflags | |||
##################################################################### | |||
# Phase 5 | |||
##################################################################### | |||
def _obj_file_path(src_file_path: str): | |||
file_name = os.path.splitext(os.path.basename(src_file_path))[0] | |||
if _is_cuda_file(src_file_path): | |||
target = "{}.cuda.o".format(file_name) | |||
else: | |||
target = "{}.o".format(file_name) | |||
return target | |||
def _dump_ninja_file( | |||
path, | |||
cflags, | |||
post_cflags, | |||
cuda_cflags, | |||
cuda_post_cflags, | |||
sources, | |||
objects, | |||
ldflags, | |||
library_target, | |||
with_cuda, | |||
): | |||
def sanitize_flags(flags): | |||
return [] if flags is None else [flag.strip() for flag in flags] | |||
cflags = sanitize_flags(cflags) | |||
post_cflags = sanitize_flags(post_cflags) | |||
cuda_cflags = sanitize_flags(cuda_cflags) | |||
cuda_post_cflags = sanitize_flags(cuda_post_cflags) | |||
ldflags = sanitize_flags(ldflags) | |||
assert len(sources) == len(objects) | |||
assert len(sources) > 0 | |||
if IS_WINDOWS: | |||
compiler = os.environ.get("CXX", "clang-cl") | |||
else: | |||
compiler = os.environ.get("CXX", "c++") | |||
# Version 1.3 is required for the `deps` directive. | |||
config = ["ninja_required_version = 1.3"] | |||
config.append("cxx = {}".format(compiler)) | |||
if with_cuda: | |||
nvcc = os.path.join(CUDA_ROOT_DIR, "bin", "nvcc") | |||
config.append("nvcc = {}".format(nvcc)) | |||
flags = ["cflags = {}".format(" ".join(cflags))] | |||
flags.append("post_cflags = {}".format(" ".join(post_cflags))) | |||
if with_cuda: | |||
flags.append("cuda_cflags = {}".format(" ".join(cuda_cflags))) | |||
flags.append("cuda_post_cflags = {}".format(" ".join(cuda_post_cflags))) | |||
flags.append("ldflags = {}".format(" ".join(ldflags))) | |||
# Turn into absolute paths so we can emit them into the ninja build | |||
# file wherever it is. | |||
sources = [os.path.abspath(file) for file in sources] | |||
# See https://ninja-build.org/build.ninja.html for reference. | |||
compile_rule = ["rule compile"] | |||
if IS_WINDOWS: | |||
compile_rule.append( | |||
" command = clang-cl /showIncludes $cflags -c $in /Fo$out $post_cflags" | |||
) | |||
compile_rule.append(" deps = msvc") | |||
else: | |||
compile_rule.append( | |||
" command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags" | |||
) | |||
compile_rule.append(" depfile = $out.d") | |||
compile_rule.append(" deps = gcc") | |||
if with_cuda: | |||
cuda_compile_rule = ["rule cuda_compile"] | |||
nvcc_gendeps = "" | |||
cuda_compile_rule.append( | |||
" command = $nvcc {} $cuda_cflags -c $in -o $out $cuda_post_cflags".format( | |||
nvcc_gendeps | |||
) | |||
) | |||
# Emit one build rule per source to enable incremental build. | |||
build = [] | |||
for source_file, object_file in zip(sources, objects): | |||
is_cuda_source = _is_cuda_file(source_file) and with_cuda | |||
rule = "cuda_compile" if is_cuda_source else "compile" | |||
if IS_WINDOWS: | |||
source_file = source_file.replace(":", "$:") | |||
object_file = object_file.replace(":", "$:") | |||
source_file = source_file.replace(" ", "$ ") | |||
object_file = object_file.replace(" ", "$ ") | |||
build.append("build {}: {} {}".format(object_file, rule, source_file)) | |||
if library_target is not None: | |||
link_rule = ["rule link"] | |||
if IS_WINDOWS: | |||
link_rule.append(" command = clang-cl $in /nologo $ldflags /out:$out") | |||
else: | |||
link_rule.append(" command = $cxx $in $ldflags -o $out") | |||
link = ["build {}: link {}".format(library_target, " ".join(objects))] | |||
default = ["default {}".format(library_target)] | |||
else: | |||
link_rule, link, default = [], [], [] | |||
# 'Blocks' should be separated by newlines, for visual benefit. | |||
blocks = [config, flags, compile_rule] | |||
if with_cuda: | |||
blocks.append(cuda_compile_rule) | |||
blocks += [link_rule, build, link, default] | |||
with open(path, "w") as build_file: | |||
for block in blocks: | |||
lines = "\n".join(block) | |||
build_file.write("{}\n\n".format(lines)) | |||
class FileBaton: | |||
def __init__(self, lock_file_path, wait_seconds=0.1): | |||
self.lock_file_path = lock_file_path | |||
self.wait_seconds = wait_seconds | |||
self.fd = None | |||
def try_acquire(self): | |||
try: | |||
self.fd = os.open(self.lock_file_path, os.O_CREAT | os.O_EXCL) | |||
return True | |||
except FileExistsError: | |||
return False | |||
def wait(self): | |||
while os.path.exists(self.lock_file_path): | |||
time.sleep(self.wait_seconds) | |||
def release(self): | |||
if self.fd is not None: | |||
os.close(self.fd) | |||
os.remove(self.lock_file_path) | |||
##################################################################### | |||
# Phase 6 | |||
##################################################################### | |||
def _build_with_ninja(build_dir: str, verbose: bool, error_prefix: str): | |||
command = ["ninja", "-v"] | |||
env = os.environ.copy() | |||
try: | |||
sys.stdout.flush() | |||
sys.stderr.flush() | |||
stdout_fileno = 1 | |||
subprocess.run( | |||
command, | |||
stdout=stdout_fileno if verbose else subprocess.PIPE, | |||
stderr=subprocess.STDOUT, | |||
cwd=build_dir, | |||
check=True, | |||
env=env, | |||
) | |||
except subprocess.CalledProcessError as e: | |||
with open(os.path.join(build_dir, "build.ninja")) as f: | |||
lines = f.readlines() | |||
print(lines) | |||
_, error, _ = sys.exc_info() | |||
message = error_prefix | |||
if hasattr(error, "output") and error.output: | |||
message += ": {}".format(error.output.decode()) | |||
raise RuntimeError(message) from e | |||
def build( | |||
name: str, | |||
sources: Union[str, List[str]], | |||
extra_cflags: Union[str, List[str]] = [], | |||
extra_cuda_cflags: Union[str, List[str]] = [], | |||
extra_ldflags: Union[str, List[str]] = [], | |||
extra_include_paths: Union[str, List[str]] = [], | |||
with_cuda: Optional[bool] = None, | |||
build_dir: Optional[bool] = None, | |||
verbose: bool = False, | |||
abi_tag: Optional[int] = None, | |||
) -> str: | |||
r"""Build a Custom Op with ninja in the way of just-in-time (JIT). | |||
To build the custom op, a Ninja build file is emitted, which is used to | |||
compile the given sources into a dynamic library. | |||
By default, the directory to which the build file is emitted and the | |||
resulting library compiled to is ``<tmp>/mge_custom_op/<name>``, where | |||
``<tmp>`` is the temporary folder on the current platform and ``<name>`` | |||
the name of the custom op. This location can be overridden in two ways. | |||
First, if the ``MGE_CUSTOM_OP_DIR`` environment variable is set, it | |||
replaces ``<tmp>/mge_custom_op`` and all custom op will be compiled | |||
into subfolders of this directory. Second, if the ``build_dir`` | |||
argument to this function is supplied, it overrides the entire path, i.e. | |||
the library will be compiled into that folder directly. | |||
To compile the sources, the default system compiler (``c++``) is used, | |||
which can be overridden by setting the ``CXX`` environment variable. To pass | |||
additional arguments to the compilation process, ``extra_cflags`` or | |||
``extra_ldflags`` can be provided. For example, to compile your custom op | |||
with optimizations, pass ``extra_cflags=['-O3']``. You can also use | |||
``extra_cflags`` to pass further include directories. | |||
CUDA support with mixed compilation is provided. Simply pass CUDA source | |||
files (``.cu`` or ``.cuh``) along with other sources. Such files will be | |||
detected and compiled with nvcc rather than the C++ compiler. This includes | |||
passing the CUDA lib64 directory as a library directory, and linking | |||
``cudart``. You can pass additional flags to nvcc via | |||
``extra_cuda_cflags``, just like with ``extra_cflags`` for C++. Various | |||
heuristics for finding the CUDA install directory are used, which usually | |||
work fine. If not, setting the ``CUDA_ROOT_DIR`` environment variable is the | |||
safest option. If you use CUDNN, please also setting the ``CUDNN_ROOT_DIR`` | |||
environment variable. | |||
Args: | |||
name: The name of the custom op to build. | |||
sources: A list of relative or absolute paths to C++ source files. | |||
extra_cflags: optional list of compiler flags to forward to the build. | |||
extra_cuda_cflags: optional list of compiler flags to forward to nvcc | |||
when building CUDA sources. | |||
extra_ldflags: optional list of linker flags to forward to the build. | |||
extra_include_paths: optional list of include directories to forward | |||
to the build. | |||
with_cuda: Determines whether CUDA headers and libraries are added to | |||
the build. If set to ``None`` (default), this value is | |||
automatically determined based on the existence of ``.cu`` or | |||
``.cuh`` in ``sources``. Set it to `True`` to force CUDA headers | |||
and libraries to be included. | |||
build_dir: optional path to use as build workspace. | |||
verbose: If ``True``, turns on verbose logging of load steps. | |||
abi_tag: Determines the value of MACRO ``_GLIBCXX_USE_CXX11_ABI`` | |||
in gcc compiler, should be ``0`` or ``1``. | |||
Returns: | |||
the compiled dynamic library path | |||
""" | |||
# phase 1: prepare config | |||
if abi_tag != None: | |||
global MGE_ABI_VER | |||
MGE_ABI_VER = abi_tag | |||
def strlist(args, name): | |||
assert isinstance(args, str) or isinstance( | |||
args, list | |||
), "{} must be str or list[str]".format(name) | |||
if isinstance(args, str): | |||
return [args] | |||
for arg in args: | |||
assert isinstance(arg, str) | |||
args = [arg.strip() for arg in args] | |||
return args | |||
sources = strlist(sources, "sources") | |||
extra_cflags = strlist(extra_cflags, "extra_cflags") | |||
extra_cuda_cflags = strlist(extra_cuda_cflags, "extra_cuda_cflags") | |||
extra_ldflags = strlist(extra_ldflags, "extra_ldflags") | |||
extra_include_paths = strlist(extra_include_paths, "extra_include_paths") | |||
with_cuda = any(map(_is_cuda_file, sources)) if with_cuda is None else with_cuda | |||
with_cudnn = any(["cudnn" in f for f in extra_ldflags]) | |||
if CUDA_ROOT_DIR == None and with_cuda: | |||
print( | |||
"No CUDA runtime is found, using {}=/path/to/your/cuda_root_dir".format( | |||
ev_cuda_root_dir | |||
) | |||
) | |||
if CUDNN_ROOT_DIR == None and with_cudnn: | |||
print( | |||
"Cannot find the root directory of cudnn, using {}=/path/to/your/cudnn_root_dir".format( | |||
ev_cudnn_root_dir | |||
) | |||
) | |||
build_dir = os.path.abspath( | |||
_get_build_dir(name) if build_dir is None else build_dir | |||
) | |||
if not os.path.exists(build_dir): | |||
os.makedirs(build_dir, exist_ok=True) | |||
if verbose: | |||
print("Using {} to build megengine custom op".format(build_dir)) | |||
# phase 2: version check | |||
version, old_version = version_check( | |||
name, | |||
sources, | |||
[extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths], | |||
build_dir, | |||
with_cuda, | |||
with_cudnn, | |||
abi_tag, | |||
) | |||
if verbose: | |||
if version != old_version and old_version != None: | |||
print( | |||
"Input conditions of custom op {} have changed, bumping to version {}".format( | |||
name, version | |||
) | |||
) | |||
print("Building custom op {} with version {}".format(name, version)) | |||
if version == old_version: | |||
if verbose: | |||
print( | |||
"No modifications detected for {}, skipping build step...".format(name) | |||
) | |||
return | |||
name = "{}_v{}".format(name, version) | |||
# phase 3: compiler and ninja check | |||
_check_ninja_availability() | |||
_check_compiler_comatibility() | |||
# phase 4: setup the compile flags | |||
user_includes, system_includes = _setup_includes( | |||
extra_include_paths, with_cuda, with_cudnn | |||
) | |||
common_cflags = _setup_common_cflags(user_includes, system_includes) | |||
cuda_cflags = ( | |||
_setup_cuda_cflags(common_cflags, extra_cuda_cflags) if with_cuda else None | |||
) | |||
ldflags = _setup_ldflags(extra_ldflags, with_cuda, with_cudnn) | |||
if IS_WINDOWS: | |||
cflags = common_cflags + COMMON_MSVC_FLAGS + extra_cflags | |||
cflags = _nt_quote_args(cflags) | |||
else: | |||
cflags = common_cflags + ["-fPIC", "-std=c++14"] + extra_cflags | |||
ldflags = _add_shared_flag(ldflags) | |||
if sys.platform.startswith("darwin"): | |||
ldflags.append("-undefined dynamic_lookup") | |||
elif IS_WINDOWS: | |||
ldflags += ["/link"] | |||
ldflags = _nt_quote_args(ldflags) | |||
baton = FileBaton(os.path.join(build_dir, "lock")) | |||
if baton.try_acquire(): | |||
try: | |||
# phase 5: generate ninja build file | |||
objs = [_obj_file_path(src) for src in sources] | |||
name += ".dll" if IS_WINDOWS else ".so" | |||
build_file_path = os.path.join(build_dir, "build.ninja") | |||
if verbose: | |||
print("Emitting ninja build file {}".format(build_file_path)) | |||
_dump_ninja_file( | |||
path=build_file_path, | |||
cflags=cflags, | |||
post_cflags=None, | |||
cuda_cflags=cuda_cflags, | |||
cuda_post_cflags=None, | |||
sources=sources, | |||
objects=objs, | |||
ldflags=ldflags, | |||
library_target=name, | |||
with_cuda=with_cuda, | |||
) | |||
# phase 6: build with ninja | |||
if verbose: | |||
print( | |||
"Compiling and linking your custom op {}".format( | |||
os.path.join(build_dir, name) | |||
) | |||
) | |||
_build_with_ninja(build_dir, verbose, "compiling error") | |||
finally: | |||
baton.release() | |||
else: | |||
baton.wait() | |||
return os.path.join(build_dir, name) | |||
def build_and_load( | |||
name: str, | |||
sources: Union[str, List[str]], | |||
extra_cflags: Union[str, List[str]] = [], | |||
extra_cuda_cflags: Union[str, List[str]] = [], | |||
extra_ldflags: Union[str, List[str]] = [], | |||
extra_include_paths: Union[str, List[str]] = [], | |||
with_cuda: Optional[bool] = None, | |||
build_dir: Optional[bool] = None, | |||
verbose: bool = False, | |||
abi_tag: Optional[int] = None, | |||
) -> str: | |||
r"""Build and Load a Custom Op with ninja in the way of just-in-time (JIT). | |||
Same as the function ``build()`` but load the built dynamic library. | |||
Args: | |||
same as ``build()`` | |||
Returns: | |||
the compiled dynamic library path | |||
""" | |||
lib_path = build( | |||
name, | |||
sources, | |||
extra_cflags, | |||
extra_cuda_cflags, | |||
extra_ldflags, | |||
extra_include_paths, | |||
with_cuda, | |||
build_dir, | |||
verbose, | |||
abi_tag, | |||
) | |||
if verbose: | |||
print("Load the compiled custom op {}".format(lib_path)) | |||
load(lib_path) | |||
return lib_path |
@@ -766,6 +766,13 @@ void init_custom(pybind11::module m) { | |||
m.def("_install", &install_custom); | |||
m.def("_uninstall", &uninstall_custom); | |||
m.def("_get_custom_op_list", &get_custom_op_list); | |||
m.def("get_custom_op_abi_tag", [](void) -> int { | |||
int ret = 0; | |||
#ifdef _GLIBCXX_USE_CXX11_ABI | |||
ret = _GLIBCXX_USE_CXX11_ABI; | |||
#endif | |||
return ret; | |||
}); | |||
static PyMethodDef method_def = { | |||
#ifdef METH_FASTCALL | |||
@@ -0,0 +1,140 @@ | |||
/** | |||
* \file imperative/python/test/unit/core/custom_opsrc/elem_add.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "megbrain/custom/custom.h" | |||
CUSTOM_OP_REG_BEGIN(ElemAddSmooth) | |||
void forward_device_infer( | |||
const std::vector<Device>& inputs, const Param& params, | |||
std::vector<Device>& outputs) { | |||
outputs[0] = inputs[0]; | |||
} | |||
void forward_shape_infer( | |||
const std::vector<Shape>& inputs, const Param& params, | |||
std::vector<Shape>& outputs) { | |||
outputs[0] = inputs[0]; | |||
} | |||
void forward_dtype_infer( | |||
const std::vector<DType>& inputs, const Param& params, | |||
std::vector<DType>& outputs) { | |||
outputs[0] = inputs[0]; | |||
} | |||
void forward_format_infer( | |||
const std::vector<Format>& inputs, const Param& params, | |||
std::vector<Format>& outputs) { | |||
outputs[0] = inputs[0]; | |||
} | |||
template <typename scalar_t> | |||
void forward_kernel( | |||
const scalar_t* input0, const scalar_t* input1, scalar_t* output, size_t len, | |||
float smooth) { | |||
for (size_t i = 0; i < len; ++i) { | |||
output[i] = input0[i] + input1[i]; | |||
if (output[i] < 0) | |||
output[i] += smooth; | |||
else | |||
output[i] -= smooth; | |||
} | |||
} | |||
void forward_compute( | |||
const std::vector<Tensor>& inputs, const Param& params, | |||
std::vector<Tensor>& outputs) { | |||
DISPATCH_SIGN_INT_AND_FLOAT_TYPES( | |||
outputs[0].dtype(), "forward_compute", ([&]() { | |||
forward_kernel<scalar_t>( | |||
inputs[0].data<scalar_t>(), inputs[1].data<scalar_t>(), | |||
outputs[0].data<scalar_t>(), outputs[0].size(), | |||
params["smooth"].as<float>()); | |||
})); | |||
} | |||
CUSTOM_OP_REG(ElemAddSmoothForward) | |||
.set_description( | |||
"Custom ElemAdd Operator With a Smooth Parameter, " | |||
"which is used to verify the CPU kernel") | |||
.add_input("lhs") | |||
.add_input("rhs") | |||
.add_output("output") | |||
.add_param("smooth", 0.f) | |||
.set_device_infer(forward_device_infer) | |||
.set_shape_infer(forward_shape_infer) | |||
.set_dtype_infer(forward_dtype_infer) | |||
.set_format_infer(forward_format_infer) | |||
.set_compute(forward_compute); | |||
void backward_device_infer( | |||
const std::vector<Device>& ograds, const Param& params, | |||
std::vector<Device>& igrads) { | |||
igrads[0] = ograds[0]; | |||
igrads[1] = ograds[0]; | |||
} | |||
void backward_shape_infer( | |||
const std::vector<Shape>& ograds, const Param& params, | |||
std::vector<Shape>& igrads) { | |||
igrads[0] = ograds[0]; | |||
igrads[1] = ograds[0]; | |||
} | |||
void backward_dtype_infer( | |||
const std::vector<DType>& ograds, const Param& params, | |||
std::vector<DType>& igrads) { | |||
igrads[0] = ograds[0]; | |||
igrads[1] = ograds[0]; | |||
} | |||
void backward_format_infer( | |||
const std::vector<Format>& ograds, const Param& params, | |||
std::vector<Format>& igrads) { | |||
igrads[0] = ograds[0]; | |||
igrads[1] = ograds[0]; | |||
} | |||
template <typename scalar_t> | |||
void backward_kernel( | |||
const scalar_t* ograd, scalar_t* igrad0, scalar_t* igrad1, size_t len) { | |||
for (size_t i = 0; i < len; ++i) { | |||
igrad0[i] = ograd[i]; | |||
igrad1[i] = ograd[i]; | |||
} | |||
} | |||
void backward_compute( | |||
const std::vector<Tensor>& ograds, const Param& params, | |||
std::vector<Tensor>& igrads) { | |||
DISPATCH_SIGN_INT_AND_FLOAT_TYPES( | |||
igrads[0].dtype(), "backward_compute", ([&]() { | |||
backward_kernel<scalar_t>( | |||
ograds[0].data<scalar_t>(), igrads[0].data<scalar_t>(), | |||
igrads[1].data<scalar_t>(), igrads[0].size()); | |||
})); | |||
} | |||
CUSTOM_OP_REG(ElemAddSmoothBackward) | |||
.set_description( | |||
"Custom ElemAdd Operator With a Smooth Parameter, " | |||
"which is used to verify the CPU kernel") | |||
.add_input("ograd") | |||
.add_output("igrad_lhs") | |||
.add_output("igrad_rhs") | |||
.set_device_infer(backward_device_infer) | |||
.set_shape_infer(backward_shape_infer) | |||
.set_dtype_infer(backward_dtype_infer) | |||
.set_format_infer(backward_format_infer) | |||
.set_compute(backward_compute); | |||
CUSTOM_OP_REG_END(ElemAddSmooth) |
@@ -0,0 +1,65 @@ | |||
/** | |||
* \file imperative/python/test/unit/core/custom_opsrc/matmul_scale.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "./matmul_scale.h" | |||
#include "megbrain/custom/custom.h" | |||
CUSTOM_OP_REG_BEGIN(MatMulScale) | |||
void forward_shape_infer( | |||
const std::vector<Shape>& inputs, const Param& params, | |||
std::vector<Shape>& outputs) { | |||
outputs[0] = {inputs[0][0], inputs[1][1]}; | |||
} | |||
void forward_compute( | |||
const std::vector<Tensor>& inputs, const Param& params, | |||
std::vector<Tensor>& outputs) { | |||
matmul_forward_helper( | |||
inputs[0], inputs[1], outputs[0], inputs[0].shape()[0], | |||
inputs[0].shape()[1], inputs[1].shape()[1], params["scale"].as<float>()); | |||
} | |||
CUSTOM_OP_REG(MatMulScaleForward) | |||
.add_inputs(2) | |||
.add_outputs(1) | |||
.add_param("scale", 1.0f) | |||
.set_shape_infer(forward_shape_infer) | |||
.set_compute("cuda", forward_compute); | |||
void backward_shape_infer( | |||
const std::vector<Shape>& ograd_and_inputs, const Param& params, | |||
std::vector<Shape>& outputs) { | |||
outputs[0] = ograd_and_inputs[1]; | |||
outputs[1] = ograd_and_inputs[2]; | |||
} | |||
void backward_compute( | |||
const std::vector<Tensor>& ograd_and_inputs, const Param& params, | |||
std::vector<Tensor>& igrads) { | |||
matmul_backward_lhs_helper( | |||
ograd_and_inputs[2], ograd_and_inputs[0], igrads[0], | |||
ograd_and_inputs[1].shape()[0], ograd_and_inputs[1].shape()[1], | |||
ograd_and_inputs[2].shape()[1], params["scale"].as<float>()); | |||
matmul_backward_rhs_helper( | |||
ograd_and_inputs[1], ograd_and_inputs[0], igrads[1], | |||
ograd_and_inputs[1].shape()[0], ograd_and_inputs[1].shape()[1], | |||
ograd_and_inputs[2].shape()[1], params["scale"].as<float>()); | |||
} | |||
CUSTOM_OP_REG(MatMulScaleBackward) | |||
.add_inputs(3) | |||
.add_outputs(2) | |||
.add_param("scale", 1.0f) | |||
.set_shape_infer(backward_shape_infer) | |||
.set_compute("cuda", backward_compute); | |||
CUSTOM_OP_REG_END(MatMulScale) |
@@ -0,0 +1,97 @@ | |||
/** | |||
* \file imperative/python/test/unit/core/custom_opsrc/matmul_scale.cu | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include <cuda.h> | |||
#include <cuda_runtime.h> | |||
#include <stdio.h> | |||
#include "./matmul_scale.h" | |||
using namespace custom; | |||
// matmul_forward for Mat_mxk * Mat_k*n | |||
template <typename T> | |||
__global__ void matmul_forward_naive( | |||
const T* lhs, const T* rhs, T* res, size_t M, size_t K, size_t N, float scale) { | |||
int row = blockIdx.y * blockDim.y + threadIdx.y; | |||
int col = blockIdx.x * blockDim.x + threadIdx.x; | |||
T acc = 0; | |||
for (int i = 0; i < K; ++i) | |||
acc += lhs[row * K + i] * rhs[i * N + col]; | |||
res[row * N + col] = acc * scale; | |||
} | |||
// matmul_backward_lhs for Mat_mxk * Mat_k*n = Mat_mxn | |||
// that is Mat_mxn * Mat_nxk | |||
template <typename T> | |||
__global__ void matmul_backward_lhs_naive( | |||
const T* rhs, const T* ograd, T* lhs_grad, size_t M, size_t K, size_t N, | |||
float scale) { | |||
int row = blockIdx.y * blockDim.y + threadIdx.y; | |||
int col = blockIdx.x * blockDim.x + threadIdx.x; | |||
T acc = 0; | |||
for (int i = 0; i < N; ++i) | |||
acc += ograd[row * N + i] * rhs[col * N + i]; | |||
lhs_grad[row * K + col] = acc / scale; | |||
} | |||
// matmul_backward_rhs for Mat_mxk * Mat_k*n = Mat_mxn | |||
// that is Mat_kxm * Mat_mxn | |||
template <typename T> | |||
__global__ void matmul_backward_rhs_naive( | |||
const T* lhs, const T* ograd, T* rhs_grad, size_t M, size_t K, size_t N, | |||
float scale) { | |||
int row = blockIdx.y * blockDim.y + threadIdx.y; | |||
int col = blockIdx.x * blockDim.x + threadIdx.x; | |||
T acc = 0; | |||
for (int i = 0; i < M; ++i) | |||
acc += lhs[i * K + row] * ograd[i * N + col]; | |||
rhs_grad[row * N + col] = acc / scale; | |||
} | |||
void matmul_forward_helper( | |||
const Tensor& lhs, const Tensor& rhs, Tensor& res, size_t M, size_t K, size_t N, | |||
float scale) { | |||
dim3 block(1, 1); | |||
dim3 grid(N / block.x, M / block.y); | |||
DISPATCH_INT_AND_FLOAT_TYPES(res.dtype(), "matmul_forward", ([&]() { | |||
matmul_forward_naive<scalar_t><<<grid, block>>>( | |||
lhs.data<scalar_t>(), rhs.data<scalar_t>(), | |||
res.data<scalar_t>(), M, K, N, scale); | |||
})); | |||
} | |||
void matmul_backward_lhs_helper( | |||
const Tensor& rhs, const Tensor& ograd, Tensor& lhs_grad, size_t M, size_t K, | |||
size_t N, float scale) { | |||
dim3 block(1, 1); | |||
dim3 grid(K / block.x, M / block.y); | |||
DISPATCH_INT_AND_FLOAT_TYPES( | |||
lhs_grad.dtype(), "matmul_backward_lhs", ([&]() { | |||
matmul_backward_lhs_naive<scalar_t><<<grid, block>>>( | |||
rhs.data<scalar_t>(), ograd.data<scalar_t>(), | |||
lhs_grad.data<scalar_t>(), M, K, N, scale); | |||
})); | |||
} | |||
void matmul_backward_rhs_helper( | |||
const Tensor& lhs, const Tensor& ograd, Tensor& rhs_grad, size_t M, size_t K, | |||
size_t N, float scale) { | |||
dim3 block(1, 1); | |||
dim3 grid(N / block.x, K / block.y); | |||
DISPATCH_INT_AND_FLOAT_TYPES( | |||
rhs_grad.dtype(), "matmul_backward_rhs", ([&]() { | |||
matmul_backward_rhs_naive<scalar_t><<<grid, block>>>( | |||
lhs.data<scalar_t>(), ograd.data<scalar_t>(), | |||
rhs_grad.data<scalar_t>(), M, K, N, scale); | |||
})); | |||
} |
@@ -0,0 +1,24 @@ | |||
/** | |||
* \file imperative/python/test/unit/core/custom_opsrc/matmul_scale.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "megbrain/custom/custom.h" | |||
using Tensor = custom::Tensor; | |||
void matmul_forward_helper( | |||
const Tensor& lhs, const Tensor& rhs, Tensor& res, size_t M, size_t K, size_t N, | |||
float scale); | |||
void matmul_backward_lhs_helper( | |||
const Tensor& rhs, const Tensor& ograd, Tensor& lhs_grad, size_t M, size_t K, | |||
size_t N, float scale); | |||
void matmul_backward_rhs_helper( | |||
const Tensor& lhs, const Tensor& ograd, Tensor& rhs_grad, size_t M, size_t K, | |||
size_t N, float scale); |
@@ -0,0 +1,111 @@ | |||
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
# | |||
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
# | |||
# Unless required by applicable law or agreed to in writing, | |||
# software distributed under the License is distributed on an | |||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
import os | |||
import platform | |||
import shutil | |||
import sys | |||
import numpy as np | |||
import pytest | |||
import megengine | |||
import megengine.functional as F | |||
import megengine.optimizer as optim | |||
from megengine import jit | |||
from megengine.autodiff import Function, GradManager | |||
from megengine.core._imperative_rt.core2 import apply | |||
from megengine.core.ops import custom | |||
from megengine.device import get_device_count | |||
from megengine.module import Conv2d, Linear, Module | |||
from megengine.random import normal | |||
from megengine.tensor import Parameter, Tensor | |||
from megengine.utils import custom_op_tools | |||
def compare(ref, real): | |||
if ref.shape != real.shape: | |||
real = real.T | |||
np.testing.assert_allclose(ref, real, rtol=1e-3, atol=1e-5) | |||
def build_and_clean(test_func): | |||
def wrapper(): | |||
cur_dir_path = os.path.dirname(os.path.abspath(__file__)) | |||
build_path = os.path.join(cur_dir_path, "custom_opsrc", "build") | |||
mgb_root_path = os.path.dirname( | |||
os.path.dirname( | |||
os.path.dirname(os.path.dirname(os.path.dirname(cur_dir_path))) | |||
) | |||
) | |||
extra_include_paths = [os.path.join(mgb_root_path, "src", "custom", "include")] | |||
extra_ld_flags = [] | |||
if sys.platform != "win32": | |||
ld_path = os.environ.get("LD_LIBRARY_PATH") | |||
if ld_path != None: | |||
ld_dirs = ld_path.split(":") | |||
for ld_dir in ld_dirs: | |||
if os.path.exists(ld_dir) and os.path.isdir(ld_dir): | |||
for lib in os.listdir(ld_dir): | |||
if "megengine_shared" in lib: | |||
extra_ld_flags += [ | |||
"-L{} -Wl,-rpath,{}".format(ld_dir, ld_dir) | |||
] | |||
break | |||
if get_device_count("gpu") > 0: | |||
custom_opsrc = [ | |||
os.path.join(cur_dir_path, "custom_opsrc", "matmul_scale.cpp"), | |||
os.path.join(cur_dir_path, "custom_opsrc", "matmul_scale.cu"), | |||
] | |||
else: | |||
custom_opsrc = [os.path.join(cur_dir_path, "custom_opsrc", "elem_add.cpp")] | |||
lib_path = custom_op_tools.build_and_load( | |||
"test_op", | |||
custom_opsrc, | |||
extra_include_paths=extra_include_paths, | |||
extra_ldflags=extra_ld_flags, | |||
build_dir=build_path, | |||
verbose=False, | |||
abi_tag=custom.get_custom_op_abi_tag(), | |||
) | |||
test_func() | |||
custom.unload(lib_path) | |||
if os.path.exists(build_path): | |||
shutil.rmtree(build_path) | |||
return wrapper | |||
@pytest.mark.skipif( | |||
get_device_count("gpu") > 0, reason="elem_add operator is only supported on CPU" | |||
) | |||
@build_and_clean | |||
def test_custom_op_cpu_build(): | |||
assert "ElemAddSmoothForward" in custom._get_custom_op_list() | |||
assert "ElemAddSmoothBackward" in custom._get_custom_op_list() | |||
assert hasattr(custom, "ElemAddSmoothForward") | |||
assert hasattr(custom, "ElemAddSmoothBackward") | |||
@pytest.mark.skipif( | |||
platform.system() == "Darwin", | |||
reason="GPU kernel is only support on Linux and Windows", | |||
) | |||
@pytest.mark.skipif( | |||
get_device_count("gpu") < 1, reason="matmul scale operator is only supported on GPU" | |||
) | |||
@build_and_clean | |||
def test_custom_op_gpu_build(): | |||
assert "MatMulScaleForward" in custom._get_custom_op_list() | |||
assert "MatMulScaleBackward" in custom._get_custom_op_list() | |||
assert hasattr(custom, "MatMulScaleForward") | |||
assert hasattr(custom, "MatMulScaleBackward") |
@@ -171,6 +171,7 @@ function do_build() { | |||
mkdir -p staging | |||
cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||
cp -a ${SRC_DIR}/src/custom/include staging/megengine/core/include/ | |||
cd ${BUILD_DIR}/staging/megengine/core | |||
rt_file=`ls _imperative_rt.*.so` | |||
echo "rt file is: ${rt_file}" | |||
@@ -151,6 +151,7 @@ do | |||
rm -rf staging | |||
mkdir -p staging | |||
cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||
cp -a ${SRC_DIR}/src/custom/include/megbrain staging/megengine/core/include | |||
cd ${BUILD_DIR}/staging/megengine/core | |||
mkdir -p lib/ucx | |||
@@ -77,11 +77,13 @@ CUBLAS_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublas6 | |||
CURAND_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/curand64_10.dll" | |||
CUBLASLT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublasLt64_10.dll" | |||
CUDART_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cudart64_101.dll" | |||
MGE_EXPORT_LIB="${SRC_DIR}/build_dir/host/build/src/megengine_shared.dll" | |||
MGE_EXPORT_DLL="${SRC_DIR}/build_dir/host/build/src/megengine_shared.dll" | |||
MGE_EXPORT_LIB="${SRC_DIR}/build_dir/host/build/src/megengine_shared.lib" | |||
function depend_real_copy() { | |||
REAL_DST=$1 | |||
echo "real copy lib to $1" | |||
cp "${MGE_EXPORT_DLL}" ${REAL_DST} | |||
cp "${MGE_EXPORT_LIB}" ${REAL_DST} | |||
if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then | |||
@@ -190,6 +192,7 @@ function do_build() { | |||
rm -rf staging | |||
mkdir -p staging | |||
cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||
cp -a ${SRC_DIR}/src/custom/include/megbrain staging/megengine/core/include/ | |||
cd ${BUILD_DIR}/staging/megengine/core | |||
rt_file=`ls _imperative_rt.*.pyd` | |||
echo "rt file is: ${rt_file}" | |||
@@ -1,3 +1,8 @@ | |||
# force define a SHARED target for whl, caused by when build for APPLE | |||
# we will force set BUILD_SHARED_LIBS=OFF for xcode needed | |||
set(MGE_SHARED_LIB megengine_shared) | |||
set(MGE_SHARED_LIB ${MGE_SHARED_LIB} PARENT_SCOPE) | |||
if(MGE_WITH_JIT_MLIR) | |||
add_subdirectory(jit/include/megbrain/jit/mlir/ir) | |||
endif() | |||
@@ -206,32 +211,30 @@ set (_VER_FILE ${PROJECT_SOURCE_DIR}/src/version.ld) | |||
# Build as SHARED or STATIC depending on BUILD_SHARED_LIBS=ON/OFF | |||
add_library(megengine) | |||
# force define a SHARED target for whl, caused by when build for APPLE | |||
# we will force set BUILD_SHARED_LIBS=OFF for xcode needed | |||
add_library(megengine_shared SHARED) | |||
add_library(${MGE_SHARED_LIB} SHARED) | |||
target_link_libraries(megengine PRIVATE ${MGE_CUDA_LIBS}) | |||
target_link_libraries(megengine PUBLIC megbrain megdnn) | |||
target_link_libraries(megengine_shared PUBLIC megbrain megdnn) | |||
target_link_libraries(megengine_shared PRIVATE ${MGE_CUDA_LIBS}) | |||
target_link_libraries(${MGE_SHARED_LIB} PUBLIC megbrain megdnn) | |||
target_link_libraries(${MGE_SHARED_LIB} PRIVATE ${MGE_CUDA_LIBS}) | |||
if (UNIX AND NOT APPLE) | |||
target_link_options(megengine PRIVATE -Wl,--no-undefined -Wl,--version-script=${_VER_FILE}) | |||
set_target_properties(megengine PROPERTIES LINK_DEPENDS ${_VER_FILE}) | |||
target_link_options(megengine_shared PRIVATE -Wl,--no-undefined -Wl,--version-script=${_VER_FILE}) | |||
set_target_properties(megengine_shared PROPERTIES LINK_DEPENDS ${_VER_FILE}) | |||
target_link_options(${MGE_SHARED_LIB} PRIVATE -Wl,--no-undefined -Wl,--version-script=${_VER_FILE}) | |||
set_target_properties(${MGE_SHARED_LIB} PROPERTIES LINK_DEPENDS ${_VER_FILE}) | |||
endif() | |||
if(WIN32 OR MSVC) | |||
target_compile_definitions(megbrain PRIVATE MGE_DLL_EXPORT) | |||
target_compile_definitions(megdnn PRIVATE MGE_DLL_EXPORT) | |||
target_compile_definitions(megengine PRIVATE MGE_DLL_EXPORT) | |||
target_compile_definitions(megengine_shared PRIVATE MGE_DLL_EXPORT) | |||
target_compile_definitions(${MGE_SHARED_LIB} PRIVATE MGE_DLL_EXPORT) | |||
# please do not use WINDOWS_EXPORT_ALL_SYMBOLS, as symbols max than 65535 when build with CUDA | |||
#set_target_properties(megengine PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) | |||
#set_target_properties(megengine_shared PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) | |||
#set_target_properties(${MGE_SHARED_LIB} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) | |||
endif() | |||
if (MGE_WITH_DISTRIBUTED) | |||
message(VERBOSE "megengine configured to link megray") | |||
target_link_libraries(megengine PUBLIC megray) | |||
target_link_libraries(megengine_shared PUBLIC megray) | |||
target_link_libraries(${MGE_SHARED_LIB} PUBLIC megray) | |||
endif() | |||
# Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready | |||
# for this. | |||
@@ -18,12 +18,31 @@ | |||
#ifndef _WIN32 | |||
#include <dlfcn.h> | |||
#else | |||
#include <windows.h> | |||
#endif | |||
using namespace mgb; | |||
namespace custom { | |||
#ifdef _WIN32 | |||
#define RTLD_LAZY 0 | |||
void* dlopen(const char* file, int) { | |||
return static_cast<void*>(LoadLibrary(file)); | |||
} | |||
int dlclose(void* handle) { | |||
return static_cast<int>(FreeLibrary(static_cast<HMODULE>(handle))); | |||
} | |||
const char* dlerror(void) { | |||
static char win_err_info[] = "no dlerror info in windows"; | |||
return win_err_info; | |||
} | |||
#endif | |||
CustomOpManager* CustomOpManager::inst(void) { | |||
static CustomOpManager op_manager; | |||
return &op_manager; | |||
@@ -127,7 +146,6 @@ std::vector<RunTimeId> CustomOpManager::op_id_list(void) { | |||
return ret; | |||
} | |||
#ifndef _WIN32 | |||
CustomLib::CustomLib(const std::string& path, int mode = RTLD_LAZY) | |||
: m_handle(nullptr, [](void* handle) { dlclose(handle); }) { | |||
auto op_list_before_load = CustomOpManager::inst()->op_name_list(); | |||
@@ -146,12 +164,6 @@ CustomLib::CustomLib(const std::string& path, int mode = RTLD_LAZY) | |||
} | |||
} | |||
} | |||
#else | |||
CustomLib::CustomLib(const std::string& path, int mode = 0) | |||
: m_handle(nullptr, [](void* handle) {}) { | |||
mgb_assert(false, "custom op is only supported on Linux now"); | |||
} | |||
#endif | |||
const std::vector<std::string>& CustomLib::ops_in_lib(void) const { | |||
return m_ops; | |||
@@ -16,7 +16,8 @@ | |||
#include "tensor.h" | |||
namespace custom { | |||
std::shared_ptr<CustomOp> op_insert(std::string opname, uint32_t version); | |||
MGE_WIN_DECLSPEC_FUC std::shared_ptr<CustomOp> op_insert( | |||
std::string opname, uint32_t version); | |||
} | |||
#define CUSTOM_OP_REG(OpName) \ | |||
@@ -32,27 +32,26 @@ namespace custom { | |||
using RunTimeId = uint64_t; | |||
class ArgInfo { | |||
class MGE_WIN_DECLSPEC_FUC ArgInfo { | |||
CUSTOM_PIMPL_CLS_DECL(ArgInfo); | |||
MGE_WIN_DECLSPEC_FUC ArgInfo( | |||
const std::string& name, const std::string& desc, | |||
ArgInfo(const std::string& name, const std::string& desc, | |||
const std::unordered_set<std::string>& dtypes, const int& ndim, | |||
const std::string& mem_stgy); | |||
MGE_WIN_DECLSPEC_FUC const std::string& name(void) const; | |||
MGE_WIN_DECLSPEC_FUC const std::string& desc(void) const; | |||
MGE_WIN_DECLSPEC_FUC const std::unordered_set<std::string>& dtypes(void) const; | |||
MGE_WIN_DECLSPEC_FUC int ndim(void) const; | |||
MGE_WIN_DECLSPEC_FUC const std::string& mem_strategy(void) const; | |||
const std::string& name(void) const; | |||
const std::string& desc(void) const; | |||
const std::unordered_set<std::string>& dtypes(void) const; | |||
int ndim(void) const; | |||
const std::string& mem_strategy(void) const; | |||
MGE_WIN_DECLSPEC_FUC std::string str() const; | |||
std::string str() const; | |||
}; | |||
class CustomOp { | |||
class MGE_WIN_DECLSPEC_FUC CustomOp { | |||
std::unique_ptr<void, void_deleter> m_impl; | |||
public: | |||
MGE_WIN_DECLSPEC_FUC CustomOp(const std::string& op_type, uint32_t version); | |||
CustomOp(const std::string& op_type, uint32_t version); | |||
PREVENT_COPY_AND_ASSIGN(CustomOp); | |||
using DeviceInferFuncPtr = | |||
@@ -71,70 +70,65 @@ public: | |||
void (*)(const std::vector<Tensor>&, const Param&, std::vector<Tensor>&); | |||
// write for forward | |||
MGE_WIN_DECLSPEC_FUC CustomOp& set_device_infer(DeviceInferFuncPtr func); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& set_shape_infer(ShapeInferFuncPtr func); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& set_dtype_infer(DTypeInferFuncPtr func); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& set_format_infer(FormatInferFuncPtr func); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& set_preprocess(PreprocessFuncPtr func); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& set_preprocess( | |||
const std::string& device, PreprocessFuncPtr func); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& set_postprocess(PostprocessFuncPtr func); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& set_postprocess( | |||
const std::string& device, PostprocessFuncPtr func); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& set_compute(ComputeFuncPtr func); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& set_compute( | |||
const std::string& device, ComputeFuncPtr func); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& set_description(const std::string& op_desc); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& add_input( | |||
CustomOp& set_device_infer(DeviceInferFuncPtr func); | |||
CustomOp& set_shape_infer(ShapeInferFuncPtr func); | |||
CustomOp& set_dtype_infer(DTypeInferFuncPtr func); | |||
CustomOp& set_format_infer(FormatInferFuncPtr func); | |||
CustomOp& set_preprocess(PreprocessFuncPtr func); | |||
CustomOp& set_preprocess(const std::string& device, PreprocessFuncPtr func); | |||
CustomOp& set_postprocess(PostprocessFuncPtr func); | |||
CustomOp& set_postprocess(const std::string& device, PostprocessFuncPtr func); | |||
CustomOp& set_compute(ComputeFuncPtr func); | |||
CustomOp& set_compute(const std::string& device, ComputeFuncPtr func); | |||
CustomOp& set_description(const std::string& op_desc); | |||
CustomOp& add_input( | |||
const std::string& name, const std::string& desc, | |||
const std::initializer_list<std::string>& legal_dtypes = {"float32"}, | |||
int dims = -1, const std::string& mem_stgy = "default"); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& add_output( | |||
CustomOp& add_output( | |||
const std::string& name, const std::string& desc, | |||
const std::initializer_list<std::string>& legal_dtypes = {"float32"}, | |||
int dims = -1, const std::string& mem_stgy = "default"); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& add_input( | |||
CustomOp& add_input( | |||
const std::string& name, | |||
const std::initializer_list<std::string>& legal_dtypes = {"float32"}, | |||
int dims = -1, const std::string& mem_stgy = "default"); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& add_output( | |||
CustomOp& add_output( | |||
const std::string& name, | |||
const std::initializer_list<std::string>& legal_dtypes = {"float32"}, | |||
int dims = -1, const std::string& mem_stgy = "default"); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& add_inputs(const size_t& input_num); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& add_outputs(const size_t& output_num); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& add_param( | |||
const std::string& name, const ParamVal& default_val); | |||
MGE_WIN_DECLSPEC_FUC CustomOp& add_param( | |||
CustomOp& add_inputs(const size_t& input_num); | |||
CustomOp& add_outputs(const size_t& output_num); | |||
CustomOp& add_param(const std::string& name, const ParamVal& default_val); | |||
CustomOp& add_param( | |||
const std::string& name, const std::string& desc, | |||
const ParamVal& default_val); | |||
// read | |||
MGE_WIN_DECLSPEC_FUC std::string op_type(void) const; | |||
MGE_WIN_DECLSPEC_FUC std::string op_desc(void) const; | |||
MGE_WIN_DECLSPEC_FUC RunTimeId runtime_id(void) const; | |||
MGE_WIN_DECLSPEC_FUC size_t input_num(void) const; | |||
MGE_WIN_DECLSPEC_FUC size_t output_num(void) const; | |||
MGE_WIN_DECLSPEC_FUC std::string str(void) const; | |||
MGE_WIN_DECLSPEC_FUC const ParamInfo& param_info(void) const; | |||
MGE_WIN_DECLSPEC_FUC ArgInfo input_info(size_t idx) const; | |||
MGE_WIN_DECLSPEC_FUC ArgInfo output_info(size_t idx) const; | |||
MGE_WIN_DECLSPEC_FUC const std::vector<ArgInfo>& inputs_info(void) const; | |||
MGE_WIN_DECLSPEC_FUC const std::vector<ArgInfo>& outputs_info(void) const; | |||
std::string op_type(void) const; | |||
std::string op_desc(void) const; | |||
RunTimeId runtime_id(void) const; | |||
size_t input_num(void) const; | |||
size_t output_num(void) const; | |||
std::string str(void) const; | |||
const ParamInfo& param_info(void) const; | |||
ArgInfo input_info(size_t idx) const; | |||
ArgInfo output_info(size_t idx) const; | |||
const std::vector<ArgInfo>& inputs_info(void) const; | |||
const std::vector<ArgInfo>& outputs_info(void) const; | |||
// use | |||
MGE_WIN_DECLSPEC_FUC std::vector<Device> infer_output_device( | |||
std::vector<Device> infer_output_device( | |||
const std::vector<Device>&, const Param&) const; | |||
MGE_WIN_DECLSPEC_FUC std::vector<Shape> infer_output_shape( | |||
std::vector<Shape> infer_output_shape( | |||
const std::vector<Shape>&, const Param&) const; | |||
MGE_WIN_DECLSPEC_FUC std::vector<DType> infer_output_dtype( | |||
std::vector<DType> infer_output_dtype( | |||
const std::vector<DType>&, const Param&) const; | |||
MGE_WIN_DECLSPEC_FUC std::vector<Format> infer_output_format( | |||
std::vector<Format> infer_output_format( | |||
const std::vector<Format>&, const Param&) const; | |||
MGE_WIN_DECLSPEC_FUC void compute( | |||
const std::vector<Tensor>&, const Param&, std::vector<Tensor>&) const; | |||
void compute(const std::vector<Tensor>&, const Param&, std::vector<Tensor>&) const; | |||
}; | |||
} // namespace custom |
@@ -23,7 +23,7 @@ class ParamInfoImpl; | |||
class ParamImpl; | |||
// Schema of a param element | |||
class ParamSchema { | |||
class MGE_WIN_DECLSPEC_FUC ParamSchema { | |||
CUSTOM_PIMPL_CLS_DECL(ParamSchema); | |||
ParamSchema( | |||
const std::string& name, const ParamVal& value, | |||
@@ -36,7 +36,7 @@ class ParamSchema { | |||
std::string str(void) const; | |||
}; | |||
class ParamInfo { | |||
class MGE_WIN_DECLSPEC_FUC ParamInfo { | |||
CUSTOM_PIMPL_CLS_DECL(ParamInfo); | |||
void set_tag(const std::string&); | |||
@@ -46,16 +46,16 @@ class ParamInfo { | |||
const std::vector<ParamSchema>& meta(void) const; | |||
}; | |||
class Param { | |||
class MGE_WIN_DECLSPEC_FUC Param { | |||
CUSTOM_PIMPL_CLS_DECL(Param); | |||
MGE_WIN_DECLSPEC_FUC Param(const ParamInfo&); | |||
MGE_WIN_DECLSPEC_FUC ParamVal& operator[](const std::string&); | |||
MGE_WIN_DECLSPEC_FUC const ParamVal& operator[](const std::string&) const; | |||
MGE_WIN_DECLSPEC_FUC const std::unordered_map<std::string, ParamVal>& raw() const; | |||
MGE_WIN_DECLSPEC_FUC bool exist(const std::string& name) const; | |||
MGE_WIN_DECLSPEC_FUC std::string to_bytes(void) const; | |||
MGE_WIN_DECLSPEC_FUC void from_bytes(const std::string&); | |||
Param(const ParamInfo&); | |||
ParamVal& operator[](const std::string&); | |||
const ParamVal& operator[](const std::string&) const; | |||
const std::unordered_map<std::string, ParamVal>& raw() const; | |||
bool exist(const std::string& name) const; | |||
std::string to_bytes(void) const; | |||
void from_bytes(const std::string&); | |||
}; | |||
MGE_WIN_DECLSPEC_FUC bool operator==(const Param&, const Param&); | |||
@@ -169,21 +169,21 @@ std::string vec2str(const std::vector<T>& vec) { | |||
* Con1: user need to set the type explicitly when class template instantiation | |||
* Con2: ParamVal<int> can not be assigned to ParamVal<double> | |||
*/ | |||
class ParamVal { | |||
class MGE_WIN_DECLSPEC_FUC ParamVal { | |||
std::unique_ptr<void, void_deleter> m_ptr; | |||
ParamDynType m_type; | |||
public: | |||
template <typename T> | |||
MGE_WIN_DECLSPEC_FUC ParamVal(const T& val); | |||
ParamVal(const T& val); | |||
template <typename T> | |||
MGE_WIN_DECLSPEC_FUC ParamVal(const std::initializer_list<T>& val); | |||
ParamVal(const std::initializer_list<T>& val); | |||
MGE_WIN_DECLSPEC_FUC ParamVal(); | |||
MGE_WIN_DECLSPEC_FUC ParamVal(const char* str); | |||
MGE_WIN_DECLSPEC_FUC ParamVal(const std::initializer_list<const char*>& strs); | |||
MGE_WIN_DECLSPEC_FUC ParamVal(const std::vector<const char*>& strs); | |||
MGE_WIN_DECLSPEC_FUC ParamVal(const ParamVal& rhs); | |||
ParamVal(); | |||
ParamVal(const char* str); | |||
ParamVal(const std::initializer_list<const char*>& strs); | |||
ParamVal(const std::vector<const char*>& strs); | |||
ParamVal(const ParamVal& rhs); | |||
template <typename T> | |||
ParamVal& operator=(const T& rhs); | |||
@@ -196,30 +196,39 @@ public: | |||
ParamVal& operator=(const ParamVal& rhs); | |||
template <typename T> | |||
MGE_WIN_DECLSPEC_FUC const T& as(void) const; | |||
const T& as(void) const; | |||
template <typename T> | |||
MGE_WIN_DECLSPEC_FUC T& as(void); | |||
MGE_WIN_DECLSPEC_FUC const void* raw_ptr(void) const; | |||
MGE_WIN_DECLSPEC_FUC void* raw_ptr(void); | |||
MGE_WIN_DECLSPEC_FUC ParamDynType type(void) const; | |||
MGE_WIN_DECLSPEC_FUC std::string str(void) const; | |||
MGE_WIN_DECLSPEC_FUC size_t size(void) const; | |||
MGE_WIN_DECLSPEC_FUC static std::string to_bytes(const ParamVal& value); | |||
MGE_WIN_DECLSPEC_FUC static ParamVal from_bytes( | |||
const std::string& bytes, size_t& offset); | |||
friend ParamVal operator+(const ParamVal& lhs, const ParamVal& rhs); | |||
friend ParamVal operator-(const ParamVal& lhs, const ParamVal& rhs); | |||
friend ParamVal operator*(const ParamVal& lhs, const ParamVal& rhs); | |||
friend ParamVal operator/(const ParamVal& lhs, const ParamVal& rhs); | |||
friend bool operator==(const ParamVal& lhs, const ParamVal& rhs); | |||
friend bool operator!=(const ParamVal& lhs, const ParamVal& rhs); | |||
friend bool operator>(const ParamVal& lhs, const ParamVal& rhs); | |||
friend bool operator<(const ParamVal& lhs, const ParamVal& rhs); | |||
friend bool operator>=(const ParamVal& lhs, const ParamVal& rhs); | |||
friend bool operator<=(const ParamVal& lhs, const ParamVal& rhs); | |||
T& as(void); | |||
const void* raw_ptr(void) const; | |||
void* raw_ptr(void); | |||
ParamDynType type(void) const; | |||
std::string str(void) const; | |||
size_t size(void) const; | |||
static std::string to_bytes(const ParamVal& value); | |||
static ParamVal from_bytes(const std::string& bytes, size_t& offset); | |||
MGE_WIN_DECLSPEC_FUC friend ParamVal operator+( | |||
const ParamVal& lhs, const ParamVal& rhs); | |||
MGE_WIN_DECLSPEC_FUC friend ParamVal operator-( | |||
const ParamVal& lhs, const ParamVal& rhs); | |||
MGE_WIN_DECLSPEC_FUC friend ParamVal operator*( | |||
const ParamVal& lhs, const ParamVal& rhs); | |||
MGE_WIN_DECLSPEC_FUC friend ParamVal operator/( | |||
const ParamVal& lhs, const ParamVal& rhs); | |||
MGE_WIN_DECLSPEC_FUC friend bool operator==( | |||
const ParamVal& lhs, const ParamVal& rhs); | |||
MGE_WIN_DECLSPEC_FUC friend bool operator!=( | |||
const ParamVal& lhs, const ParamVal& rhs); | |||
MGE_WIN_DECLSPEC_FUC friend bool operator>( | |||
const ParamVal& lhs, const ParamVal& rhs); | |||
MGE_WIN_DECLSPEC_FUC friend bool operator<( | |||
const ParamVal& lhs, const ParamVal& rhs); | |||
MGE_WIN_DECLSPEC_FUC friend bool operator>=( | |||
const ParamVal& lhs, const ParamVal& rhs); | |||
MGE_WIN_DECLSPEC_FUC friend bool operator<=( | |||
const ParamVal& lhs, const ParamVal& rhs); | |||
}; | |||
ParamVal operator+(const ParamVal& lhs, const ParamVal& rhs); | |||
@@ -30,9 +30,9 @@ namespace custom { | |||
#define CUSTOM_DEVICE_TYPE_ENUM_DECL(custom_type, builtin_type, builtin_str) \ | |||
custom_type, | |||
class Device { | |||
MGE_WIN_DECLSPEC_FUC const void* impl() const; | |||
MGE_WIN_DECLSPEC_FUC Device(const void* impl); | |||
class MGE_WIN_DECLSPEC_FUC Device { | |||
const void* impl() const; | |||
Device(const void* impl); | |||
CUSTOM_PIMPL_CLS_DECL(Device); | |||
public: | |||
@@ -40,19 +40,19 @@ public: | |||
CUSTOM_FOR_EACH_DEVICE_TYPE(CUSTOM_DEVICE_TYPE_ENUM_DECL) | |||
}; | |||
MGE_WIN_DECLSPEC_FUC Device(const std::string& device); | |||
MGE_WIN_DECLSPEC_FUC Device(const char* device); | |||
MGE_WIN_DECLSPEC_FUC Device(DeviceEnum device); | |||
Device(const std::string& device); | |||
Device(const char* device); | |||
Device(DeviceEnum device); | |||
MGE_WIN_DECLSPEC_FUC std::string str(void) const; | |||
MGE_WIN_DECLSPEC_FUC DeviceEnum enumv(void) const; | |||
std::string str(void) const; | |||
DeviceEnum enumv(void) const; | |||
MGE_WIN_DECLSPEC_FUC static bool is_legal(const std::string& device); | |||
MGE_WIN_DECLSPEC_FUC static bool is_legal(DeviceEnum device); | |||
MGE_WIN_DECLSPEC_FUC static std::vector<std::string> legal_devices(void); | |||
static bool is_legal(const std::string& device); | |||
static bool is_legal(DeviceEnum device); | |||
static std::vector<std::string> legal_devices(void); | |||
friend class Tensor; | |||
friend bool operator==(const Device& lhs, const Device& rhs); | |||
MGE_WIN_DECLSPEC_FUC friend bool operator==(const Device& lhs, const Device& rhs); | |||
CUSTOM_DATA_ADAPTOR_FRIEND_DECL; | |||
}; | |||
@@ -60,23 +60,23 @@ using DeviceEnum = Device::DeviceEnum; | |||
bool operator==(const Device& lhs, const Device& rhs); | |||
class Shape { | |||
MGE_WIN_DECLSPEC_FUC const void* impl() const; | |||
MGE_WIN_DECLSPEC_FUC Shape(const void* impl); | |||
class MGE_WIN_DECLSPEC_FUC Shape { | |||
const void* impl() const; | |||
Shape(const void* impl); | |||
CUSTOM_PIMPL_CLS_DECL(Shape); | |||
public: | |||
MGE_WIN_DECLSPEC_FUC Shape(const std::vector<size_t>& rhs); | |||
MGE_WIN_DECLSPEC_FUC Shape(const std::initializer_list<size_t>& rhs); | |||
Shape(const std::vector<size_t>& rhs); | |||
Shape(const std::initializer_list<size_t>& rhs); | |||
size_t& operator[](size_t idx); | |||
size_t operator[](size_t idx) const; | |||
MGE_WIN_DECLSPEC_FUC void ndim(size_t dim); | |||
MGE_WIN_DECLSPEC_FUC size_t ndim(void) const; | |||
void ndim(size_t dim); | |||
size_t ndim(void) const; | |||
friend class Tensor; | |||
friend bool operator==(const Shape& lhs, const Shape& rhs); | |||
MGE_WIN_DECLSPEC_FUC friend bool operator==(const Shape& lhs, const Shape& rhs); | |||
CUSTOM_DATA_ADAPTOR_FRIEND_DECL; | |||
}; | |||
@@ -104,9 +104,9 @@ using bfloat16_t = uint16_t; | |||
#define CUSTOM_DTYPE_ENUM_DECL(custom_type, builtin_type, ctype) custom_type, | |||
class DType { | |||
MGE_WIN_DECLSPEC_FUC const void* impl() const; | |||
MGE_WIN_DECLSPEC_FUC DType(const void* impl); | |||
class MGE_WIN_DECLSPEC_FUC DType { | |||
const void* impl() const; | |||
DType(const void* impl); | |||
CUSTOM_PIMPL_CLS_DECL(DType); | |||
public: | |||
@@ -114,27 +114,33 @@ public: | |||
CUSTOM_FOR_EACH_TENSOR_DATA_TYPE(CUSTOM_DTYPE_ENUM_DECL) | |||
}; | |||
MGE_WIN_DECLSPEC_FUC DType(const std::string& dtype); | |||
MGE_WIN_DECLSPEC_FUC DType(const char* dtype); | |||
MGE_WIN_DECLSPEC_FUC DType( | |||
const std::string& dtype, float scale, uint8_t zero_point = 0); | |||
MGE_WIN_DECLSPEC_FUC DType(const char* dtype, float scale, uint8_t zero_point = 0); | |||
MGE_WIN_DECLSPEC_FUC DType(DTypeEnum dtype); | |||
MGE_WIN_DECLSPEC_FUC DType(DTypeEnum dtype, float scale, uint8_t zero_point = 0); | |||
MGE_WIN_DECLSPEC_FUC std::string str(void) const; | |||
MGE_WIN_DECLSPEC_FUC DTypeEnum enumv() const; | |||
MGE_WIN_DECLSPEC_FUC float scale(void) const; | |||
MGE_WIN_DECLSPEC_FUC uint8_t zero_point(void) const; | |||
DType(const std::string& dtype); | |||
DType(const char* dtype); | |||
DType(const std::string& dtype, float scale, uint8_t zero_point = 0); | |||
DType(const char* dtype, float scale, uint8_t zero_point = 0); | |||
DType(DTypeEnum dtype); | |||
DType(DTypeEnum dtype, float scale, uint8_t zero_point = 0); | |||
std::string str(void) const; | |||
DTypeEnum enumv() const; | |||
float scale(void) const; | |||
uint8_t zero_point(void) const; | |||
template <typename T> | |||
MGE_WIN_DECLSPEC_FUC bool is_compatible(void) const; | |||
bool is_compatible(void) const; | |||
MGE_WIN_DECLSPEC_FUC static bool is_legal(const std::string& dtype); | |||
MGE_WIN_DECLSPEC_FUC static bool is_legal(const DTypeEnum& dtype); | |||
MGE_WIN_DECLSPEC_FUC static std::vector<std::string> legal_dtypes(void); | |||
static bool is_legal(const std::string& dtype); | |||
static bool is_legal(const DTypeEnum& dtype); | |||
static std::vector<std::string> legal_dtypes(void); | |||
friend class Tensor; | |||
friend bool operator==(const DType& lhs, const DType& rhs); | |||
MGE_WIN_DECLSPEC_FUC friend bool operator==(const DType& lhs, const DType& rhs); | |||
MGE_WIN_DECLSPEC_FUC friend bool operator==( | |||
const DType& lhs, const std::string& rhs); | |||
MGE_WIN_DECLSPEC_FUC friend bool operator==(const DType& lhs, const char* rhs); | |||
MGE_WIN_DECLSPEC_FUC friend bool operator==( | |||
const std::string& lhs, const DType& rhs); | |||
MGE_WIN_DECLSPEC_FUC friend bool operator==(const char* lhs, const DType& rhs); | |||
CUSTOM_DATA_ADAPTOR_FRIEND_DECL; | |||
}; | |||
@@ -180,45 +186,45 @@ bool operator==(const DType& lhs, const char* rhs); | |||
bool operator==(const std::string& lhs, const DType& rhs); | |||
bool operator==(const char* lhs, const DType& rhs); | |||
class Format { | |||
MGE_WIN_DECLSPEC_FUC const void* impl() const; | |||
MGE_WIN_DECLSPEC_FUC Format(const void* impl); | |||
class MGE_WIN_DECLSPEC_FUC Format { | |||
const void* impl() const; | |||
Format(const void* impl); | |||
CUSTOM_PIMPL_CLS_DECL(Format); | |||
public: | |||
MGE_WIN_DECLSPEC_FUC Format(const std::string& format); | |||
MGE_WIN_DECLSPEC_FUC Format(const char* format); | |||
Format(const std::string& format); | |||
Format(const char* format); | |||
MGE_WIN_DECLSPEC_FUC std::string str(void) const; | |||
MGE_WIN_DECLSPEC_FUC bool is_default(void) const; | |||
std::string str(void) const; | |||
bool is_default(void) const; | |||
friend class Tensor; | |||
CUSTOM_DATA_ADAPTOR_FRIEND_DECL; | |||
}; | |||
class Tensor { | |||
class MGE_WIN_DECLSPEC_FUC Tensor { | |||
void* m_tensor; | |||
MGE_WIN_DECLSPEC_FUC const void* impl(void) const; | |||
MGE_WIN_DECLSPEC_FUC Tensor(const void* impl); | |||
const void* impl(void) const; | |||
Tensor(const void* impl); | |||
MGE_WIN_DECLSPEC_FUC const size_t* shapes_raw(void) const; | |||
MGE_WIN_DECLSPEC_FUC const ptrdiff_t* strides_raw(void) const; | |||
const size_t* shapes_raw(void) const; | |||
const ptrdiff_t* strides_raw(void) const; | |||
public: | |||
Tensor() = delete; | |||
MGE_WIN_DECLSPEC_FUC Tensor(const Tensor& rhs); | |||
MGE_WIN_DECLSPEC_FUC Tensor& operator=(const Tensor& rhs); | |||
MGE_WIN_DECLSPEC_FUC Shape shape(void) const; | |||
MGE_WIN_DECLSPEC_FUC DType dtype(void) const; | |||
MGE_WIN_DECLSPEC_FUC Format format(void) const; | |||
MGE_WIN_DECLSPEC_FUC Device device(void) const; | |||
MGE_WIN_DECLSPEC_FUC size_t size(void) const; | |||
MGE_WIN_DECLSPEC_FUC std::vector<ptrdiff_t> stride(void) const; | |||
MGE_WIN_DECLSPEC_FUC float scale(void) const; | |||
MGE_WIN_DECLSPEC_FUC uint8_t zero_point(void) const; | |||
Tensor(const Tensor& rhs); | |||
Tensor& operator=(const Tensor& rhs); | |||
Shape shape(void) const; | |||
DType dtype(void) const; | |||
Format format(void) const; | |||
Device device(void) const; | |||
size_t size(void) const; | |||
std::vector<ptrdiff_t> stride(void) const; | |||
float scale(void) const; | |||
uint8_t zero_point(void) const; | |||
void* data(void); | |||
const void* data(void) const; | |||
@@ -19,10 +19,19 @@ | |||
namespace custom { | |||
void assert_failed_log( | |||
#ifndef MGE_WIN_DECLSPEC_FUC | |||
#ifdef _WIN32 | |||
#define MGE_WIN_DECLSPEC_FUC __declspec(dllexport) | |||
#else | |||
#define MGE_WIN_DECLSPEC_FUC | |||
#endif | |||
#endif | |||
MGE_WIN_DECLSPEC_FUC void assert_failed_log( | |||
const char* file, int line, const char* func, const char* expr, | |||
const char* msg_fmt, ...); | |||
#ifndef _WIN32 | |||
#define custom_expect(expr, msg...) \ | |||
if (!(expr)) { \ | |||
assert_failed_log(__FILE__, __LINE__, __PRETTY_FUNCTION__, #expr, ##msg); \ | |||
@@ -33,8 +42,22 @@ void assert_failed_log( | |||
assert_failed_log(__FILE__, __LINE__, __PRETTY_FUNCTION__, #expr, ##msg); \ | |||
} \ | |||
assert((expr)) | |||
#else | |||
#define custom_expect(expr, ...) \ | |||
if (!(expr)) { \ | |||
assert_failed_log( \ | |||
__FILE__, __LINE__, __PRETTY_FUNCTION__, #expr, __VA_ARGS__); \ | |||
} | |||
#define custom_assert(expr, ...) \ | |||
if (!(expr)) { \ | |||
assert_failed_log( \ | |||
__FILE__, __LINE__, __PRETTY_FUNCTION__, #expr, __VA_ARGS__); \ | |||
} \ | |||
assert((expr)) | |||
#endif | |||
class UnImpleWarnLog { | |||
class MGE_WIN_DECLSPEC_FUC UnImpleWarnLog { | |||
public: | |||
UnImpleWarnLog( | |||
const std::string& func, const std::string& attr, const std::string& val); | |||
@@ -54,9 +77,9 @@ void impl_deleter(void* ptr) { | |||
std::unique_ptr<void, void_deleter> m_impl; \ | |||
\ | |||
public: \ | |||
MGE_WIN_DECLSPEC_FUC Cls(); \ | |||
MGE_WIN_DECLSPEC_FUC Cls(const Cls& rhs); \ | |||
MGE_WIN_DECLSPEC_FUC Cls& operator=(const Cls& rhs) | |||
Cls(); \ | |||
Cls(const Cls& rhs); \ | |||
Cls& operator=(const Cls& rhs) | |||
#define CUSTOM_PIMPL_CLS_DEFINE(Cls) \ | |||
Cls::Cls() : m_impl(new Cls##Impl(), impl_deleter<Cls##Impl>) {} \ | |||