Browse Source

fix(mgb): change the check method of cuda sm code

GitOrigin-RevId: 23dbc9b574
HuaHua404-patch-4
Megvii Engine Team 2 years ago
parent
commit
32b31fd578
2 changed files with 62 additions and 21 deletions
  1. +0
    -21
      imperative/python/megengine/__init__.py
  2. +62
    -0
      src/core/impl/comp_node/cuda/comp_node.cpp

+ 0
- 21
imperative/python/megengine/__init__.py View File

@@ -96,27 +96,6 @@ from .utils.persistent_cache import PersistentCacheOnServer as _PersistentCacheO
from .version import __version__


def _check_sm_version():
cur_logger = get_logger(__name__)
ngpus = get_device_count("gpu")
supported_sm_versions = re.findall(r"sm_(\d+)", _get_supported_sm_versions())
for idx in range(ngpus):
prop = get_cuda_device_property(idx)
cur_sm = str(prop.major * 10 + prop.minor)
if not cur_sm in supported_sm_versions:
cur_logger.warning(
"{} with CUDA capability sm_{} is not compatible with the current MegEngine installation. The current MegEngine install supports CUDA {} {}. If you want to use the {} with MegEngine, please check the instructions at https://github.com/MegEngine/MegEngine/blob/master/scripts/cmake-build/BUILD_README.md".format(
prop.name,
cur_sm,
"capabilities" if len(supported_sm_versions) > 1 else "capability",
" ".join(["sm_" + v for v in supported_sm_versions]),
prop.name,
)
)


_check_sm_version()

_exit_handlers = []




+ 62
- 0
src/core/impl/comp_node/cuda/comp_node.cpp View File

@@ -8,10 +8,15 @@ using namespace mgb;

#if MGB_CUDA

#if MEGDNN_WITH_CUDA
#include "cuda_sm_gen.h"
#endif

#include "megbrain/comp_node/alloc.h"

#include <cctype>
#include <cstdio>
#include <regex>

#include <thread>

@@ -417,6 +422,63 @@ void CudaCompNodeImpl::init(const Locator& locator, const Locator& locator_logic

m_env.init_cuda_async(
locator.device, make_comp_node_from_impl(this), {on_succ, on_error});
#if MEGDNN_WITH_CUDA
auto cur_prop = CudaCompNode::get_device_prop(locator.device);
auto cur_sm =
std::string("sm_") + std::to_string(cur_prop.major * 10 + cur_prop.minor);
const std::string mge_gen_code = MGE_CUDA_GENCODE;
std::regex re("sm_([0-9]+)");
std::vector<std::string> build_sm(
std::sregex_token_iterator(mge_gen_code.begin(), mge_gen_code.end(), re),
std::sregex_token_iterator());

if (std::find(build_sm.begin(), build_sm.end(), cur_sm) == build_sm.end()) {
std::string build_sm_info = "";
for (auto&& s : build_sm) {
build_sm_info += std::string(" ") + s;
}

std::vector<int> support_gpu;
for (int i = 0; i < get_device_count(); i++) {
auto prop = CudaCompNode::get_device_prop(i);
auto sm = std::string("sm_") + std::to_string(prop.major * 10 + prop.minor);
if (std::find(build_sm.begin(), build_sm.end(), sm) != build_sm.end()) {
support_gpu.emplace_back(i);
}
}

if (support_gpu.size() == 0) {
mgb_throw(
MegBrainError,
"%s(gpu%d) with CUDA capability %s is not compatible with the "
"current MegEngine installation. The current MegEngine install "
"supports CUDA capabilities%s. If you want to use the %s(gpu%d) "
"with MegEngine, please check the instructions at "
"https://github.com/MegEngine/MegEngine/blob/master/scripts/"
"cmake-build/BUILD_README.md",
cur_prop.name.c_str(), locator.device, cur_sm.c_str(),
build_sm_info.c_str(), cur_prop.name.c_str(), locator.device);
} else {
std::string support_gpu_info = "";
for (auto&& g : support_gpu) {
support_gpu_info += std::string(" gpu") + std::to_string(g);
}
mgb_throw(
MegBrainError,
"%s(gpu%d) with CUDA capability %s is not compatible with the "
"current MegEngine installation. The current MegEngine install "
"supports CUDA capabilities%s. You can try to use%s instead or "
"config CUDA_VISIBLE_DEVICES to chosse anthor cuda card.If you "
"really want to use the %s(gpu%d) with MegEngine, please check the "
"instructions at "
"https://github.com/MegEngine/MegEngine/blob/master/scripts/"
"cmake-build/BUILD_README.md",
cur_prop.name.c_str(), locator.device, cur_sm.c_str(),
build_sm_info.c_str(), support_gpu_info.c_str(),
cur_prop.name.c_str(), locator.device);
}
}
#endif
}

void CudaCompNodeImpl::fini() {


Loading…
Cancel
Save