fix(mgb): change the check method of cuda sm code

GitOrigin-RevId: 23dbc9b574
2 years ago · 32b31fd578
--- a/imperative/python/megengine/init.py
+++ b/imperative/python/megengine/init.py
@@ -96,27 +96,6 @@ from .utils.persistent_cache import PersistentCacheOnServer as _PersistentCacheO
 from .version import __version__


 def _check_sm_version():
    cur_logger = get_logger(__name__)
    ngpus = get_device_count("gpu")
    supported_sm_versions = re.findall(r"sm_(\d+)", _get_supported_sm_versions())
    for idx in range(ngpus):
        prop = get_cuda_device_property(idx)
        cur_sm = str(prop.major * 10 + prop.minor)
        if not cur_sm in supported_sm_versions:
            cur_logger.warning(
                "{} with CUDA capability sm_{} is not compatible with the current MegEngine installation. The current MegEngine install supports CUDA {} {}. If you want to use the {} with MegEngine, please check the instructions at https://github.com/MegEngine/MegEngine/blob/master/scripts/cmake-build/BUILD_README.md".format(
                    prop.name,
                    cur_sm,
                    "capabilities" if len(supported_sm_versions) > 1 else "capability",
                    " ".join(["sm_" + v for v in supported_sm_versions]),
                    prop.name,
                )
            )


 _check_sm_version()

 _exit_handlers = []


--- a/src/core/impl/comp_node/cuda/comp_node.cpp
+++ b/src/core/impl/comp_node/cuda/comp_node.cpp
@@ -8,10 +8,15 @@ using namespace mgb;

 #if MGB_CUDA

 #if MEGDNN_WITH_CUDA
 #include "cuda_sm_gen.h"
 #endif

 #include "megbrain/comp_node/alloc.h"

 #include <cctype>
 #include <cstdio>
 #include <regex>

 #include <thread>

@@ -417,6 +422,63 @@ void CudaCompNodeImpl::init(const Locator& locator, const Locator& locator_logic

    m_env.init_cuda_async(
            locator.device, make_comp_node_from_impl(this), {on_succ, on_error});
 #if MEGDNN_WITH_CUDA
    auto cur_prop = CudaCompNode::get_device_prop(locator.device);
    auto cur_sm =
            std::string("sm_") + std::to_string(cur_prop.major * 10 + cur_prop.minor);
    const std::string mge_gen_code = MGE_CUDA_GENCODE;
    std::regex re("sm_([0-9]+)");
    std::vector<std::string> build_sm(
            std::sregex_token_iterator(mge_gen_code.begin(), mge_gen_code.end(), re),
            std::sregex_token_iterator());

    if (std::find(build_sm.begin(), build_sm.end(), cur_sm) == build_sm.end()) {
        std::string build_sm_info = "";
        for (auto&& s : build_sm) {
            build_sm_info += std::string(" ") + s;
        }

        std::vector<int> support_gpu;
        for (int i = 0; i < get_device_count(); i++) {
            auto prop = CudaCompNode::get_device_prop(i);
            auto sm = std::string("sm_") + std::to_string(prop.major * 10 + prop.minor);
            if (std::find(build_sm.begin(), build_sm.end(), sm) != build_sm.end()) {
                support_gpu.emplace_back(i);
            }
        }

        if (support_gpu.size() == 0) {
            mgb_throw(
                    MegBrainError,
                    "%s(gpu%d) with CUDA capability %s is not compatible with the "
                    "current MegEngine installation. The current MegEngine install "
                    "supports CUDA capabilities%s. If you want to use the %s(gpu%d) "
                    "with MegEngine, please check the instructions at "
                    "https://github.com/MegEngine/MegEngine/blob/master/scripts/"
                    "cmake-build/BUILD_README.md",
                    cur_prop.name.c_str(), locator.device, cur_sm.c_str(),
                    build_sm_info.c_str(), cur_prop.name.c_str(), locator.device);
        } else {
            std::string support_gpu_info = "";
            for (auto&& g : support_gpu) {
                support_gpu_info += std::string(" gpu") + std::to_string(g);
            }
            mgb_throw(
                    MegBrainError,
                    "%s(gpu%d) with CUDA capability %s is not compatible with the "
                    "current MegEngine installation. The current MegEngine install "
                    "supports CUDA capabilities%s. You can try to use%s instead or "
                    "config CUDA_VISIBLE_DEVICES to chosse anthor cuda card.If you "
                    "really want to use the %s(gpu%d) with MegEngine, please check the "
                    "instructions at "
                    "https://github.com/MegEngine/MegEngine/blob/master/scripts/"
                    "cmake-build/BUILD_README.md",
                    cur_prop.name.c_str(), locator.device, cur_sm.c_str(),
                    build_sm_info.c_str(), support_gpu_info.c_str(),
                    cur_prop.name.c_str(), locator.device);
        }
    }
 #endif
 }

 void CudaCompNodeImpl::fini() {