Browse Source

refactor(cuda-stub): opt cuda-stub log

GitOrigin-RevId: 87dda08e1b
release-1.10
Megvii Engine Team 3 years ago
parent
commit
c0b267fff6
4 changed files with 29 additions and 15 deletions
  1. +12
    -4
      dnn/cuda-stub/src/dlopen_helper.h
  2. +2
    -1
      dnn/cuda-stub/src/libcuda.cpp
  3. +3
    -2
      dnn/cuda-stub/src/libnvrtc.cpp
  4. +12
    -8
      src/core/impl/comp_node/cuda/comp_node.cpp

+ 12
- 4
dnn/cuda-stub/src/dlopen_helper.h View File

@@ -85,25 +85,33 @@ static void* get_library_handle() {
sizeof(extra_so_paths) / sizeof(char*)); sizeof(extra_so_paths) / sizeof(char*));
} }
if (!handle) { if (!handle) {
LOGE("Failed to load %s API library", g_default_api_name);
if (std::string(g_default_api_name) == "cuda") {
LOGI("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++");
LOGI("+ Failed to load CUDA driver library, MegEngine works under CPU mode now. +");
LOGI("+ To use CUDA mode, please make sure NVIDIA GPU driver was installed properly. +");
LOGI("+ Refer to https://discuss.megengine.org.cn/t/topic/1264 for more information. +");
LOGI("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++");
} else {
LOGI("Failed to load %s API library", g_default_api_name);
}
return nullptr; return nullptr;
} }
return handle; return handle;
} }


static void log_failed_load(int func_idx) { static void log_failed_load(int func_idx) {
LOGE("failed to load %s func: %s", g_default_api_name,
LOGD("failed to load %s func: %s", g_default_api_name,
g_func_name[func_idx]); g_func_name[func_idx]);
} }


static void* resolve_library_func(void* handle, const char* func) { static void* resolve_library_func(void* handle, const char* func) {
if (!handle) { if (!handle) {
LOGE("%s handle should not be nullptr!", g_default_api_name);
LOGD("%s handle should not be nullptr!", g_default_api_name);
return nullptr; return nullptr;
} }
auto ret = dlsym(handle, func); auto ret = dlsym(handle, func);
if (!ret) { if (!ret) {
LOGE("failed to load %s func: %s", g_default_api_name, func);
LOGD("failed to load %s func: %s", g_default_api_name, func);
} }
return ret; return ret;
} }

+ 2
- 1
dnn/cuda-stub/src/libcuda.cpp View File

@@ -3,7 +3,8 @@
#pragma GCC visibility push(default) #pragma GCC visibility push(default)


#include <cstdio> #include <cstdio>
#define LOGE(fmt, v...) fprintf(stderr, "err: " fmt "\n", ##v)
#define LOGI(fmt, v...) fprintf(stdout, "info: " fmt "\n", ##v)
#define LOGD(fmt, v...) fprintf(stdout, "debug: " fmt "\n", ##v)


extern "C" { extern "C" {
#include "cuda.h" #include "cuda.h"


+ 3
- 2
dnn/cuda-stub/src/libnvrtc.cpp View File

@@ -13,7 +13,8 @@
#pragma GCC visibility push(default) #pragma GCC visibility push(default)


#include <cstdio> #include <cstdio>
#define LOGE(fmt, v...) fprintf(stderr, "err: " fmt "\n", ##v)
#define LOGI(fmt, v...) fprintf(stdout, "info: " fmt "\n", ##v)
#define LOGD(fmt, v...) fprintf(stdout, "debug: " fmt "\n", ##v)
#include "./nvrtc_type.h" #include "./nvrtc_type.h"
#pragma GCC diagnostic ignored "-Wdeprecated-declarations" #pragma GCC diagnostic ignored "-Wdeprecated-declarations"


@@ -72,4 +73,4 @@ static const char* default_so_paths[] = {
static const char* extra_so_paths[] = {}; static const char* extra_so_paths[] = {};


static const char* g_default_api_name = "nvrtc"; static const char* g_default_api_name = "nvrtc";
#include "./dlopen_helper.h"
#include "./dlopen_helper.h"

+ 12
- 8
src/core/impl/comp_node/cuda/comp_node.cpp View File

@@ -822,8 +822,10 @@ CUresult call_cuda_forksafe(Func func, Val* val, Args... args) {
const char* cu_get_error_string(CUresult err) { const char* cu_get_error_string(CUresult err) {
const char* ret = nullptr; const char* ret = nullptr;
cuGetErrorString(err, &ret); cuGetErrorString(err, &ret);
if (!ret)
ret = "unknown cuda error";
if (!ret) {
//! caused by cuda stub do not find driver
ret = "invalid_stub_call";
}
return ret; return ret;
} }


@@ -837,10 +839,12 @@ bool CudaCompNode::available() {
int ndev = -1; int ndev = -1;
auto err = call_cuda_forksafe(cuDeviceGetCount, &ndev); auto err = call_cuda_forksafe(cuDeviceGetCount, &ndev);
result = err == CUDA_SUCCESS && ndev > 0; result = err == CUDA_SUCCESS && ndev > 0;
if (!result) {
auto err_s = cu_get_error_string(err);
//! only show !CUDA_SUCCESS log when with valid stub call
if (!result && (std::string(err_s) != "invalid_stub_call")) {
mgb_log_warn( mgb_log_warn(
"cuda unavailable: %s(%d) ndev=%d", cu_get_error_string(err),
static_cast<int>(err), ndev);
"cuda unavailable: %s(%d) ndev=%d", err_s, static_cast<int>(err),
ndev);
} }
if (err == CUDA_ERROR_NOT_INITIALIZED) { if (err == CUDA_ERROR_NOT_INITIALIZED) {
mgb_throw(std::runtime_error, "cuda initialization error."); mgb_throw(std::runtime_error, "cuda initialization error.");
@@ -984,11 +988,11 @@ size_t CudaCompNode::get_device_count(bool warn) {
MGB_LOCK_GUARD(mtx); MGB_LOCK_GUARD(mtx);
if (cnt == -1) { if (cnt == -1) {
auto err = call_cuda_forksafe(cuDeviceGetCount, &cnt); auto err = call_cuda_forksafe(cuDeviceGetCount, &cnt);
auto err_s = cu_get_error_string(err);
if (err != CUDA_SUCCESS) { if (err != CUDA_SUCCESS) {
if (warn)
if (warn && (std::string(err_s) != "invalid_stub_call"))
mgb_log_error( mgb_log_error(
"cudaGetDeviceCount failed: %s (err %d)",
cu_get_error_string(err), int(err));
"cudaGetDeviceCount failed: %s (err %d)", err_s, int(err));
cnt = 0; cnt = 0;
} }
mgb_assert(cnt >= 0); mgb_assert(cnt >= 0);


Loading…
Cancel
Save