diff --git a/dnn/cuda-stub/src/dlopen_helper.h b/dnn/cuda-stub/src/dlopen_helper.h
index 00a429d5..728c348a 100644
--- a/dnn/cuda-stub/src/dlopen_helper.h
+++ b/dnn/cuda-stub/src/dlopen_helper.h
@@ -85,25 +85,33 @@ static void* get_library_handle() {
                                  sizeof(extra_so_paths) / sizeof(char*));
     }
     if (!handle) {
-        LOGE("Failed to load %s API library", g_default_api_name);
+        if (std::string(g_default_api_name) == "cuda") {
+            LOGI("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++");
+            LOGI("+ Failed to load CUDA driver library, MegEngine works under CPU mode now.      +");
+            LOGI("+ To use CUDA mode, please make sure NVIDIA GPU driver was installed properly. +");
+            LOGI("+ Refer to https://discuss.megengine.org.cn/t/topic/1264 for more information. +");
+            LOGI("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++");
+        } else {
+            LOGI("Failed to load %s API library", g_default_api_name);
+        }
         return nullptr;
     }
     return handle;
 }
 
 static void log_failed_load(int func_idx) {
-    LOGE("failed to load %s func: %s", g_default_api_name,
+    LOGD("failed to load %s func: %s", g_default_api_name,
          g_func_name[func_idx]);
 }
 
 static void* resolve_library_func(void* handle, const char* func) {
     if (!handle) {
-        LOGE("%s handle should not be nullptr!", g_default_api_name);
+        LOGD("%s handle should not be nullptr!", g_default_api_name);
         return nullptr;
     }
     auto ret = dlsym(handle, func);
     if (!ret) {
-        LOGE("failed to load %s func: %s", g_default_api_name, func);
+        LOGD("failed to load %s func: %s", g_default_api_name, func);
     }
     return ret;
 }
diff --git a/dnn/cuda-stub/src/libcuda.cpp b/dnn/cuda-stub/src/libcuda.cpp
index 4ced0476..b2fd129f 100644
--- a/dnn/cuda-stub/src/libcuda.cpp
+++ b/dnn/cuda-stub/src/libcuda.cpp
@@ -3,7 +3,8 @@
 #pragma GCC visibility push(default)
 
 #include <cstdio>
-#define LOGE(fmt, v...) fprintf(stderr, "err: " fmt "\n", ##v)
+#define LOGI(fmt, v...) fprintf(stdout, "info: " fmt "\n", ##v)
+#define LOGD(fmt, v...) fprintf(stdout, "debug: " fmt "\n", ##v)
 
 extern "C" {
 #include "cuda.h"
diff --git a/dnn/cuda-stub/src/libnvrtc.cpp b/dnn/cuda-stub/src/libnvrtc.cpp
index 03522026..6de777eb 100644
--- a/dnn/cuda-stub/src/libnvrtc.cpp
+++ b/dnn/cuda-stub/src/libnvrtc.cpp
@@ -13,7 +13,8 @@
 #pragma GCC visibility push(default)
 
 #include <cstdio>
-#define LOGE(fmt, v...) fprintf(stderr, "err: " fmt "\n", ##v)
+#define LOGI(fmt, v...) fprintf(stdout, "info: " fmt "\n", ##v)
+#define LOGD(fmt, v...) fprintf(stdout, "debug: " fmt "\n", ##v)
 #include "./nvrtc_type.h"
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 
@@ -72,4 +73,4 @@ static const char* default_so_paths[] = {
 static const char* extra_so_paths[] = {};
 
 static const char* g_default_api_name = "nvrtc";
-#include "./dlopen_helper.h"
\ No newline at end of file
+#include "./dlopen_helper.h"
diff --git a/src/core/impl/comp_node/cuda/comp_node.cpp b/src/core/impl/comp_node/cuda/comp_node.cpp
index 5891a7c5..d31c9dc6 100644
--- a/src/core/impl/comp_node/cuda/comp_node.cpp
+++ b/src/core/impl/comp_node/cuda/comp_node.cpp
@@ -822,8 +822,10 @@ CUresult call_cuda_forksafe(Func func, Val* val, Args... args) {
 const char* cu_get_error_string(CUresult err) {
     const char* ret = nullptr;
     cuGetErrorString(err, &ret);
-    if (!ret)
-        ret = "unknown cuda error";
+    if (!ret) {
+        //! caused by cuda stub do not find driver
+        ret = "invalid_stub_call";
+    }
     return ret;
 }
 
@@ -837,10 +839,12 @@ bool CudaCompNode::available() {
         int ndev = -1;
         auto err = call_cuda_forksafe(cuDeviceGetCount, &ndev);
         result = err == CUDA_SUCCESS && ndev > 0;
-        if (!result) {
+        auto err_s = cu_get_error_string(err);
+        //! only show !CUDA_SUCCESS log when with valid stub call
+        if (!result && (std::string(err_s) != "invalid_stub_call")) {
             mgb_log_warn(
-                    "cuda unavailable: %s(%d) ndev=%d", cu_get_error_string(err),
-                    static_cast<int>(err), ndev);
+                    "cuda unavailable: %s(%d) ndev=%d", err_s, static_cast<int>(err),
+                    ndev);
         }
         if (err == CUDA_ERROR_NOT_INITIALIZED) {
             mgb_throw(std::runtime_error, "cuda initialization error.");
@@ -984,11 +988,11 @@ size_t CudaCompNode::get_device_count(bool warn) {
     MGB_LOCK_GUARD(mtx);
     if (cnt == -1) {
         auto err = call_cuda_forksafe(cuDeviceGetCount, &cnt);
+        auto err_s = cu_get_error_string(err);
         if (err != CUDA_SUCCESS) {
-            if (warn)
+            if (warn && (std::string(err_s) != "invalid_stub_call"))
                 mgb_log_error(
-                        "cudaGetDeviceCount failed: %s (err %d)",
-                        cu_get_error_string(err), int(err));
+                        "cudaGetDeviceCount failed: %s (err %d)", err_s, int(err));
             cnt = 0;
         }
         mgb_assert(cnt >= 0);