|
|
@@ -6,39 +6,39 @@ |
|
|
|
* |
|
|
|
* Unless required by applicable law or agreed to in writing, |
|
|
|
* software distributed under the License is distributed on an |
|
|
|
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
|
|
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or |
|
|
|
* implied. |
|
|
|
*/ |
|
|
|
|
|
|
|
#include "megbrain/utils/debug.h" |
|
|
|
#include "megdnn/tensor_iter.h" |
|
|
|
#include <cmath> |
|
|
|
#include <cerrno> |
|
|
|
#include <cmath> |
|
|
|
#include "megdnn/tensor_iter.h" |
|
|
|
|
|
|
|
using namespace mgb; |
|
|
|
using namespace debug; |
|
|
|
|
|
|
|
|
|
|
|
#if MGB_ENABLE_DEBUG_UTIL |
|
|
|
|
|
|
|
#include "megbrain/utils/metahelper.h" |
|
|
|
#include "megbrain/common.h" |
|
|
|
#include "megbrain/utils/metahelper.h" |
|
|
|
|
|
|
|
#include "megbrain/utils/thin/function.h" |
|
|
|
#include <regex> |
|
|
|
#include <cstring> |
|
|
|
#include <cctype> |
|
|
|
#include <cstdio> |
|
|
|
#include <cstdlib> |
|
|
|
#include <cstring> |
|
|
|
#include <regex> |
|
|
|
#include "megbrain/utils/thin/function.h" |
|
|
|
|
|
|
|
#if MGB_CUDA |
|
|
|
#include <cuda.h> |
|
|
|
#include <cuda_runtime.h> |
|
|
|
#endif |
|
|
|
|
|
|
|
#include <pthread.h> |
|
|
|
#include <signal.h> |
|
|
|
#include <sys/types.h> |
|
|
|
#include <unistd.h> |
|
|
|
#include <pthread.h> |
|
|
|
|
|
|
|
#ifdef __ANDROID__ |
|
|
|
#include <unwind.h> |
|
|
@@ -54,17 +54,17 @@ struct AndroidBacktraceState { |
|
|
|
void** end; |
|
|
|
}; |
|
|
|
|
|
|
|
static _Unwind_Reason_Code android_unwind_callback(struct _Unwind_Context* context, |
|
|
|
void* arg) { |
|
|
|
static _Unwind_Reason_Code android_unwind_callback( |
|
|
|
struct _Unwind_Context* context, void* arg) { |
|
|
|
AndroidBacktraceState* state = static_cast<AndroidBacktraceState*>(arg); |
|
|
|
uintptr_t current_pc = _Unwind_GetIP(context); |
|
|
|
if(current_pc == nullptr) |
|
|
|
void* current_pc = reinterpret_cast<void*>(_Unwind_GetIP(context)); |
|
|
|
if (current_pc == nullptr) |
|
|
|
return _URC_NO_REASON; |
|
|
|
|
|
|
|
if (state->current == state->end) { |
|
|
|
return _URC_END_OF_STACK; |
|
|
|
return _URC_END_OF_STACK; |
|
|
|
} else { |
|
|
|
*state->current++ = reinterpret_cast<void*>(current_pc); |
|
|
|
*state->current++ = current_pc; |
|
|
|
} |
|
|
|
|
|
|
|
return _URC_NO_REASON; |
|
|
@@ -89,26 +89,27 @@ struct MemmapEntry { |
|
|
|
uintptr_t low, high; |
|
|
|
std::string file; |
|
|
|
|
|
|
|
MemmapEntry(uint64_t low_, uint64_t high_, const char *file_): |
|
|
|
low(low_), high(high_), file(file_) |
|
|
|
{} |
|
|
|
MemmapEntry(uint64_t low_, uint64_t high_, const char* file_) |
|
|
|
: low(low_), high(high_), file(file_) {} |
|
|
|
}; |
|
|
|
|
|
|
|
void get_mem_map(int pid, thin_function<void( |
|
|
|
uintptr_t, uintptr_t, const char*, const char*)> callback) { |
|
|
|
void get_mem_map( |
|
|
|
int pid, |
|
|
|
thin_function<void(uintptr_t, uintptr_t, const char*, const char*)> |
|
|
|
callback) { |
|
|
|
char fpath[64]; |
|
|
|
if (pid) |
|
|
|
sprintf(fpath, "/proc/%d/maps", pid); |
|
|
|
else |
|
|
|
strcpy(fpath, "/proc/self/maps"); |
|
|
|
FILE *fin = fopen(fpath, "r"); |
|
|
|
FILE* fin = fopen(fpath, "r"); |
|
|
|
mgb_assert(fin, "failed to open %s", fpath); |
|
|
|
char linebuf[512]; |
|
|
|
while (fgets(linebuf, sizeof(linebuf), fin)) { |
|
|
|
uintptr_t begin, end; |
|
|
|
char perm[10], offset[20], dev[10], inode[20], path_mem[256], *path; |
|
|
|
int nr = sscanf(linebuf, "%zx-%zx %s %s %s %s %s", |
|
|
|
&begin, &end, perm, offset, dev, inode, path_mem); |
|
|
|
int nr = sscanf(linebuf, "%zx-%zx %s %s %s %s %s", &begin, &end, perm, |
|
|
|
offset, dev, inode, path_mem); |
|
|
|
if (nr == 6) |
|
|
|
path = nullptr; |
|
|
|
else { |
|
|
@@ -122,10 +123,12 @@ void get_mem_map(int pid, thin_function<void( |
|
|
|
|
|
|
|
class SigHandlerInit { |
|
|
|
static void death_handler(int signum) { |
|
|
|
char msg0[] = "megbrain is about to die abruptly; you can set " |
|
|
|
"MGB_WAIT_TERMINATE and rerun to wait for gdb attach"; |
|
|
|
char msg0[] = |
|
|
|
"megbrain is about to die abruptly; you can set " |
|
|
|
"MGB_WAIT_TERMINATE and rerun to wait for gdb attach"; |
|
|
|
if (MGB_GETENV("MGB_WAIT_TERMINATE")) { |
|
|
|
fprintf(stderr, "megbrain is about to die abruptly; you can gdb " |
|
|
|
fprintf(stderr, |
|
|
|
"megbrain is about to die abruptly; you can gdb " |
|
|
|
"me at %d; wait for pressing enter\n", |
|
|
|
static_cast<int>(getpid())); |
|
|
|
getchar(); |
|
|
@@ -133,9 +136,8 @@ class SigHandlerInit { |
|
|
|
if (signum == -1) { |
|
|
|
mgb_log_error("%s: std::terminate() called", msg0); |
|
|
|
} else { |
|
|
|
mgb_log_error("%s: caught deadly signal %d(%s)", |
|
|
|
msg0, |
|
|
|
signum, strsignal(signum)); |
|
|
|
mgb_log_error("%s: caught deadly signal %d(%s)", msg0, signum, |
|
|
|
strsignal(signum)); |
|
|
|
} |
|
|
|
std::string bp; |
|
|
|
debug::backtrace(2).fmt_to_str(bp); |
|
|
@@ -143,14 +145,14 @@ class SigHandlerInit { |
|
|
|
exit(EXIT_FAILURE); |
|
|
|
} |
|
|
|
|
|
|
|
public: |
|
|
|
static void init_for_segv() { |
|
|
|
struct sigaction action; |
|
|
|
memset(&action, 0, sizeof(action)); |
|
|
|
action.sa_handler = &death_handler; |
|
|
|
sigaction(SIGSEGV, &action, nullptr); |
|
|
|
std::set_terminate([](){death_handler(-1);}); |
|
|
|
} |
|
|
|
public: |
|
|
|
static void init_for_segv() { |
|
|
|
struct sigaction action; |
|
|
|
memset(&action, 0, sizeof(action)); |
|
|
|
action.sa_handler = &death_handler; |
|
|
|
sigaction(SIGSEGV, &action, nullptr); |
|
|
|
std::set_terminate([]() { death_handler(-1); }); |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
#if MGB_CUDA |
|
|
@@ -164,7 +166,8 @@ class CudaCheckOnFork { |
|
|
|
if (flag() && !ScopedForkWarningSupress::supress()) { |
|
|
|
CUcontext ctx; |
|
|
|
if (cuCtxGetCurrent(&ctx) != CUDA_ERROR_NOT_INITIALIZED) { |
|
|
|
mgb_log_debug("It is dangerous to call fork() after cuda " |
|
|
|
mgb_log_debug( |
|
|
|
"It is dangerous to call fork() after cuda " |
|
|
|
"context has been initialized; please ensure no cuda " |
|
|
|
"methods is invoked in the child process. You can set " |
|
|
|
"MGB_THROW_ON_FORK to find out where the fork() is " |
|
|
@@ -177,20 +180,17 @@ class CudaCheckOnFork { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
public: |
|
|
|
static void set_flag(int f) { |
|
|
|
flag() = f; |
|
|
|
} |
|
|
|
public: |
|
|
|
static void set_flag(int f) { flag() = f; } |
|
|
|
|
|
|
|
static void init() { |
|
|
|
int err = pthread_atfork(&CudaCheckOnFork::atfork_prepare, |
|
|
|
nullptr, nullptr); |
|
|
|
if (err) { |
|
|
|
mgb_throw(SystemError, |
|
|
|
"failed to setup atfork handler: %s", |
|
|
|
strerror(err)); |
|
|
|
} |
|
|
|
static void init() { |
|
|
|
int err = pthread_atfork(&CudaCheckOnFork::atfork_prepare, nullptr, |
|
|
|
nullptr); |
|
|
|
if (err) { |
|
|
|
mgb_throw(SystemError, "failed to setup atfork handler: %s", |
|
|
|
strerror(err)); |
|
|
|
} |
|
|
|
} |
|
|
|
}; |
|
|
|
#endif |
|
|
|
|
|
|
@@ -206,7 +206,7 @@ class InitCaller { |
|
|
|
}; |
|
|
|
InitCaller InitCaller::inst; |
|
|
|
|
|
|
|
} // anonymous namespace |
|
|
|
} // anonymous namespace |
|
|
|
|
|
|
|
void (*ForkAfterCudaError::throw_)() = throw_fork_cuda_exc; |
|
|
|
|
|
|
@@ -221,7 +221,7 @@ BacktraceResult mgb::debug::backtrace(int nr_exclude) { |
|
|
|
recursive_call = true; |
|
|
|
|
|
|
|
constexpr size_t MAX_DEPTH = 6; |
|
|
|
void *stack_mem[MAX_DEPTH]; |
|
|
|
void* stack_mem[MAX_DEPTH]; |
|
|
|
int depth = ::backtrace(stack_mem, MAX_DEPTH); |
|
|
|
auto stack = stack_mem; |
|
|
|
if (depth > nr_exclude) { |
|
|
@@ -234,19 +234,19 @@ BacktraceResult mgb::debug::backtrace(int nr_exclude) { |
|
|
|
static std::mutex mtx; |
|
|
|
MGB_LOCK_GUARD(mtx); |
|
|
|
if (memmap.empty()) { |
|
|
|
get_mem_map(0, [&](uintptr_t lo, uintptr_t hi, |
|
|
|
const char * /*perm*/, const char *fname){ |
|
|
|
if (fname && strlen(fname)) |
|
|
|
get_mem_map(0, [&](uintptr_t lo, uintptr_t hi, const char* /*perm*/, |
|
|
|
const char* fname) { |
|
|
|
if (fname && strlen(fname)) |
|
|
|
memmap.emplace_back(lo, hi, fname); |
|
|
|
}); |
|
|
|
}); |
|
|
|
} |
|
|
|
} |
|
|
|
BacktraceResult result; |
|
|
|
|
|
|
|
for (int i = 0; i < depth; ++ i) { |
|
|
|
for (int i = 0; i < depth; ++i) { |
|
|
|
const char* fname = nullptr; |
|
|
|
auto addr = reinterpret_cast<uintptr_t>(stack[i]); |
|
|
|
for (auto &&j: memmap) |
|
|
|
for (auto&& j : memmap) |
|
|
|
if (j.low <= addr && j.high >= addr) { |
|
|
|
// theoretically we should examine file content to find whether |
|
|
|
// it is a shared library; but who would name an executable with |
|
|
@@ -264,12 +264,12 @@ BacktraceResult mgb::debug::backtrace(int nr_exclude) { |
|
|
|
return result; |
|
|
|
} |
|
|
|
|
|
|
|
void BacktraceResult::fmt_to_str(std::string &dst) { |
|
|
|
void BacktraceResult::fmt_to_str(std::string& dst) { |
|
|
|
char addr[128]; |
|
|
|
bool first = true; |
|
|
|
const char* prev_fname = nullptr; |
|
|
|
dst.append("bt:"); |
|
|
|
for (auto &&i: stack) { |
|
|
|
for (auto&& i : stack) { |
|
|
|
sprintf(addr, "%zx", i.second); |
|
|
|
if (i.first != prev_fname || first) { |
|
|
|
if (!first) |
|
|
@@ -296,85 +296,85 @@ void debug::set_fork_cuda_warning_flag(int flag) { |
|
|
|
#endif |
|
|
|
} |
|
|
|
|
|
|
|
#endif // MGB_ENABLE_DEBUG_UTIL |
|
|
|
#endif // MGB_ENABLE_DEBUG_UTIL |
|
|
|
|
|
|
|
namespace { |
|
|
|
|
|
|
|
bool good_float(float val) { |
|
|
|
return std::isfinite(val); |
|
|
|
} |
|
|
|
bool good_float(float val) { |
|
|
|
return std::isfinite(val); |
|
|
|
} |
|
|
|
|
|
|
|
bool good_float(int) { |
|
|
|
return true; |
|
|
|
} |
|
|
|
bool good_float(int) { |
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
#if MGB_ENABLE_LOGGING |
|
|
|
// if not in MGB_ENABLE_LOGGING, num2str would become defined but not used |
|
|
|
template<typename T> |
|
|
|
std::string num2str(T val) { |
|
|
|
return std::to_string(val); |
|
|
|
} |
|
|
|
// if not in MGB_ENABLE_LOGGING, num2str would become defined but not used |
|
|
|
template <typename T> |
|
|
|
std::string num2str(T val) { |
|
|
|
return std::to_string(val); |
|
|
|
} |
|
|
|
|
|
|
|
std::string num2str(float val) { |
|
|
|
union V { |
|
|
|
uint32_t i; |
|
|
|
float f; |
|
|
|
}; |
|
|
|
auto ret = std::to_string(val); |
|
|
|
if (!good_float(val)) { |
|
|
|
V v; |
|
|
|
v.f = val; |
|
|
|
ret.append(" (0x"); |
|
|
|
ret.append(ssprintf("%x", v.i)); |
|
|
|
ret.append(")"); |
|
|
|
} |
|
|
|
return ret; |
|
|
|
std::string num2str(float val) { |
|
|
|
union V { |
|
|
|
uint32_t i; |
|
|
|
float f; |
|
|
|
}; |
|
|
|
auto ret = std::to_string(val); |
|
|
|
if (!good_float(val)) { |
|
|
|
V v; |
|
|
|
v.f = val; |
|
|
|
ret.append(" (0x"); |
|
|
|
ret.append(ssprintf("%x", v.i)); |
|
|
|
ret.append(")"); |
|
|
|
} |
|
|
|
return ret; |
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
template<typename ctype> |
|
|
|
Maybe<std::string> do_compare_tensor_value( |
|
|
|
const char *expr0, const char *expr1, |
|
|
|
const HostTensorND &v0, const HostTensorND &v1, float maxerr) { |
|
|
|
|
|
|
|
auto it0 = megdnn::tensor_iter<ctype>(v0.as_megdnn()).begin(), |
|
|
|
it1 = megdnn::tensor_iter<ctype>(v1.as_megdnn()).begin(); |
|
|
|
for (size_t i = 0, it = v0.shape().total_nr_elems(); i < it; ++ i) { |
|
|
|
ctype iv0 = *it0, iv1 = *it1; |
|
|
|
double err = std::abs(iv0 - iv1) / std::max<double>( |
|
|
|
1, std::min(std::abs(static_cast<double>(iv0)), |
|
|
|
std::abs((static_cast<double>(iv1))))); |
|
|
|
if (!good_float(iv0) || !good_float(iv1) || err >= maxerr) { |
|
|
|
TensorShape idx_shp; |
|
|
|
idx_shp.ndim = v0.shape().ndim; |
|
|
|
std::copy(it0.idx(), it0.idx() + idx_shp.ndim, idx_shp.shape); |
|
|
|
return mgb_ssprintf_log( |
|
|
|
"Unequal value\n" |
|
|
|
"Value of: %s\n" |
|
|
|
" Actual: %s\n" |
|
|
|
"Expected: %s\n" |
|
|
|
"Which is: %s\n" |
|
|
|
"At index: %s/%s\n" |
|
|
|
" error: %.6g", |
|
|
|
expr1, num2str(iv1).c_str(), |
|
|
|
expr0, num2str(iv0).c_str(), |
|
|
|
idx_shp.to_string().c_str(), |
|
|
|
v0.shape().to_string().c_str(), |
|
|
|
err); |
|
|
|
} |
|
|
|
|
|
|
|
++ it0; |
|
|
|
++ it1; |
|
|
|
template <typename ctype> |
|
|
|
Maybe<std::string> do_compare_tensor_value(const char* expr0, const char* expr1, |
|
|
|
const HostTensorND& v0, |
|
|
|
const HostTensorND& v1, |
|
|
|
float maxerr) { |
|
|
|
auto it0 = megdnn::tensor_iter<ctype>(v0.as_megdnn()).begin(), |
|
|
|
it1 = megdnn::tensor_iter<ctype>(v1.as_megdnn()).begin(); |
|
|
|
for (size_t i = 0, it = v0.shape().total_nr_elems(); i < it; ++i) { |
|
|
|
ctype iv0 = *it0, iv1 = *it1; |
|
|
|
double err = std::abs(iv0 - iv1) / |
|
|
|
std::max<double>( |
|
|
|
1, std::min(std::abs(static_cast<double>(iv0)), |
|
|
|
std::abs((static_cast<double>(iv1))))); |
|
|
|
if (!good_float(iv0) || !good_float(iv1) || err >= maxerr) { |
|
|
|
TensorShape idx_shp; |
|
|
|
idx_shp.ndim = v0.shape().ndim; |
|
|
|
std::copy(it0.idx(), it0.idx() + idx_shp.ndim, idx_shp.shape); |
|
|
|
return mgb_ssprintf_log( |
|
|
|
"Unequal value\n" |
|
|
|
"Value of: %s\n" |
|
|
|
" Actual: %s\n" |
|
|
|
"Expected: %s\n" |
|
|
|
"Which is: %s\n" |
|
|
|
"At index: %s/%s\n" |
|
|
|
" error: %.6g", |
|
|
|
expr1, num2str(iv1).c_str(), expr0, num2str(iv0).c_str(), |
|
|
|
idx_shp.to_string().c_str(), v0.shape().to_string().c_str(), |
|
|
|
err); |
|
|
|
} |
|
|
|
return None; |
|
|
|
|
|
|
|
++it0; |
|
|
|
++it1; |
|
|
|
} |
|
|
|
return None; |
|
|
|
} |
|
|
|
|
|
|
|
} // anonymous namespace |
|
|
|
} // anonymous namespace |
|
|
|
|
|
|
|
Maybe<std::string> debug::compare_tensor_value( |
|
|
|
const HostTensorND &v0, const char *expr0, |
|
|
|
const HostTensorND &v1, const char *expr1, |
|
|
|
float maxerr) { |
|
|
|
Maybe<std::string> debug::compare_tensor_value(const HostTensorND& v0, |
|
|
|
const char* expr0, |
|
|
|
const HostTensorND& v1, |
|
|
|
const char* expr1, |
|
|
|
float maxerr) { |
|
|
|
if (!v0.shape().eq_shape(v1.shape())) { |
|
|
|
return mgb_ssprintf_log( |
|
|
|
"Shape mismatch\n" |
|
|
@@ -382,8 +382,8 @@ Maybe<std::string> debug::compare_tensor_value( |
|
|
|
" Actual: %s\n" |
|
|
|
"Expected: %s\n" |
|
|
|
"Which is: %s", |
|
|
|
expr1, v1.shape().to_string().c_str(), |
|
|
|
expr0, v0.shape().to_string().c_str()); |
|
|
|
expr1, v1.shape().to_string().c_str(), expr0, |
|
|
|
v0.shape().to_string().c_str()); |
|
|
|
} |
|
|
|
auto dtype = v0.layout().dtype; |
|
|
|
if (dtype != v1.layout().dtype) { |
|
|
@@ -393,12 +393,13 @@ Maybe<std::string> debug::compare_tensor_value( |
|
|
|
" Actual: %s\n" |
|
|
|
"Expected: %s\n" |
|
|
|
"Which is: %s", |
|
|
|
expr1, v1.layout().dtype.name(), |
|
|
|
expr0, v0.layout().dtype.name()); |
|
|
|
expr1, v1.layout().dtype.name(), expr0, |
|
|
|
v0.layout().dtype.name()); |
|
|
|
} |
|
|
|
|
|
|
|
switch(dtype.enumv()) { |
|
|
|
#define cb(_dt) case DTypeTrait<_dt>::enumv: \ |
|
|
|
switch (dtype.enumv()) { |
|
|
|
#define cb(_dt) \ |
|
|
|
case DTypeTrait<_dt>::enumv: \ |
|
|
|
return do_compare_tensor_value<DTypeTrait<_dt>::ctype>( \ |
|
|
|
expr0, expr1, v0, v1, maxerr); |
|
|
|
MEGDNN_FOREACH_COMPUTING_DTYPE(cb) |
|
|
@@ -408,7 +409,8 @@ Maybe<std::string> debug::compare_tensor_value( |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
std::string debug::dump_tensor(const HostTensorND& value, const std::string &name) { |
|
|
|
std::string debug::dump_tensor(const HostTensorND& value, |
|
|
|
const std::string& name) { |
|
|
|
struct Header { |
|
|
|
uint32_t name_len; |
|
|
|
uint32_t dtype; |
|
|
@@ -418,7 +420,7 @@ std::string debug::dump_tensor(const HostTensorND& value, const std::string &nam |
|
|
|
}; |
|
|
|
mgb_assert(value.layout().is_contiguous()); |
|
|
|
auto value_bytes = value.layout().span().dist_byte(); |
|
|
|
std::string ret(name.size() + value_bytes + sizeof(Header), '\0'); |
|
|
|
std::string ret(name.size() + value_bytes + sizeof(Header), '\0'); |
|
|
|
auto header = reinterpret_cast<Header*>(&ret[0]); |
|
|
|
memset(header, 0, sizeof(Header)); |
|
|
|
header->name_len = name.length(); |
|
|
@@ -439,8 +441,8 @@ void debug::write_to_file(const char* filename, const std::string& content, |
|
|
|
strerror(errno)); |
|
|
|
auto nr = fwrite(content.data(), 1, content.size(), fout); |
|
|
|
mgb_throw_if(nr != content.size(), SystemError, |
|
|
|
"failed to write to %s: num=%zu size=%zu %s", filename, |
|
|
|
nr, content.size() ,strerror(errno)); |
|
|
|
"failed to write to %s: num=%zu size=%zu %s", filename, nr, |
|
|
|
content.size(), strerror(errno)); |
|
|
|
auto err = fclose(fout); |
|
|
|
mgb_throw_if(err, SystemError, "failed to close %s: %s", filename, |
|
|
|
strerror(errno)); |
|
|
|