- /**
- * \file src/core/impl/utils/debug.cpp
- * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- *
- * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied.
- */
-
- #include "megbrain/utils/debug.h"
- #include <cerrno>
- #include <cmath>
- #include "megdnn/tensor_iter.h"
-
- using namespace mgb;
- using namespace debug;
-
- #if MGB_ENABLE_DEBUG_UTIL
-
- #include "megbrain/common.h"
- #include "megbrain/utils/metahelper.h"
-
- #include <cctype>
- #include <cstdio>
- #include <cstdlib>
- #include <cstring>
- #include <regex>
- #include "megbrain/utils/thin/function.h"
-
- #if MGB_CUDA
- #include <cuda.h>
- #include <cuda_runtime.h>
- #endif
-
- #include <pthread.h>
- #include <signal.h>
- #include <sys/types.h>
- #include <unistd.h>
-
- #ifdef __ANDROID__
- #include <unwind.h>
- #else
- #include <execinfo.h>
- #endif
-
- #ifdef __ANDROID__
- namespace {
-
- struct AndroidBacktraceState {
- void** current;
- void** end;
- };
-
- static _Unwind_Reason_Code android_unwind_callback(
- struct _Unwind_Context* context, void* arg) {
- AndroidBacktraceState* state = static_cast<AndroidBacktraceState*>(arg);
- void* current_pc = reinterpret_cast<void*>(_Unwind_GetIP(context));
- if (current_pc == nullptr)
- return _URC_NO_REASON;
-
- if (state->current == state->end) {
- return _URC_END_OF_STACK;
- } else {
- *state->current++ = current_pc;
- }
-
- return _URC_NO_REASON;
- }
-
- size_t backtrace(void** buffer, size_t max) {
- AndroidBacktraceState state = {buffer, buffer + max};
- _Unwind_Backtrace(android_unwind_callback, &state);
- return state.current - buffer;
- }
-
- } // anonymous namespace
- #endif // backtrace impl for __ANDROID__
-
- namespace {
-
- void throw_fork_cuda_exc() {
- mgb_throw(ForkAfterCudaError, "fork after cuda has been initialized");
- }
-
- struct MemmapEntry {
- uintptr_t low, high;
- std::string file;
-
- MemmapEntry(uint64_t low_, uint64_t high_, const char* file_)
- : low(low_), high(high_), file(file_) {}
- };
-
- void get_mem_map(
- int pid,
- thin_function<void(uintptr_t, uintptr_t, const char*, const char*)>
- callback) {
- char fpath[64];
- if (pid)
- sprintf(fpath, "/proc/%d/maps", pid);
- else
- strcpy(fpath, "/proc/self/maps");
- FILE* fin = fopen(fpath, "r");
- mgb_assert(fin, "failed to open %s", fpath);
- char linebuf[512];
- while (fgets(linebuf, sizeof(linebuf), fin)) {
- uintptr_t begin, end;
- char perm[10], offset[20], dev[10], inode[20], path_mem[256], *path;
- int nr = sscanf(linebuf, "%zx-%zx %s %s %s %s %s", &begin, &end, perm,
- offset, dev, inode, path_mem);
- if (nr == 6)
- path = nullptr;
- else {
- mgb_assert(nr == 7, "failed to parse map line: %s", linebuf);
- path = path_mem;
- }
- callback(begin, end, perm, path);
- }
- fclose(fin);
- }
-
- class SigHandlerInit {
- static void death_handler(int signum) {
- char msg0[] =
- "megbrain is about to die abruptly; you can set "
- "MGB_WAIT_TERMINATE and rerun to wait for gdb attach";
- if (MGB_GETENV("MGB_WAIT_TERMINATE")) {
- fprintf(stderr,
- "megbrain is about to die abruptly; you can gdb "
- "me at %d; wait for pressing enter\n",
- static_cast<int>(getpid()));
- getchar();
- }
- if (signum == -1) {
- mgb_log_error("%s: std::terminate() called", msg0);
- } else {
- mgb_log_error("%s: caught deadly signal %d(%s)", msg0, signum,
- strsignal(signum));
- }
- //FIXME: imp backtrace for macos
- #ifndef __APPLE__
- std::string bp;
- debug::backtrace(2).fmt_to_str(bp);
- mgb_log_error("%s", bp.c_str());
- #endif
- exit(EXIT_FAILURE);
- }
-
- public:
- static void init_for_segv() {
- struct sigaction action;
- memset(&action, 0, sizeof(action));
- action.sa_handler = &death_handler;
- sigaction(SIGSEGV, &action, nullptr);
- std::set_terminate([]() { death_handler(-1); });
- }
- };
-
- #if MGB_CUDA
- class CudaCheckOnFork {
- static int& flag() {
- static int ret = MGB_GETENV("MGB_THROW_ON_FORK") ? 2 : 1;
- return ret;
- }
-
- static void atfork_prepare() {
- if (flag() && !ScopedForkWarningSupress::supress()) {
- CUcontext ctx;
- if (cuCtxGetCurrent(&ctx) != CUDA_ERROR_NOT_INITIALIZED) {
- mgb_log_debug(
- "It is dangerous to call fork() after cuda "
- "context has been initialized; please ensure no cuda "
- "methods is invoked in the child process. You can set "
- "MGB_THROW_ON_FORK to find out where the fork() is "
- "called.");
-
- if (flag() > 1) {
- ForkAfterCudaError::throw_();
- }
- }
- }
- }
-
- public:
- static void set_flag(int f) { flag() = f; }
-
- static void init() {
- int err = pthread_atfork(&CudaCheckOnFork::atfork_prepare, nullptr,
- nullptr);
- if (err) {
- mgb_throw(SystemError, "failed to setup atfork handler: %s",
- strerror(err));
- }
- }
- };
- #endif
-
- class InitCaller {
- static InitCaller inst;
-
- InitCaller() {
- SigHandlerInit::init_for_segv();
- #if MGB_CUDA
- CudaCheckOnFork::init();
- #endif
- }
- };
- InitCaller InitCaller::inst;
-
- } // anonymous namespace
-
- void (*ForkAfterCudaError::throw_)() = throw_fork_cuda_exc;
-
- std::atomic_size_t ScopedForkWarningSupress::sm_depth{0};
-
- BacktraceResult mgb::debug::backtrace(int nr_exclude) {
- static bool thread_local recursive_call = false;
- if (recursive_call) {
- fprintf(stderr, "recursive call to backtrace()!\n");
- return {};
- }
- recursive_call = true;
-
- constexpr size_t MAX_DEPTH = 6;
- void* stack_mem[MAX_DEPTH];
- int depth = ::backtrace(stack_mem, MAX_DEPTH);
- auto stack = stack_mem;
- if (depth > nr_exclude) {
- depth -= nr_exclude;
- stack += nr_exclude;
- }
-
- static std::vector<MemmapEntry> memmap;
- if (memmap.empty()) {
- static std::mutex mtx;
- MGB_LOCK_GUARD(mtx);
- if (memmap.empty()) {
- get_mem_map(0, [&](uintptr_t lo, uintptr_t hi, const char* /*perm*/,
- const char* fname) {
- if (fname && strlen(fname))
- memmap.emplace_back(lo, hi, fname);
- });
- }
- }
- BacktraceResult result;
-
- for (int i = 0; i < depth; ++i) {
- const char* fname = nullptr;
- auto addr = reinterpret_cast<uintptr_t>(stack[i]);
- for (auto&& j : memmap)
- if (j.low <= addr && j.high >= addr) {
- // theoretically we should examine file content to find whether
- // it is a shared library; but who would name an executable with
- // .so ?
- if (j.file.find(".so") != std::string::npos)
- addr -= j.low;
-
- fname = j.file.c_str();
- break;
- }
- result.stack.emplace_back(fname, addr);
- }
-
- recursive_call = false;
- return result;
- }
-
- void BacktraceResult::fmt_to_str(std::string& dst) {
- char addr[128];
- bool first = true;
- const char* prev_fname = nullptr;
- dst.append("bt:");
- for (auto&& i : stack) {
- sprintf(addr, "%zx", i.second);
- if (i.first != prev_fname || first) {
- if (!first)
- dst.append("}");
- if (i.first)
- dst.append(i.first);
- else
- dst.append("unknown");
- prev_fname = i.first;
- first = false;
- dst.append("{");
- dst.append(addr);
- } else {
- dst.append(",");
- dst.append(addr);
- }
- }
- dst.append("}");
- }
-
- void debug::set_fork_cuda_warning_flag(int flag) {
- #if MGB_CUDA
- CudaCheckOnFork::set_flag(flag);
- #endif
- }
-
- #endif // MGB_ENABLE_DEBUG_UTIL
-
- namespace {
-
- bool good_float(float val) {
- return std::isfinite(val);
- }
-
- bool good_float(int) {
- return true;
- }
-
- #if MGB_ENABLE_LOGGING
- // if not in MGB_ENABLE_LOGGING, num2str would become defined but not used
- template <typename T>
- std::string num2str(T val) {
- return std::to_string(val);
- }
-
- std::string num2str(float val) {
- union V {
- uint32_t i;
- float f;
- };
- auto ret = std::to_string(val);
- if (!good_float(val)) {
- V v;
- v.f = val;
- ret.append(" (0x");
- ret.append(ssprintf("%x", v.i));
- ret.append(")");
- }
- return ret;
- }
- #endif
-
- template <typename ctype>
- Maybe<std::string> do_compare_tensor_value(const char* expr0, const char* expr1,
- const HostTensorND& v0,
- const HostTensorND& v1,
- float maxerr) {
- auto it0 = megdnn::tensor_iter<ctype>(v0.as_megdnn()).begin(),
- it1 = megdnn::tensor_iter<ctype>(v1.as_megdnn()).begin();
- for (size_t i = 0, it = v0.shape().total_nr_elems(); i < it; ++i) {
- ctype iv0 = *it0, iv1 = *it1;
- double err = std::abs(iv0 - iv1) /
- std::max<double>(
- 1, std::min(std::abs(static_cast<double>(iv0)),
- std::abs((static_cast<double>(iv1)))));
- if (!good_float(iv0) || !good_float(iv1) || err >= maxerr) {
- TensorShape idx_shp;
- idx_shp.ndim = v0.shape().ndim;
- std::copy(it0.idx(), it0.idx() + idx_shp.ndim, idx_shp.shape);
- return mgb_ssprintf_log(
- "Unequal value\n"
- "Value of: %s\n"
- " Actual: %s\n"
- "Expected: %s\n"
- "Which is: %s\n"
- "At index: %s/%s\n"
- " error: %.6g",
- expr1, num2str(iv1).c_str(), expr0, num2str(iv0).c_str(),
- idx_shp.to_string().c_str(), v0.shape().to_string().c_str(),
- err);
- }
-
- ++it0;
- ++it1;
- }
- return None;
- }
-
- } // anonymous namespace
-
- Maybe<std::string> debug::compare_tensor_value(const HostTensorND& v0,
- const char* expr0,
- const HostTensorND& v1,
- const char* expr1,
- float maxerr) {
- if (!v0.shape().eq_shape(v1.shape())) {
- return mgb_ssprintf_log(
- "Shape mismatch\n"
- "Value of: %s\n"
- " Actual: %s\n"
- "Expected: %s\n"
- "Which is: %s",
- expr1, v1.shape().to_string().c_str(), expr0,
- v0.shape().to_string().c_str());
- }
- auto dtype = v0.layout().dtype;
- if (dtype != v1.layout().dtype) {
- return mgb_ssprintf_log(
- "Data type mismatch\n"
- "Value of: %s\n"
- " Actual: %s\n"
- "Expected: %s\n"
- "Which is: %s",
- expr1, v1.layout().dtype.name(), expr0,
- v0.layout().dtype.name());
- }
-
- switch (dtype.enumv()) {
- #define cb(_dt) \
- case DTypeTrait<_dt>::enumv: \
- return do_compare_tensor_value<DTypeTrait<_dt>::ctype>( \
- expr0, expr1, v0, v1, maxerr);
- MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
- #undef cb
- default:
- mgb_throw(MegBrainError, "unhandled dtype: %s", dtype.name());
- }
- }
-
- std::string debug::dump_tensor(const HostTensorND& value,
- const std::string& name) {
- struct Header {
- uint32_t name_len;
- uint32_t dtype;
- uint32_t max_ndim;
- uint32_t shape[TensorShape::MAX_NDIM];
- char name[0];
- };
- mgb_assert(value.layout().is_contiguous());
- auto value_bytes = value.layout().span().dist_byte();
- std::string ret(name.size() + value_bytes + sizeof(Header), '\0');
- auto header = reinterpret_cast<Header*>(&ret[0]);
- memset(header, 0, sizeof(Header));
- header->name_len = name.length();
- header->dtype = static_cast<uint32_t>(value.dtype().enumv());
- header->max_ndim = TensorShape::MAX_NDIM;
- for (size_t i = 0; i < value.layout().ndim; ++i) {
- header->shape[i] = value.layout()[i];
- }
- memcpy(header->name, name.c_str(), header->name_len);
- memcpy(header->name + name.size(), value.raw_ptr(), value_bytes);
- return ret;
- }
-
- void debug::write_to_file(const char* filename, const std::string& content,
- const char* mode) {
- FILE* fout = fopen(filename, mode);
- mgb_throw_if(!fout, SystemError, "failed to open %s: %s", filename,
- strerror(errno));
- auto nr = fwrite(content.data(), 1, content.size(), fout);
- mgb_throw_if(nr != content.size(), SystemError,
- "failed to write to %s: num=%zu size=%zu %s", filename, nr,
- content.size(), strerror(errno));
- auto err = fclose(fout);
- mgb_throw_if(err, SystemError, "failed to close %s: %s", filename,
- strerror(errno));
- }
-
- // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
|