You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.h 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. /**
  2. * \file dnn/test/common/utils.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #pragma once
  12. #include "megdnn/basic_types.h"
  13. #include "megdnn/handle.h"
  14. #include "src/common/utils.h"
  15. #include <memory>
  16. #include <cstdlib>
  17. #include <cmath>
  18. #include <iostream>
  19. #include <gtest/gtest.h>
  20. #if MEGDNN_ENABLE_MULTI_THREADS
  21. #include <atomic>
  22. #endif
  23. #define megcore_check(x) \
  24. do { \
  25. auto status = (x); \
  26. if (status != megcoreSuccess) { \
  27. std::cerr << "megcore_check error: " \
  28. << megcoreGetErrorName(status) << std::endl; \
  29. megdnn_trap(); \
  30. } \
  31. } while (0)
  32. namespace megdnn {
  33. namespace test {
  34. struct TaskExecutorConfig {
  35. //! Number of threads.
  36. size_t nr_thread;
  37. //! The core id to bind. The size of affinity_core_set should be equal to
  38. //! nr_thread.
  39. std::vector<size_t> affinity_core_set;
  40. };
  41. class CpuDispatchChecker final : MegcoreCPUDispatcher {
  42. class TaskExecutor {
  43. using Task = megcore::CPUDispatcher::Task;
  44. using MultiThreadingTask = megcore::CPUDispatcher::MultiThreadingTask;
  45. #if MEGDNN_ENABLE_MULTI_THREADS
  46. #if defined(WIN32)
  47. using thread_affinity_type = DWORD;
  48. #else // not WIN32
  49. #if defined(__APPLE__)
  50. using thread_affinity_type = int;
  51. #else
  52. using thread_affinity_type = cpu_set_t;
  53. #endif
  54. #endif
  55. #endif
  56. public:
  57. TaskExecutor(TaskExecutorConfig* config = nullptr);
  58. ~TaskExecutor();
  59. /*!
  60. * Sync all workers.
  61. */
  62. void sync();
  63. /*!
  64. * Number of threads in this thread pool, including the main thread.
  65. */
  66. size_t nr_threads() const { return m_nr_threads; }
  67. void add_task(const MultiThreadingTask& task, size_t parallelism);
  68. void add_task(const Task& task);
  69. private:
  70. #if MEGDNN_ENABLE_MULTI_THREADS
  71. size_t m_all_task_iter = 0;
  72. std::atomic_int m_current_task_iter{0};
  73. //! Indicate whether the thread should work, used for main thread sync
  74. std::vector<std::atomic_bool*> m_workers_flag;
  75. //! Whether the main thread affinity has been set.
  76. bool m_main_thread_affinity = false;
  77. //! Stop the worker threads.
  78. bool m_stop{false};
  79. MultiThreadingTask m_task;
  80. //! The cpuids to be bound.
  81. //! If the m_cpu_ids is empty, then none of the threads will be bound to
  82. //! cpus, else the size of m_cpu_ids should equal to m_nr_threads.
  83. std::vector<size_t> m_cpu_ids;
  84. //! The previous affinity mask of the main thread.
  85. thread_affinity_type m_main_thread_prev_affinity_mask;
  86. std::vector<std::thread> m_workers;
  87. #endif
  88. //! Total number of threads, including main thread.
  89. size_t m_nr_threads = 0;
  90. };
  91. //! track number of CpuDispatchChecker instances to avoid leaking
  92. class InstCounter {
  93. bool m_used = false;
  94. int m_cnt = 0, m_max_cnt = 0;
  95. public:
  96. ~InstCounter() {
  97. auto check = [this]() {
  98. ASSERT_NE(0, m_max_cnt) << "no kernel dispatched on CPU";
  99. ASSERT_EQ(0, m_cnt) << "leaked CpuDispatchChecker object";
  100. };
  101. if (m_used) {
  102. check();
  103. }
  104. }
  105. int& cnt() {
  106. m_used = true;
  107. m_max_cnt = std::max(m_cnt, m_max_cnt);
  108. return m_cnt;
  109. }
  110. };
  111. static InstCounter sm_inst_counter;
  112. bool m_recursive_dispatch = false;
  113. #if MEGDNN_ENABLE_MULTI_THREADS
  114. std::atomic_size_t m_nr_call{0};
  115. #else
  116. size_t m_nr_call = 0;
  117. #endif
  118. std::unique_ptr<TaskExecutor> m_task_executor;
  119. CpuDispatchChecker(TaskExecutorConfig* config) {
  120. ++sm_inst_counter.cnt();
  121. megdnn_assert(sm_inst_counter.cnt() < 10);
  122. m_task_executor = std::make_unique<TaskExecutor>(config);
  123. }
  124. void dispatch(Task&& task) override {
  125. megdnn_assert(!m_recursive_dispatch);
  126. m_recursive_dispatch = true;
  127. ++m_nr_call;
  128. m_task_executor->add_task(std::move(task));
  129. m_recursive_dispatch = false;
  130. }
  131. void dispatch(MultiThreadingTask&& task, size_t parallelism) override {
  132. megdnn_assert(!m_recursive_dispatch);
  133. m_recursive_dispatch = true;
  134. ++m_nr_call;
  135. m_task_executor->add_task(std::move(task), parallelism);
  136. m_recursive_dispatch = false;
  137. }
  138. size_t nr_threads() override { return m_task_executor->nr_threads(); }
  139. CpuDispatchChecker() {
  140. ++sm_inst_counter.cnt();
  141. megdnn_assert(sm_inst_counter.cnt() < 10);
  142. }
  143. void sync() override {}
  144. public:
  145. ~CpuDispatchChecker() {
  146. if (!std::uncaught_exception()) {
  147. megdnn_assert(!m_recursive_dispatch);
  148. #if !MEGDNN_NO_THREAD
  149. megdnn_assert(m_nr_call && "cpu dispatch must be called");
  150. #endif
  151. } else {
  152. if (m_recursive_dispatch) {
  153. fprintf(stderr,
  154. "CpuDispatchChecker: "
  155. "detected recursive dispatch\n");
  156. }
  157. if (!m_nr_call) {
  158. fprintf(stderr, "CpuDispatchChecker: dispatch not called\n");
  159. }
  160. }
  161. --sm_inst_counter.cnt();
  162. }
  163. static std::unique_ptr<MegcoreCPUDispatcher> make(
  164. TaskExecutorConfig* config) {
  165. return std::unique_ptr<MegcoreCPUDispatcher>(
  166. new CpuDispatchChecker(config));
  167. }
  168. };
  169. std::unique_ptr<Handle> create_cpu_handle(int debug_level,
  170. bool check_dispatch = true,
  171. TaskExecutorConfig* config = nullptr);
  172. std::unique_ptr<Handle> create_cpu_handle_with_dispatcher(
  173. int debug_level,
  174. const std::shared_ptr<MegcoreCPUDispatcher>& dispatcher);
  175. static inline dt_float32 diff(dt_float32 x, dt_float32 y) {
  176. auto numerator = x - y;
  177. auto denominator = std::max(std::max(std::abs(x), std::abs(y)), 1.f);
  178. return numerator / denominator;
  179. }
  180. static inline int diff(int x, int y) {
  181. return x - y;
  182. }
  183. static inline int diff(dt_quint8 x, dt_quint8 y) {
  184. return x.as_uint8() - y.as_uint8();
  185. }
  186. static inline int diff(dt_qint32 x, dt_qint32 y) {
  187. return x.as_int32() - y.as_int32();
  188. }
  189. static inline int diff(dt_qint16 x, dt_qint16 y) {
  190. return x.as_int16() - y.as_int16();
  191. }
  192. static inline int diff(dt_qint8 x, dt_qint8 y) {
  193. return x.as_int8() - y.as_int8();
  194. }
  195. inline TensorShape cvt_src_or_dst_nchw2nhwc(const TensorShape& shape) {
  196. megdnn_assert(shape.ndim == 4);
  197. auto N = shape[0], C = shape[1], H = shape[2], W = shape[3];
  198. return TensorShape{N, H, W, C};
  199. }
  200. inline TensorShape cvt_src_or_dst_ncdhw2ndhwc(const TensorShape& shape) {
  201. megdnn_assert(shape.ndim == 5);
  202. auto N = shape[0], C = shape[1], D = shape[2], H = shape[3], W = shape[4];
  203. return TensorShape{N, D, H, W, C};
  204. }
  205. inline TensorShape cvt_filter_nchw2nhwc(const TensorShape& shape) {
  206. if (shape.ndim == 4) {
  207. auto OC = shape[0], IC = shape[1], FH = shape[2], FW = shape[3];
  208. return TensorShape{OC, FH, FW, IC};
  209. } else {
  210. megdnn_assert(shape.ndim == 5);
  211. auto G = shape[0], OC = shape[1], IC = shape[2], FH = shape[3],
  212. FW = shape[4];
  213. return TensorShape{G, OC, FH, FW, IC};
  214. }
  215. }
  216. inline TensorShape cvt_filter_ncdhw2ndhwc(const TensorShape& shape) {
  217. if (shape.ndim == 5) {
  218. auto OC = shape[0], IC = shape[1], FD = shape[2], FH = shape[3],
  219. FW = shape[4];
  220. return TensorShape{OC, FD, FH, FW, IC};
  221. } else {
  222. megdnn_assert(shape.ndim == 6);
  223. auto G = shape[0], OC = shape[1], IC = shape[2], FD = shape[3],
  224. FH = shape[4], FW = shape[5];
  225. return TensorShape{G, OC, FD, FH, FW, IC};
  226. }
  227. }
  228. void megdnn_sync(Handle* handle);
  229. void* megdnn_malloc(Handle* handle, size_t size_in_bytes);
  230. void megdnn_free(Handle* handle, void* ptr);
  231. void megdnn_memcpy_D2H(Handle* handle, void* dst, const void* src,
  232. size_t size_in_bytes);
  233. void megdnn_memcpy_H2D(Handle* handle, void* dst, const void* src,
  234. size_t size_in_bytes);
  235. void megdnn_memcpy_D2D(Handle* handle, void* dst, const void* src,
  236. size_t size_in_bytes);
  237. //! default implementation for DynOutMallocPolicy
  238. class DynOutMallocPolicyImpl final : public DynOutMallocPolicy {
  239. Handle* m_handle;
  240. public:
  241. DynOutMallocPolicyImpl(Handle* handle) : m_handle{handle} {}
  242. TensorND alloc_output(size_t id, DType dtype, const TensorShape& shape,
  243. void* user_data) override;
  244. void* alloc_workspace(size_t sz, void* user_data) override;
  245. void free_workspace(void* ptr, void* user_data) override;
  246. /*!
  247. * \brief make a shared_ptr which would release output memory when
  248. * deleted
  249. * \param out output tensor allocated by alloc_output()
  250. */
  251. std::shared_ptr<void> make_output_refholder(const TensorND& out);
  252. };
  253. //! replace ErrorHandler::on_megdnn_error
  254. class MegDNNError : public std::exception {
  255. std::string m_msg;
  256. public:
  257. MegDNNError(const std::string& msg) : m_msg{msg} {}
  258. const char* what() const noexcept { return m_msg.c_str(); }
  259. };
  260. class TensorReshapeError : public MegDNNError {
  261. public:
  262. using MegDNNError::MegDNNError;
  263. };
  264. size_t get_cpu_count();
  265. static inline bool good_float(float val) {
  266. return std::isfinite(val);
  267. }
  268. static inline bool good_float(int) {
  269. return true;
  270. }
  271. static inline bool good_float(dt_qint8) {
  272. return true;
  273. }
  274. static inline bool good_float(dt_qint16) {
  275. return true;
  276. }
  277. static inline bool good_float(dt_quint8) {
  278. return true;
  279. }
  280. static inline bool good_float(dt_qint32) {
  281. return true;
  282. }
  283. // A hack for the (x+0) promote to int trick on dt_quint8.
  284. static inline int operator+(dt_quint8 lhs, int rhs) {
  285. megdnn_assert(rhs == 0, "unexpected rhs");
  286. return lhs.as_uint8();
  287. }
  288. static inline int operator+(dt_qint32 lhs, int rhs) {
  289. megdnn_assert(rhs == 0, "unexpected rhs");
  290. return lhs.as_int32();
  291. }
  292. static inline int operator+(dt_qint8 lhs, int rhs) {
  293. megdnn_assert(rhs == 0, "unexpected rhs");
  294. return int8_t(lhs);
  295. }
  296. static inline int operator+(dt_qint16 lhs, int rhs) {
  297. megdnn_assert(rhs == 0, "unexpected rhs");
  298. return lhs.as_int16();
  299. }
  300. } // namespace test
  301. static inline bool operator==(const TensorLayout& a, const TensorLayout& b) {
  302. return a.eq_layout(b);
  303. }
  304. static inline std::ostream& operator<<(std::ostream& ostr,
  305. const TensorLayout& layout) {
  306. return ostr << layout.to_string();
  307. }
  308. //! change the image2d_pitch_alignment of naive handle in this scope
  309. class NaivePitchAlignmentScope {
  310. size_t m_orig_val, m_new_val;
  311. public:
  312. NaivePitchAlignmentScope(size_t alignment);
  313. ~NaivePitchAlignmentScope();
  314. };
  315. } // namespace megdnn
  316. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台