You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.h 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. /**
  2. * \file dnn/test/common/utils.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #pragma once
  12. #include "megdnn/basic_types.h"
  13. #include "megdnn/handle.h"
  14. #include "src/common/utils.h"
  15. #include <memory>
  16. #include <cstdlib>
  17. #include <cmath>
  18. #include <iostream>
  19. #include <gtest/gtest.h>
  20. #if MEGDNN_ENABLE_MULTI_THREADS
  21. #include <atomic>
  22. #endif
  23. #define megcore_check(x) \
  24. do { \
  25. auto status = (x); \
  26. if (status != megcoreSuccess) { \
  27. std::cerr << "megcore_check error: " \
  28. << megcoreGetErrorName(status) << std::endl; \
  29. megdnn_trap(); \
  30. } \
  31. } while (0)
  32. namespace megdnn {
  33. namespace test {
  34. struct TaskExecutorConfig {
  35. //! Number of threads.
  36. size_t nr_thread;
  37. //! The core id to bind. The size of affinity_core_set should be equal to
  38. //! nr_thread.
  39. std::vector<size_t> affinity_core_set;
  40. };
  41. class CpuDispatchChecker final : MegcoreCPUDispatcher {
  42. class TaskExecutor {
  43. using Task = megcore::CPUDispatcher::Task;
  44. using MultiThreadingTask = megcore::CPUDispatcher::MultiThreadingTask;
  45. #if MEGDNN_ENABLE_MULTI_THREADS
  46. #if defined(WIN32)
  47. using thread_affinity_type = DWORD;
  48. #else // not WIN32
  49. #if defined(__APPLE__)
  50. using thread_affinity_type = int;
  51. #else
  52. using thread_affinity_type = cpu_set_t;
  53. #endif
  54. #endif
  55. #endif
  56. public:
  57. TaskExecutor(TaskExecutorConfig* config = nullptr);
  58. ~TaskExecutor();
  59. /*!
  60. * Sync all workers.
  61. */
  62. void sync();
  63. /*!
  64. * Number of threads in this thread pool, including the main thread.
  65. */
  66. size_t nr_threads() const { return m_nr_threads; }
  67. void add_task(const MultiThreadingTask& task, size_t parallelism);
  68. void add_task(const Task& task);
  69. private:
  70. #if MEGDNN_ENABLE_MULTI_THREADS
  71. size_t m_all_task_iter = 0;
  72. std::atomic_int m_current_task_iter{0};
  73. //! Indicate whether the thread should work, used for main thread sync
  74. std::vector<std::atomic_bool*> m_workers_flag;
  75. //! Whether the main thread affinity has been set.
  76. bool m_main_thread_affinity = false;
  77. //! Stop the worker threads.
  78. bool m_stop{false};
  79. MultiThreadingTask m_task;
  80. //! The cpuids to be bound.
  81. //! If the m_cpu_ids is empty, then none of the threads will be bound to
  82. //! cpus, else the size of m_cpu_ids should equal to m_nr_threads.
  83. std::vector<size_t> m_cpu_ids;
  84. //! The previous affinity mask of the main thread.
  85. thread_affinity_type m_main_thread_prev_affinity_mask;
  86. std::vector<std::thread> m_workers;
  87. #endif
  88. //! Total number of threads, including main thread.
  89. size_t m_nr_threads = 0;
  90. };
  91. //! track number of CpuDispatchChecker instances to avoid leaking
  92. class InstCounter {
  93. bool m_used = false;
  94. int m_cnt = 0, m_max_cnt = 0;
  95. public:
  96. ~InstCounter() {
  97. auto check = [this]() {
  98. ASSERT_NE(0, m_max_cnt) << "no kernel dispatched on CPU";
  99. ASSERT_EQ(0, m_cnt) << "leaked CpuDispatchChecker object";
  100. };
  101. if (m_used) {
  102. check();
  103. }
  104. }
  105. int& cnt() {
  106. m_used = true;
  107. m_max_cnt = std::max(m_cnt, m_max_cnt);
  108. return m_cnt;
  109. }
  110. };
  111. static InstCounter sm_inst_counter;
  112. bool m_recursive_dispatch = false;
  113. #if MEGDNN_ENABLE_MULTI_THREADS
  114. std::atomic_size_t m_nr_call{0};
  115. #else
  116. size_t m_nr_call = 0;
  117. #endif
  118. std::unique_ptr<TaskExecutor> m_task_executor;
  119. CpuDispatchChecker(TaskExecutorConfig* config) {
  120. ++sm_inst_counter.cnt();
  121. megdnn_assert(sm_inst_counter.cnt() < 10);
  122. m_task_executor = std::make_unique<TaskExecutor>(config);
  123. }
  124. void dispatch(Task&& task) override {
  125. megdnn_assert(!m_recursive_dispatch);
  126. m_recursive_dispatch = true;
  127. ++m_nr_call;
  128. m_task_executor->add_task(std::move(task));
  129. m_recursive_dispatch = false;
  130. }
  131. void dispatch(MultiThreadingTask&& task, size_t parallelism) override {
  132. megdnn_assert(!m_recursive_dispatch);
  133. m_recursive_dispatch = true;
  134. ++m_nr_call;
  135. m_task_executor->add_task(std::move(task), parallelism);
  136. m_recursive_dispatch = false;
  137. }
  138. size_t nr_threads() override { return m_task_executor->nr_threads(); }
  139. CpuDispatchChecker() {
  140. ++sm_inst_counter.cnt();
  141. megdnn_assert(sm_inst_counter.cnt() < 10);
  142. }
  143. void sync() override {}
  144. public:
  145. ~CpuDispatchChecker() {
  146. if (!std::uncaught_exception()) {
  147. megdnn_assert(!m_recursive_dispatch);
  148. #if !MEGDNN_NO_THREAD
  149. megdnn_assert(m_nr_call && "cpu dispatch must be called");
  150. #endif
  151. } else {
  152. if (m_recursive_dispatch) {
  153. fprintf(stderr,
  154. "CpuDispatchChecker: "
  155. "detected recursive dispatch\n");
  156. }
  157. if (!m_nr_call) {
  158. fprintf(stderr, "CpuDispatchChecker: dispatch not called\n");
  159. }
  160. }
  161. --sm_inst_counter.cnt();
  162. }
  163. static std::unique_ptr<MegcoreCPUDispatcher> make(
  164. TaskExecutorConfig* config) {
  165. return std::unique_ptr<MegcoreCPUDispatcher>(
  166. new CpuDispatchChecker(config));
  167. }
  168. };
  169. std::unique_ptr<Handle> create_cpu_handle(int debug_level,
  170. bool check_dispatch = true,
  171. TaskExecutorConfig* config = nullptr);
  172. std::unique_ptr<Handle> create_cpu_handle_with_dispatcher(
  173. int debug_level,
  174. const std::shared_ptr<MegcoreCPUDispatcher>& dispatcher);
  175. static inline dt_float32 diff(dt_float32 x, dt_float32 y) {
  176. auto numerator = x - y;
  177. auto denominator = std::max(std::max(std::abs(x), std::abs(y)), 1.f);
  178. return numerator / denominator;
  179. }
  180. static inline int diff(int x, int y) {
  181. return x - y;
  182. }
  183. static inline int diff(dt_quint8 x, dt_quint8 y) {
  184. return x.as_uint8() - y.as_uint8();
  185. }
  186. static inline int diff(dt_qint32 x, dt_qint32 y) {
  187. return x.as_int32() - y.as_int32();
  188. }
  189. static inline int diff(dt_qint16 x, dt_qint16 y) {
  190. return x.as_int16() - y.as_int16();
  191. }
  192. static inline int diff(dt_qint8 x, dt_qint8 y) {
  193. return x.as_int8() - y.as_int8();
  194. }
  195. static inline int diff(dt_qint4 x, dt_qint4 y) {
  196. return x.as_int8() - y.as_int8();
  197. }
  198. static inline int diff(dt_quint4 x, dt_quint4 y) {
  199. return x.as_uint8() - y.as_uint8();
  200. }
  201. inline TensorShape cvt_src_or_dst_nchw2nhwc(const TensorShape& shape) {
  202. megdnn_assert(shape.ndim == 4);
  203. auto N = shape[0], C = shape[1], H = shape[2], W = shape[3];
  204. return TensorShape{N, H, W, C};
  205. }
  206. inline TensorShape cvt_src_or_dst_ncdhw2ndhwc(const TensorShape& shape) {
  207. megdnn_assert(shape.ndim == 5);
  208. auto N = shape[0], C = shape[1], D = shape[2], H = shape[3], W = shape[4];
  209. return TensorShape{N, D, H, W, C};
  210. }
  211. inline TensorShape cvt_filter_nchw2nhwc(const TensorShape& shape) {
  212. if (shape.ndim == 4) {
  213. auto OC = shape[0], IC = shape[1], FH = shape[2], FW = shape[3];
  214. return TensorShape{OC, FH, FW, IC};
  215. } else {
  216. megdnn_assert(shape.ndim == 5);
  217. auto G = shape[0], OC = shape[1], IC = shape[2], FH = shape[3],
  218. FW = shape[4];
  219. return TensorShape{G, OC, FH, FW, IC};
  220. }
  221. }
  222. inline TensorShape cvt_filter_ncdhw2ndhwc(const TensorShape& shape) {
  223. if (shape.ndim == 5) {
  224. auto OC = shape[0], IC = shape[1], FD = shape[2], FH = shape[3],
  225. FW = shape[4];
  226. return TensorShape{OC, FD, FH, FW, IC};
  227. } else {
  228. megdnn_assert(shape.ndim == 6);
  229. auto G = shape[0], OC = shape[1], IC = shape[2], FD = shape[3],
  230. FH = shape[4], FW = shape[5];
  231. return TensorShape{G, OC, FD, FH, FW, IC};
  232. }
  233. }
  234. void megdnn_sync(Handle* handle);
  235. void* megdnn_malloc(Handle* handle, size_t size_in_bytes);
  236. void megdnn_free(Handle* handle, void* ptr);
  237. void megdnn_memcpy_D2H(Handle* handle, void* dst, const void* src,
  238. size_t size_in_bytes);
  239. void megdnn_memcpy_H2D(Handle* handle, void* dst, const void* src,
  240. size_t size_in_bytes);
  241. void megdnn_memcpy_D2D(Handle* handle, void* dst, const void* src,
  242. size_t size_in_bytes);
  243. //! default implementation for DynOutMallocPolicy
  244. class DynOutMallocPolicyImpl final : public DynOutMallocPolicy {
  245. Handle* m_handle;
  246. public:
  247. DynOutMallocPolicyImpl(Handle* handle) : m_handle{handle} {}
  248. TensorND alloc_output(size_t id, DType dtype, const TensorShape& shape,
  249. void* user_data) override;
  250. void* alloc_workspace(size_t sz, void* user_data) override;
  251. void free_workspace(void* ptr, void* user_data) override;
  252. /*!
  253. * \brief make a shared_ptr which would release output memory when
  254. * deleted
  255. * \param out output tensor allocated by alloc_output()
  256. */
  257. std::shared_ptr<void> make_output_refholder(const TensorND& out);
  258. };
  259. //! replace ErrorHandler::on_megdnn_error
  260. class MegDNNError : public std::exception {
  261. std::string m_msg;
  262. public:
  263. MegDNNError(const std::string& msg) : m_msg{msg} {}
  264. const char* what() const noexcept { return m_msg.c_str(); }
  265. };
  266. class TensorReshapeError : public MegDNNError {
  267. public:
  268. using MegDNNError::MegDNNError;
  269. };
  270. size_t get_cpu_count();
  271. static inline bool good_float(float val) {
  272. return std::isfinite(val);
  273. }
  274. static inline bool good_float(int) {
  275. return true;
  276. }
  277. static inline bool good_float(dt_qint8) {
  278. return true;
  279. }
  280. static inline bool good_float(dt_qint16) {
  281. return true;
  282. }
  283. static inline bool good_float(dt_quint8) {
  284. return true;
  285. }
  286. static inline bool good_float(dt_qint32) {
  287. return true;
  288. }
  289. static inline bool good_float(dt_qint4) {
  290. return true;
  291. }
  292. static inline bool good_float(dt_quint4) {
  293. return true;
  294. }
  295. // A hack for the (x+0) promote to int trick on dt_quint8.
  296. static inline int operator+(dt_quint8 lhs, int rhs) {
  297. megdnn_assert(rhs == 0, "unexpected rhs");
  298. return lhs.as_uint8();
  299. }
  300. static inline int operator+(dt_qint32 lhs, int rhs) {
  301. megdnn_assert(rhs == 0, "unexpected rhs");
  302. return lhs.as_int32();
  303. }
  304. static inline int operator+(dt_qint8 lhs, int rhs) {
  305. megdnn_assert(rhs == 0, "unexpected rhs");
  306. return int8_t(lhs);
  307. }
  308. static inline int operator+(dt_qint16 lhs, int rhs) {
  309. megdnn_assert(rhs == 0, "unexpected rhs");
  310. return lhs.as_int16();
  311. }
  312. static inline int operator+(dt_quint4 lhs, int rhs) {
  313. megdnn_assert(rhs == 0, "unexpected rhs");
  314. return lhs.as_uint8();
  315. }
  316. static inline int operator+(dt_qint4 lhs, int rhs) {
  317. megdnn_assert(rhs == 0, "unexpected rhs");
  318. return lhs.as_int8();
  319. }
  320. } // namespace test
  321. static inline bool operator==(const TensorLayout& a, const TensorLayout& b) {
  322. return a.eq_layout(b);
  323. }
  324. static inline std::ostream& operator<<(std::ostream& ostr,
  325. const TensorLayout& layout) {
  326. return ostr << layout.to_string();
  327. }
  328. //! change the image2d_pitch_alignment of naive handle in this scope
  329. class NaivePitchAlignmentScope {
  330. size_t m_orig_val, m_new_val;
  331. public:
  332. NaivePitchAlignmentScope(size_t alignment);
  333. ~NaivePitchAlignmentScope();
  334. };
  335. } // namespace megdnn
  336. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台