You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

comp_node.cpp 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615
  1. /**
  2. * \file src/core/test/comp_node.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./comp_node_helper.h"
  12. #include "megbrain/comp_node_env.h"
  13. #include "megbrain/utils/comp_node_sync_manager.h"
  14. #include "megbrain/utils/timer.h"
  15. #include "megbrain/system.h"
  16. #include "megbrain/test/helper.h"
  17. #include "megbrain/opr/utility.h"
  18. #include <chrono>
  19. #if MGB_HAVE_THREAD
  20. #include <thread>
  21. #endif
  22. using namespace mgb;
  23. TEST(TestCompNode, Parse) {
  24. using L = CompNode::Locator;
  25. using D = CompNode::DeviceType;
  26. auto make_lc = [](D t, int dev, int s) -> L { return {t, dev, {s}}; };
  27. ASSERT_EQ(L::parse("xpux"), make_lc(D::UNSPEC, -1, 0));
  28. ASSERT_EQ(L::parse("xpux:23"), make_lc(D::UNSPEC, -1, 23));
  29. ASSERT_EQ(L::parse("xpu2:23"), make_lc(D::UNSPEC, 2, 23));
  30. ASSERT_EQ(L::parse("xpu21:23"), make_lc(D::UNSPEC, 21, 23));
  31. ASSERT_EQ(L::parse("cpux"), make_lc(D::CPU, -1, 0));
  32. ASSERT_EQ(L::parse("cpux:23"), make_lc(D::CPU, -1, 23));
  33. ASSERT_EQ(L::parse("cpu2:23"), make_lc(D::CPU, 2, 23));
  34. ASSERT_EQ(L::parse("cpu21:23"), make_lc(D::CPU, 21, 23));
  35. ASSERT_EQ(L::parse("xpu"), make_lc(D::UNSPEC, -1, 0));
  36. ASSERT_EQ(L::parse("xpux"), make_lc(D::UNSPEC, -1, 0));
  37. ASSERT_EQ(L::parse("xpu23"), make_lc(D::UNSPEC, 23, 0));
  38. ASSERT_EQ(L::parse("xpu23:1"), make_lc(D::UNSPEC, 23, 1));
  39. ASSERT_EQ(L::parse("cpu:default"), make_lc(D::CPU, L::DEVICE_CPU_DEFAULT, 0));
  40. ASSERT_EQ(L::parse("multithread2:0"), make_lc(D::MULTITHREAD, 0, 2));
  41. ASSERT_EQ(L::parse("multithread1:3"), make_lc(D::MULTITHREAD, 3, 1));
  42. ASSERT_EQ(L::parse("multithread:default:2"),
  43. make_lc(D::MULTITHREAD, L::DEVICE_MULTITHREAD_DEFAULT, 2));
  44. ASSERT_THROW(L::parse("apu"), MegBrainError);
  45. ASSERT_THROW(L::parse("fpgbx"), MegBrainError);
  46. ASSERT_THROW(L::parse("cab0"), MegBrainError);
  47. ASSERT_THROW(L::parse("cpu"), MegBrainError);
  48. ASSERT_THROW(L::parse("cpu-1"), MegBrainError);
  49. ASSERT_THROW(L::parse("cpu0:"), MegBrainError);
  50. ASSERT_THROW(L::parse("cpu0:x"), MegBrainError);
  51. ASSERT_THROW(L::parse("cpu2:23x"), MegBrainError);
  52. ASSERT_THROW(L::parse("multithread"), MegBrainError);
  53. ASSERT_THROW(L::parse("multithread1:"), MegBrainError);
  54. ASSERT_THROW(L::parse("multithread1:default"), MegBrainError);
  55. ASSERT_THROW(L::parse("multithread1:default:0"), MegBrainError);
  56. }
  57. TEST(TestCompNode, SetDefaultDev) {
  58. REQUIRE_GPU(3);
  59. CompNode::finalize();
  60. using L = CompNode::Locator;
  61. auto orig_dt = L::parse("xpu").to_physical(),
  62. orig_gpu = L::parse("gpux").to_physical();
  63. constexpr auto CUDA = CompNode::DeviceType::CUDA;
  64. L::set_unspec_device_type(CUDA);
  65. L::set_device_map(CUDA, -1, 2);
  66. auto run = []() {
  67. ASSERT_EQ(CompNode::load("xpu").locator(), L::parse("gpu2"));
  68. };
  69. MGB_TRY {
  70. run();
  71. } MGB_FINALLY({
  72. L::set_unspec_device_type(orig_dt.type);
  73. L::set_device_map(CUDA, -1, orig_gpu.device);
  74. });
  75. CompNode::finalize();
  76. }
  77. TEST(TestCompNode, Load) {
  78. auto cn0 = CompNode::load("xpux"),
  79. cn1 = CompNode::load("cpux");
  80. ASSERT_EQ(CompNode::DeviceType::UNSPEC, cn0.locator_logical().type);
  81. ASSERT_EQ(CompNode::DeviceType::CPU, cn1.locator_logical().type);
  82. ASSERT_EQ(CompNode::load("cpux"), cn1);
  83. ASSERT_EQ(CompNode::load("xpux"), cn0);
  84. auto cnp = CompNode::load("cpu1"), cnq = CompNode::load("cpu2");
  85. ASSERT_EQ(CompNode::load("cpu1"), cnp);
  86. ASSERT_EQ(CompNode::load("cpu2"), cnq);
  87. #if MGB_HAVE_THREAD
  88. ASSERT_NE(cnp, cnq);
  89. #else
  90. ASSERT_EQ(cnp, cnq);
  91. #endif
  92. #if MGB_HAVE_THREAD
  93. auto cn_multi_thread0 = CompNode::load("multithread2:0");
  94. auto cn_multi_thread1 = CompNode::load("multithread2:1");
  95. ASSERT_EQ(CompNode::load("multithread2:0"), cn_multi_thread0);
  96. ASSERT_EQ(CompNode::load("multithread2:1"), cn_multi_thread1);
  97. ASSERT_NE(CompNode::load("multithread4:0"), cn_multi_thread0);
  98. ASSERT_NE(CompNode::load("multithread4:1"), cn_multi_thread1);
  99. auto cn_multi_default0 = CompNode::load("multithread:default:2");
  100. auto cn_multi_default1 = CompNode::load("multithread:default:4");
  101. ASSERT_EQ(CompNode::load("multithread:default:2"), cn_multi_default0);
  102. ASSERT_EQ(CompNode::load("multithread:default:4"), cn_multi_default1);
  103. ASSERT_NE(cn_multi_thread0, cn_multi_default1);
  104. #endif
  105. ASSERT_EQ(CompNode::load("cpu1"), cnp);
  106. ASSERT_EQ(CompNode::load("cpu2"), cnq);
  107. if (check_gpu_available(2)) {
  108. auto cn2 = CompNode::load("gpux"),
  109. cn3 = CompNode::load("gpu1");
  110. ASSERT_EQ(CompNode::DeviceType::CUDA, cn2.locator_logical().type);
  111. ASSERT_NE(cn2, cn3);
  112. ASSERT_EQ(CompNode::load("gpux"), cn2);
  113. ASSERT_EQ(CompNode::load("gpu1"), cn3);
  114. }
  115. }
  116. TEST(TestCompNode, FreeAfterFinalize) {
  117. CompNode::finalize();
  118. for (size_t i = 0; i < CompNode::NR_DEVICE_TYPE; ++i) {
  119. auto type = static_cast<CompNode::DeviceType>(i);
  120. if (!CompNode::get_device_count(type))
  121. continue;
  122. auto cn = CompNode::load(CompNode::Locator{type, -1, {0}});
  123. auto ptr = cn.alloc_device(123);
  124. CompNode::finalize();
  125. cn.free_device(ptr);
  126. }
  127. }
  128. TEST(TestCompNode, CPUDispatchSync) {
  129. REQUIRE_THREAD();
  130. constexpr int LOOP = 160, tot_threads = 8;
  131. std::atomic_int started_threads{0};
  132. auto worker = [&](int *shared_cnt, CompNode dest) {
  133. int nr_call = 0;
  134. RNGxorshf rng{next_rand_seed()};
  135. auto func = [&rng, &nr_call, shared_cnt]() {
  136. ++ nr_call;
  137. ++ *shared_cnt;
  138. int volatile cnt = 0;
  139. while (rng() % 20)
  140. ++ cnt;
  141. };
  142. auto &&env = CompNodeEnv::from_comp_node(dest).cpu_env();
  143. ++ started_threads;
  144. while (started_threads.load() != tot_threads);
  145. for (int i = 0; i < LOOP; ++ i) {
  146. env.dispatch(func);
  147. dest.sync();
  148. ASSERT_EQ(i + 1, nr_call);
  149. }
  150. };
  151. auto cn0 = CompNode::load("cpu0"), cn1 = CompNode::load("cpu1");
  152. int cnt0 = 0, cnt1 = 0;
  153. std::vector<std::thread> wk_threads;
  154. for (int i = 0; i < tot_threads / 2; ++ i) {
  155. wk_threads.emplace_back(worker, &cnt0, cn0);
  156. wk_threads.emplace_back(worker, &cnt1, cn1);
  157. }
  158. for (auto &&i: wk_threads)
  159. i.join();
  160. ASSERT_EQ(LOOP * tot_threads / 2, cnt0);
  161. ASSERT_EQ(LOOP * tot_threads / 2, cnt1);
  162. }
  163. TEST(TestCompNodeCPU, CoreAffinity) {
  164. REQUIRE_THREAD();
  165. std::vector<size_t> data_v(2, 0);
  166. size_t data0, data1 = 0;
  167. auto empty_task = []() {};
  168. auto cn0 = CompNode::load("cpu:default"), cn1 = CompNode::load("cpu0"),
  169. cn2 = CompNode::load("multithread2:0");
  170. auto binding0 = [&](size_t) { data0 = 10; };
  171. CompNodeEnv::from_comp_node(cn0).cpu_env().set_affinity(binding0);
  172. CompNodeEnv::from_comp_node(cn0).cpu_env().dispatch(empty_task);
  173. cn0.sync();
  174. auto binding1 = [&](size_t ) { data1 = 20; };
  175. CompNodeEnv::from_comp_node(cn1).cpu_env().set_affinity(binding1);
  176. CompNodeEnv::from_comp_node(cn1).cpu_env().dispatch(empty_task);
  177. cn1.sync();
  178. auto binding2 = [&](size_t thread_id) { data_v[thread_id] = 30; };
  179. auto temp_task = [](size_t, size_t) {};
  180. CompNodeEnv::from_comp_node(cn2).cpu_env().set_affinity(binding2);
  181. CompNodeEnv::from_comp_node(cn2).cpu_env().dispatch(temp_task, 40u);
  182. cn2.sync();
  183. ASSERT_EQ(data0, static_cast<size_t>(10));
  184. ASSERT_EQ(data1, static_cast<size_t>(20));
  185. ASSERT_EQ(data_v[0], static_cast<size_t>(30));
  186. ASSERT_EQ(data_v[1], static_cast<size_t>(30));
  187. }
  188. TEST(TestCompNode, CPU_MULTI_THREAD) {
  189. REQUIRE_THREAD();
  190. std::vector<int> source(100), dst0(100), dst1(100);
  191. for (int i = 0; i < 100; i++) {
  192. source[i] = i;
  193. dst0[i] = 0;
  194. dst1[i] = 0;
  195. }
  196. size_t total_task = 20;
  197. auto worker = [&](std::vector<int>& dst, CompNode dest) {
  198. auto func = [&](size_t index, size_t) {
  199. size_t sub_task = 100 / total_task;
  200. for (size_t i = index * sub_task; i < (index + 1) * sub_task; i++) {
  201. int sum = 0;
  202. for (size_t j = 0; j < i; j++) {
  203. sum += source[j];
  204. }
  205. dst[i] = sum;
  206. }
  207. };
  208. auto&& env = CompNodeEnv::from_comp_node(dest).cpu_env();
  209. env.dispatch(std::move(func), total_task);
  210. dest.sync();
  211. };
  212. for (auto&& str : std::vector<std::string>{
  213. "multithread2:0", "multithread4:0", "multithread:default:4"}) {
  214. auto cn0 = CompNode::load("cpu0"), cn1 = CompNode::load(str);
  215. std::thread wk_thread0{std::ref(worker), std::ref(dst0), std::ref(cn0)};
  216. std::thread wk_thread1{std::ref(worker), std::ref(dst1), std::ref(cn1)};
  217. wk_thread0.join();
  218. wk_thread1.join();
  219. for (int i = 0; i < 100; i++) {
  220. ASSERT_EQ(dst0[i], dst1[i]);
  221. }
  222. }
  223. }
  224. TEST(TestCompNodeCuda, MemNode) {
  225. REQUIRE_GPU(2);
  226. auto cn00 = CompNode::load("gpu0"),
  227. cn1 = CompNode::load("gpu1"),
  228. cn01 = CompNode::load("gpu0:1");
  229. ASSERT_EQ(cn00, CompNode::load("gpu0"));
  230. ASSERT_EQ(cn00.mem_node(), cn01.mem_node());
  231. ASSERT_NE(cn00.mem_node(), cn1.mem_node());
  232. }
  233. TEST(TestCompNodeCuda, Uid) {
  234. REQUIRE_GPU(2);
  235. auto cn00 = CompNode::load("gpu0"),
  236. cn1 = CompNode::load("gpu1"),
  237. cn01 = CompNode::load("gpu0:0"),
  238. cn02 = CompNode::load("gpu0:2");
  239. ASSERT_EQ(cn00, CompNode::load("gpu0"));
  240. ASSERT_EQ(cn00.get_uid(), cn01.get_uid());
  241. ASSERT_NE(cn00.get_uid(), cn02.get_uid());
  242. ASSERT_NE(cn00.get_uid(), cn1.get_uid());
  243. }
  244. TEST(TestCompNodeCPU, PhysicalDispatch) {
  245. constexpr int ID = 0x2a6453e0;
  246. using L = CompNode::Locator;
  247. constexpr auto DT = CompNode::DeviceType::CPU;
  248. L::set_device_map(DT, ID, 0);
  249. L::set_device_map(DT, ID + 1, 0);
  250. L::set_device_map(DT, ID + 2, 1);
  251. auto cn0 = CompNode::load({DT, ID, {0}}),
  252. cn1 = CompNode::load({DT, ID + 1, {0}}),
  253. cn2 = CompNode::load({DT, ID + 2, {0}});
  254. #if MGB_HAVE_THREAD
  255. ASSERT_NE(cn0, cn1);
  256. #else
  257. ASSERT_EQ(cn0, cn1);
  258. #endif
  259. std::vector<std::thread::id> tids;
  260. std::mutex tids_mtx;
  261. auto get_tid = [&]() {
  262. MGB_LOCK_GUARD(tids_mtx);
  263. tids.push_back(std::this_thread::get_id());
  264. };
  265. CompNodeEnv::from_comp_node(cn0).cpu_env().dispatch(get_tid);
  266. CompNodeEnv::from_comp_node(cn1).cpu_env().dispatch(get_tid);
  267. CompNodeEnv::from_comp_node(cn2).cpu_env().dispatch(get_tid);
  268. CompNode::sync_all();
  269. std::unordered_set<std::thread::id> uniq_tids(tids.begin(), tids.end());
  270. ASSERT_EQ(3u, tids.size());
  271. #if MGB_HAVE_THREAD
  272. ASSERT_EQ(2u, uniq_tids.size());
  273. #else
  274. ASSERT_EQ(1u, uniq_tids.size());
  275. #endif
  276. }
  277. TEST(TestCompNodeCPU, EventWait) {
  278. REQUIRE_THREAD();
  279. std::atomic_bool start = ATOMIC_VAR_INIT(false);
  280. auto cn0 = CompNode::load("cpu0"),
  281. cn1 = CompNode::load("cpu1");
  282. auto task0 = [&]() {
  283. while (!start)
  284. std::this_thread::yield();
  285. };
  286. auto event = cn0.create_event();
  287. CompNodeEnv::from_comp_node(cn0).cpu_env().dispatch(task0);
  288. event->record();
  289. cn1.device_wait_event(*event);
  290. bool succ = false;
  291. auto task1 = [&]() {
  292. succ = start;
  293. };
  294. CompNodeEnv::from_comp_node(cn1).cpu_env().dispatch(task1);
  295. using namespace std::literals;
  296. std::this_thread::sleep_for(50ms);
  297. ASSERT_FALSE(succ);
  298. start = true;
  299. CompNode::sync_all();
  300. ASSERT_TRUE(succ);
  301. }
  302. TEST(TestCompNodeCPU, EventRecOverwrite) {
  303. REQUIRE_THREAD();
  304. auto cn = CompNode::load("cpu0");
  305. auto dispatcher = CompNodeEnv::from_comp_node(cn).
  306. cpu_env().dispatcher.get();
  307. auto dispatch = [&](MegcoreCPUDispatcher::Task &&t) {
  308. dispatcher->dispatch(std::move(t));
  309. };
  310. auto ev = cn.create_event();
  311. auto wait_atomic = [](std::atomic_bool *var) {
  312. while(!var->load())
  313. std::this_thread::yield();
  314. };
  315. auto set_atomic = [](std::atomic_bool *var) {
  316. var->store(true);
  317. };
  318. std::atomic_bool
  319. s0 = ATOMIC_VAR_INIT(false),
  320. s1 = ATOMIC_VAR_INIT(false),
  321. t0 = ATOMIC_VAR_INIT(false),
  322. t1 = ATOMIC_VAR_INIT(false),
  323. t2 = ATOMIC_VAR_INIT(false);
  324. dispatch(std::bind(set_atomic, &t0));
  325. dispatch(std::bind(wait_atomic, &s0));
  326. ev->record();
  327. dispatch(std::bind(set_atomic, &t1));
  328. dispatch(std::bind(wait_atomic, &s1));
  329. ev->record();
  330. dispatch(std::bind(set_atomic, &t2));
  331. wait_atomic(&t0);
  332. ASSERT_FALSE(ev->finished());
  333. set_atomic(&s0);
  334. wait_atomic(&t1);
  335. ASSERT_FALSE(ev->finished());
  336. set_atomic(&s1);
  337. wait_atomic(&t2);
  338. ASSERT_TRUE(ev->finished());
  339. }
  340. namespace {
  341. void test_peer_copy_from_device(const char* comp_node) {
  342. REQUIRE_THREAD();
  343. auto cn_gpu = CompNode::load(comp_node);
  344. auto cn_cpu = CompNode::load("cpux");
  345. HostTensorGenerator<> gen;
  346. auto a = gen({20, 3, 112, 112});
  347. auto b = gen({20, 3, 112, 112});
  348. auto c = gen({20, 3, 112, 112});
  349. DeviceTensorND dev_a{cn_gpu}, dev_b{cn_cpu}, dev_c{cn_gpu};
  350. dev_a.copy_from(*a).sync();
  351. dev_b.copy_from(*b).sync();
  352. dev_c.copy_from(*c).sync();
  353. auto wait_event = cn_gpu.create_event();
  354. opr::Sleep::sleep(cn_gpu, 0.1);
  355. dev_a.copy_from(dev_c);
  356. wait_event->record();
  357. cn_cpu.device_wait_event(*wait_event);
  358. dev_b.copy_from(dev_a);
  359. dev_b.sync();
  360. HostTensorND result;
  361. result.copy_from(dev_b);
  362. CompNode::sync_all();
  363. MGB_ASSERT_TENSOR_EQ(result, *c);
  364. }
  365. }
  366. TEST(TestCompNodeCPU, PeerCopyFromCUDA) {
  367. REQUIRE_GPU(1);
  368. test_peer_copy_from_device("gpux");
  369. }
  370. TEST(TestCompNodeSyncManager, HostWait) {
  371. REQUIRE_THREAD();
  372. CompNodeSyncManager mgr(CompNode::load("xpu0"));
  373. auto run_set = [&]() {
  374. using namespace std::literals;
  375. std::this_thread::sleep_for(200ms);
  376. mgr.set_ready();
  377. mgb_log_debug("set_ready() called");
  378. };
  379. for (int run = 0; run < 2; ++ run) {
  380. std::thread th_run_set(run_set);
  381. RealTimer timer;
  382. mgr.clear_waiter_record();
  383. ASSERT_THROW(mgr.busy_wait_set_ready(), MegBrainError);
  384. mgr.add_waiter_record(false);
  385. mgr.add_waiter_record(false);
  386. mgr.busy_wait_set_ready();
  387. EXPECT_GE(timer.get_secs(), 0.1);
  388. timer.reset();
  389. mgr.busy_wait_set_ready();
  390. EXPECT_LE(timer.get_secs(), 0.001);
  391. th_run_set.join();
  392. }
  393. }
  394. TEST(TestCompNodeSyncManager, DeviceWait) {
  395. REQUIRE_THREAD();
  396. auto cns = load_multiple_xpus(3);
  397. auto cn0 = cns[0], cn1 = cns[1], cn2 = cns[2];
  398. CompNodeSyncManager mgr(cn0);
  399. using Event = CompNode::Event;
  400. auto ev_cn1 = cn1.create_event(),
  401. ev_cn2_begin = cn2.create_event(Event::NEED_TIMER),
  402. ev_cn2_end = cn2.create_event(Event::NEED_TIMER);
  403. for (int run = 0; run < 2; ++ run) {
  404. RealTimer timer;
  405. mgr.clear_waiter_record();
  406. ASSERT_THROW(mgr.busy_wait_set_ready_and_get_event(), MegBrainError);
  407. mgr.add_waiter_record(true);
  408. mgr.add_waiter_record(true);
  409. opr::Sleep::sleep(cn0, 0.13);
  410. mgr.set_ready();
  411. ev_cn2_begin->record();
  412. cn1.device_wait_event(mgr.busy_wait_set_ready_and_get_event());
  413. cn2.device_wait_event(mgr.busy_wait_set_ready_and_get_event());
  414. ev_cn1->record();
  415. ev_cn2_end->record();
  416. EXPECT_LE(timer.get_secs(), 0.05);
  417. ev_cn1->host_wait();
  418. EXPECT_GE(timer.get_secs(), 0.1);
  419. ev_cn2_end->host_wait();
  420. auto ev2_t = ev_cn2_begin->elapsed_time_until(*ev_cn2_end);
  421. EXPECT_GE(ev2_t, 0.1);
  422. }
  423. }
  424. TEST(TestCompNodeSyncManager, DeviceWaitCross) {
  425. REQUIRE_THREAD();
  426. auto cn0 = CompNode::load("xpu0:0"), cn1 = CompNode::load("xpu0:1");
  427. auto ev_cn0 = cn0.create_event(),
  428. ev_cn1 = cn1.create_event();
  429. RealTimer timer;
  430. // cross wait like deadlock, but guaranteed to work due to good timing
  431. ev_cn0->record();
  432. cn1.device_wait_event(*ev_cn0);
  433. ev_cn1->record();
  434. opr::Sleep::sleep(cn0, 0.1);
  435. cn0.device_wait_event(*ev_cn1);
  436. ev_cn0->record();
  437. cn1.device_wait_event(*ev_cn0);
  438. cn0.sync();
  439. cn1.sync();
  440. // sleep kernel in cuda is easily affected by the frequency change of GPU,
  441. // so we just print warn log instead assert. more refer to
  442. // XPU-226
  443. auto used = timer.get_secs();
  444. if (used <= 0.1 || used >= 0.2) {
  445. mgb_log_warn("expect time between [%f, %f], got %f", 0.1, 0.2, used);
  446. }
  447. }
  448. #if !MGB_HAVE_THREAD
  449. TEST(TestCompNodeSyncManager, DeviceWaitWithoutThread) {
  450. auto cn = CompNode::load("cpu:default");
  451. CompNodeSyncManager mgr(cn);
  452. mgr.add_waiter_record(true);
  453. ASSERT_ANY_THROW(mgr.busy_wait_set_ready());
  454. mgr.set_ready();
  455. EXPECT_TRUE(mgr.busy_wait_set_ready_and_get_event().finished());
  456. }
  457. #endif
  458. TEST(TestCompNode, MultipleLoad) {
  459. auto run = [](CompNode cn) {
  460. HostTensorND a(cn, {23}, dtype::Int32{}), b;
  461. auto pa = a.ptr<int>();
  462. for (int i = 0; i < 23; ++i) {
  463. pa[i] = i;
  464. }
  465. DeviceTensorND tmp;
  466. tmp.copy_from(a);
  467. b.copy_from(tmp).sync();
  468. auto pb = b.ptr<int>();
  469. for (int i = 0; i < 23; ++i) {
  470. ASSERT_EQ(i, pb[i]);
  471. }
  472. CompNode::finalize();
  473. };
  474. for (size_t i = 1; i < CompNode::NR_DEVICE_TYPE; ++i) {
  475. auto dt = static_cast<CompNode::DeviceType>(i);
  476. if (CompNode::get_device_count(dt)) {
  477. auto cn = CompNode::load({dt, 0, {0}});
  478. mgb_log("comp node %s is available", cn.to_string().c_str());
  479. run(cn);
  480. cn = CompNode::load({dt, 0, {0}});
  481. run(cn);
  482. }
  483. }
  484. }
  485. namespace {
  486. class CompNodeDepedentObjectInst final : public CompNodeDepedentObject {
  487. int *m_dst, *m_timer;
  488. std::shared_ptr<void> on_comp_node_finalize() override {
  489. EXPECT_EQ(0, *m_dst);
  490. *m_dst = ++*m_timer;
  491. return {};
  492. }
  493. public:
  494. CompNodeDepedentObjectInst(int* dst, int* timer)
  495. : m_dst{dst}, m_timer{timer} {}
  496. void chk() { check_not_finalized(); }
  497. };
  498. } // anonymous namespace
  499. TEST(TestCompNode, DepedentObjectList) {
  500. CompNode::finalize();
  501. for (int i = 0; i < 5; ++i) {
  502. // loop multiple times so memory problems can be easier exposed
  503. int ts[4] = {0}, timer = 0;
  504. auto make = [&](int i) {
  505. return std::make_unique<CompNodeDepedentObjectInst>(ts + i, &timer);
  506. };
  507. auto i0 = make(0), i1 = make(1), i2 = make(2), i3 = make(3);
  508. ASSERT_NO_THROW(i0->chk());
  509. ASSERT_NO_THROW(i1->chk());
  510. i1.reset();
  511. comp_node_detail::DepedentObjList::invoke_callback_and_clean();
  512. ASSERT_EQ(1, ts[3]);
  513. ASSERT_EQ(2, ts[2]);
  514. ASSERT_EQ(0, ts[1]);
  515. ASSERT_EQ(3, ts[0]);
  516. ASSERT_THROW(i0->chk(), InternalError);
  517. }
  518. }
  519. namespace {
  520. template <typename tag>
  521. class TestCPUCompSeqRec : public ::testing::Test {};
  522. TYPED_TEST_CASE(TestCPUCompSeqRec, comp_node_test::seq_rec::test_types);
  523. TYPED_TEST(TestCPUCompSeqRec, run) {
  524. comp_node_test::seq_rec::run<TypeParam>(CompNode::load("cpux"));
  525. }
  526. TYPED_TEST(TestCPUCompSeqRec, run_default_cpu) {
  527. comp_node_test::seq_rec::run<TypeParam>(CompNode::load("cpu:default"));
  528. }
  529. TYPED_TEST(TestCPUCompSeqRec, run_multi_thread) {
  530. auto cn = CompNode::load("multithread4:0");
  531. comp_node_test::seq_rec::run<TypeParam>(cn);
  532. }
  533. TYPED_TEST(TestCPUCompSeqRec, run_multi_thread_default) {
  534. auto cn = CompNode::load("multithread:default:4");
  535. comp_node_test::seq_rec::run<TypeParam>(cn);
  536. }
  537. } // anonymous namespace
  538. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台