You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

device_options.cpp 10 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. #include <iostream>
  2. #include <sstream>
  3. #include "lite/global.h"
  4. #include "megbrain/comp_node_env.h"
  5. #include "misc.h"
  6. #include "device_options.h"
  7. #include "models/model_lite.h"
  8. #include "models/model_mdl.h"
  9. DECLARE_bool(weight_preprocess);
  10. using namespace lar;
  11. /////////////////// XPUDeviceOption //////////////////////
  12. namespace lar {
  13. template <>
  14. void XPUDeviceOption::config_model_internel<ModelLite>(
  15. RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
  16. if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
  17. if ((enable_cpu) || (enable_cpu_default) || (enable_multithread) ||
  18. (enable_multithread_default)) {
  19. LITE_LOG("using cpu device\n");
  20. model->get_config().device_type = LiteDeviceType::LITE_CPU;
  21. }
  22. #if LITE_WITH_CUDA
  23. if (enable_cuda) {
  24. LITE_LOG("using cuda device\n");
  25. model->get_config().device_type = LiteDeviceType::LITE_CUDA;
  26. }
  27. #endif
  28. } else if (runtime_param.stage == RunStage::AFTER_NETWORK_CREATED) {
  29. auto&& network = model->get_lite_network();
  30. if (enable_cpu_default) {
  31. LITE_LOG("using cpu default device\n");
  32. lite::Runtime::set_cpu_inplace_mode(network);
  33. }
  34. if (enable_multithread) {
  35. LITE_LOG("using multithread device\n");
  36. lite::Runtime::set_cpu_threads_number(network, thread_num);
  37. }
  38. if (enable_multithread_default) {
  39. LITE_LOG("using multithread default device\n");
  40. lite::Runtime::set_cpu_inplace_mode(network);
  41. lite::Runtime::set_cpu_threads_number(network, thread_num);
  42. }
  43. if (enable_set_core_ids) {
  44. std::string core_str;
  45. for (auto id : core_ids) {
  46. core_str += std::to_string(id) + ",";
  47. }
  48. LITE_LOG("multi thread core ids: %s\n", core_str.c_str());
  49. lite::ThreadAffinityCallback affinity_callback = [&](size_t thread_id) {
  50. mgb::sys::set_cpu_affinity({core_ids[thread_id]});
  51. };
  52. lite::Runtime::set_runtime_thread_affinity(network, affinity_callback);
  53. }
  54. }
  55. }
  56. template <>
  57. void XPUDeviceOption::config_model_internel<ModelMdl>(
  58. RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
  59. if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
  60. if (enable_cpu) {
  61. mgb_log("using cpu device\n");
  62. model->get_mdl_config().comp_node_mapper = [](mgb::CompNode::Locator& loc) {
  63. loc.type = mgb::CompNode::DeviceType::CPU;
  64. };
  65. }
  66. #if LITE_WITH_CUDA
  67. if (enable_cuda) {
  68. mgb_log("using cuda device\n");
  69. model->get_mdl_config().comp_node_mapper = [](mgb::CompNode::Locator& loc) {
  70. if (loc.type == mgb::CompNode::DeviceType::UNSPEC) {
  71. loc.type = mgb::CompNode::DeviceType::CUDA;
  72. }
  73. loc.device = 0;
  74. };
  75. }
  76. #endif
  77. if (enable_cpu_default) {
  78. mgb_log("using cpu default device\n");
  79. model->get_mdl_config().comp_node_mapper = [](mgb::CompNode::Locator& loc) {
  80. loc.type = mgb::CompNode::DeviceType::CPU;
  81. loc.device = mgb::CompNode::Locator::DEVICE_CPU_DEFAULT;
  82. };
  83. }
  84. if (enable_multithread) {
  85. mgb_log("using multithread(threads number:%ld) device\n", thread_num);
  86. model->get_mdl_config().comp_node_mapper =
  87. [&](mgb::CompNode::Locator& loc) {
  88. loc.type = mgb::CompNode::DeviceType::MULTITHREAD;
  89. loc.device = 0;
  90. loc.stream = thread_num;
  91. };
  92. }
  93. if (enable_multithread_default) {
  94. mgb_log("using multithread default device\n");
  95. model->get_mdl_config().comp_node_mapper =
  96. [&](mgb::CompNode::Locator& loc) {
  97. loc.type = mgb::CompNode::DeviceType::MULTITHREAD;
  98. loc.device = mgb::CompNode::Locator::DEVICE_MULTITHREAD_DEFAULT;
  99. loc.stream = thread_num;
  100. };
  101. }
  102. if (enable_set_core_ids) {
  103. std::string core_str;
  104. for (auto id : core_ids) {
  105. core_str += std::to_string(id) + ",";
  106. }
  107. mgb_log("set multi thread core ids:%s\n", core_str.c_str());
  108. auto affinity_callback = [&](size_t thread_id) {
  109. mgb::sys::set_cpu_affinity({core_ids[thread_id]});
  110. };
  111. mgb::CompNode::Locator loc;
  112. model->get_mdl_config().comp_node_mapper(loc);
  113. auto comp_node = mgb::CompNode::load(loc);
  114. mgb::CompNodeEnv::from_comp_node(comp_node).cpu_env().set_affinity(
  115. affinity_callback);
  116. }
  117. }
  118. }
  119. } // namespace lar
  120. void XPUDeviceOption::update() {
  121. m_option_name = "xpu_device";
  122. enable_cpu = FLAGS_cpu;
  123. #if LITE_WITH_CUDA
  124. enable_cuda = FLAGS_cuda;
  125. #endif
  126. enable_cpu_default = FLAGS_cpu_default;
  127. if (FLAGS_multithread >= 0) {
  128. thread_num = FLAGS_multithread;
  129. enable_multithread = true;
  130. }
  131. if (FLAGS_multithread_default >= 0) {
  132. thread_num = FLAGS_multithread_default;
  133. enable_multithread_default = true;
  134. }
  135. if (!FLAGS_multi_thread_core_ids.empty()) {
  136. mgb_assert(
  137. enable_multithread || enable_multithread_default,
  138. "core ids should be set after --multithread or --multithread-default");
  139. std::stringstream id_stream(FLAGS_multi_thread_core_ids);
  140. std::string id;
  141. size_t thread_cnt = 0;
  142. while (getline(id_stream, id, ',')) {
  143. thread_cnt++;
  144. core_ids.push_back(atoi(id.c_str()));
  145. }
  146. mgb_assert(
  147. thread_cnt == thread_num,
  148. "core ids number should be same with thread number set before");
  149. enable_set_core_ids = true;
  150. } else {
  151. enable_set_core_ids = false;
  152. }
  153. m_option = {
  154. {"cpu", lar::Bool::make(false)},
  155. #if LITE_WITH_CUDA
  156. {"cuda", lar::Bool::make(false)},
  157. #endif
  158. {"cpu_default", lar::Bool::make(false)},
  159. {"multithread", lar::NumberInt32::make(-1)},
  160. {"multithread_default", lar::NumberInt32::make(-1)},
  161. {"multi_thread_core_ids", lar::String::make("")},
  162. };
  163. std::static_pointer_cast<lar::Bool>(m_option["cpu"])->set_value(FLAGS_cpu);
  164. #if LITE_WITH_CUDA
  165. std::static_pointer_cast<lar::Bool>(m_option["cuda"])->set_value(FLAGS_cuda);
  166. #endif
  167. std::static_pointer_cast<lar::Bool>(m_option["cpu_default"])
  168. ->set_value(FLAGS_cpu_default);
  169. std::static_pointer_cast<lar::NumberInt32>(m_option["multithread"])
  170. ->set_value(FLAGS_multithread);
  171. std::static_pointer_cast<lar::NumberInt32>(m_option["multithread_default"])
  172. ->set_value(FLAGS_multithread_default);
  173. std::static_pointer_cast<lar::String>(m_option["multi_thread_core_ids"])
  174. ->set_value(FLAGS_multi_thread_core_ids);
  175. }
  176. bool XPUDeviceOption::m_valid;
  177. bool XPUDeviceOption::is_valid() {
  178. bool ret = FLAGS_cpu || FLAGS_cpu_default;
  179. #if LITE_WITH_CUDA
  180. ret = ret || FLAGS_cuda;
  181. #endif
  182. ret = ret || FLAGS_multithread >= 0;
  183. ret = ret || FLAGS_multithread_default >= 0;
  184. ret = ret || !FLAGS_multi_thread_core_ids.empty();
  185. return ret || m_valid;
  186. }
  187. std::shared_ptr<OptionBase> XPUDeviceOption::create_option() {
  188. static std::shared_ptr<lar::XPUDeviceOption> option(new XPUDeviceOption);
  189. if (XPUDeviceOption::is_valid()) {
  190. option->update();
  191. return std::static_pointer_cast<lar::OptionBase>(option);
  192. } else {
  193. return nullptr;
  194. }
  195. }
  196. void XPUDeviceOption::config_model(
  197. RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
  198. enable_cpu = std::static_pointer_cast<lar::Bool>(m_option["cpu"])->get_value();
  199. #if LITE_WITH_CUDA
  200. enable_cuda = std::static_pointer_cast<lar::Bool>(m_option["cuda"])->get_value();
  201. #endif
  202. enable_cpu_default =
  203. std::static_pointer_cast<lar::Bool>(m_option["cpu_default"])->get_value();
  204. int32_t num_of_thread =
  205. std::static_pointer_cast<lar::NumberInt32>(m_option["multithread"])
  206. ->get_value();
  207. enable_multithread = num_of_thread >= 0;
  208. int32_t num_of_thread_dft =
  209. std::static_pointer_cast<lar::NumberInt32>(m_option["multithread_default"])
  210. ->get_value();
  211. enable_multithread_default = num_of_thread_dft >= 0;
  212. mgb_assert(
  213. num_of_thread < 0 || num_of_thread_dft < 0,
  214. "multithread and multithread_default should not bet set at the same time");
  215. thread_num = num_of_thread >= 0 ? num_of_thread
  216. : (num_of_thread_dft >= 0 ? num_of_thread_dft : -1);
  217. std::string core_id_str =
  218. std::static_pointer_cast<lar::String>(m_option["multi_thread_core_ids"])
  219. ->get_value();
  220. if (!core_id_str.empty()) {
  221. mgb_assert(
  222. enable_multithread || enable_multithread_default,
  223. "core ids should be set after --multithread or --multithread-default");
  224. std::stringstream id_stream(core_id_str);
  225. std::string id;
  226. size_t thread_cnt = 0;
  227. while (getline(id_stream, id, ',')) {
  228. thread_cnt++;
  229. core_ids.push_back(atoi(id.c_str()));
  230. }
  231. mgb_assert(
  232. thread_cnt == thread_num,
  233. "core ids number should be same with thread number set before");
  234. enable_set_core_ids = true;
  235. } else {
  236. enable_set_core_ids = false;
  237. }
  238. CONFIG_MODEL_FUN;
  239. }
  240. ///////////////////////// xpu gflags ////////////////////////////
  241. DEFINE_bool(cpu, false, "set CPU device as running device");
  242. #if LITE_WITH_CUDA
  243. DEFINE_bool(cuda, false, "set CUDA device as running device ");
  244. #endif
  245. DEFINE_bool(cpu_default, false, "set running device as CPU device with inplace mode");
  246. DEFINE_int32(multithread, -1, "set multithread device as running device");
  247. DEFINE_int32(
  248. multithread_default, -1,
  249. "set multithread device as running device with inplace mode");
  250. DEFINE_string(multi_thread_core_ids, "", "set multithread core id");
  251. REGIST_OPTION_CREATOR(xpu_device, lar::XPUDeviceOption::create_option);
  252. REGIST_OPTION_VALIDATER(xpu_device, lar::XPUDeviceOption::set_valid);