#include #include #include "lite/global.h" #include "megbrain/comp_node_env.h" #include "misc.h" #include "device_options.h" #include "models/model_lite.h" #include "models/model_mdl.h" DECLARE_bool(weight_preprocess); using namespace lar; /////////////////// XPUDeviceOption ////////////////////// namespace lar { template <> void XPUDeviceOption::config_model_internel( RuntimeParam& runtime_param, std::shared_ptr model) { if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) { if ((enable_cpu) || (enable_cpu_default) || (enable_multithread) || (enable_multithread_default)) { LITE_LOG("using cpu device\n"); model->get_config().device_type = LiteDeviceType::LITE_CPU; } #if LITE_WITH_CUDA if (enable_cuda) { LITE_LOG("using cuda device\n"); model->get_config().device_type = LiteDeviceType::LITE_CUDA; } #endif } else if (runtime_param.stage == RunStage::AFTER_NETWORK_CREATED) { auto&& network = model->get_lite_network(); if (enable_cpu_default) { LITE_LOG("using cpu default device\n"); lite::Runtime::set_cpu_inplace_mode(network); } if (enable_multithread) { LITE_LOG("using multithread device\n"); lite::Runtime::set_cpu_threads_number(network, thread_num); } if (enable_multithread_default) { LITE_LOG("using multithread default device\n"); lite::Runtime::set_cpu_inplace_mode(network); lite::Runtime::set_cpu_threads_number(network, thread_num); } if (enable_set_core_ids) { std::string core_str; for (auto id : core_ids) { core_str += std::to_string(id) + ","; } LITE_LOG("multi thread core ids: %s\n", core_str.c_str()); lite::ThreadAffinityCallback affinity_callback = [&](size_t thread_id) { mgb::sys::set_cpu_affinity({core_ids[thread_id]}); }; lite::Runtime::set_runtime_thread_affinity(network, affinity_callback); } } } template <> void XPUDeviceOption::config_model_internel( RuntimeParam& runtime_param, std::shared_ptr model) { if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) { if (enable_cpu) { mgb_log("using cpu device\n"); model->get_mdl_config().comp_node_mapper = [](mgb::CompNode::Locator& loc) { loc.type = mgb::CompNode::DeviceType::CPU; }; } #if LITE_WITH_CUDA if (enable_cuda) { mgb_log("using cuda device\n"); model->get_mdl_config().comp_node_mapper = [](mgb::CompNode::Locator& loc) { if (loc.type == mgb::CompNode::DeviceType::UNSPEC) { loc.type = mgb::CompNode::DeviceType::CUDA; } loc.device = 0; }; } #endif if (enable_cpu_default) { mgb_log("using cpu default device\n"); model->get_mdl_config().comp_node_mapper = [](mgb::CompNode::Locator& loc) { loc.type = mgb::CompNode::DeviceType::CPU; loc.device = mgb::CompNode::Locator::DEVICE_CPU_DEFAULT; }; } if (enable_multithread) { mgb_log("using multithread(threads number:%ld) device\n", thread_num); model->get_mdl_config().comp_node_mapper = [&](mgb::CompNode::Locator& loc) { loc.type = mgb::CompNode::DeviceType::MULTITHREAD; loc.device = 0; loc.stream = thread_num; }; } if (enable_multithread_default) { mgb_log("using multithread default device\n"); model->get_mdl_config().comp_node_mapper = [&](mgb::CompNode::Locator& loc) { loc.type = mgb::CompNode::DeviceType::MULTITHREAD; loc.device = mgb::CompNode::Locator::DEVICE_MULTITHREAD_DEFAULT; loc.stream = thread_num; }; } if (enable_set_core_ids) { std::string core_str; for (auto id : core_ids) { core_str += std::to_string(id) + ","; } mgb_log("set multi thread core ids:%s\n", core_str.c_str()); auto affinity_callback = [&](size_t thread_id) { mgb::sys::set_cpu_affinity({core_ids[thread_id]}); }; mgb::CompNode::Locator loc; model->get_mdl_config().comp_node_mapper(loc); auto comp_node = mgb::CompNode::load(loc); mgb::CompNodeEnv::from_comp_node(comp_node).cpu_env().set_affinity( affinity_callback); } } } } // namespace lar void XPUDeviceOption::update() { m_option_name = "xpu_device"; enable_cpu = FLAGS_cpu; #if LITE_WITH_CUDA enable_cuda = FLAGS_cuda; #endif enable_cpu_default = FLAGS_cpu_default; if (FLAGS_multithread >= 0) { thread_num = FLAGS_multithread; enable_multithread = true; } if (FLAGS_multithread_default >= 0) { thread_num = FLAGS_multithread_default; enable_multithread_default = true; } if (!FLAGS_multi_thread_core_ids.empty()) { mgb_assert( enable_multithread || enable_multithread_default, "core ids should be set after --multithread or --multithread-default"); std::stringstream id_stream(FLAGS_multi_thread_core_ids); std::string id; size_t thread_cnt = 0; while (getline(id_stream, id, ',')) { thread_cnt++; core_ids.push_back(atoi(id.c_str())); } mgb_assert( thread_cnt == thread_num, "core ids number should be same with thread number set before"); enable_set_core_ids = true; } else { enable_set_core_ids = false; } m_option = { {"cpu", lar::Bool::make(false)}, #if LITE_WITH_CUDA {"cuda", lar::Bool::make(false)}, #endif {"cpu_default", lar::Bool::make(false)}, {"multithread", lar::NumberInt32::make(-1)}, {"multithread_default", lar::NumberInt32::make(-1)}, {"multi_thread_core_ids", lar::String::make("")}, }; std::static_pointer_cast(m_option["cpu"])->set_value(FLAGS_cpu); #if LITE_WITH_CUDA std::static_pointer_cast(m_option["cuda"])->set_value(FLAGS_cuda); #endif std::static_pointer_cast(m_option["cpu_default"]) ->set_value(FLAGS_cpu_default); std::static_pointer_cast(m_option["multithread"]) ->set_value(FLAGS_multithread); std::static_pointer_cast(m_option["multithread_default"]) ->set_value(FLAGS_multithread_default); std::static_pointer_cast(m_option["multi_thread_core_ids"]) ->set_value(FLAGS_multi_thread_core_ids); } bool XPUDeviceOption::m_valid; bool XPUDeviceOption::is_valid() { bool ret = FLAGS_cpu || FLAGS_cpu_default; #if LITE_WITH_CUDA ret = ret || FLAGS_cuda; #endif ret = ret || FLAGS_multithread >= 0; ret = ret || FLAGS_multithread_default >= 0; ret = ret || !FLAGS_multi_thread_core_ids.empty(); return ret || m_valid; } std::shared_ptr XPUDeviceOption::create_option() { static std::shared_ptr option(new XPUDeviceOption); if (XPUDeviceOption::is_valid()) { option->update(); return std::static_pointer_cast(option); } else { return nullptr; } } void XPUDeviceOption::config_model( RuntimeParam& runtime_param, std::shared_ptr model) { enable_cpu = std::static_pointer_cast(m_option["cpu"])->get_value(); #if LITE_WITH_CUDA enable_cuda = std::static_pointer_cast(m_option["cuda"])->get_value(); #endif enable_cpu_default = std::static_pointer_cast(m_option["cpu_default"])->get_value(); int32_t num_of_thread = std::static_pointer_cast(m_option["multithread"]) ->get_value(); enable_multithread = num_of_thread >= 0; int32_t num_of_thread_dft = std::static_pointer_cast(m_option["multithread_default"]) ->get_value(); enable_multithread_default = num_of_thread_dft >= 0; mgb_assert( num_of_thread < 0 || num_of_thread_dft < 0, "multithread and multithread_default should not bet set at the same time"); thread_num = num_of_thread >= 0 ? num_of_thread : (num_of_thread_dft >= 0 ? num_of_thread_dft : -1); std::string core_id_str = std::static_pointer_cast(m_option["multi_thread_core_ids"]) ->get_value(); if (!core_id_str.empty()) { mgb_assert( enable_multithread || enable_multithread_default, "core ids should be set after --multithread or --multithread-default"); std::stringstream id_stream(core_id_str); std::string id; size_t thread_cnt = 0; while (getline(id_stream, id, ',')) { thread_cnt++; core_ids.push_back(atoi(id.c_str())); } mgb_assert( thread_cnt == thread_num, "core ids number should be same with thread number set before"); enable_set_core_ids = true; } else { enable_set_core_ids = false; } CONFIG_MODEL_FUN; } ///////////////////////// xpu gflags //////////////////////////// DEFINE_bool(cpu, false, "set CPU device as running device"); #if LITE_WITH_CUDA DEFINE_bool(cuda, false, "set CUDA device as running device "); #endif DEFINE_bool(cpu_default, false, "set running device as CPU device with inplace mode"); DEFINE_int32(multithread, -1, "set multithread device as running device"); DEFINE_int32( multithread_default, -1, "set multithread device as running device with inplace mode"); DEFINE_string(multi_thread_core_ids, "", "set multithread core id"); REGIST_OPTION_CREATOR(xpu_device, lar::XPUDeviceOption::create_option); REGIST_OPTION_VALIDATER(xpu_device, lar::XPUDeviceOption::set_valid);