GitOrigin-RevId: 144ff547d1
tags/v1.4.0-rc1
@@ -136,16 +136,16 @@ public: | |||
uint32_t type = INVALID_ALGO_TYPE; | |||
//! serialized param of the algo type | |||
std::string param; | |||
//! algorithm name | |||
std::string name; | |||
bool valid() const { return type != INVALID_ALGO_TYPE; } | |||
void reset() { type = INVALID_ALGO_TYPE; } | |||
bool operator==(const Desc& rhs) const { | |||
return handle_type == rhs.handle_type && type == rhs.type && | |||
param == rhs.param; | |||
param == rhs.param && name == rhs.name; | |||
} | |||
} desc; | |||
//! algorithm name | |||
std::string name; | |||
Attribute attribute; | |||
bool valid() const { return desc.valid(); } | |||
void reset() { desc.reset(); } | |||
@@ -178,12 +178,12 @@ public: | |||
static std::string attribute_str(const Attribute& attr); | |||
Handle::HandleType handle_type() const { return m_handle_type; } | |||
Info::Desc desc() const { return {handle_type(), type(), param(), name()}; } | |||
Info info() const { | |||
return {{handle_type(), type(), param()}, name(), attribute()}; | |||
return {desc(), attribute()}; | |||
} | |||
Info::Desc desc() const { return {handle_type(), type(), param()}; } | |||
template <typename T> | |||
static void serialize_write_pod(const T& val, std::string& result) { | |||
static_assert(std::is_trivially_copyable<T>::value, | |||
@@ -116,8 +116,10 @@ struct hash<megdnn::detail::Algorithm::Info::Desc> { | |||
const megdnn::detail::Algorithm::Info::Desc& desc) const { | |||
return megdnn::hash_combine<size_t>( | |||
megdnn::hash_combine<size_t>( | |||
std::hash<std::string>()(desc.param), | |||
std::hash<uint32_t>()(desc.type)), | |||
std::hash<std::string>()(desc.name), | |||
megdnn::hash_combine<size_t>( | |||
std::hash<std::string>()(desc.param), | |||
std::hash<uint32_t>()(desc.type))), | |||
std::hash<uint32_t>()(static_cast<uint32_t>(desc.handle_type))); | |||
} | |||
}; | |||
@@ -439,12 +439,6 @@ public: | |||
TensorLayout& dst_pg, TensorLayout& bias_pg); | |||
MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | |||
std::string param() const override { | |||
std::string ret; | |||
serialize_write_pod(m_impl->name(), ret); | |||
return ret; | |||
} | |||
private: | |||
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; | |||
AlgoBase* m_impl; | |||
@@ -237,12 +237,6 @@ public: | |||
} | |||
return ret; | |||
} | |||
std::string param() const override { | |||
std::string ret; | |||
serialize_write_pod(m_impl->name(), ret); | |||
return ret; | |||
} | |||
}; | |||
class ConvolutionBackwardDataImpl::AlgoInt8NCHW4DotProdImplicitGemm final | |||
@@ -222,12 +222,6 @@ public: | |||
} | |||
return ret; | |||
} | |||
std::string param() const override { | |||
std::string ret; | |||
serialize_write_pod(m_impl->name(), ret); | |||
return ret; | |||
} | |||
}; | |||
class ConvolutionBackwardFilterImpl::AlgoPack : NonCopyableObj { | |||
@@ -174,14 +174,8 @@ public: | |||
} | |||
MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | |||
std::string param() const override { | |||
std::string ret; | |||
serialize_write_pod(m_impl->name(), ret); | |||
return ret; | |||
} | |||
}; | |||
class Convolution3DBackwardDataImpl::AlgoPack : NonCopyableObj { | |||
// defined in cudnn.cpp | |||
void fill_cudnn_algos(); | |||
@@ -183,11 +183,6 @@ public: | |||
TensorLayout& diff_pg); | |||
MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | |||
std::string param() const override { | |||
std::string ret; | |||
serialize_write_pod(m_impl->name(), ret); | |||
return ret; | |||
} | |||
}; | |||
class Convolution3DBackwardFilterImpl::AlgoPack : NonCopyableObj { | |||
@@ -135,11 +135,6 @@ public: | |||
static void modify_size_args(SizeArgs& args, TensorLayout& src_pg, | |||
TensorLayout& dst_pg); | |||
MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | |||
std::string param() const override { | |||
std::string ret; | |||
serialize_write_pod(m_impl->name(), ret); | |||
return ret; | |||
} | |||
}; | |||
class Convolution3DForwardImpl::AlgoCUDNN final : public AlgoBase { | |||
@@ -65,11 +65,6 @@ public: | |||
return {AlgoDataType::FLOAT32, AlgoCategory::WINOGRAD}; | |||
} | |||
MEGDNN_DECL_ALGO_TYPE(FB_WINOGRAD_F32) | |||
std::string param() const override { | |||
std::string ret; | |||
serialize_write_pod(m_matmul_algo->name(), ret); | |||
return ret; | |||
} | |||
private: | |||
MatrixMulImpl::AlgoBase* m_matmul_algo; | |||
@@ -101,11 +96,6 @@ public: | |||
return {AlgoDataType::FLOAT32, AlgoCategory::WINOGRAD}; | |||
} | |||
MEGDNN_DECL_ALGO_TYPE(FB_WINOGRAD_4X4_F32) | |||
std::string param() const override { | |||
std::string ret; | |||
serialize_write_pod(m_matmul_algo->name(), ret); | |||
return ret; | |||
} | |||
private: | |||
MatrixMulImpl::AlgoBase* m_matmul_algo; | |||
@@ -137,11 +127,6 @@ public: | |||
return {AlgoDataType::QINT8X8X32, AlgoCategory::WINOGRAD}; | |||
} | |||
MEGDNN_DECL_ALGO_TYPE(FB_WINOGRAD_QS8) | |||
std::string param() const override { | |||
std::string ret; | |||
serialize_write_pod(m_matmul_algo->name(), ret); | |||
return ret; | |||
} | |||
private: | |||
MatrixMulImpl::AlgoBase* m_matmul_algo; | |||
@@ -173,11 +158,6 @@ public: | |||
return {AlgoDataType::QINT8X8X32, AlgoCategory::WINOGRAD}; | |||
} | |||
MEGDNN_DECL_ALGO_TYPE(FB_WINOGRAD_8X8_QS8) | |||
std::string param() const override { | |||
std::string ret; | |||
serialize_write_pod(m_matmul_algo->name(), ret); | |||
return ret; | |||
} | |||
private: | |||
MatrixMulImpl::AlgoBase* m_matmul_algo; | |||
@@ -157,7 +157,6 @@ using BiasMode = ConvBiasForward::BiasMode; | |||
} \ | |||
std::string param() const override { \ | |||
std::string ret; \ | |||
serialize_write_pod(m_matmul_algo->name(), ret); \ | |||
serialize_write_pod(m_tile_size, ret); \ | |||
return ret; \ | |||
} \ | |||
@@ -62,10 +62,9 @@ public: | |||
return {m_matmul_algo->matmul_description().algo_type.data_type, | |||
AlgoCategory::IM2COL}; | |||
} | |||
MEGDNN_DECL_ALGO_TYPE(FB_WINOGRAD_8X8_QS8) | |||
MEGDNN_DECL_ALGO_TYPE(FB_CONV1x1) | |||
std::string param() const override { | |||
std::string ret; | |||
serialize_write_pod(m_matmul_algo->name(), ret); | |||
serialize_write_pod(m_oc_block_size, ret); | |||
return ret; | |||
} | |||
@@ -74,7 +74,6 @@ public: | |||
std::string param() const override { | |||
std::string ret; | |||
serialize_write_pod(m_matmul_algo->name(), ret); | |||
serialize_write_pod(m_ohw_tile_size, ret); | |||
return ret; | |||
} | |||
@@ -155,12 +155,6 @@ public: | |||
//! select matmul to the highest preference | |||
bool is_preferred(const NCBKernSizeParam& param) const override; | |||
std::string param() const override { | |||
std::string ret; | |||
serialize_write_pod(m_algorithm->name(), ret); | |||
return ret; | |||
} | |||
static ConvBiasImpl::NCBKernSizeParam init_conv_bias_param( | |||
const NCBKernSizeParam& param); | |||
@@ -380,13 +380,13 @@ float algo_benchmark(Benchmarker<Opr, T>& benchmark, TensorLayoutArray layouts, | |||
float min_used = std::numeric_limits<float>::max(); | |||
bool execed = false; | |||
for (auto i : algos) { | |||
if (std::regex_match(i.name, | |||
if (std::regex_match(i.desc.name, | |||
std::regex("(" + algo_base + ")(.*)"))) { | |||
opr->execution_policy().algo = i.desc; | |||
auto used = benchmark.exec(layouts); | |||
min_used = std::min(min_used, used); | |||
printf("run algo: %s used: %f ms min_used: %f ms\n", i.name.c_str(), | |||
used, min_used); | |||
printf("run algo: %s used: %f ms min_used: %f ms\n", | |||
i.desc.name.c_str(), used, min_used); | |||
execed = true; | |||
} | |||
} | |||
@@ -482,7 +482,7 @@ public: | |||
AlgoProxy<Opr, OprTrait<Opr>::arity>::get_all_algorithms_info( | |||
opr.get(), layouts)) { | |||
if (std::regex_match( | |||
algo_info.name, | |||
algo_info.desc.name, | |||
std::regex("(" + policy_name.name + ")(.*)"))) { | |||
ret.algo = algo_info.desc; | |||
} else { | |||
@@ -495,7 +495,7 @@ public: | |||
if (sub_items.size() != policy_name.sub_policy_names.size()) { | |||
printf("Invalid sub_policy_names in %s, expected %zu but got " | |||
"%zu\n", | |||
algo_info.name.c_str(), sub_items.size(), | |||
algo_info.desc.name.c_str(), sub_items.size(), | |||
policy_name.sub_policy_names.size()); | |||
return {}; | |||
} | |||
@@ -528,7 +528,7 @@ public: | |||
auto algo = | |||
OprAlgoProxy::get_algorithm_info_heuristic(opr, layouts); | |||
ASSERT_STREQ(opr->get_algorithm_from_desc(m_policy.algo)->name(), | |||
algo.name.c_str()); | |||
algo.desc.name.c_str()); | |||
} else { | |||
opr->execution_policy() = m_policy; | |||
} | |||
@@ -629,11 +629,10 @@ Checker<Convolution> checker(handle); | |||
out_type = inp_type; | |||
} | |||
checker | |||
.set_dtype(0, inp_type) | |||
.set_dtype(1, inp_type) | |||
.set_dtype(2, out_type) | |||
.set_param(param); | |||
checker.set_dtype(0, inp_type) | |||
.set_dtype(1, inp_type) | |||
.set_dtype(2, out_type) | |||
.set_param(param); | |||
auto opr = checker.opr(); | |||
opr->param() = param; | |||
std::string param_str; | |||
@@ -642,7 +641,8 @@ Checker<Convolution> checker(handle); | |||
oly.dtype = out_type; | |||
opr->deduce_layout(ily, fly, oly); | |||
int channel_start = 1; | |||
if (format) channel_start = 3; | |||
if (format) | |||
channel_start = 3; | |||
float scale = 1.0f / sqrt(fshp[channel_start] * FH * FW); | |||
UniformFloatRNG rng(scale, 2 * scale); | |||
checker.set_rng(0, &rng).set_rng(1, &rng); | |||
@@ -653,11 +653,11 @@ Checker<Convolution> checker(handle); | |||
construct_sub_execution_policy_heuristic<ConvolutionForward>( | |||
opr->execution_policy(), {ily, fly, oly}, param_str, | |||
opr->handle()); | |||
checker | |||
.set_epsilon(eps_getter(dtype == 1, 0, algo.name.c_str())) | |||
.execs({ishp, fshp, {}}); | |||
checker.set_epsilon( | |||
eps_getter(dtype == 1, 0, algo.desc.name.c_str())) | |||
.execs({ishp, fshp, {}}); | |||
opr->execution_policy() = {}; | |||
ASSERT_TRUE(checker.prev_succ()) << errmsg(algo.name.c_str()); | |||
ASSERT_TRUE(checker.prev_succ()) << errmsg(algo.desc.name.c_str()); | |||
} | |||
if (test_backward) { | |||
@@ -671,7 +671,7 @@ Checker<Convolution> checker(handle); | |||
opr->param() = param; | |||
std::string param_str; | |||
Algorithm::serialize_write_pod(opr->param(), param_str); | |||
for (auto algo: opr->get_all_algorithms_info(fly, oly, ily)) { | |||
for (auto algo : opr->get_all_algorithms_info(fly, oly, ily)) { | |||
used_algos_bwd_data.insert(algo.desc); | |||
opr->execution_policy().algo = algo.desc; | |||
construct_sub_execution_policy_heuristic< | |||
@@ -679,26 +679,26 @@ Checker<Convolution> checker(handle); | |||
{fly, oly, ily}, param_str, | |||
opr->handle()); | |||
checker_bwd_data | |||
.set_epsilon(eps_getter(dtype == 1, 1, algo.name.c_str())) | |||
.execl({fly, oly, ily}); | |||
.set_epsilon(eps_getter(dtype == 1, 1, | |||
algo.desc.name.c_str())) | |||
.execl({fly, oly, ily}); | |||
opr->execution_policy() = {}; | |||
ASSERT_TRUE(checker_bwd_data.prev_succ()) << | |||
errmsg(algo.name.c_str()); | |||
ASSERT_TRUE(checker_bwd_data.prev_succ()) | |||
<< errmsg(algo.desc.name.c_str()); | |||
} | |||
} | |||
if (test_backward) { | |||
// backward filter | |||
checker_bwd_filter | |||
.set_dtype(0, inp_type) | |||
.set_dtype(1, out_type) | |||
.set_dtype(2, inp_type) | |||
.set_param(param); | |||
checker_bwd_filter.set_dtype(0, inp_type) | |||
.set_dtype(1, out_type) | |||
.set_dtype(2, inp_type) | |||
.set_param(param); | |||
auto opr = checker_bwd_filter.opr(); | |||
opr->param() = param; | |||
std::string param_str; | |||
Algorithm::serialize_write_pod(opr->param(), param_str); | |||
for (auto algo: opr->get_all_algorithms_info(ily, oly, fly)) { | |||
for (auto algo : opr->get_all_algorithms_info(ily, oly, fly)) { | |||
used_algos_bwd_flt.insert(algo.desc); | |||
opr->execution_policy().algo = algo.desc; | |||
construct_sub_execution_policy_heuristic< | |||
@@ -706,11 +706,12 @@ Checker<Convolution> checker(handle); | |||
{ily, oly, fly}, param_str, | |||
opr->handle()); | |||
checker_bwd_filter | |||
.set_epsilon(eps_getter(dtype == 1, 2, algo.name.c_str())) | |||
.execl({ily, oly, fly}); | |||
.set_epsilon(eps_getter(dtype == 1, 2, | |||
algo.desc.name.c_str())) | |||
.execl({ily, oly, fly}); | |||
opr->execution_policy() = {}; | |||
ASSERT_TRUE(checker_bwd_filter.prev_succ()) << | |||
errmsg(algo.name.c_str()); | |||
ASSERT_TRUE(checker_bwd_filter.prev_succ()) | |||
<< errmsg(algo.desc.name.c_str()); | |||
} | |||
} | |||
} | |||
@@ -400,7 +400,7 @@ struct OprProxyProfilingBase | |||
megcoreSynchronize(opr->handle()->megcore_computing_handle()); | |||
timer.stop(); | |||
megdnn_log("%.3fms %s", timer.get_time_in_us() / 1e3, | |||
algo.name.c_str()); | |||
algo.desc.name.c_str()); | |||
if (min_time > timer.get_time_in_us()) { | |||
min_time = timer.get_time_in_us(); | |||
best_algo = algo.desc; | |||
@@ -522,7 +522,7 @@ struct OprWeightPreprocessProxyImpl : public OprProxyProfilingBase<Opr> { | |||
megcoreSynchronize(opr->handle()->megcore_computing_handle()); | |||
timer.stop(); | |||
printf("%.3fms %s\n", timer.get_time_in_us() / 1e3, | |||
algo.name.c_str()); | |||
algo.desc.name.c_str()); | |||
if (min_time > timer.get_time_in_us()) { | |||
min_time = timer.get_time_in_us(); | |||
Base::target_execution_policy.algo = algo.desc; | |||
@@ -88,7 +88,7 @@ void test_multibatchsize( | |||
A_tensor.layout(), B_tensor.layout(), | |||
C_tensor.layout())) { | |||
if (std::regex_match( | |||
i.name.c_str(), | |||
i.desc.name.c_str(), | |||
std::regex("(" + std::string(algo) + ")(.*)"))) { | |||
opr_reference->execution_policy().algo = i.desc; | |||
break; | |||
@@ -117,7 +117,7 @@ void test_multibatchsize( | |||
A_tensor_prime.layout(), B_tensor.layout(), | |||
C_tensor_batch.layout())) { | |||
if (std::regex_match( | |||
i.name.c_str(), | |||
i.desc.name.c_str(), | |||
std::regex("(" + std::string(algo) + ")(.*)"))) { | |||
opr_reference->execution_policy().algo = i.desc; | |||
break; | |||
@@ -318,7 +318,7 @@ void AlgoChooser<Opr>::profile(ExeContext& ctx, | |||
Maybe<AlgoChooserProfileCache::ResultEntry> cur_rst; | |||
std::string msg = ssprintf("profiling %s algorithm %s %s", | |||
ctx.mgb_opr()->dyn_typeinfo()->name, | |||
algo.name.c_str(), layouts_str.c_str()); | |||
algo.desc.name.c_str(), layouts_str.c_str()); | |||
ImplExecutionPolicy policy; | |||
policy.algo = algo.desc; | |||
ctx.construct_execution_policy(selected_strategy, policy); | |||
@@ -327,12 +327,12 @@ void AlgoChooser<Opr>::profile(ExeContext& ctx, | |||
} | |||
auto palgo = ctx.megdnn_opr()->get_algorithm_from_desc(policy.algo); | |||
if (!(palgo->contain_attribute_all(target_attr.first) && | |||
!palgo->contain_attribute_any(target_attr.second))) { | |||
!palgo->contain_attribute_any(target_attr.second))) { | |||
mgb_log_debug( | |||
"skip algo %s with attribute(%s), which is not match the " | |||
"profile strategy required contain attribute(%s) and not " | |||
"contain attribute(%s).", | |||
algo.name.c_str(), | |||
algo.desc.name.c_str(), | |||
Algorithm::attribute_str(palgo->attribute()).c_str(), | |||
Algorithm::attribute_str(target_attr.first).c_str(), | |||
Algorithm::attribute_str(target_attr.second).c_str()); | |||
@@ -552,8 +552,8 @@ AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache( | |||
auto&& prof = rst.val(); | |||
std::unordered_map<std::string, ImplAlgo> algo_map; | |||
for (auto i : get_all_candidates()) { | |||
auto ins = algo_map.emplace(i.name.c_str(), i); | |||
mgb_assert(ins.second, "duplicated algo name: %s", i.name.c_str()); | |||
auto ins = algo_map.emplace(i.desc.name.c_str(), i); | |||
mgb_assert(ins.second, "duplicated algo name: %s", i.desc.name.c_str()); | |||
} | |||
if (prof.empty()) | |||
@@ -41,8 +41,11 @@ std::string serialize_policy(const megdnn::ExecutionPolicy& policy) { | |||
megdnn::Algorithm::serialize_write_pod(policy.algo.handle_type, ret); | |||
megdnn::Algorithm::serialize_write_pod(policy.algo.type, ret); | |||
uint32_t param_size = policy.algo.param.size(); | |||
uint32_t name_size = policy.algo.name.size(); | |||
megdnn::Algorithm::serialize_write_pod<uint32_t>(param_size, ret); | |||
megdnn::Algorithm::serialize_write_pod<uint32_t>(name_size, ret); | |||
ret += policy.algo.param; | |||
ret += policy.algo.name; | |||
//! serialize sub_policy | |||
uint32_t size = policy.sub_policy.size(); | |||
@@ -64,11 +67,17 @@ megdnn::ExecutionPolicy deserialize_policy(const char* buf, uint32_t size, | |||
cb(ret.algo.type, uint32_t); | |||
uint32_t param_size = 0; | |||
uint32_t name_size = 0; | |||
cb(param_size, uint32_t); | |||
cb(name_size, uint32_t); | |||
if (param_size > 0) { | |||
ret.algo.param = std::string(buf + offset, param_size); | |||
offset += param_size; | |||
} | |||
if (name_size > 0) { | |||
ret.algo.name = std::string(buf + offset, name_size); | |||
offset += name_size; | |||
} | |||
uint32_t nr_policy = 0; | |||
cb(nr_policy, uint32_t); | |||