GitOrigin-RevId: a12a7d399a
tags/v1.3.0
@@ -0,0 +1,172 @@ | |||
/** | |||
* \file dnn/src/cuda/convolution/forward/algos.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/cuda/convolution/forward/algos.h" | |||
#include "src/cuda/conv_bias/opr_impl.h" | |||
#include "src/cuda/conv_bias/algo.h" | |||
#include "src/common/algo_base.h" | |||
#include "src/common/algo_chooser.h" | |||
using namespace megdnn; | |||
using namespace cuda; | |||
namespace { | |||
std::pair<TensorLayoutArray, ConvBiasForward::Param> sub_opr_config( | |||
const TensorLayout& src, const TensorLayout& filter, | |||
const TensorLayout& dst, const ConvolutionForwardImpl* opr) { | |||
auto conv_param = opr->param(); | |||
DType bias_type; | |||
if (src.dtype.enumv() == DTypeEnum::QuantizedS8) { | |||
bias_type = dtype::QuantizedS32( | |||
src.dtype.param<dtype::QuantizedS8>().scale * | |||
filter.dtype.param<dtype::QuantizedS8>().scale); | |||
} else if (src.dtype.enumv() == DTypeEnum::Quantized8Asymm) { | |||
bias_type = dtype::QuantizedS32( | |||
src.dtype.param<dtype::Quantized8Asymm>().scale * | |||
filter.dtype.param<dtype::Quantized8Asymm>().scale); | |||
} else if (src.dtype.enumv() == DTypeEnum::Uint8 || | |||
src.dtype.enumv() == DTypeEnum::Int8) { | |||
bias_type = dtype::Int32{}; | |||
} else if (src.dtype.enumv() == DTypeEnum::Quantized4Asymm) { | |||
bias_type = dtype::QuantizedS32( | |||
src.dtype.param<dtype::Quantized4Asymm>().scale * | |||
filter.dtype.param<dtype::Quantized4Asymm>().scale); | |||
} else { | |||
megdnn_assert(src.dtype.category() == DTypeCategory::FLOAT); | |||
bias_type = src.dtype; | |||
} | |||
std::pair<TensorLayoutArray, ConvBiasForward::Param> ret; | |||
ret.second = {param::ConvBias::NonlineMode::IDENTITY, | |||
conv_param.mode, | |||
conv_param.sparse, | |||
conv_param.format, | |||
conv_param.pad_h, | |||
conv_param.pad_w, | |||
conv_param.stride_h, | |||
conv_param.stride_w, | |||
conv_param.dilate_h, | |||
conv_param.dilate_w, | |||
conv_param.compute_mode}; | |||
ret.first.push_back(TensorLayout({}, bias_type)); | |||
ret.first.push_back(TensorLayout({}, dst.dtype)); | |||
return ret; | |||
} | |||
} // namespace | |||
ConvolutionForwardImpl::AlgoPack::AlgoPack() { | |||
all_algos.push_back(&algo_default); | |||
for (auto&& algo : all_algos) { | |||
m_all_algos_map.emplace(algo->info().desc, algo); | |||
} | |||
} | |||
ConvolutionForwardImpl::AlgoPack ConvolutionForwardImpl::sm_algo_pack; | |||
MEGDNN_DEF_GET_ALGO_FROM_DESC(ConvolutionForwardImpl) | |||
ConvolutionForwardImpl::AlgoBase::SizeArgs::SizeArgs(ConvolutionForwardImpl* o, | |||
const TensorLayout& src, | |||
const TensorLayout& filter, | |||
const TensorLayout& dst) | |||
: opr{o}, layout_src{&src}, layout_filter{&filter}, layout_dst{&dst} {} | |||
ConvolutionForwardImpl::AlgoBase::ExecArgs::ExecArgs( | |||
ConvolutionForwardImpl* opr, _megdnn_tensor_in src, | |||
_megdnn_tensor_in filter, _megdnn_tensor_out dst, | |||
_megdnn_workspace workspace) | |||
: SizeArgs(opr, src.layout, filter.layout, dst.layout), | |||
tensor_src{src}, | |||
tensor_filter{filter}, | |||
tensor_dst{dst}, | |||
workspace{workspace} {} | |||
std::string ConvolutionForwardImpl::AlgoBase::SizeArgs::to_string() const { | |||
return megdnn_mangle(ssprintf("src=%s, filter=%s, dst=%s", | |||
layout_src->to_string().c_str(), | |||
layout_filter->to_string().c_str(), | |||
layout_dst->to_string().c_str())); | |||
} | |||
/* ===================== default algo ===================== */ | |||
std::vector<Algorithm::SearchItem> | |||
ConvolutionForwardImpl::AlgoDefault::get_subopr_list( | |||
const TensorLayoutArray& layouts, const OperatorBase* opr) const { | |||
auto&& config = | |||
sub_opr_config(layouts[0], layouts[1], layouts[2], | |||
static_cast<const ConvolutionForwardImpl*>(opr)); | |||
TensorLayoutArray conv_bias_layouts = {layouts[0], layouts[1], | |||
config.first[0], config.first[1], | |||
layouts[2]}; | |||
std::string param_str; | |||
Algorithm::serialize_write_pod(config.second, param_str); | |||
return {{Algorithm::OprType::CONVBIAS_FORWARD, param_str, | |||
conv_bias_layouts}}; | |||
} | |||
bool ConvolutionForwardImpl::AlgoDefault::is_available( | |||
const SizeArgs& args) const { | |||
auto conv_bias_opr = | |||
args.opr->handle()->create_operator<ConvBiasForward>(); | |||
auto&& config = sub_opr_config( | |||
*args.layout_src, *args.layout_filter, *args.layout_dst, | |||
args.opr); | |||
conv_bias_opr->param() = config.second; | |||
return get_algorithm(static_cast<ConvBiasForwardImpl*>(conv_bias_opr.get()), | |||
*args.layout_src, *args.layout_filter, config.first[0], | |||
config.first[1], *args.layout_dst); | |||
} | |||
size_t ConvolutionForwardImpl::AlgoDefault::get_workspace_in_bytes( | |||
const SizeArgs& args) const { | |||
auto conv_bias_opr = args.opr->handle()->create_operator<ConvBiasForward>(); | |||
if (args.opr->execution_policy().algo.valid() && | |||
!args.opr->execution_policy().sub_policy.empty()) { | |||
megdnn_assert(args.opr->execution_policy().sub_policy.size() == 1); | |||
conv_bias_opr->execution_policy() = | |||
args.opr->execution_policy().sub_policy[0]; | |||
} | |||
auto&& config = sub_opr_config( | |||
*args.layout_src, *args.layout_filter, *args.layout_dst, | |||
args.opr); | |||
conv_bias_opr->param() = config.second; | |||
return conv_bias_opr->get_workspace_in_bytes( | |||
*args.layout_src, *args.layout_filter, config.first[0], | |||
config.first[1], *args.layout_dst, nullptr); | |||
} | |||
void ConvolutionForwardImpl::AlgoDefault::exec(const ExecArgs& args) const { | |||
auto conv_bias_opr = args.opr->handle()->create_operator<ConvBiasForward>(); | |||
if (args.opr->execution_policy().algo.valid()) { | |||
megdnn_assert(args.opr->execution_policy().sub_policy.size() == 1); | |||
conv_bias_opr->execution_policy() = | |||
args.opr->execution_policy().sub_policy[0]; | |||
} | |||
auto&& config = sub_opr_config( | |||
*args.layout_src, *args.layout_filter, *args.layout_dst, | |||
args.opr); | |||
conv_bias_opr->param() = config.second; | |||
conv_bias_opr->exec(args.tensor_src, args.tensor_filter, | |||
{nullptr, config.first[0]}, {nullptr, config.first[1]}, | |||
args.tensor_dst, nullptr, args.workspace); | |||
} | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,111 @@ | |||
/** | |||
* \file dnn/src/cuda/convolution/forward/algos.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#pragma once | |||
#include "megdnn/oprs.h" | |||
#include "src/common/algo_base.h" | |||
#include "src/common/metahelper.h" | |||
#include "src/common/utils.h" | |||
#include "src/cuda/convolution/opr_impl.h" | |||
#include <unordered_map> | |||
namespace megdnn { | |||
namespace cuda { | |||
/*! | |||
* \brief base class for convolutionForward algos | |||
* | |||
*/ | |||
class ConvolutionForwardImpl::AlgoBase : public Algorithm { | |||
protected: | |||
~AlgoBase() = default; | |||
public: | |||
enum class AlgoType : uint32_t { | |||
CUDA_DEFAULT, | |||
}; | |||
using Mapper = std::unordered_map<AlgorithmDesc, AlgoBase*>; | |||
AlgoBase() : Algorithm() { m_handle_type = Handle::HandleType::CUDA; } | |||
struct SizeArgs { | |||
ConvolutionForwardImpl* opr; | |||
const TensorLayout *layout_src, *layout_filter, *layout_dst; | |||
std::string to_string() const; | |||
SizeArgs(ConvolutionForwardImpl* opr, const TensorLayout& src, | |||
const TensorLayout& filter, const TensorLayout& dst); | |||
}; | |||
struct ExecArgs : public SizeArgs { | |||
TensorND tensor_src, tensor_filter, tensor_dst; | |||
Workspace workspace; | |||
ExecArgs(ConvolutionForwardImpl* opr, _megdnn_tensor_in src, | |||
_megdnn_tensor_in filter, _megdnn_tensor_out dst, | |||
_megdnn_workspace workspace); | |||
}; | |||
virtual bool is_available(const SizeArgs& args) const = 0; | |||
virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0; | |||
virtual void exec(const ExecArgs&) const = 0; | |||
bool is_available_wk(const SizeArgs& args, size_t limit) const { | |||
return is_available(args) && get_workspace_in_bytes(args) <= limit; | |||
} | |||
bool is_available_reproducible( | |||
const SizeArgs& args, bool reproducible = true, | |||
size_t limit = std::numeric_limits<size_t>::max()) const { | |||
return (!reproducible || is_reproducible()) && | |||
is_available_wk(args, limit); | |||
} | |||
AlgoBase& check_workspace(const SizeArgs& args, | |||
const Workspace& workspace) { | |||
auto req = get_workspace_in_bytes(args); | |||
megdnn_assert(req <= workspace.size, | |||
"convolution fwd algo %s: required workspace %zu bytes, " | |||
"got %zu", | |||
name(), req, workspace.size); | |||
return *this; | |||
} | |||
}; | |||
class ConvolutionForwardImpl::AlgoDefault final : public AlgoBase { | |||
public: | |||
AlgoDefault() = default; | |||
bool is_available(const SizeArgs&) const override; | |||
size_t get_workspace_in_bytes(const SizeArgs& /* args */) const override; | |||
const char* name() const override { return "DEFAULT"; } | |||
void exec(const ExecArgs&) const override; | |||
bool is_reproducible() const override { return true; } | |||
std::vector<SearchItem> get_subopr_list( | |||
const TensorLayoutArray& layouts, | |||
const OperatorBase* opr) const override; | |||
MEGDNN_DECL_ALGO_TYPE(CUDA_DEFAULT) | |||
}; | |||
class ConvolutionForwardImpl::AlgoPack : NonCopyableObj { | |||
private: | |||
AlgoBase::Mapper m_all_algos_map; | |||
public: | |||
AlgoPack(); | |||
AlgoDefault algo_default; | |||
std::vector<AlgoBase*> all_algos; | |||
const AlgoBase::Mapper& all_algos_map() const { return m_all_algos_map; } | |||
}; | |||
} // namespace cuda | |||
} // namespace megdnn | |||
// vim: syntax=cpp.doxygen |
@@ -12,6 +12,7 @@ | |||
#include "src/cuda/convolution/opr_impl.h" | |||
#include "megdnn/dtype.h" | |||
#include "src/cuda/convolution/helper.h" | |||
#include "src/cuda/convolution/forward/algos.h" | |||
#include "src/cuda/convolution/backward_data/algo.h" | |||
#include "src/cuda/convolution/backward_filter/algo.h" | |||
#include "src/cuda/conv_bias/opr_impl.h" | |||
@@ -28,108 +29,34 @@ using namespace convolution; | |||
TO_STRING(CUDNN_MINOR) "." TO_STRING(CUDNN_PATCHLEVEL) | |||
/* ============== ConvolutionForwardImpl ============== */ | |||
ConvolutionForwardImpl::ConvBiasExtraData | |||
ConvolutionForwardImpl::conv_bias_extra_data(const TensorLayout& src, | |||
const TensorLayout& filter, | |||
const TensorLayout& dst) { | |||
auto conv_param = param(); | |||
DType bias_type; | |||
if (src.dtype.enumv() == DTypeEnum::QuantizedS8) { | |||
bias_type = dtype::QuantizedS32( | |||
src.dtype.param<dtype::QuantizedS8>().scale * | |||
filter.dtype.param<dtype::QuantizedS8>().scale); | |||
} else if (src.dtype.enumv() == DTypeEnum::Quantized8Asymm) { | |||
bias_type = dtype::QuantizedS32( | |||
src.dtype.param<dtype::Quantized8Asymm>().scale * | |||
filter.dtype.param<dtype::Quantized8Asymm>().scale); | |||
} else if (src.dtype.enumv() == DTypeEnum::Uint8 || | |||
src.dtype.enumv() == DTypeEnum::Int8) { | |||
bias_type = dtype::Int32{}; | |||
} else if (src.dtype.enumv() == DTypeEnum::Quantized4Asymm) { | |||
bias_type = dtype::QuantizedS32( | |||
src.dtype.param<dtype::Quantized4Asymm>().scale * | |||
filter.dtype.param<dtype::Quantized4Asymm>().scale); | |||
} else { | |||
megdnn_assert(src.dtype.category() == DTypeCategory::FLOAT); | |||
bias_type = src.dtype; | |||
} | |||
ConvBiasExtraData ret = {this->handle()->create_operator<ConvBiasForward>(), | |||
TensorLayout(bias_type), TensorLayout(dst.dtype)}; | |||
ret.convbias_opr->param() = {param::ConvBias::NonlineMode::IDENTITY, | |||
conv_param.mode, | |||
conv_param.sparse, | |||
conv_param.format, | |||
conv_param.pad_h, | |||
conv_param.pad_w, | |||
conv_param.stride_h, | |||
conv_param.stride_w, | |||
conv_param.dilate_h, | |||
conv_param.dilate_w, | |||
conv_param.compute_mode}; | |||
ret.convbias_opr->execution_policy() = {this->execution_policy().algo, {}}; | |||
return ret; | |||
} | |||
ConvolutionForwardImpl::Algorithm* | |||
ConvolutionForwardImpl::get_algorithm_heuristic(const TensorLayout& src, | |||
const TensorLayout& filter, | |||
const TensorLayout& dst, | |||
size_t workspace_limit_in_bytes, | |||
bool reproducible) { | |||
auto extra_data = conv_bias_extra_data(src, filter, dst); | |||
return static_cast<ConvBiasForwardImpl*>(extra_data.convbias_opr.get()) | |||
->get_algorithm_heuristic(src, filter, extra_data.bias_layout, | |||
extra_data.z_layout, dst, | |||
workspace_limit_in_bytes, reproducible); | |||
} | |||
ConvolutionForwardImpl::Algorithm* | |||
ConvolutionForwardImpl::get_algorithm_from_desc( | |||
const ConvolutionForward::AlgorithmDesc& desc) { | |||
auto conv_param = param(); | |||
auto convbias_opr = this->handle()->create_operator<ConvBiasForward>(); | |||
convbias_opr->param() = {param::ConvBias::NonlineMode::IDENTITY, | |||
conv_param.mode, | |||
conv_param.sparse, | |||
conv_param.format, | |||
conv_param.pad_h, | |||
conv_param.pad_w, | |||
conv_param.stride_h, | |||
conv_param.stride_w, | |||
conv_param.dilate_h, | |||
conv_param.dilate_w, | |||
conv_param.compute_mode}; | |||
convbias_opr->execution_policy() = {this->execution_policy().algo, {}}; | |||
return static_cast<ConvBiasForwardImpl*>(convbias_opr.get()) | |||
->get_algorithm_from_desc(desc); | |||
AlgoBase::SizeArgs args{this, src, filter, dst}; | |||
MEGDNN_MARK_USED_VAR(workspace_limit_in_bytes); | |||
MEGDNN_MARK_USED_VAR(reproducible); | |||
return &sm_algo_pack.algo_default; | |||
} | |||
std::vector<ConvolutionForwardImpl::Algorithm*> | |||
ConvolutionForwardImpl::get_all_algorithms(const TensorLayout& src, | |||
const TensorLayout& filter, | |||
const TensorLayout& dst) { | |||
auto extra_data = conv_bias_extra_data(src, filter, dst); | |||
return static_cast<ConvBiasForwardImpl*>(extra_data.convbias_opr.get()) | |||
->get_all_algorithms(src, filter, extra_data.bias_layout, | |||
extra_data.z_layout, dst); | |||
AlgoBase::SizeArgs args{this, src, filter, dst}; | |||
return megdnn::get_all_algorithms<ConvolutionForwardImpl>(args); | |||
} | |||
size_t ConvolutionForwardImpl::get_workspace_in_bytes( | |||
const TensorLayout& src, const TensorLayout& filter, | |||
const TensorLayout& dst, | |||
const PreprocessedFilter* preprocessed_filter) { | |||
auto extra_data = conv_bias_extra_data(src, filter, dst); | |||
return static_cast<ConvBiasForwardImpl*>(extra_data.convbias_opr.get()) | |||
->get_workspace_in_bytes( | |||
src, filter, extra_data.bias_layout, extra_data.z_layout, | |||
dst, | |||
reinterpret_cast<const ConvolutionBase< | |||
param::ConvBias>::PreprocessedFilter*>( | |||
preprocessed_filter)); | |||
MEGDNN_MARK_USED_VAR(preprocessed_filter); | |||
AlgoBase::SizeArgs args{this, src, filter, dst}; | |||
return megdnn::get_algorithm(this, src, filter, dst) | |||
->get_workspace_in_bytes(args); | |||
} | |||
void ConvolutionForwardImpl::exec(_megdnn_tensor_in src, | |||
@@ -137,20 +64,15 @@ void ConvolutionForwardImpl::exec(_megdnn_tensor_in src, | |||
_megdnn_tensor_out dst, | |||
const PreprocessedFilter* preprocessed_filter, | |||
_megdnn_workspace workspace) { | |||
auto extra_data = | |||
conv_bias_extra_data(src.layout, filter.layout, dst.layout); | |||
TensorND bias(nullptr, extra_data.bias_layout); | |||
TensorND z(nullptr, extra_data.z_layout); | |||
return static_cast<ConvBiasForwardImpl*>(extra_data.convbias_opr.get()) | |||
->exec(src, filter, bias, z, dst, | |||
reinterpret_cast<const ConvolutionBase< | |||
param::ConvBias>::PreprocessedFilter*>( | |||
preprocessed_filter), | |||
workspace); | |||
check_exec(src.layout, filter.layout, dst.layout, workspace.size, | |||
preprocessed_filter); | |||
AlgoBase::ExecArgs args(this, src, filter, dst, workspace); | |||
auto&& algo = get_algorithm(this, src.layout, filter.layout, dst.layout); | |||
algo->check_workspace(args, workspace).exec(args); | |||
} | |||
const char* ConvolutionForwardImpl::get_algorithm_set_name() const { | |||
return "CUDACONV0+CUDNN" CUDNN_VERSION_STR; | |||
return "CUDA CONVOLUTION_FORWARD" ; | |||
} | |||
/* ============== ConvolutionBackwardDataImpl ============== */ | |||
@@ -6,7 +6,8 @@ | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#pragma once | |||
@@ -16,58 +17,56 @@ | |||
namespace megdnn { | |||
namespace cuda { | |||
class ConvolutionForwardImpl: public ConvolutionForward { | |||
public: | |||
using ConvolutionForward::ConvolutionForward; | |||
void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, | |||
_megdnn_tensor_out dst, | |||
const PreprocessedFilter* preprocessed_filter, | |||
_megdnn_workspace workspace) override; | |||
size_t get_workspace_in_bytes( | |||
const TensorLayout& src, const TensorLayout& filter, | |||
const TensorLayout& dst, | |||
const PreprocessedFilter* preprocessed_filter) override; | |||
const char* get_algorithm_set_name() const override; | |||
SmallVector<TensorLayout> deduce_preprocessed_filter_layout( | |||
const TensorLayout&, const TensorLayout&, | |||
const TensorLayout&) override { | |||
return {}; | |||
} | |||
size_t get_preprocess_workspace_in_bytes( | |||
const TensorLayout& , const TensorLayout& , | |||
const TensorLayout& ) override{ | |||
return 0; | |||
} | |||
void exec_preprocess(const TensorLayout&, _megdnn_tensor_in, | |||
const TensorLayout&, PreprocessedFilter*, | |||
_megdnn_workspace) override { | |||
megdnn_throw("cuda exec_preprocess has not implemeted yet"); | |||
} | |||
Algorithm* get_algorithm_from_desc(const AlgorithmDesc& desc) override; | |||
protected: | |||
struct ConvBiasExtraData{ | |||
std::unique_ptr<ConvBiasForward> convbias_opr; | |||
TensorLayout bias_layout; | |||
TensorLayout z_layout; | |||
}; | |||
std::vector<Algorithm*> get_all_algorithms( | |||
const TensorLayout& src, const TensorLayout& filter, | |||
const TensorLayout& dst) override; | |||
Algorithm* get_algorithm_heuristic(const TensorLayout& src, | |||
const TensorLayout& filter, | |||
const TensorLayout& dst, | |||
size_t workspace_limit_in_bytes, | |||
bool reproducible) override; | |||
private: | |||
ConvBiasExtraData conv_bias_extra_data(const TensorLayout&, | |||
const TensorLayout&, | |||
const TensorLayout&); | |||
class ConvolutionForwardImpl : public ConvolutionForward { | |||
public: | |||
using ConvolutionForward::ConvolutionForward; | |||
void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, | |||
_megdnn_tensor_out dst, | |||
const PreprocessedFilter* preprocessed_filter, | |||
_megdnn_workspace workspace) override; | |||
size_t get_workspace_in_bytes( | |||
const TensorLayout& src, const TensorLayout& filter, | |||
const TensorLayout& dst, | |||
const PreprocessedFilter* preprocessed_filter) override; | |||
const char* get_algorithm_set_name() const override; | |||
SmallVector<TensorLayout> deduce_preprocessed_filter_layout( | |||
const TensorLayout&, const TensorLayout&, | |||
const TensorLayout&) override { | |||
return {}; | |||
} | |||
size_t get_preprocess_workspace_in_bytes(const TensorLayout&, | |||
const TensorLayout&, | |||
const TensorLayout&) override { | |||
return 0; | |||
} | |||
void exec_preprocess(const TensorLayout&, _megdnn_tensor_in, | |||
const TensorLayout&, PreprocessedFilter*, | |||
_megdnn_workspace) override { | |||
megdnn_throw("cuda exec_preprocess has not implemeted yet"); | |||
} | |||
Algorithm* get_algorithm_from_desc(const AlgorithmDesc& desc) override; | |||
class AlgoBase; | |||
class AlgoDefault; | |||
class AlgoPack; | |||
static const AlgoPack& algo_pack() { return sm_algo_pack; } | |||
protected: | |||
std::vector<Algorithm*> get_all_algorithms( | |||
const TensorLayout& src, const TensorLayout& filter, | |||
const TensorLayout& dst) override; | |||
Algorithm* get_algorithm_heuristic(const TensorLayout& src, | |||
const TensorLayout& filter, | |||
const TensorLayout& dst, | |||
size_t workspace_limit_in_bytes, | |||
bool reproducible) override; | |||
private: | |||
static AlgoPack sm_algo_pack; | |||
}; | |||
class ConvolutionBackwardDataImpl : public ConvolutionBackwardData { | |||
@@ -122,6 +121,7 @@ protected: | |||
const TensorLayout& grad, | |||
size_t workspace_limit_in_bytes, | |||
bool reproducible) override; | |||
private: | |||
Algorithm* get_algorithm_heuristic(const TensorLayout& filter, | |||
const CanonizedFilterMeta& filter_meta, | |||
@@ -141,12 +141,10 @@ public: | |||
size_t get_workspace_in_bytes(const TensorLayout& src, | |||
const TensorLayout& diff, | |||
const TensorLayout& grad) override; | |||
AlgorithmInfo get_algorithm_info_heuristic(const TensorLayout& src, | |||
const TensorLayout& diff, | |||
const TensorLayout& grad, | |||
const CanonizedFilterMeta& grad_meta, | |||
size_t workspace_limit_in_bytes, | |||
bool reproducible) { | |||
AlgorithmInfo get_algorithm_info_heuristic( | |||
const TensorLayout& src, const TensorLayout& diff, | |||
const TensorLayout& grad, const CanonizedFilterMeta& grad_meta, | |||
size_t workspace_limit_in_bytes, bool reproducible) { | |||
return get_algorithm_heuristic(src, diff, grad, grad_meta, | |||
workspace_limit_in_bytes, reproducible) | |||
->info(); | |||
@@ -162,7 +160,6 @@ public: | |||
->info(); | |||
} | |||
const char* get_algorithm_set_name() const override; | |||
class AlgoBase; | |||
@@ -187,6 +184,7 @@ protected: | |||
const TensorLayout& grad, | |||
size_t workspace_limit_in_bytes, | |||
bool reproducible) override; | |||
private: | |||
Algorithm* get_algorithm_heuristic(const TensorLayout& src, | |||
const TensorLayout& diff, | |||
@@ -532,6 +532,30 @@ private: | |||
bool* m_require_algo; | |||
}; | |||
template <typename Opr> | |||
void construct_sub_execution_policy_heuristic(ExecutionPolicy& policy, | |||
const TensorLayoutArray& layouts, | |||
const std::string& param, | |||
Handle* handle) { | |||
megdnn_assert(layouts.size() == OprTrait<Opr>::arity); | |||
auto opr = handle->create_operator<Opr>(); | |||
opr->param() = Algorithm::deserialize_read_pod<typename Opr::Param>(param); | |||
if (!policy.algo.valid()) { | |||
policy.algo = AlgoProxy<Opr, OprTrait<Opr>::arity>:: | |||
get_algorithm_info_heuristic(opr.get(), layouts).desc; | |||
} | |||
Algorithm* algo = opr->get_algorithm_from_desc(policy.algo); | |||
std::vector<Algorithm::SearchItem>&& sub_items = | |||
algo->get_subopr_list(layouts, opr.get()); | |||
FOREACH_OPR_TYPE_DISPATCH(sub_items, { | |||
policy.sub_policy.push_back(ExecutionPolicy{}); | |||
construct_sub_execution_policy_heuristic<_Opr>( | |||
policy.sub_policy.back(), _item.layouts, _item.param, | |||
handle); | |||
}); | |||
} | |||
} // namespace test | |||
} // namespace megdnn | |||
@@ -570,6 +570,8 @@ void convolution::test_conv_config_combinations(int k_size, | |||
.set_param(param); | |||
auto opr = checker.opr(); | |||
opr->param() = param; | |||
std::string param_str; | |||
Algorithm::serialize_write_pod(opr->param(), param_str); | |||
TensorLayout ily{ishp, inp_type}, fly{fshp, inp_type}, oly; | |||
oly.dtype = out_type; | |||
opr->deduce_layout(ily, fly, oly); | |||
@@ -581,10 +583,14 @@ void convolution::test_conv_config_combinations(int k_size, | |||
for (auto algo : opr->get_all_algorithms_info(ily, fly, oly)) { | |||
used_algos.insert(algo.desc); | |||
opr->execution_policy().algo = algo.desc; | |||
construct_sub_execution_policy_heuristic<ConvolutionForward>( | |||
opr->execution_policy(), {ily, fly, oly}, param_str, | |||
opr->handle()); | |||
checker | |||
.set_epsilon(eps_getter(dtype == 1, 0, algo.name.c_str())) | |||
.execs({ishp, fshp, {}}); | |||
opr->execution_policy().algo.reset(); | |||
opr->execution_policy() = {}; | |||
ASSERT_TRUE(checker.prev_succ()) << errmsg(algo.name.c_str()); | |||
} | |||
@@ -597,13 +603,19 @@ void convolution::test_conv_config_combinations(int k_size, | |||
auto opr = checker_bwd_data.opr(); | |||
opr->param() = param; | |||
std::string param_str; | |||
Algorithm::serialize_write_pod(opr->param(), param_str); | |||
for (auto algo: opr->get_all_algorithms_info(fly, oly, ily)) { | |||
used_algos_bwd_data.insert(algo.desc); | |||
opr->execution_policy().algo = algo.desc; | |||
construct_sub_execution_policy_heuristic< | |||
ConvolutionBackwardData>(opr->execution_policy(), | |||
{fly, oly, ily}, param_str, | |||
opr->handle()); | |||
checker_bwd_data | |||
.set_epsilon(eps_getter(dtype == 1, 1, algo.name.c_str())) | |||
.execl({fly, oly, ily}); | |||
opr->execution_policy().algo.reset(); | |||
opr->execution_policy() = {}; | |||
ASSERT_TRUE(checker_bwd_data.prev_succ()) << | |||
errmsg(algo.name.c_str()); | |||
} | |||
@@ -618,13 +630,19 @@ void convolution::test_conv_config_combinations(int k_size, | |||
auto opr = checker_bwd_filter.opr(); | |||
opr->param() = param; | |||
std::string param_str; | |||
Algorithm::serialize_write_pod(opr->param(), param_str); | |||
for (auto algo: opr->get_all_algorithms_info(ily, oly, fly)) { | |||
used_algos_bwd_flt.insert(algo.desc); | |||
opr->execution_policy().algo = algo.desc; | |||
construct_sub_execution_policy_heuristic< | |||
ConvolutionBackwardFilter>(opr->execution_policy(), | |||
{ily, oly, fly}, param_str, | |||
opr->handle()); | |||
checker_bwd_filter | |||
.set_epsilon(eps_getter(dtype == 1, 2, algo.name.c_str())) | |||
.execl({ily, oly, fly}); | |||
opr->execution_policy().algo.reset(); | |||
opr->execution_policy() = {}; | |||
ASSERT_TRUE(checker_bwd_filter.prev_succ()) << | |||
errmsg(algo.name.c_str()); | |||
} | |||
@@ -338,6 +338,7 @@ struct OprProxyProfilingBase | |||
FastRunCache& cache) { | |||
megdnn_assert(layouts.size() == arity); | |||
auto opr = handle->create_operator<Opr>(); | |||
opr->param() = | |||
Algorithm::deserialize_read_pod<typename Opr::Param>(param); | |||
SmallVector<size_t> sizes_in_bytes; | |||
@@ -427,9 +428,9 @@ struct OprProxyProfilingBase | |||
auto&& search_items = | |||
flatten_search_space(layouts, param_str, opr->handle()); | |||
FOREACH_OPR_TYPE_DISPATCH(search_items, { | |||
OprProxyProfilingBase<_Opr>::search(_item.layouts, param_str, W, | |||
opr->handle(), warmup_times, | |||
exec_times, cache); | |||
OprProxyProfilingBase<_Opr>::search( | |||
_item.layouts, _item.param, W, opr->handle(), | |||
warmup_times, exec_times, cache); | |||
}); | |||
construct_execution_policy(layouts, param_str, opr->handle(), cache, | |||
@@ -273,10 +273,14 @@ TEST_F(CUDA, CHANWISE_CONVOLUTION_FORWARD) { | |||
Checker<Convolution> checker(handle_cuda()); | |||
bool require_algo = false; | |||
checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>( | |||
ConvBiasForward::algo_name<ConvBiasForward::DirectParam>( | |||
"CHANNEL_WISE", {}) | |||
.c_str(), | |||
ExecutionPolicyAlgoName{ | |||
"DEFAULT", | |||
{{ConvBiasForward::algo_name<ConvBiasForward::DirectParam>( | |||
"CHANNEL_WISE", {}) | |||
.c_str(), | |||
{}}}}, | |||
&require_algo)); | |||
for (auto dtype : std::vector<DType>{dtype::Float32(), dtype::Float16()}) { | |||
checker.set_dtype(0, dtype).set_dtype(1, dtype).set_dtype(2, dtype); | |||
if (dtype.enumv() == DTypeEnum::Float16) | |||
@@ -306,8 +310,12 @@ TEST_F(CUDA, CHANWISE_CONVOLUTION_FORWARD_SMALL) { | |||
Checker<Convolution> checker(handle_cuda()); | |||
bool require_algo = false; | |||
checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>( | |||
ConvBiasForward::algo_name<ConvBiasForward::DirectParam>( | |||
"CHANNEL_WISE_SMALL", {}).c_str(), | |||
ExecutionPolicyAlgoName{ | |||
"DEFAULT", | |||
{{ConvBiasForward::algo_name<ConvBiasForward::DirectParam>( | |||
"CHANNEL_WISE_SMALL", {}) | |||
.c_str(), | |||
{}}}}, | |||
&require_algo)); | |||
for (auto dtype : std::vector<DType> { | |||
dtype::Float32(), | |||
@@ -338,6 +346,7 @@ TEST_F(CUDA, CHANWISE_CONVOLUTION_BACKWARD_DATA) { | |||
bool require_algo = false; | |||
checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardData>( | |||
"CHANNEL_WISE", &require_algo)); | |||
for (auto dtype : std::vector<DType>{dtype::Float32(), dtype::Float16()}) { | |||
checker.set_dtype(0, dtype).set_dtype(1, dtype).set_dtype(2, dtype); | |||
if (dtype.enumv() == DTypeEnum::Float16) | |||
@@ -368,9 +377,8 @@ TEST_F(CUDA, CHANWISE_CONVOLUTION_BACKWARD_DATA) { | |||
TEST_F(CUDA, CHANWISE_CONVOLUTION_BACKWARD_DATA_SMALL) { | |||
Checker<ConvolutionBackwardData> checker(handle_cuda()); | |||
bool require_algo = false; | |||
checker.set_before_exec_callback( | |||
AlgoChecker<ConvolutionBackwardData>( | |||
"CHANNEL_WISE_SMALL", &require_algo)); | |||
checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardData>( | |||
"CHANNEL_WISE_SMALL", &require_algo)); | |||
for (auto dtype : std::vector<DType> { | |||
dtype::Float32(), | |||
#if CUDA_VERSION >= 9000 | |||
@@ -396,10 +404,14 @@ TEST_F(CUDA, CHANWISE_CONVOLUTION_BACKWARD_FILTER) { | |||
Checker<ConvolutionBackwardFilter> checker(handle_cuda()); | |||
bool require_algo = false; | |||
checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardFilter>( | |||
"CHANNEL_WISE", &require_algo)); | |||
"CHANNEL_WISE", &require_algo)); | |||
UniformFloatRNG rng(-0.1, 0.1); | |||
for (auto dtype : std::vector<DType>{dtype::Float32(), dtype::Float16()}) { | |||
checker.set_dtype(0, dtype).set_dtype(1, dtype).set_dtype(2, dtype).set_rng(0, &rng).set_rng(1, &rng); | |||
checker.set_dtype(0, dtype) | |||
.set_dtype(1, dtype) | |||
.set_dtype(2, dtype) | |||
.set_rng(0, &rng) | |||
.set_rng(1, &rng); | |||
if (dtype.enumv() == DTypeEnum::Float16) | |||
checker.set_epsilon(2e-1); | |||
// simple case | |||
@@ -514,7 +526,7 @@ TEST_F(CUDA, CHANWISE_CONVOLUTION_BENCH_ALL_ALGO_FWD) { | |||
auto run = [&](size_t N, size_t C, size_t IH, size_t IW, size_t FH, | |||
size_t FW) { | |||
checker.proxy()->target_execution_policy.algo.reset(); | |||
checker.proxy()->target_execution_policy = {}; | |||
checker.execs({{N, C, IH, IW}, {C, 1, 1, FH, FW}, {}}); | |||
}; | |||
@@ -614,7 +626,7 @@ TEST_F(CUDA, BENCHMARK_CHANWISE_CONV_ALL_ALGO_FORWARD) { | |||
.set_dtype(2, dtype::Float32()) | |||
.set_rng(0, &rng) | |||
.set_rng(1, &rng); | |||
bencher.proxy()->target_execution_policy.algo.reset(); | |||
bencher.proxy()->target_execution_policy = {}; | |||
auto time_in_ms_fp32 = bencher.execs({src, filter, {}}) / RUNS; | |||
bencher.set_param(param) | |||
@@ -623,7 +635,7 @@ TEST_F(CUDA, BENCHMARK_CHANWISE_CONV_ALL_ALGO_FORWARD) { | |||
.set_dtype(2, dtype::Float16()) | |||
.set_rng(0, &rng) | |||
.set_rng(1, &rng); | |||
bencher.proxy()->target_execution_policy.algo.reset(); | |||
bencher.proxy()->target_execution_policy = {}; | |||
auto time_in_ms_fp16 = bencher.execs({src, filter, {}}) / RUNS; | |||
bencher.proxy()->target_execution_policy.algo.reset(); | |||
@@ -677,10 +689,13 @@ TEST_F(CUDA, BENCHMARK_CHANWISE_CONV_FORWARD_FLOAT) { | |||
CUBenchmarker<ConvolutionForward> bencher(handle_cuda()); | |||
size_t RUNS = 1; | |||
bencher.set_display(false).set_times(RUNS); | |||
bencher.set_before_exec_callback(AlgoChecker<ConvolutionForward>( | |||
ConvBiasForward::algo_name<ConvBiasForward::DirectParam>( | |||
"CHANNEL_WISE", {}) | |||
.c_str())); | |||
bencher.set_before_exec_callback( | |||
AlgoChecker<ConvolutionForward>(ExecutionPolicyAlgoName{ | |||
"DEFAULT", | |||
{{ConvBiasForward::algo_name<ConvBiasForward::DirectParam>( | |||
"CHANNEL_WISE", {}) | |||
.c_str(), | |||
{}}}})); | |||
Convolution::Param param; | |||
param.format = ConvBias::Param::Format::NCHW; | |||
@@ -783,17 +798,24 @@ TEST_F(CUDA, BENCHMARK_CHANWISE_CONV_FORWARD_FLOAT_SMALL) { | |||
.set_dtype(2, dtype::Float32()) | |||
.set_rng(0, &rng) | |||
.set_rng(1, &rng) | |||
.set_before_exec_callback(AlgoChecker<ConvolutionForward>( | |||
ConvBiasForward::algo_name< | |||
ConvBiasForward::DirectParam>("CHANNEL_WISE", | |||
{}) | |||
.c_str())); | |||
.set_before_exec_callback( | |||
AlgoChecker<ConvolutionForward>(ExecutionPolicyAlgoName{ | |||
"DEFAULT", | |||
{{ConvBiasForward::algo_name< | |||
ConvBiasForward::DirectParam>( | |||
"CHANNEL_WISE", {}) | |||
.c_str(), | |||
{}}}})); | |||
auto time_in_ms_fp32_normal = bencher.execs({src, filter, {}}) / RUNS; | |||
bencher.set_before_exec_callback(AlgoChecker<ConvolutionForward>( | |||
ConvBiasForward::algo_name<ConvBiasForward::DirectParam>( | |||
"CHANNEL_WISE", {}) | |||
.c_str())); | |||
ExecutionPolicyAlgoName{"DEFAULT", | |||
{{ConvBiasForward::algo_name< | |||
ConvBiasForward::DirectParam>( | |||
"CHANNEL_WISE", {}) | |||
.c_str(), | |||
{}}}})); | |||
auto time_in_ms_fp32_small = bencher.execs({src, filter, {}}) / RUNS; | |||
bencher.set_param(param) | |||
@@ -135,10 +135,13 @@ TEST_F(CUDA, CONV_FORWARD_MATMUL_NCHW4) { | |||
.set_rng(1, &int_rng) | |||
.set_param(param); | |||
checker.set_before_exec_callback(AlgoChecker<Convolution>( | |||
ConvBiasForward::algo_name<ConvBiasForward::MatmulParam>( | |||
"MATMUL8X8X32", {}) | |||
.c_str())); | |||
checker.set_before_exec_callback( | |||
AlgoChecker<ConvolutionForward>(ExecutionPolicyAlgoName{ | |||
"DEFAULT", | |||
{{ConvBiasForward::algo_name<ConvBiasForward::MatmulParam>( | |||
"MATMUL8X8X32", {}) | |||
.c_str(), | |||
{}}}})); | |||
param.sparse = Convolution::Param::Sparse::DENSE; | |||
param.pad_h = param.pad_w = 1; | |||
@@ -30,19 +30,26 @@ TEST_F(CUDA, DILATED_CONVOLUTION_FORWARD) | |||
auto args = get_dilated_args(); | |||
Checker<ConvolutionForward> checker(handle_cuda()); | |||
#if CUDNN_VERSION >= 7500 | |||
checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>( | |||
ConvBiasForward::algo_name<ConvBiasForward::DefaultParam>( | |||
"CUDNN:Convolution:CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_" | |||
"PRECOMP_" | |||
"GEMM" CUDNN_VERSION_STRING, | |||
{}) | |||
.c_str())); | |||
checker.set_before_exec_callback( | |||
AlgoChecker<ConvolutionForward>(ExecutionPolicyAlgoName{ | |||
"DEFAULT", | |||
{{ConvBiasForward::algo_name<ConvBiasForward::DefaultParam>( | |||
"CUDNN:Convolution:CUDNN_CONVOLUTION_FWD_ALGO_" | |||
"IMPLICIT_" | |||
"PRECOMP_" | |||
"GEMM" CUDNN_VERSION_STRING, | |||
{}) | |||
.c_str(), | |||
{}}}})); | |||
printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n"); | |||
#else | |||
checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>( | |||
ConvBiasForward::algo_name<ConvBiasForward::MatmulParam>("MATMUL", | |||
{}) | |||
.c_str())); | |||
checker.set_before_exec_callback( | |||
AlgoChecker<ConvolutionForward>(ExecutionPolicyAlgoName{ | |||
"DEFAULT", | |||
{{ConvBiasForward::algo_name<ConvBiasForward::MatmulParam>( | |||
"MATMUL", {}) | |||
.c_str(), | |||
{}}}})); | |||
#endif | |||
NormalRNG default_rng; | |||
for (auto &&arg: args) { | |||
@@ -116,12 +116,17 @@ TEST_F(CUDA, GROUP_CONV_FORWARD_1x1) { | |||
std::string conv1x1_name = | |||
ConvBiasForward::algo_name<ConvBiasForward::MatmulParam>( | |||
"MATMUL1X1", {}); | |||
checker.set_before_exec_callback(AlgoChecker<Convolution>( | |||
ConvBiasForward::algo_name<ConvBiasForward::DirectParam>( | |||
ssprintf("%s:%s", "CUDA:GROUP_CONV", | |||
conv1x1_name.c_str()), | |||
{}) | |||
.c_str())); | |||
checker.set_before_exec_callback( | |||
AlgoChecker<ConvolutionForward>(ExecutionPolicyAlgoName{ | |||
"DEFAULT", | |||
{{ConvBiasForward::algo_name< | |||
ConvBiasForward::DirectParam>( | |||
ssprintf("%s:%s", "CUDA:GROUP_CONV", | |||
conv1x1_name.c_str()) | |||
.c_str(), | |||
{}) | |||
.c_str(), | |||
{}}}})); | |||
#endif | |||
Convolution::Param param; | |||
param.sparse = Convolution::Param::Sparse::GROUP; | |||
@@ -231,7 +231,7 @@ void AlgoChooser<Opr>::profile(ExeContext& ctx, bool require_reproducible) { | |||
algo.name.c_str(), str_on_inp_shape.c_str()); | |||
ImplExecutionPolicy policy; | |||
policy.algo = algo.desc; | |||
ctx.construct_execution_policy_from_cache(require_reproducible, policy); | |||
ctx.construct_execution_policy(require_reproducible, policy); | |||
if (ctx.get_workspace_size_bytes(policy) >= workspace_limit) | |||
continue; | |||
@@ -302,7 +302,7 @@ AlgoChooser<Opr>::choose_by_profile(ExeContext& ctx, bool require_reproducible, | |||
}); | |||
} | |||
typename AlgoChooser<Opr>::ImplExecutionPolicy policy; | |||
ctx.construct_execution_policy_from_cache(require_reproducible, policy); | |||
ctx.construct_execution_policy(require_reproducible, policy); | |||
return policy; | |||
MIDOUT_E | |||
} | |||
@@ -324,6 +324,11 @@ size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts, | |||
ImplExecutionPolicy policy; | |||
if (auto algo_choose_hook = mgb_opr->algo_chooser()) { | |||
policy = algo_choose_hook(mgb_opr); | |||
ctx.construct_execution_policy( | |||
mgb_opr->execution_policy().strategy == | |||
mixin::AlgoChooserHelper::ExecutionPolicy::Strategy:: | |||
HEURISTIC_REPRODUCIBLE, | |||
policy, false); | |||
} | |||
if (!policy.algo.valid()) { | |||
policy = get_policy(ctx); | |||
@@ -520,13 +525,26 @@ AlgoChooser<Opr>::ExeContext::get_all_candidates() const { | |||
} | |||
template <typename Opr> | |||
void AlgoChooser<Opr>::ExeContext::construct_execution_policy_from_cache( | |||
void AlgoChooser<Opr>::ExeContext::construct_execution_policy( | |||
bool require_reproducible, | |||
typename AlgoChooser<Opr>::ImplExecutionPolicy& policy) const { | |||
typename AlgoChooser<Opr>::ImplExecutionPolicy& policy, | |||
bool retrive_from_cache) const { | |||
if (!policy.algo.valid()) { | |||
policy.algo = get_profile_result_from_cache(require_reproducible).desc; | |||
if (retrive_from_cache) { | |||
policy.algo = | |||
get_profile_result_from_cache(require_reproducible).desc; | |||
} else { | |||
auto workspace_limit = WorkspaceLimitGetter::get_workspace_limit( | |||
owner_graph(), m_cn, m_execution_policy.workspace_limit); | |||
policy.algo = APPLY(m_megdnn_opr->get_algorithm_info_heuristic( | |||
args..., workspace_limit, | |||
require_reproducible), | |||
m_layouts) | |||
.desc; | |||
} | |||
mgb_assert(policy.algo.valid(), | |||
"No cache found, maybe some error occured"); | |||
"No algo found from cache or heuristic, maybe some error " | |||
"occured"); | |||
} | |||
Algorithm* algo = m_megdnn_opr->get_algorithm_from_desc(policy.algo); | |||
@@ -544,8 +562,9 @@ void AlgoChooser<Opr>::ExeContext::construct_execution_policy_from_cache( | |||
_item.param, m_base_mgb_opr, m_cn, m_execution_policy, | |||
m_allow_weight_preprocess); | |||
policy.sub_policy.push_back({}); | |||
sub_ctx.construct_execution_policy_from_cache(require_reproducible, | |||
policy.sub_policy.back()); | |||
sub_ctx.construct_execution_policy(require_reproducible, | |||
policy.sub_policy.back(), | |||
retrive_from_cache); | |||
}); | |||
return; | |||
@@ -672,11 +691,11 @@ AlgoChooser<Opr>::ExeContext::construct_fake_preprocess_filter() const { | |||
AlgoChooser<megdnn::Opr>::ExeContext::get_workspace_size_bytes( \ | |||
const typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& \ | |||
policy) const; \ | |||
template void AlgoChooser<megdnn::Opr>::ExeContext:: \ | |||
construct_execution_policy_from_cache( \ | |||
bool require_reproducible, \ | |||
typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& \ | |||
policy) const; \ | |||
template void \ | |||
AlgoChooser<megdnn::Opr>::ExeContext::construct_execution_policy( \ | |||
bool require_reproducible, \ | |||
typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& policy, \ | |||
bool retrive_from_cache) const; \ | |||
template Maybe<AlgoChooserProfileCache::ResultEntry> \ | |||
AlgoChooser<megdnn::Opr>::ExeContext::profile_single_algo( \ | |||
const typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& \ | |||
@@ -129,13 +129,16 @@ public: | |||
ImplAlgo get_profile_result_from_cache(bool require_reproducible) const; | |||
/** | |||
* \brief construct execution policy from cache. | |||
* \brief construct execution policy from cache or heuristic. | |||
* | |||
* \param require_reproducible select algo which is reproducible | |||
* \param policy execution policy | |||
* \param retrive_from_cache retrive algo from cache if set True, get | |||
* from heuristic otherwise. | |||
*/ | |||
void construct_execution_policy_from_cache( | |||
bool require_reproducible, ImplExecutionPolicy& policy) const; | |||
void construct_execution_policy( | |||
bool require_reproducible, ImplExecutionPolicy& policy, | |||
bool retrive_from_cache = true) const; | |||
private: | |||
Maybe<PreprocessFilter<Opr>> construct_fake_preprocess_filter() const; | |||