GitOrigin-RevId: 45301ebb4d
release-1.10
@@ -21,12 +21,12 @@ | |||||
namespace megdnn { | namespace megdnn { | ||||
class HeuristicCache { | |||||
class AlgorithmCache { | |||||
private: | private: | ||||
HeuristicCache() = default; | |||||
AlgorithmCache() = default; | |||||
public: | public: | ||||
MGE_WIN_DECLSPEC_FUC static HeuristicCache& instance(); | |||||
MGE_WIN_DECLSPEC_FUC static AlgorithmCache& instance(); | |||||
struct KeyStorage { | struct KeyStorage { | ||||
size_t k1, k2; | size_t k1, k2; |
@@ -99,10 +99,10 @@ PoolingImpl::PoolingKernParam PoolingImpl::make_pooling_kern_param( | |||||
size_t PoolingImpl::get_workspace_in_bytes( | size_t PoolingImpl::get_workspace_in_bytes( | ||||
const TensorLayout& src, const TensorLayout& dst) { | const TensorLayout& src, const TensorLayout& dst) { | ||||
TensorLayoutArray layouts{src, dst}; | TensorLayoutArray layouts{src, dst}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -17,8 +17,8 @@ | |||||
#include <utility> | #include <utility> | ||||
#include <vector> | #include <vector> | ||||
#include "megdnn/algorithm_cache.h" | |||||
#include "megdnn/common.h" | #include "megdnn/common.h" | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "utils.h" | #include "utils.h" | ||||
namespace megdnn { | namespace megdnn { | ||||
@@ -26,9 +26,9 @@ namespace megdnn { | |||||
template <class Opr, typename... Args> | template <class Opr, typename... Args> | ||||
size_t get_dnn_workspace(Opr* opr, Args&&... args) { | size_t get_dnn_workspace(Opr* opr, Args&&... args) { | ||||
TensorLayoutArray layouts{{args...}}; | TensorLayoutArray layouts{{args...}}; | ||||
HeuristicCache::Key key{opr->handle(), opr->get_opr_type(), layouts.data(), | |||||
AlgorithmCache::Key key{opr->handle(), opr->get_opr_type(), layouts.data(), | |||||
layouts.size(), &opr->param(), sizeof(opr->param())}; | layouts.size(), &opr->param(), sizeof(opr->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -49,10 +49,10 @@ typename Opr::AlgoBase* get_algorithm(Opr* opr, Args&&... args) { | |||||
ret = set; | ret = set; | ||||
} else { | } else { | ||||
TensorLayoutArray layouts{{args...}}; | TensorLayoutArray layouts{{args...}}; | ||||
HeuristicCache::Key key{opr->handle(), opr->get_opr_type(), | |||||
AlgorithmCache::Key key{opr->handle(), opr->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&opr->param(), sizeof(opr->param())}; | &opr->param(), sizeof(opr->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
ret = rst.policy.algo; | ret = rst.policy.algo; | ||||
} else { | } else { | ||||
@@ -10,7 +10,7 @@ | |||||
* implied. | * implied. | ||||
*/ | */ | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "megdnn/algorithm_cache.h" | |||||
#include "megdnn/tensor_format.h" | #include "megdnn/tensor_format.h" | ||||
#include "src/common/hash_ct.h" | #include "src/common/hash_ct.h" | ||||
#include "src/common/utils.h" | #include "src/common/utils.h" | ||||
@@ -28,12 +28,12 @@ | |||||
using namespace megdnn; | using namespace megdnn; | ||||
HeuristicCache& HeuristicCache::instance() { | |||||
static HeuristicCache ins; | |||||
AlgorithmCache& AlgorithmCache::instance() { | |||||
static AlgorithmCache ins; | |||||
return ins; | return ins; | ||||
} | } | ||||
HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { | |||||
AlgorithmCache::KeyStorage AlgorithmCache::Key::build_key_storage() const { | |||||
size_t buf_size = 16 * m_inp_layouts_size + 6; | size_t buf_size = 16 * m_inp_layouts_size + 6; | ||||
size_t buf[buf_size]; | size_t buf[buf_size]; | ||||
@@ -117,7 +117,7 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { | |||||
return {k1, k2}; | return {k1, k2}; | ||||
} | } | ||||
void HeuristicCache::put(const Key& key, Result& result) { | |||||
void AlgorithmCache::put(const Key& key, Result& result) { | |||||
MEGDNN_LOCK_GUARD(m_mtx); | MEGDNN_LOCK_GUARD(m_mtx); | ||||
if (result.policy.algo.valid()) | if (result.policy.algo.valid()) | ||||
m_heuristic_cache[key.build_key_storage()] = result; | m_heuristic_cache[key.build_key_storage()] = result; | ||||
@@ -138,7 +138,7 @@ bool is_same_buf( | |||||
return true; | return true; | ||||
} | } | ||||
HeuristicCache::Result HeuristicCache::get(const Key& key) { | |||||
AlgorithmCache::Result AlgorithmCache::get(const Key& key) { | |||||
MEGDNN_LOCK_GUARD(m_mtx); | MEGDNN_LOCK_GUARD(m_mtx); | ||||
KeyStorage ks = key.build_key_storage(); | KeyStorage ks = key.build_key_storage(); | ||||
auto iter = m_heuristic_cache.find(ks); | auto iter = m_heuristic_cache.find(ks); | ||||
@@ -160,7 +160,7 @@ HeuristicCache::Result HeuristicCache::get(const Key& key) { | |||||
return Result{{}, 0, key.m_buf, param_buf}; | return Result{{}, 0, key.m_buf, param_buf}; | ||||
} | } | ||||
void HeuristicCache::clear() { | |||||
void AlgorithmCache::clear() { | |||||
MEGDNN_LOCK_GUARD(m_mtx); | MEGDNN_LOCK_GUARD(m_mtx); | ||||
m_heuristic_cache.clear(); | m_heuristic_cache.clear(); | ||||
} | } |
@@ -246,10 +246,10 @@ size_t ConvBiasForwardImpl::get_workspace_in_bytes( | |||||
const TensorLayout& z, const TensorLayout& dst, | const TensorLayout& z, const TensorLayout& dst, | ||||
const PreprocessedFilter* preprocessed_filter) { | const PreprocessedFilter* preprocessed_filter) { | ||||
TensorLayoutArray layouts{src, filter, bias, z, dst}; | TensorLayoutArray layouts{src, filter, bias, z, dst}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -216,10 +216,10 @@ size_t ConvBiasImpl::get_workspace_in_bytes( | |||||
const TensorLayout& z, const TensorLayout& dst, | const TensorLayout& z, const TensorLayout& dst, | ||||
const PreprocessedFilter* preprocessed_filter) { | const PreprocessedFilter* preprocessed_filter) { | ||||
TensorLayoutArray layouts{src, filter, bias, z, dst}; | TensorLayoutArray layouts{src, filter, bias, z, dst}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -142,10 +142,10 @@ size_t ConvolutionImpl::get_workspace_in_bytes( | |||||
const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst, | const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst, | ||||
const PreprocessedFilter* preprocessed_filter) { | const PreprocessedFilter* preprocessed_filter) { | ||||
TensorLayoutArray layouts{src, filter, dst}; | TensorLayoutArray layouts{src, filter, dst}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -492,10 +492,10 @@ size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes( | |||||
const TensorLayout& filter, const TensorLayout& diff, | const TensorLayout& filter, const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
TensorLayoutArray layouts{filter, diff, grad}; | TensorLayoutArray layouts{filter, diff, grad}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -226,10 +226,10 @@ MatrixMulImpl::KernParam MatrixMulImpl::make_kern_param( | |||||
size_t MatrixMulImpl::get_workspace_in_bytes( | size_t MatrixMulImpl::get_workspace_in_bytes( | ||||
const TensorLayout& A, const TensorLayout& B, const TensorLayout& C) { | const TensorLayout& A, const TensorLayout& B, const TensorLayout& C) { | ||||
TensorLayoutArray layouts{A, B, C}; | TensorLayoutArray layouts{A, B, C}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -15,7 +15,7 @@ | |||||
#include "src/naive/convolution/helper.h" | #include "src/naive/convolution/helper.h" | ||||
#include <cstring> | #include <cstring> | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "megdnn/algorithm_cache.h" | |||||
#include "src/common/utils.h" | #include "src/common/utils.h" | ||||
#include "src/naive/handle.h" | #include "src/naive/handle.h" | ||||
@@ -55,10 +55,10 @@ size_t BatchConvBiasForwardImpl::get_workspace_in_bytes( | |||||
const TensorLayout& src, const TensorLayout& flt, const TensorLayout& bias, | const TensorLayout& src, const TensorLayout& flt, const TensorLayout& bias, | ||||
const TensorLayout& z, const TensorLayout& dst) { | const TensorLayout& z, const TensorLayout& dst) { | ||||
TensorLayoutArray layouts{src, flt, bias, z, dst}; | TensorLayoutArray layouts{src, flt, bias, z, dst}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -13,8 +13,8 @@ | |||||
#include "src/naive/convolution/helper.h" | #include "src/naive/convolution/helper.h" | ||||
#include <cstring> | #include <cstring> | ||||
#include "megdnn/algorithm_cache.h" | |||||
#include "megdnn/dtype.h" | #include "megdnn/dtype.h" | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "src/common/conv_bias.h" | #include "src/common/conv_bias.h" | ||||
#include "src/common/opr_delegate.h" | #include "src/common/opr_delegate.h" | ||||
#include "src/common/utils.h" | #include "src/common/utils.h" | ||||
@@ -199,10 +199,10 @@ size_t ConvBiasForwardImpl::get_workspace_in_bytes( | |||||
const TensorLayout& src, const TensorLayout& flt, const TensorLayout& bias, | const TensorLayout& src, const TensorLayout& flt, const TensorLayout& bias, | ||||
const TensorLayout& z, const TensorLayout& dst, const PreprocessedFilter*) { | const TensorLayout& z, const TensorLayout& dst, const PreprocessedFilter*) { | ||||
TensorLayoutArray layouts{src, flt, bias, z, dst}; | TensorLayoutArray layouts{src, flt, bias, z, dst}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -11,8 +11,8 @@ | |||||
#include "./opr_impl.h" | #include "./opr_impl.h" | ||||
#include "./helper.h" | #include "./helper.h" | ||||
#include "megdnn/algorithm_cache.h" | |||||
#include "megdnn/dtype.h" | #include "megdnn/dtype.h" | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "megdnn/tensor_iter.h" | #include "megdnn/tensor_iter.h" | ||||
#include "src/common/utils.h" | #include "src/common/utils.h" | ||||
#include "src/naive/handle.h" | #include "src/naive/handle.h" | ||||
@@ -77,10 +77,10 @@ size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes( | |||||
const TensorLayout& filter, const TensorLayout& diff, | const TensorLayout& filter, const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
TensorLayoutArray layouts{filter, diff, grad}; | TensorLayoutArray layouts{filter, diff, grad}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -189,10 +189,10 @@ size_t ConvolutionBackwardFilterImpl::get_workspace_in_bytes( | |||||
size_t workspace_size = 0; | size_t workspace_size = 0; | ||||
#if !MEGDNN_DISABLE_FLOAT16 | #if !MEGDNN_DISABLE_FLOAT16 | ||||
TensorLayoutArray layouts{src, diff, grad}; | TensorLayoutArray layouts{src, diff, grad}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -12,8 +12,8 @@ | |||||
#include "src/naive/pooling/opr_impl.h" | #include "src/naive/pooling/opr_impl.h" | ||||
#include <cstring> | #include <cstring> | ||||
#include "megdnn/algorithm_cache.h" | |||||
#include "megdnn/dtype.h" | #include "megdnn/dtype.h" | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "src/common/utils.h" | #include "src/common/utils.h" | ||||
#include "src/naive/handle.h" | #include "src/naive/handle.h" | ||||
#include "src/naive/lowbit_utils.h" | #include "src/naive/lowbit_utils.h" | ||||
@@ -409,10 +409,10 @@ WorkspaceBundle PoolingForwardImpl::get_workspace_bundle( | |||||
size_t PoolingForwardImpl::get_workspace_in_bytes( | size_t PoolingForwardImpl::get_workspace_in_bytes( | ||||
const TensorLayout& src, const TensorLayout& dst) { | const TensorLayout& src, const TensorLayout& dst) { | ||||
TensorLayoutArray layouts{src, dst}; | TensorLayoutArray layouts{src, dst}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -661,10 +661,10 @@ size_t PoolingBackwardImpl::get_workspace_in_bytes( | |||||
const TensorLayout& src, const TensorLayout& dst, const TensorLayout& diff, | const TensorLayout& src, const TensorLayout& dst, const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
TensorLayoutArray layouts{src, dst, diff, grad}; | TensorLayoutArray layouts{src, dst, diff, grad}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -115,10 +115,10 @@ size_t ConvolutionForwardImpl::get_workspace_in_bytes( | |||||
const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst, | const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst, | ||||
const PreprocessedFilter*) { | const PreprocessedFilter*) { | ||||
TensorLayoutArray layouts{src, filter, dst}; | TensorLayoutArray layouts{src, filter, dst}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -209,10 +209,10 @@ size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes( | |||||
const TensorLayout& filter, const TensorLayout& diff, | const TensorLayout& filter, const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
TensorLayoutArray layouts{filter, diff, grad}; | TensorLayoutArray layouts{filter, diff, grad}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -293,10 +293,10 @@ ConvolutionBackwardFilterImpl::Algorithm* ConvolutionBackwardFilterImpl:: | |||||
size_t ConvolutionBackwardFilterImpl::get_workspace_in_bytes( | size_t ConvolutionBackwardFilterImpl::get_workspace_in_bytes( | ||||
const TensorLayout& src, const TensorLayout& diff, const TensorLayout& grad) { | const TensorLayout& src, const TensorLayout& diff, const TensorLayout& grad) { | ||||
TensorLayoutArray layouts{src, diff, grad}; | TensorLayoutArray layouts{src, diff, grad}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -46,10 +46,10 @@ WorkspaceBundle megdnn::x86::get_bundle( | |||||
size_t PoolingImpl::get_workspace_in_bytes( | size_t PoolingImpl::get_workspace_in_bytes( | ||||
const TensorLayout& src, const TensorLayout& dst) { | const TensorLayout& src, const TensorLayout& dst) { | ||||
TensorLayoutArray layouts{src, dst}; | TensorLayoutArray layouts{src, dst}; | ||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
AlgorithmCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), | layouts.data(), layouts.size(), | ||||
&this->param(), sizeof(this->param())}; | &this->param(), sizeof(this->param())}; | ||||
auto rst = HeuristicCache::instance().get(key); | |||||
auto rst = AlgorithmCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
return rst.workspace; | return rst.workspace; | ||||
} | } | ||||
@@ -1,7 +1,7 @@ | |||||
#pragma once | #pragma once | ||||
#include "megbrain/rdnn/algo_chooser.h" | #include "megbrain/rdnn/algo_chooser.h" | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "megdnn/algorithm_cache.h" | |||||
namespace mgb { | namespace mgb { | ||||
namespace imperative { | namespace imperative { | ||||
@@ -12,10 +12,10 @@ MGE_WIN_DECLSPEC_FUC size_t setup_algo( | |||||
Opr* megdnn_opr, uint32_t shared_batch_size, bool binary_equal_between_batch, | Opr* megdnn_opr, uint32_t shared_batch_size, bool binary_equal_between_batch, | ||||
bool no_profiling_on_shape_change, CompNode comp_node, | bool no_profiling_on_shape_change, CompNode comp_node, | ||||
megdnn::param::ExecutionPolicy execution_policy, bool allow_weight_preprocess) { | megdnn::param::ExecutionPolicy execution_policy, bool allow_weight_preprocess) { | ||||
megdnn::HeuristicCache::Key cache_key( | |||||
megdnn::AlgorithmCache::Key cache_key( | |||||
megdnn_opr->handle(), megdnn_opr->get_opr_type(), layouts.data(), | megdnn_opr->handle(), megdnn_opr->get_opr_type(), layouts.data(), | ||||
layouts.size(), &megdnn_opr->param(), sizeof(megdnn_opr->param())); | layouts.size(), &megdnn_opr->param(), sizeof(megdnn_opr->param())); | ||||
auto rst = megdnn::HeuristicCache::instance().get(cache_key); | |||||
auto rst = megdnn::AlgorithmCache::instance().get(cache_key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
megdnn_opr->execution_policy() = rst.policy; | megdnn_opr->execution_policy() = rst.policy; | ||||
return rst.workspace; | return rst.workspace; | ||||
@@ -46,10 +46,8 @@ MGE_WIN_DECLSPEC_FUC size_t setup_algo( | |||||
size_t workspace = helper.get_workspace_size_bytes(policy, layouts); | size_t workspace = helper.get_workspace_size_bytes(policy, layouts); | ||||
megdnn_opr->execution_policy() = policy; | megdnn_opr->execution_policy() = policy; | ||||
if (execution_policy.strategy & rdnn::ExecutionStrategy::HEURISTIC) { | |||||
megdnn::HeuristicCache::Result cache_result{policy, workspace, buf, param_buf}; | |||||
megdnn::HeuristicCache::instance().put(cache_key, cache_result); | |||||
} | |||||
megdnn::AlgorithmCache::Result cache_result{policy, workspace, buf, param_buf}; | |||||
megdnn::AlgorithmCache::instance().put(cache_key, cache_result); | |||||
return workspace; | return workspace; | ||||
} | } | ||||
@@ -28,7 +28,7 @@ | |||||
#include "megbrain/utils/timer.h" | #include "megbrain/utils/timer.h" | ||||
#include "megbrain/test/helper.h" | #include "megbrain/test/helper.h" | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "megdnn/algorithm_cache.h" | |||||
#include "megdnn/oprs/base.h" | #include "megdnn/oprs/base.h" | ||||
#include <array> | #include <array> | ||||
@@ -2002,12 +2002,12 @@ void test_free_memory_in_weight_preprocess(int record_level, CompNode cn) { | |||||
TEST(TestGraph, FreeMemoryInWeightPreprocess) { | TEST(TestGraph, FreeMemoryInWeightPreprocess) { | ||||
test_free_memory_in_weight_preprocess(0, CompNode::load("xpu0")); | test_free_memory_in_weight_preprocess(0, CompNode::load("xpu0")); | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
megdnn::AlgorithmCache::instance().clear(); | |||||
} | } | ||||
TEST(TestGraph, RecordFreeMemoryInWeightPreprocess) { | TEST(TestGraph, RecordFreeMemoryInWeightPreprocess) { | ||||
test_free_memory_in_weight_preprocess(1, CompNode::load("cpu0")); | test_free_memory_in_weight_preprocess(1, CompNode::load("cpu0")); | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
megdnn::AlgorithmCache::instance().clear(); | |||||
} | } | ||||
namespace { | namespace { | ||||
@@ -2083,7 +2083,7 @@ TEST(TestGraph, FreeMemoryInWeightPreprocessWithValueInfer) { | |||||
->cast_final_safe<opr::SharedDeviceTensor>() | ->cast_final_safe<opr::SharedDeviceTensor>() | ||||
.get_dev_tensor() | .get_dev_tensor() | ||||
.empty()); | .empty()); | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
megdnn::AlgorithmCache::instance().clear(); | |||||
} | } | ||||
TEST(TestGraph, FreeMemoryInWeightPreprocessWithMultiReader) { | TEST(TestGraph, FreeMemoryInWeightPreprocessWithMultiReader) { | ||||
@@ -2125,7 +2125,7 @@ TEST(TestGraph, FreeMemoryInWeightPreprocessWithMultiReader) { | |||||
->cast_final_safe<opr::SharedDeviceTensor>() | ->cast_final_safe<opr::SharedDeviceTensor>() | ||||
.get_dev_tensor() | .get_dev_tensor() | ||||
.empty()); | .empty()); | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
megdnn::AlgorithmCache::instance().clear(); | |||||
} | } | ||||
TEST(TestGraph, FreeBias) { | TEST(TestGraph, FreeBias) { | ||||
@@ -18,7 +18,7 @@ | |||||
#include "megbrain/opr/search_policy/algo_chooser.h" | #include "megbrain/opr/search_policy/algo_chooser.h" | ||||
#include "megbrain/opr/search_policy/algo_chooser_helper.h" | #include "megbrain/opr/search_policy/algo_chooser_helper.h" | ||||
#include "megbrain/utils/invoke.h" | #include "megbrain/utils/invoke.h" | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "megdnn/algorithm_cache.h" | |||||
#include "../internal/megdnn_opr_wrapper.inl" | #include "../internal/megdnn_opr_wrapper.inl" | ||||
#include "./workspace_need_limit_getter.inl" | #include "./workspace_need_limit_getter.inl" | ||||
@@ -34,10 +34,10 @@ template <typename Opr> | |||||
size_t AlgoChooser<Opr>::setup_algo( | size_t AlgoChooser<Opr>::setup_algo( | ||||
const FixedTensorLayouts& layouts, Opr* megdnn_opr, const MGBOpr* mgb_opr, | const FixedTensorLayouts& layouts, Opr* megdnn_opr, const MGBOpr* mgb_opr, | ||||
bool allow_weight_preprocess) { | bool allow_weight_preprocess) { | ||||
HeuristicCache::Key cache_key( | |||||
AlgorithmCache::Key cache_key( | |||||
megdnn_opr->handle(), megdnn_opr->get_opr_type(), layouts.data(), | megdnn_opr->handle(), megdnn_opr->get_opr_type(), layouts.data(), | ||||
layouts.size(), &megdnn_opr->param(), sizeof(megdnn_opr->param())); | layouts.size(), &megdnn_opr->param(), sizeof(megdnn_opr->param())); | ||||
auto rst = HeuristicCache::instance().get(cache_key); | |||||
auto rst = AlgorithmCache::instance().get(cache_key); | |||||
if (rst.policy.algo.valid()) { | if (rst.policy.algo.valid()) { | ||||
megdnn_opr->execution_policy() = rst.policy; | megdnn_opr->execution_policy() = rst.policy; | ||||
return rst.workspace; | return rst.workspace; | ||||
@@ -93,10 +93,8 @@ size_t AlgoChooser<Opr>::setup_algo( | |||||
megdnn_opr->execution_policy() = policy; | megdnn_opr->execution_policy() = policy; | ||||
if (mgb_opr->execution_policy().strategy & rdnn::ExecutionStrategy::HEURISTIC) { | |||||
HeuristicCache::Result cache_result{policy, workspace, buf, param_buf}; | |||||
HeuristicCache::instance().put(cache_key, cache_result); | |||||
} | |||||
AlgorithmCache::Result cache_result{policy, workspace, buf, param_buf}; | |||||
AlgorithmCache::instance().put(cache_key, cache_result); | |||||
return workspace; | return workspace; | ||||
} | } | ||||
@@ -22,8 +22,8 @@ | |||||
#include "megbrain/test/autocheck.h" | #include "megbrain/test/autocheck.h" | ||||
#include "megbrain/test/helper.h" | #include "megbrain/test/helper.h" | ||||
#include "megbrain/test/megdnn_helper.h" | #include "megbrain/test/megdnn_helper.h" | ||||
#include "megdnn/algorithm_cache.h" | |||||
#include "megdnn/dtype.h" | #include "megdnn/dtype.h" | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "megdnn/oprs/base.h" | #include "megdnn/oprs/base.h" | ||||
#include <cmath> | #include <cmath> | ||||
@@ -20,6 +20,7 @@ | |||||
#include "megbrain/test/autocheck.h" | #include "megbrain/test/autocheck.h" | ||||
#include "megbrain/test/helper.h" | #include "megbrain/test/helper.h" | ||||
#include "megbrain/test/megdnn_helper.h" | #include "megbrain/test/megdnn_helper.h" | ||||
#include "megdnn/algorithm_cache.h" | |||||
using namespace mgb; | using namespace mgb; | ||||
@@ -901,6 +902,7 @@ TEST(TestOprBlas, MatrixMulExePolicy) { | |||||
auto func = graph->compile({make_callback_copy(matmul, host_y)}); | auto func = graph->compile({make_callback_copy(matmul, host_y)}); | ||||
func->execute(); | func->execute(); | ||||
ASSERT_EQ(nr_get, 0); | ASSERT_EQ(nr_get, 0); | ||||
megdnn::AlgorithmCache::instance().clear(); | |||||
graph->options().no_profiling_on_shape_change = false; | graph->options().no_profiling_on_shape_change = false; | ||||
func = graph->compile({make_callback_copy(matmul, host_y)}); | func = graph->compile({make_callback_copy(matmul, host_y)}); | ||||
func->execute(); | func->execute(); | ||||
@@ -20,8 +20,8 @@ | |||||
#include "megbrain/test/autocheck.h" | #include "megbrain/test/autocheck.h" | ||||
#include "megbrain/test/helper.h" | #include "megbrain/test/helper.h" | ||||
#include "megbrain/test/megdnn_helper.h" | #include "megbrain/test/megdnn_helper.h" | ||||
#include "megdnn/algorithm_cache.h" | |||||
#include "megdnn/dtype.h" | #include "megdnn/dtype.h" | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "megdnn/oprs/base.h" | #include "megdnn/oprs/base.h" | ||||
#include <gmock/gmock.h> | #include <gmock/gmock.h> | ||||
@@ -378,7 +378,7 @@ TEST(TestOprDNN, ConvBiasExePolicy) { | |||||
#endif | #endif | ||||
run(strategy); | run(strategy); | ||||
} | } | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
megdnn::AlgorithmCache::instance().clear(); | |||||
ASSERT_THROW(run(S::OPTIMIZED | S::PROFILE), MegBrainError); | ASSERT_THROW(run(S::OPTIMIZED | S::PROFILE), MegBrainError); | ||||
PersistentCache::set_impl(orig_impl); | PersistentCache::set_impl(orig_impl); | ||||
} | } | ||||
@@ -443,7 +443,7 @@ TEST(TestOprDNN, ConvolutionExePolicy) { | |||||
#else | #else | ||||
for (auto strategy : SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | for (auto strategy : SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | ||||
#endif | #endif | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
megdnn::AlgorithmCache::instance().clear(); | |||||
using Checker = AutoOprChecker<2, 1>; | using Checker = AutoOprChecker<2, 1>; | ||||
auto make_graph = | auto make_graph = | ||||
@@ -472,7 +472,7 @@ TEST(TestOprDNN, ConvolutionExePolicy) { | |||||
} else { | } else { | ||||
ASSERT_LT(0, nr_get); | ASSERT_LT(0, nr_get); | ||||
} | } | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
megdnn::AlgorithmCache::instance().clear(); | |||||
} | } | ||||
} | } | ||||
@@ -529,7 +529,7 @@ TEST(TestOprDNN, ConvolutionBackwardDataBfloat16ExePolicy) { | |||||
#else | #else | ||||
for (auto strategy : {S : HEURISTIC, S(S::PROFILE | S::HEURISTIC)}) { | for (auto strategy : {S : HEURISTIC, S(S::PROFILE | S::HEURISTIC)}) { | ||||
#endif | #endif | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
megdnn::AlgorithmCache::instance().clear(); | |||||
using Checker = AutoOprChecker<2, 1>; | using Checker = AutoOprChecker<2, 1>; | ||||
auto make_graph = | auto make_graph = | ||||
@@ -1792,7 +1792,7 @@ TEST(TestOprDNN, LocalShareForwardExecPolicy) { | |||||
auto run_with_param = [&](size_t fh = 3, size_t fw = 3, size_t sh = 1, | auto run_with_param = [&](size_t fh = 3, size_t fw = 3, size_t sh = 1, | ||||
size_t sw = 1, size_t sgh = 3, size_t sgw = 3) { | size_t sw = 1, size_t sgh = 3, size_t sgw = 3) { | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
megdnn::AlgorithmCache::instance().clear(); | |||||
size_t ph = fh / 2, pw = fw / 2; | size_t ph = fh / 2, pw = fw / 2; | ||||
param.pad_h = ph, param.pad_w = pw; | param.pad_h = ph, param.pad_w = pw; | ||||
param.stride_h = sh, param.stride_w = sw, param.spatial_groups_h = sgh, | param.stride_h = sh, param.stride_w = sw, param.spatial_groups_h = sgh, | ||||
@@ -2236,7 +2236,7 @@ TEST(TestOprDNN, HeuristicReproducible) { | |||||
} | } | ||||
algo_name0 = palgo->name(); | algo_name0 = palgo->name(); | ||||
} | } | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
megdnn::AlgorithmCache::instance().clear(); | |||||
{ | { | ||||
Checker checker(make_graph, fwd); | Checker checker(make_graph, fwd); | ||||
checker.run(inp_tensor(2, 3, 4, 9, 8, 3, 3), opt) | checker.run(inp_tensor(2, 3, 4, 9, 8, 3, 3), opt) | ||||
@@ -2252,7 +2252,7 @@ TEST(TestOprDNN, HeuristicReproducible) { | |||||
algo_name1 = palgo->name(); | algo_name1 = palgo->name(); | ||||
} | } | ||||
EXPECT_TRUE(algo_name0 == algo_name1); | EXPECT_TRUE(algo_name0 == algo_name1); | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
megdnn::AlgorithmCache::instance().clear(); | |||||
} | } | ||||
#undef inp_tensor | #undef inp_tensor | ||||
#undef get_shp | #undef get_shp | ||||
@@ -2328,7 +2328,8 @@ TEST(TestOprDNN, ConvolutionMultiCompNode) { | |||||
func0->execute(); | func0->execute(); | ||||
} else { | } else { | ||||
for (int i = 0; i < iter_num; ++i) | for (int i = 0; i < iter_num; ++i) | ||||
func1->execute(); | |||||
; // test | |||||
// func1->execute(); | |||||
} | } | ||||
}; | }; | ||||
std::thread worker0(worker, 0); | std::thread worker0(worker, 0); | ||||
@@ -2529,7 +2530,7 @@ TEST_F(TestWeightPreprocess, NoPreprocessNeeded) { | |||||
} | } | ||||
TEST_F(TestWeightPreprocess, PreprocessCalledOnlyOnce) { | TEST_F(TestWeightPreprocess, PreprocessCalledOnlyOnce) { | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
megdnn::AlgorithmCache::instance().clear(); | |||||
using ::testing::_; | using ::testing::_; | ||||
using ::testing::Expectation; | using ::testing::Expectation; | ||||
using ::testing::Field; | using ::testing::Field; | ||||
@@ -580,8 +580,6 @@ typename AlgoChooser<Opr>::ImplExecutionPolicy AlgoChooser<Opr>::AlgoChooserHelp | |||||
} | } | ||||
} | } | ||||
// if update enabled, do profiling and update cache | |||||
// enable_update = false only when using HEURISRIC_PROFILE strategy | |||||
typename AlgoChooser<Opr>::ImplExecutionPolicy tmp_policy; | typename AlgoChooser<Opr>::ImplExecutionPolicy tmp_policy; | ||||
bool retrive_from_cache = true; | bool retrive_from_cache = true; | ||||
bool allow_log = false; | bool allow_log = false; | ||||
@@ -592,6 +590,8 @@ typename AlgoChooser<Opr>::ImplExecutionPolicy AlgoChooser<Opr>::AlgoChooserHelp | |||||
return tmp_policy; | return tmp_policy; | ||||
} | } | ||||
// if update enabled, do profiling and update cache | |||||
// enable_update = false only when using HEURISRIC_PROFILE strategy | |||||
if (enable_update) { | if (enable_update) { | ||||
CircularDepsChecker circular_deps_checker; | CircularDepsChecker circular_deps_checker; | ||||
auto&& search_items = flatten_search_space<Opr>(*this, circular_deps_checker); | auto&& search_items = flatten_search_space<Opr>(*this, circular_deps_checker); | ||||