@@ -0,0 +1,92 @@ | |||||
/** | |||||
* \file dnn/include/megdnn/heuristic_cache.h | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#pragma once | |||||
#include "megdnn/basic_types.h" | |||||
#include "megdnn/oprs/base.h" | |||||
#include <mutex> | |||||
#include <string> | |||||
#include <unordered_map> | |||||
namespace megdnn { | |||||
class HeuristicCache { | |||||
private: | |||||
HeuristicCache() = default; | |||||
public: | |||||
static HeuristicCache& instance(); | |||||
struct KeyStorage { | |||||
std::string category; | |||||
std::string input; | |||||
bool operator==(const KeyStorage& k) const { | |||||
return category == k.category && input == k.input; | |||||
} | |||||
}; | |||||
class Key { | |||||
Handle* m_handle; | |||||
uint32_t m_opr_type; | |||||
const TensorLayout* m_inp_layouts_ptr; | |||||
size_t m_inp_layouts_size; | |||||
const void* m_param_ptr; | |||||
size_t m_param_size; | |||||
mutable std::string m_category; | |||||
mutable std::string m_input; | |||||
public: | |||||
Key(Handle* opr_handle, Algorithm::OprType opr_type, const TensorLayout* inp_layouts_ptr, | |||||
size_t inp_layouts_size, const void* param_ptr = nullptr, size_t param_size = 0) | |||||
: m_handle{opr_handle}, | |||||
m_opr_type{static_cast<uint32_t>(opr_type)}, | |||||
m_inp_layouts_ptr{inp_layouts_ptr}, | |||||
m_inp_layouts_size{inp_layouts_size}, | |||||
m_param_ptr{param_ptr}, | |||||
m_param_size{param_size} {} | |||||
KeyStorage build_key_storage() const; | |||||
}; | |||||
struct Result { | |||||
ExecutionPolicy policy; | |||||
size_t workspace; | |||||
}; | |||||
void put(const Key& key, Result& result); | |||||
Result get(const Key& key); | |||||
void clear(); | |||||
private: | |||||
struct Hash { | |||||
size_t operator()(const KeyStorage& k) const { | |||||
size_t h1 = std::hash<std::string>{}(k.category); | |||||
size_t h2 = std::hash<std::string>{}(k.input); | |||||
h1 ^= h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2); | |||||
return h1; | |||||
} | |||||
}; | |||||
std::unordered_map<KeyStorage, Result, Hash> m_heuristic_cache; | |||||
#if __DEPLOY_ON_XP_SP2__ | |||||
size_t m_mtx; | |||||
#else | |||||
std::mutex m_mtx; | |||||
#endif | |||||
}; | |||||
} // namespace megdnn |
@@ -42,6 +42,10 @@ public: | |||||
const TensorLayout& B, | const TensorLayout& B, | ||||
const TensorLayout& C) = 0; | const TensorLayout& C) = 0; | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::BATCHED_MATRIX_MUL_FORWARD; | |||||
} | |||||
protected: | protected: | ||||
void check_exec(const TensorLayout& A, const TensorLayout& B, | void check_exec(const TensorLayout& A, const TensorLayout& B, | ||||
const TensorLayout& C, size_t workspace_in_bytes); | const TensorLayout& C, size_t workspace_in_bytes); | ||||
@@ -76,6 +80,11 @@ public: | |||||
const TensorLayout& C) = 0; | const TensorLayout& C) = 0; | ||||
static size_t pack_size (const Param::Format format); | static size_t pack_size (const Param::Format format); | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::MATRIX_MUL_FORWARD; | |||||
} | |||||
protected: | protected: | ||||
void check_exec(const TensorLayout& A, const TensorLayout& B, | void check_exec(const TensorLayout& A, const TensorLayout& B, | ||||
const TensorLayout& C, size_t workspace_in_bytes); | const TensorLayout& C, size_t workspace_in_bytes); | ||||
@@ -275,6 +275,10 @@ public: | |||||
const TensorLayout& src, const TensorLayout& filter, | const TensorLayout& src, const TensorLayout& filter, | ||||
const TensorLayout& dst) = 0; | const TensorLayout& dst) = 0; | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::CONVOLUTION_FORWARD; | |||||
} | |||||
protected: | protected: | ||||
CanonizedFilterMeta check_exec( | CanonizedFilterMeta check_exec( | ||||
const TensorLayout& src, const TensorLayout& filter, | const TensorLayout& src, const TensorLayout& filter, | ||||
@@ -309,6 +313,10 @@ public: | |||||
void deduce_layout(const TensorLayout& filter, const TensorLayout& diff, | void deduce_layout(const TensorLayout& filter, const TensorLayout& diff, | ||||
TensorLayout& grad); | TensorLayout& grad); | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::CONVOLUTION_BACKWARD_DATA; | |||||
} | |||||
protected: | protected: | ||||
CanonizedFilterMeta check_exec(const TensorLayout& filter, | CanonizedFilterMeta check_exec(const TensorLayout& filter, | ||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
@@ -338,6 +346,10 @@ public: | |||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad) = 0; | const TensorLayout& grad) = 0; | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::CONVOLUTION_BACKWARD_FILTER; | |||||
} | |||||
protected: | protected: | ||||
CanonizedFilterMeta check_exec(const TensorLayout& src, | CanonizedFilterMeta check_exec(const TensorLayout& src, | ||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
@@ -505,6 +517,10 @@ public: | |||||
const ConvBiasForward::BiasMode bias_mode, | const ConvBiasForward::BiasMode bias_mode, | ||||
const param::ConvBias::NonlineMode nonline_mode); | const param::ConvBias::NonlineMode nonline_mode); | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::CONVBIAS_FORWARD; | |||||
} | |||||
protected: | protected: | ||||
CanonizedFilterMeta check_exec( | CanonizedFilterMeta check_exec( | ||||
const TensorLayout& src, const TensorLayout& filter, | const TensorLayout& src, const TensorLayout& filter, | ||||
@@ -775,6 +791,10 @@ public: | |||||
virtual size_t get_workspace_in_bytes(const TensorLayout& src, | virtual size_t get_workspace_in_bytes(const TensorLayout& src, | ||||
const TensorLayout& dst) = 0; | const TensorLayout& dst) = 0; | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::POOLING_FORWARD; | |||||
} | |||||
protected: | protected: | ||||
void check_exec(const TensorLayout& src, const TensorLayout& dst, | void check_exec(const TensorLayout& src, const TensorLayout& dst, | ||||
size_t workspace_in_bytes); | size_t workspace_in_bytes); | ||||
@@ -801,6 +821,10 @@ public: | |||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad) = 0; | const TensorLayout& grad) = 0; | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::POOLING_BACKWARD; | |||||
} | |||||
protected: | protected: | ||||
void check_exec(const TensorLayout& src, const TensorLayout& dst, | void check_exec(const TensorLayout& src, const TensorLayout& dst, | ||||
const TensorLayout& diff, const TensorLayout& grad, | const TensorLayout& diff, const TensorLayout& grad, | ||||
@@ -1216,6 +1240,10 @@ public: | |||||
const TensorLayout& filter, | const TensorLayout& filter, | ||||
const TensorLayout& dst) = 0; | const TensorLayout& dst) = 0; | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::CONVOLUTION3D_FORWARD; | |||||
} | |||||
protected: | protected: | ||||
CanonizedFilterMeta check_exec(const TensorLayout& src, | CanonizedFilterMeta check_exec(const TensorLayout& src, | ||||
const TensorLayout& filter, | const TensorLayout& filter, | ||||
@@ -1244,6 +1272,10 @@ public: | |||||
void deduce_layout(const TensorLayout& filter, const TensorLayout& diff, | void deduce_layout(const TensorLayout& filter, const TensorLayout& diff, | ||||
TensorLayout& grad); | TensorLayout& grad); | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::CONVOLUTION3D_BACKWARD_DATA; | |||||
} | |||||
protected: | protected: | ||||
CanonizedFilterMeta check_exec(const TensorLayout& filter, | CanonizedFilterMeta check_exec(const TensorLayout& filter, | ||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
@@ -1268,6 +1300,10 @@ public: | |||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad) = 0; | const TensorLayout& grad) = 0; | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::CONVOLUTION3D_BACKWARD_FILTER; | |||||
} | |||||
protected: | protected: | ||||
CanonizedFilterMeta check_exec(const TensorLayout& src, | CanonizedFilterMeta check_exec(const TensorLayout& src, | ||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
@@ -1308,6 +1344,10 @@ public: | |||||
const TensorLayout& filter, | const TensorLayout& filter, | ||||
const TensorLayout& dst) = 0; | const TensorLayout& dst) = 0; | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::LOCAL_SHARE_FORWARD; | |||||
} | |||||
protected: | protected: | ||||
void check_exec(const TensorLayout& src, const TensorLayout& filter, | void check_exec(const TensorLayout& src, const TensorLayout& filter, | ||||
const TensorLayout& dst, size_t workspace_in_bytes); | const TensorLayout& dst, size_t workspace_in_bytes); | ||||
@@ -1334,6 +1374,10 @@ public: | |||||
void deduce_layout(const TensorLayout& filter, const TensorLayout& diff, | void deduce_layout(const TensorLayout& filter, const TensorLayout& diff, | ||||
TensorLayout& grad); | TensorLayout& grad); | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::LOCAL_SHARE_BACKWARD_DATA; | |||||
} | |||||
protected: | protected: | ||||
void check_exec(const TensorLayout& filter, const TensorLayout& diff, | void check_exec(const TensorLayout& filter, const TensorLayout& diff, | ||||
const TensorLayout& grad, size_t workspace_in_bytes); | const TensorLayout& grad, size_t workspace_in_bytes); | ||||
@@ -1358,6 +1402,10 @@ public: | |||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad) = 0; | const TensorLayout& grad) = 0; | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::LOCAL_SHARE_BACKWARD_FILTER; | |||||
} | |||||
protected: | protected: | ||||
void check_exec(const TensorLayout& src, const TensorLayout& diff, | void check_exec(const TensorLayout& src, const TensorLayout& diff, | ||||
const TensorLayout& grad, size_t workspace_in_bytes); | const TensorLayout& grad, size_t workspace_in_bytes); | ||||
@@ -1479,6 +1527,10 @@ public: | |||||
const TensorLayout& mask, | const TensorLayout& mask, | ||||
const TensorLayout& dst) = 0; | const TensorLayout& dst) = 0; | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::DEFORMABLE_CONV_FORWARD; | |||||
} | |||||
protected: | protected: | ||||
CanonizedFilterMeta check_exec(const TensorLayout& im, | CanonizedFilterMeta check_exec(const TensorLayout& im, | ||||
const TensorLayout& filter, | const TensorLayout& filter, | ||||
@@ -1520,6 +1572,10 @@ public: | |||||
const TensorLayout& mask, const TensorLayout& out_grad, | const TensorLayout& mask, const TensorLayout& out_grad, | ||||
TensorLayout& filter_grad); | TensorLayout& filter_grad); | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::DEFORMABLE_CONV_BACKWARD_FILTER; | |||||
} | |||||
protected: | protected: | ||||
CanonizedFilterMeta check_exec(const TensorLayout& im, | CanonizedFilterMeta check_exec(const TensorLayout& im, | ||||
const TensorLayout& offset, | const TensorLayout& offset, | ||||
@@ -1566,6 +1622,10 @@ public: | |||||
const TensorLayout& out_grad, TensorLayout& im_grad, | const TensorLayout& out_grad, TensorLayout& im_grad, | ||||
TensorLayout& offset_grad, TensorLayout& mask_grad); | TensorLayout& offset_grad, TensorLayout& mask_grad); | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::DEFORMABLE_CONV_BACKWARD_DATA; | |||||
} | |||||
protected: | protected: | ||||
CanonizedFilterMeta check_exec( | CanonizedFilterMeta check_exec( | ||||
const TensorLayout& im, const TensorLayout& filter, | const TensorLayout& im, const TensorLayout& filter, | ||||
@@ -1677,6 +1737,10 @@ public: | |||||
const TensorLayout& z, | const TensorLayout& z, | ||||
const TensorLayout& dst) = 0; | const TensorLayout& dst) = 0; | ||||
static Algorithm::OprType get_opr_type() { | |||||
return Algorithm::OprType::BATCH_CONV_FORWARD; | |||||
} | |||||
protected: | protected: | ||||
CanonizedFilterMeta check_exec(const TensorLayout& src, | CanonizedFilterMeta check_exec(const TensorLayout& src, | ||||
const TensorLayout& filter, | const TensorLayout& filter, | ||||
@@ -101,6 +101,15 @@ PoolingImpl::PoolingKernParam PoolingImpl::make_pooling_kern_param( | |||||
size_t PoolingImpl::get_workspace_in_bytes(const TensorLayout& src, | size_t PoolingImpl::get_workspace_in_bytes(const TensorLayout& src, | ||||
const TensorLayout& dst) { | const TensorLayout& dst) { | ||||
TensorLayoutArray layouts{src, dst}; | |||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
auto param = make_pooling_kern_szie_param(this, src, dst); | auto param = make_pooling_kern_szie_param(this, src, dst); | ||||
auto algo = get_algorithm(this, src, dst); | auto algo = get_algorithm(this, src, dst); | ||||
if (!is_fallback_algo(algo)) { | if (!is_fallback_algo(algo)) { | ||||
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#pragma once | #pragma once | ||||
@@ -17,10 +18,28 @@ | |||||
#include <vector> | #include <vector> | ||||
#include "megdnn/common.h" | #include "megdnn/common.h" | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "utils.h" | #include "utils.h" | ||||
namespace megdnn { | namespace megdnn { | ||||
template <class Opr, typename... Args> | |||||
size_t get_dnn_workspace(Opr* opr, Args&&... args) { | |||||
TensorLayoutArray layouts{{args...}}; | |||||
HeuristicCache::Key key{opr->handle(), opr->get_opr_type(), | |||||
layouts.data(), layouts.size(), &opr->param(), | |||||
sizeof(opr->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
typename Opr::AlgoBase::SizeArgs size_args(opr, | |||||
std::forward<Args>(args)...); | |||||
return get_algorithm(opr, std::forward<Args>(args)...) | |||||
->get_workspace_in_bytes(size_args); | |||||
} | |||||
/*! | /*! | ||||
* \brief get user-configured algorithm, or heuristic algorithm | * \brief get user-configured algorithm, or heuristic algorithm | ||||
*/ | */ | ||||
@@ -31,9 +50,20 @@ typename Opr::AlgoBase* get_algorithm(Opr* opr, Args&&... args) { | |||||
if (set.valid()) { | if (set.valid()) { | ||||
ret = set; | ret = set; | ||||
} else { | } else { | ||||
ret = opr->get_algorithm_info_heuristic( | |||||
std::forward<Args>(args)..., std::numeric_limits<size_t>::max(), | |||||
AlgoAttribute::DEFAULT, AlgoAttribute::DEFAULT).desc; | |||||
TensorLayoutArray layouts{{args...}}; | |||||
HeuristicCache::Key key{opr->handle(), opr->get_opr_type(), | |||||
layouts.data(), layouts.size(), &opr->param(), | |||||
sizeof(opr->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
ret = rst.policy.algo; | |||||
} else { | |||||
ret = opr->get_algorithm_info_heuristic( | |||||
std::forward<Args>(args)..., | |||||
std::numeric_limits<size_t>::max(), | |||||
AlgoAttribute::DEFAULT, AlgoAttribute::DEFAULT) | |||||
.desc; | |||||
} | |||||
} | } | ||||
return static_cast<typename Opr::AlgoBase*>( | return static_cast<typename Opr::AlgoBase*>( | ||||
opr->get_algorithm_from_desc(ret)); | opr->get_algorithm_from_desc(ret)); | ||||
@@ -250,13 +250,9 @@ CanonizedFilterMeta DeformableConvBackwardData::check_exec( | |||||
megdnn_assert_eq_dtype(im, mask_grad); | megdnn_assert_eq_dtype(im, mask_grad); | ||||
// check layout | // check layout | ||||
megdnn_assert(im.shape == im_grad.shape, "invalid im_grad shape: %s", | |||||
megdnn_layout_msg(im_grad).c_str()); | |||||
megdnn_assert(offset.shape == offset_grad.shape, | |||||
"invalid offset_grad shape: %s", | |||||
megdnn_layout_msg(offset_grad).c_str()); | |||||
megdnn_assert(mask.shape == mask_grad.shape, "invalid mask_grad shape: %s", | |||||
megdnn_layout_msg(mask_grad).c_str()); | |||||
megdnn_assert_eq_shape(im, im_grad); | |||||
megdnn_assert_eq_shape(offset, offset_grad); | |||||
megdnn_assert_eq_shape(mask, mask_grad); | |||||
auto ret = make_canonized_filter_meta(im.ndim, filter, offset); | auto ret = make_canonized_filter_meta(im.ndim, filter, offset); | ||||
auto required_workspace_in_bytes = | auto required_workspace_in_bytes = | ||||
@@ -0,0 +1,142 @@ | |||||
/** | |||||
* \file dnn/src/common/heuristic_cache.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "megdnn/heuristic_cache.h" | |||||
#include "src/common/utils.h" | |||||
#include "src/naive/handle.h" | |||||
#if MEGDNN_WITH_CUDA | |||||
#include "src/cuda/utils.h" | |||||
#endif | |||||
#if MEGDNN_WITH_ROCM | |||||
#include "hcc_detail/hcc_defs_prologue.h" | |||||
#include "megcore_rocm.h" | |||||
#include "src/rocm/utils.h" | |||||
#endif | |||||
using namespace megdnn; | |||||
HeuristicCache& HeuristicCache::instance() { | |||||
static HeuristicCache ins; | |||||
return ins; | |||||
} | |||||
HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { | |||||
auto&& ctg = m_category; | |||||
auto&& inp = m_input; | |||||
if (!m_category.empty() && !m_input.empty()) | |||||
return {ctg, inp}; | |||||
inp.reserve(sizeof(TensorLayout) * 3 * m_inp_layouts_size + m_param_size); | |||||
for (size_t i = 0; i < m_inp_layouts_size; i++) { | |||||
auto&& ly = m_inp_layouts_ptr[i]; | |||||
for (size_t j = 0; j < ly.ndim; j++) { | |||||
if (j) | |||||
inp.push_back(','); | |||||
inp.append(std::to_string(ly.shape[j])); | |||||
} | |||||
inp.push_back(';'); | |||||
for (size_t j = 0; j < ly.ndim; j++) { | |||||
if (j) | |||||
inp.push_back(','); | |||||
inp.append(std::to_string(ly.stride[j])); | |||||
} | |||||
inp.push_back(';'); | |||||
inp.append(ly.dtype.name()); | |||||
inp.push_back(';'); | |||||
inp.append(ly.format.to_string().c_str()); | |||||
inp.push_back('|'); | |||||
} | |||||
if (m_param_size) { | |||||
inp.append(reinterpret_cast<const char*>(m_param_ptr), m_param_size); | |||||
} | |||||
ctg = "plat:"; | |||||
ctg.append(std::to_string(static_cast<uint32_t>(m_handle->type()))); | |||||
switch (m_handle->type()) { | |||||
#if MEGDNN_WITH_CUDA | |||||
case Handle::HandleType::CUDA: { | |||||
int cuda_rt = -1; | |||||
cuda_check(cudaRuntimeGetVersion(&cuda_rt)); | |||||
cuda_rt /= 1000; | |||||
auto&& handle = static_cast<megdnn::cuda::HandleImpl*>(m_handle); | |||||
auto&& prop = handle->device_prop(); | |||||
ctg.append(ssprintf(";dev=%s;cap=%d.%d;runtime=%d;", | |||||
prop.name, prop.major, prop.minor, cuda_rt)); | |||||
break; | |||||
} | |||||
#endif | |||||
#if MEGDNN_WITH_ROCM | |||||
case Handle::HandleType::ROCM: { | |||||
auto&& handle = static_cast<megdnn::rocm::HandleImpl*>(m_handle); | |||||
auto&& prop = handle->device_prop(); | |||||
int drv = -1, hip_rt = -1; | |||||
hip_check(hipDriverGetVersion(&drv)); | |||||
hip_check(hipRuntimeGetVersion(&hip_rt)); | |||||
ctg.append(ssprintf(";dev=%s;cap=%d.%d,drv=%d;runtime=%d;", | |||||
prop.name, prop.major, prop.minor, drv, hip_rt)); | |||||
break; | |||||
} | |||||
#endif | |||||
case Handle::HandleType::FALLBACK: | |||||
#if MEGDNN_X86 | |||||
case Handle::HandleType::X86: | |||||
#endif | |||||
#if MEGDNN_AARCH64 || MEGDNN_ARMV7 | |||||
case Handle::HandleType::ARM_COMMON: | |||||
#endif | |||||
#if MEGDNN_AARCH64 | |||||
case Handle::HandleType::AARCH64: | |||||
#endif | |||||
#if MEGDNN_ARMV7 | |||||
case Handle::HandleType::ARMV7: | |||||
#endif | |||||
{ | |||||
size_t nr_threads = | |||||
static_cast<megdnn::naive::HandleImpl*>(m_handle) | |||||
->megcore_dispatcher() | |||||
->nr_threads(); | |||||
ctg.append(";"); | |||||
ctg.append(std::to_string(nr_threads)); | |||||
ctg.append(";"); | |||||
break; | |||||
} | |||||
default: | |||||
ctg.append(";"); | |||||
} | |||||
ctg.append(std::to_string(m_opr_type)); | |||||
return {ctg, inp}; | |||||
} | |||||
void HeuristicCache::put(const Key& key, Result& result) { | |||||
MEGDNN_LOCK_GUARD(m_mtx); | |||||
if (result.policy.algo.valid()) | |||||
m_heuristic_cache[key.build_key_storage()] = result; | |||||
} | |||||
HeuristicCache::Result HeuristicCache::get(const Key& key) { | |||||
MEGDNN_LOCK_GUARD(m_mtx); | |||||
KeyStorage ks = key.build_key_storage(); | |||||
auto iter = m_heuristic_cache.find(ks); | |||||
if (iter == m_heuristic_cache.end()) { | |||||
return {}; | |||||
} else { | |||||
return iter->second; | |||||
} | |||||
} | |||||
void HeuristicCache::clear() { | |||||
MEGDNN_LOCK_GUARD(m_mtx); | |||||
m_heuristic_cache.clear(); | |||||
} |
@@ -56,9 +56,7 @@ size_t BatchConvBiasForwardImpl::get_workspace_in_bytes( | |||||
const TensorLayout& src, const TensorLayout& filter, | const TensorLayout& src, const TensorLayout& filter, | ||||
const TensorLayout& bias, const TensorLayout& z, | const TensorLayout& bias, const TensorLayout& z, | ||||
const TensorLayout& dst) { | const TensorLayout& dst) { | ||||
AlgoBase::SizeArgs args(this, src, filter, bias, z, dst); | |||||
return get_algorithm(this, src, filter, bias, z, dst) | |||||
->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, src, filter, bias, z, dst); | |||||
} | } | ||||
void BatchConvBiasForwardImpl::exec(_megdnn_tensor_in src, | void BatchConvBiasForwardImpl::exec(_megdnn_tensor_in src, | ||||
@@ -66,10 +64,12 @@ void BatchConvBiasForwardImpl::exec(_megdnn_tensor_in src, | |||||
_megdnn_tensor_in bias, _megdnn_tensor_in z, | _megdnn_tensor_in bias, _megdnn_tensor_in z, | ||||
_megdnn_tensor_out dst, | _megdnn_tensor_out dst, | ||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
check_exec(src.layout, filter.layout, bias.layout, z.layout, dst.layout, | |||||
workspace.size); | |||||
AlgoBase::ExecArgs args(this, src, filter, bias, z, dst, workspace); | AlgoBase::ExecArgs args(this, src, filter, bias, z, dst, workspace); | ||||
auto algo = get_algorithm(this, src.layout, filter.layout, bias.layout, | auto algo = get_algorithm(this, src.layout, filter.layout, bias.layout, | ||||
z.layout, dst.layout); | z.layout, dst.layout); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
algo->exec(args); | |||||
} | } | ||||
const char* BatchConvBiasForwardImpl::get_algorithm_set_name() const { | const char* BatchConvBiasForwardImpl::get_algorithm_set_name() const { | ||||
@@ -33,13 +33,12 @@ void BatchedMatrixMulForwardImpl::exec(_megdnn_tensor_in A, _megdnn_tensor_in B, | |||||
AlgoBase::ExecArgs args(this, A, B, C, workspace); | AlgoBase::ExecArgs args(this, A, B, C, workspace); | ||||
check_exec(A.layout, B.layout, C.layout, workspace.size); | check_exec(A.layout, B.layout, C.layout, workspace.size); | ||||
auto&& algo = megdnn::get_algorithm(this, A.layout, B.layout, C.layout); | auto&& algo = megdnn::get_algorithm(this, A.layout, B.layout, C.layout); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
algo->exec(args); | |||||
} | } | ||||
size_t BatchedMatrixMulForwardImpl::get_workspace_in_bytes( | size_t BatchedMatrixMulForwardImpl::get_workspace_in_bytes( | ||||
const TensorLayout& A, const TensorLayout& B, const TensorLayout& C) { | const TensorLayout& A, const TensorLayout& B, const TensorLayout& C) { | ||||
AlgoBase::SizeArgs args(this, A, B, C); | |||||
return megdnn::get_algorithm(this, A, B, C)->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, A, B, C); | |||||
} | } | ||||
std::vector<Algorithm*> BatchedMatrixMulForwardImpl::get_all_algorithms( | std::vector<Algorithm*> BatchedMatrixMulForwardImpl::get_all_algorithms( | ||||
@@ -36,7 +36,7 @@ void ConvBiasForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, | |||||
preprocessed_filter); | preprocessed_filter); | ||||
auto algo = get_algorithm(this, src.layout, filter.layout, bias.layout, | auto algo = get_algorithm(this, src.layout, filter.layout, bias.layout, | ||||
z.layout, dst.layout); | z.layout, dst.layout); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
algo->exec(args); | |||||
}; | }; | ||||
std::vector<ConvBiasForward::Algorithm*> | std::vector<ConvBiasForward::Algorithm*> | ||||
@@ -228,6 +228,15 @@ size_t ConvBiasForwardImpl::get_workspace_in_bytes( | |||||
const TensorLayout& bias, const TensorLayout& z, | const TensorLayout& bias, const TensorLayout& z, | ||||
const TensorLayout& dst, | const TensorLayout& dst, | ||||
const PreprocessedFilter* preprocessed_filter) { | const PreprocessedFilter* preprocessed_filter) { | ||||
TensorLayoutArray layouts{src, filter, bias, z, dst}; | |||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
AlgoBase::SizeArgs args{ | AlgoBase::SizeArgs args{ | ||||
this, src, filter, bias, z, dst, preprocessed_filter}; | this, src, filter, bias, z, dst, preprocessed_filter}; | ||||
return get_algorithm(this, src, filter, bias, z, dst) | return get_algorithm(this, src, filter, bias, z, dst) | ||||
@@ -58,9 +58,7 @@ size_t ConvolutionForwardImpl::get_workspace_in_bytes( | |||||
const TensorLayout& dst, | const TensorLayout& dst, | ||||
const PreprocessedFilter* preprocessed_filter) { | const PreprocessedFilter* preprocessed_filter) { | ||||
MEGDNN_MARK_USED_VAR(preprocessed_filter); | MEGDNN_MARK_USED_VAR(preprocessed_filter); | ||||
AlgoBase::SizeArgs args{this, src, filter, dst}; | |||||
return megdnn::get_algorithm(this, src, filter, dst) | |||||
->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, src, filter, dst); | |||||
} | } | ||||
void ConvolutionForwardImpl::exec(_megdnn_tensor_in src, | void ConvolutionForwardImpl::exec(_megdnn_tensor_in src, | ||||
@@ -72,7 +70,7 @@ void ConvolutionForwardImpl::exec(_megdnn_tensor_in src, | |||||
preprocessed_filter); | preprocessed_filter); | ||||
AlgoBase::ExecArgs args(this, src, filter, dst, workspace); | AlgoBase::ExecArgs args(this, src, filter, dst, workspace); | ||||
auto&& algo = get_algorithm(this, src.layout, filter.layout, dst.layout); | auto&& algo = get_algorithm(this, src.layout, filter.layout, dst.layout); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
algo->exec(args); | |||||
} | } | ||||
const char* ConvolutionForwardImpl::get_algorithm_set_name() const { | const char* ConvolutionForwardImpl::get_algorithm_set_name() const { | ||||
@@ -85,9 +83,10 @@ void ConvolutionBackwardDataImpl::exec(_megdnn_tensor_in filter, | |||||
_megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad, | _megdnn_tensor_out grad, | ||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
check_exec(filter.layout, diff.layout, grad.layout, workspace.size); | |||||
AlgoBase::ExecArgs args(this, filter, diff, grad, workspace); | AlgoBase::ExecArgs args(this, filter, diff, grad, workspace); | ||||
auto algo = get_algorithm(this, filter.layout, diff.layout, grad.layout); | auto algo = get_algorithm(this, filter.layout, diff.layout, grad.layout); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
algo->exec(args); | |||||
} | } | ||||
std::vector<ConvolutionBackwardDataImpl::Algorithm*> | std::vector<ConvolutionBackwardDataImpl::Algorithm*> | ||||
@@ -196,9 +195,7 @@ ConvolutionBackwardDataImpl::get_algorithm_heuristic( | |||||
size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes( | size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes( | ||||
const TensorLayout& filter, const TensorLayout& diff, | const TensorLayout& filter, const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
AlgoBase::SizeArgs args(this, filter, diff, grad); | |||||
return get_algorithm(this, filter, diff, grad) | |||||
->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, filter, diff, grad); | |||||
} | } | ||||
const char* ConvolutionBackwardDataImpl::get_algorithm_set_name() const { | const char* ConvolutionBackwardDataImpl::get_algorithm_set_name() const { | ||||
@@ -211,9 +208,10 @@ void ConvolutionBackwardFilterImpl::exec(_megdnn_tensor_in src, | |||||
_megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad, | _megdnn_tensor_out grad, | ||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
check_exec(src.layout, diff.layout, grad.layout, workspace.size); | |||||
AlgoBase::ExecArgs args(this, src, diff, grad, workspace); | AlgoBase::ExecArgs args(this, src, diff, grad, workspace); | ||||
auto algo = get_algorithm(this, src.layout, diff.layout, grad.layout); | auto algo = get_algorithm(this, src.layout, diff.layout, grad.layout); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
algo->exec(args); | |||||
} | } | ||||
std::vector<ConvolutionBackwardFilterImpl::Algorithm*> | std::vector<ConvolutionBackwardFilterImpl::Algorithm*> | ||||
@@ -324,9 +322,7 @@ ConvolutionBackwardFilterImpl::get_algorithm_heuristic( | |||||
size_t ConvolutionBackwardFilterImpl::get_workspace_in_bytes( | size_t ConvolutionBackwardFilterImpl::get_workspace_in_bytes( | ||||
const TensorLayout& src, const TensorLayout& diff, | const TensorLayout& src, const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
AlgoBase::SizeArgs args(this, src, diff, grad); | |||||
return get_algorithm(this, src, diff, grad) | |||||
->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, src, diff, grad); | |||||
} | } | ||||
const char* ConvolutionBackwardFilterImpl::get_algorithm_set_name() const { | const char* ConvolutionBackwardFilterImpl::get_algorithm_set_name() const { | ||||
@@ -111,18 +111,17 @@ Convolution3DForwardImpl::get_all_algorithms(const TensorLayout& src, | |||||
size_t Convolution3DForwardImpl::get_workspace_in_bytes( | size_t Convolution3DForwardImpl::get_workspace_in_bytes( | ||||
const TensorLayout& src, const TensorLayout& filter, | const TensorLayout& src, const TensorLayout& filter, | ||||
const TensorLayout& dst) { | const TensorLayout& dst) { | ||||
AlgoBase::SizeArgs args(this, src, filter, dst); | |||||
return get_algorithm(this, src, filter, dst) | |||||
->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, src, filter, dst); | |||||
} | } | ||||
void Convolution3DForwardImpl::exec(_megdnn_tensor_in src, | void Convolution3DForwardImpl::exec(_megdnn_tensor_in src, | ||||
_megdnn_tensor_in filter, | _megdnn_tensor_in filter, | ||||
_megdnn_tensor_out dst, | _megdnn_tensor_out dst, | ||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
check_exec(src.layout, filter.layout, dst.layout, workspace.size); | |||||
AlgoBase::ExecArgs args(this, src, filter, dst, workspace); | AlgoBase::ExecArgs args(this, src, filter, dst, workspace); | ||||
auto algo = get_algorithm(this, src.layout, filter.layout, dst.layout); | auto algo = get_algorithm(this, src.layout, filter.layout, dst.layout); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
algo->exec(args); | |||||
} | } | ||||
const char* Convolution3DForwardImpl::get_algorithm_set_name() const { | const char* Convolution3DForwardImpl::get_algorithm_set_name() const { | ||||
@@ -133,9 +132,10 @@ void Convolution3DBackwardDataImpl::exec(_megdnn_tensor_in filter, | |||||
_megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad, | _megdnn_tensor_out grad, | ||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
check_exec(filter.layout, diff.layout, grad.layout, workspace.size); | |||||
AlgoBase::ExecArgs args(this, filter, diff, grad, workspace); | AlgoBase::ExecArgs args(this, filter, diff, grad, workspace); | ||||
auto algo = get_algorithm(this, filter.layout, diff.layout, grad.layout); | auto algo = get_algorithm(this, filter.layout, diff.layout, grad.layout); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
algo->exec(args); | |||||
} | } | ||||
std::vector<Convolution3DBackwardDataImpl::Algorithm*> | std::vector<Convolution3DBackwardDataImpl::Algorithm*> | ||||
@@ -200,9 +200,7 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic( | |||||
size_t Convolution3DBackwardDataImpl::get_workspace_in_bytes( | size_t Convolution3DBackwardDataImpl::get_workspace_in_bytes( | ||||
const TensorLayout& filter, const TensorLayout& diff, | const TensorLayout& filter, const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
AlgoBase::SizeArgs args(this, filter, diff, grad); | |||||
return get_algorithm(this, filter, diff, grad) | |||||
->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, filter, diff, grad); | |||||
} | } | ||||
const char* Convolution3DBackwardDataImpl::get_algorithm_set_name() const { | const char* Convolution3DBackwardDataImpl::get_algorithm_set_name() const { | ||||
@@ -213,10 +211,11 @@ void Convolution3DBackwardFilterImpl::exec(_megdnn_tensor_in src, | |||||
_megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad, | _megdnn_tensor_out grad, | ||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
check_exec(src.layout, diff.layout, grad.layout, workspace.size); | |||||
AlgoBase::ExecArgs args(this, src, diff, grad, workspace); | AlgoBase::ExecArgs args(this, src, diff, grad, workspace); | ||||
auto algo = | auto algo = | ||||
get_algorithm(this, src.layout, diff.layout, grad.layout); | get_algorithm(this, src.layout, diff.layout, grad.layout); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
algo->exec(args); | |||||
} | } | ||||
std::vector<Convolution3DBackwardFilterImpl::Algorithm*> | std::vector<Convolution3DBackwardFilterImpl::Algorithm*> | ||||
@@ -281,9 +280,7 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic( | |||||
size_t Convolution3DBackwardFilterImpl::get_workspace_in_bytes( | size_t Convolution3DBackwardFilterImpl::get_workspace_in_bytes( | ||||
const TensorLayout& src, const TensorLayout& diff, | const TensorLayout& src, const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
AlgoBase::SizeArgs args(this, src, diff, grad); | |||||
return get_algorithm(this, src, diff, grad) | |||||
->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, src, diff , grad); | |||||
} | } | ||||
const char* Convolution3DBackwardFilterImpl::get_algorithm_set_name() const { | const char* Convolution3DBackwardFilterImpl::get_algorithm_set_name() const { | ||||
@@ -36,8 +36,7 @@ size_t Fwd::get_workspace_in_bytes(const TensorLayout& im, | |||||
const TensorLayout& offset, | const TensorLayout& offset, | ||||
const TensorLayout& mask, | const TensorLayout& mask, | ||||
const TensorLayout& dst) { | const TensorLayout& dst) { | ||||
auto algo = get_algorithm(this, im, filter, offset, mask, dst); | |||||
return algo->get_workspace_in_bytes({this, im, filter, offset, mask, dst}); | |||||
return get_dnn_workspace(this, im, filter, offset, mask, dst); | |||||
} | } | ||||
std::vector<AlgoFwd*> Fwd::get_all_algorithms(const TensorLayout& /* im */, | std::vector<AlgoFwd*> Fwd::get_all_algorithms(const TensorLayout& /* im */, | ||||
@@ -96,13 +95,13 @@ const char* Fwd::get_algorithm_set_name() const { | |||||
void Fwd::exec(_megdnn_tensor_in im, _megdnn_tensor_in filter, | void Fwd::exec(_megdnn_tensor_in im, _megdnn_tensor_in filter, | ||||
_megdnn_tensor_in offset, _megdnn_tensor_in mask, | _megdnn_tensor_in offset, _megdnn_tensor_in mask, | ||||
_megdnn_tensor_out out, _megdnn_workspace workspace) { | _megdnn_tensor_out out, _megdnn_workspace workspace) { | ||||
check_exec(im.layout, filter.layout, offset.layout, mask.layout, out.layout, | |||||
workspace.size); | |||||
auto algo = get_algorithm(this, im.layout, filter.layout, offset.layout, | auto algo = get_algorithm(this, im.layout, filter.layout, offset.layout, | ||||
mask.layout, out.layout); | mask.layout, out.layout); | ||||
AlgoBase::ExecArgs args(this, im, filter, offset, mask, out, workspace); | AlgoBase::ExecArgs args(this, im, filter, offset, mask, out, workspace); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
return; | |||||
algo->exec(args); | |||||
} | } | ||||
/* ============== BwdFlt Implementation ============== */ | /* ============== BwdFlt Implementation ============== */ | ||||
@@ -152,21 +151,23 @@ AlgoBwdFlt* BwdFlt::get_algorithm_heuristic( | |||||
size_t BwdFlt::get_workspace_in_bytes( | size_t BwdFlt::get_workspace_in_bytes( | ||||
const TensorLayout& im, const TensorLayout& offset, const TensorLayout& mask, | const TensorLayout& im, const TensorLayout& offset, const TensorLayout& mask, | ||||
const TensorLayout& out_grad, const TensorLayout& filter_grad) { | const TensorLayout& out_grad, const TensorLayout& filter_grad) { | ||||
auto algo = get_algorithm(this, im, offset, mask, out_grad, filter_grad); | |||||
return algo->get_workspace_in_bytes({this, im, offset, mask, out_grad, filter_grad}); | |||||
return get_dnn_workspace(this, im, offset, mask, out_grad, filter_grad); | |||||
} | } | ||||
const char* BwdFlt::get_algorithm_set_name() const { | const char* BwdFlt::get_algorithm_set_name() const { | ||||
return "DEFORMABLE_CONV_BWD_FILTER_CUDA"; | return "DEFORMABLE_CONV_BWD_FILTER_CUDA"; | ||||
}; | }; | ||||
void BwdFlt::exec(_megdnn_tensor_in im, _megdnn_tensor_in offset, _megdnn_tensor_in mask, | |||||
_megdnn_tensor_in out_grad, _megdnn_tensor_out filter_grad, | |||||
_megdnn_workspace workspace) { | |||||
AlgoBase::ExecArgs args(this, im, offset, mask, out_grad, filter_grad, workspace); | |||||
auto algo = get_algorithm(this, im.layout, offset.layout, mask.layout, out_grad.layout, | |||||
filter_grad.layout); | |||||
algo->check_workspace(args, workspace).exec(args); | |||||
void BwdFlt::exec(_megdnn_tensor_in im, _megdnn_tensor_in offset, | |||||
_megdnn_tensor_in mask, _megdnn_tensor_in out_grad, | |||||
_megdnn_tensor_out filter_grad, _megdnn_workspace workspace) { | |||||
check_exec(im.layout, offset.layout, mask.layout, out_grad.layout, | |||||
filter_grad.layout, workspace.size); | |||||
AlgoBase::ExecArgs args(this, im, offset, mask, out_grad, filter_grad, | |||||
workspace); | |||||
auto algo = get_algorithm(this, im.layout, offset.layout, mask.layout, | |||||
out_grad.layout, filter_grad.layout); | |||||
algo->exec(args); | |||||
} | } | ||||
/* ============== BwdData Implementation ============== */ | /* ============== BwdData Implementation ============== */ | ||||
@@ -222,10 +223,8 @@ size_t BwdData::get_workspace_in_bytes( | |||||
const TensorLayout& offset, const TensorLayout& mask, | const TensorLayout& offset, const TensorLayout& mask, | ||||
const TensorLayout& out_grad, const TensorLayout& im_grad, | const TensorLayout& out_grad, const TensorLayout& im_grad, | ||||
const TensorLayout& offset_grad, const TensorLayout& mask_grad) { | const TensorLayout& offset_grad, const TensorLayout& mask_grad) { | ||||
auto algo = get_algorithm(this, im, filter, offset, mask, out_grad, | |||||
im_grad, offset_grad, mask_grad); | |||||
return algo->get_workspace_in_bytes({this, im, filter, offset, mask, out_grad, | |||||
im_grad, offset_grad, mask_grad}); | |||||
return get_dnn_workspace(this, im, filter, offset, mask, out_grad, im_grad, | |||||
offset_grad, mask_grad); | |||||
} | } | ||||
const char* BwdData::get_algorithm_set_name() const { | const char* BwdData::get_algorithm_set_name() const { | ||||
@@ -233,16 +232,19 @@ const char* BwdData::get_algorithm_set_name() const { | |||||
}; | }; | ||||
void BwdData::exec(_megdnn_tensor_in im, _megdnn_tensor_in filter, | void BwdData::exec(_megdnn_tensor_in im, _megdnn_tensor_in filter, | ||||
_megdnn_tensor_in offset, _megdnn_tensor_in mask, | |||||
_megdnn_tensor_in out_grad, _megdnn_tensor_out im_grad, | |||||
_megdnn_tensor_out offset_grad, _megdnn_tensor_out mask_grad, | |||||
_megdnn_workspace workspace) { | |||||
_megdnn_tensor_in offset, _megdnn_tensor_in mask, | |||||
_megdnn_tensor_in out_grad, _megdnn_tensor_out im_grad, | |||||
_megdnn_tensor_out offset_grad, _megdnn_tensor_out mask_grad, | |||||
_megdnn_workspace workspace) { | |||||
check_exec(im.layout, filter.layout, offset.layout, mask.layout, | |||||
out_grad.layout, im_grad.layout, offset_grad.layout, | |||||
mask_grad.layout, workspace.size); | |||||
AlgoBase::ExecArgs args(this, im, filter, offset, mask, out_grad, im_grad, | AlgoBase::ExecArgs args(this, im, filter, offset, mask, out_grad, im_grad, | ||||
offset_grad, mask_grad, workspace); | offset_grad, mask_grad, workspace); | ||||
auto algo = get_algorithm(this, im.layout, filter.layout, offset.layout, | auto algo = get_algorithm(this, im.layout, filter.layout, offset.layout, | ||||
mask.layout, out_grad.layout, im_grad.layout, | mask.layout, out_grad.layout, im_grad.layout, | ||||
offset_grad.layout, mask_grad.layout); | offset_grad.layout, mask_grad.layout); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
algo->exec(args); | |||||
} | } | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -59,17 +59,17 @@ LocalShareForwardImpl::get_all_algorithms(const TensorLayout& src, | |||||
size_t LocalShareForwardImpl::get_workspace_in_bytes(const TensorLayout& src, | size_t LocalShareForwardImpl::get_workspace_in_bytes(const TensorLayout& src, | ||||
const TensorLayout& filter, | const TensorLayout& filter, | ||||
const TensorLayout& dst) { | const TensorLayout& dst) { | ||||
AlgoBase::SizeArgs args(this, src, filter, dst); | |||||
return get_algorithm(this, src, filter, dst)->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, src, filter, dst); | |||||
} | } | ||||
void LocalShareForwardImpl::exec(_megdnn_tensor_in src, | void LocalShareForwardImpl::exec(_megdnn_tensor_in src, | ||||
_megdnn_tensor_in filter, | _megdnn_tensor_in filter, | ||||
_megdnn_tensor_out dst, | _megdnn_tensor_out dst, | ||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
check_exec(src.layout, filter.layout, dst.layout, workspace.size); | |||||
AlgoBase::ExecArgs args(this, src, filter, dst, workspace); | AlgoBase::ExecArgs args(this, src, filter, dst, workspace); | ||||
auto algo = get_algorithm(this, src.layout, filter.layout, dst.layout); | auto algo = get_algorithm(this, src.layout, filter.layout, dst.layout); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
algo->exec(args); | |||||
} | } | ||||
const char* LocalShareForwardImpl::get_algorithm_set_name() const { | const char* LocalShareForwardImpl::get_algorithm_set_name() const { | ||||
@@ -112,8 +112,7 @@ LocalShareBackwardDataImpl::get_all_algorithms(const TensorLayout& filter, | |||||
size_t LocalShareBackwardDataImpl::get_workspace_in_bytes(const TensorLayout& filter, | size_t LocalShareBackwardDataImpl::get_workspace_in_bytes(const TensorLayout& filter, | ||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
AlgoBase::SizeArgs args(this, filter, diff, grad); | |||||
return get_algorithm(this, filter, diff, grad)->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, filter, diff, grad); | |||||
} | } | ||||
void LocalShareBackwardDataImpl::exec(_megdnn_tensor_in filter, | void LocalShareBackwardDataImpl::exec(_megdnn_tensor_in filter, | ||||
@@ -166,8 +165,7 @@ LocalShareBackwardFilterImpl::get_all_algorithms(const TensorLayout& src, | |||||
size_t LocalShareBackwardFilterImpl::get_workspace_in_bytes(const TensorLayout& src, | size_t LocalShareBackwardFilterImpl::get_workspace_in_bytes(const TensorLayout& src, | ||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
AlgoBase::SizeArgs args(this, src, diff, grad); | |||||
return get_algorithm(this, src, diff, grad)->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, src, diff, grad); | |||||
} | } | ||||
void LocalShareBackwardFilterImpl::exec(_megdnn_tensor_in src, | void LocalShareBackwardFilterImpl::exec(_megdnn_tensor_in src, | ||||
@@ -59,8 +59,7 @@ MatrixMulForwardImpl::Algorithm* MatrixMulForwardImpl::get_algorithm_heuristic( | |||||
size_t MatrixMulForwardImpl::get_workspace_in_bytes(const TensorLayout& A, | size_t MatrixMulForwardImpl::get_workspace_in_bytes(const TensorLayout& A, | ||||
const TensorLayout& B, | const TensorLayout& B, | ||||
const TensorLayout& C) { | const TensorLayout& C) { | ||||
AlgoBase::SizeArgs args{this, A, B, C}; | |||||
return megdnn::get_algorithm(this, A, B, C)->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, A, B, C); | |||||
} | } | ||||
void MatrixMulForwardImpl::exec(_megdnn_tensor_in A, _megdnn_tensor_in B, | void MatrixMulForwardImpl::exec(_megdnn_tensor_in A, _megdnn_tensor_in B, | ||||
@@ -69,7 +68,7 @@ void MatrixMulForwardImpl::exec(_megdnn_tensor_in A, _megdnn_tensor_in B, | |||||
check_exec(A.layout, B.layout, C.layout, workspace.size); | check_exec(A.layout, B.layout, C.layout, workspace.size); | ||||
AlgoBase::ExecArgs args(this, A, B, C, workspace); | AlgoBase::ExecArgs args(this, A, B, C, workspace); | ||||
auto&& algo = get_algorithm(this, A.layout, B.layout, C.layout); | auto&& algo = get_algorithm(this, A.layout, B.layout, C.layout); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
algo->exec(args); | |||||
} | } | ||||
} // namespace cuda | } // namespace cuda | ||||
@@ -21,8 +21,7 @@ namespace cuda { | |||||
size_t PoolingForwardImpl::get_workspace_in_bytes(const TensorLayout& src, | size_t PoolingForwardImpl::get_workspace_in_bytes(const TensorLayout& src, | ||||
const TensorLayout& dst) { | const TensorLayout& dst) { | ||||
AlgoBase::SizeArgs args(this, src, dst); | |||||
return get_algorithm(this, src, dst)->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, src, dst); | |||||
} | } | ||||
const char* PoolingForwardImpl::get_algorithm_set_name() const { | const char* PoolingForwardImpl::get_algorithm_set_name() const { | ||||
@@ -117,9 +116,7 @@ size_t PoolingBackwardImpl::get_workspace_in_bytes(const TensorLayout& src, | |||||
const TensorLayout& dst, | const TensorLayout& dst, | ||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
AlgoBase::SizeArgs args(this, src, dst, diff, grad); | |||||
return get_algorithm(this, src, dst, diff, grad) | |||||
->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, src, dst, diff, grad); | |||||
} | } | ||||
} // namespace cuda | } // namespace cuda | ||||
@@ -44,8 +44,7 @@ BatchedMatrixMulForwardImpl::get_algorithm_heuristic( | |||||
size_t BatchedMatrixMulForwardImpl::get_workspace_in_bytes( | size_t BatchedMatrixMulForwardImpl::get_workspace_in_bytes( | ||||
const TensorLayout& A, const TensorLayout& B, const TensorLayout& C) { | const TensorLayout& A, const TensorLayout& B, const TensorLayout& C) { | ||||
AlgoBase::SizeArgs args{this, A, B, C}; | |||||
return megdnn::get_algorithm(this, A, B, C)->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, A, B, C); | |||||
} | } | ||||
void BatchedMatrixMulForwardImpl::exec(_megdnn_tensor_in A, _megdnn_tensor_in B, | void BatchedMatrixMulForwardImpl::exec(_megdnn_tensor_in A, _megdnn_tensor_in B, | ||||
@@ -54,7 +53,7 @@ void BatchedMatrixMulForwardImpl::exec(_megdnn_tensor_in A, _megdnn_tensor_in B, | |||||
check_exec(A.layout, B.layout, C.layout, workspace.size); | check_exec(A.layout, B.layout, C.layout, workspace.size); | ||||
AlgoBase::ExecArgs args(this, A, B, C, workspace); | AlgoBase::ExecArgs args(this, A, B, C, workspace); | ||||
auto&& algo = get_algorithm(this, A.layout, B.layout, C.layout); | auto&& algo = get_algorithm(this, A.layout, B.layout, C.layout); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
algo->exec(args); | |||||
} | } | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -224,6 +224,15 @@ size_t ConvBiasImpl::get_workspace_in_bytes( | |||||
const TensorLayout& bias, const TensorLayout& z, | const TensorLayout& bias, const TensorLayout& z, | ||||
const TensorLayout& dst, | const TensorLayout& dst, | ||||
const PreprocessedFilter* preprocessed_filter) { | const PreprocessedFilter* preprocessed_filter) { | ||||
TensorLayoutArray layouts{src, filter, bias, z, dst}; | |||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
auto fparam = make_ncb_kern_size_param(src, filter, bias, dst, | auto fparam = make_ncb_kern_size_param(src, filter, bias, dst, | ||||
preprocessed_filter); | preprocessed_filter); | ||||
auto&& algo = get_algorithm(fparam); | auto&& algo = get_algorithm(fparam); | ||||
@@ -146,6 +146,15 @@ size_t ConvolutionImpl::get_workspace_in_bytes( | |||||
const TensorLayout& src, const TensorLayout& filter, | const TensorLayout& src, const TensorLayout& filter, | ||||
const TensorLayout& dst, | const TensorLayout& dst, | ||||
const PreprocessedFilter* preprocessed_filter) { | const PreprocessedFilter* preprocessed_filter) { | ||||
TensorLayoutArray layouts{src, filter, dst}; | |||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
auto fparam = | auto fparam = | ||||
make_ncb_kern_size_param(src, filter, dst, preprocessed_filter); | make_ncb_kern_size_param(src, filter, dst, preprocessed_filter); | ||||
auto&& algo = get_algorithm(fparam); | auto&& algo = get_algorithm(fparam); | ||||
@@ -494,6 +503,15 @@ void ConvolutionBackwardDataImpl::exec(_megdnn_tensor_in filter, | |||||
size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes( | size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes( | ||||
const TensorLayout& filter, const TensorLayout& diff, | const TensorLayout& filter, const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
TensorLayoutArray layouts{filter, diff, grad}; | |||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
if (param().format == param::Convolution::Format::NHWCD4 || | if (param().format == param::Convolution::Format::NHWCD4 || | ||||
param().format == param::Convolution::Format::NCHW4 || | param().format == param::Convolution::Format::NCHW4 || | ||||
(param().format == param::Convolution::Format::NCHW && | (param().format == param::Convolution::Format::NCHW && | ||||
@@ -219,6 +219,15 @@ MatrixMulImpl::KernParam MatrixMulImpl::make_kern_param( | |||||
size_t MatrixMulImpl::get_workspace_in_bytes(const TensorLayout& A, | size_t MatrixMulImpl::get_workspace_in_bytes(const TensorLayout& A, | ||||
const TensorLayout& B, | const TensorLayout& B, | ||||
const TensorLayout& C) { | const TensorLayout& C) { | ||||
TensorLayoutArray layouts{A, B, C}; | |||||
HeuristicCache::Key key{this->handle(),this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
if (auto algo = get_algorithm_heuristic( | if (auto algo = get_algorithm_heuristic( | ||||
A, B, C, std::numeric_limits<size_t>::max(), | A, B, C, std::numeric_limits<size_t>::max(), | ||||
AlgoAttribute::DEFAULT, AlgoAttribute::DEFAULT)) { | AlgoAttribute::DEFAULT, AlgoAttribute::DEFAULT)) { | ||||
@@ -15,6 +15,7 @@ | |||||
#include "src/naive/convolution/helper.h" | #include "src/naive/convolution/helper.h" | ||||
#include <cstring> | #include <cstring> | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "src/common/utils.h" | #include "src/common/utils.h" | ||||
#include "src/naive/handle.h" | #include "src/naive/handle.h" | ||||
@@ -56,6 +57,14 @@ size_t BatchConvBiasForwardImpl::get_workspace_in_bytes( | |||||
const TensorLayout& src, const TensorLayout& flt, | const TensorLayout& src, const TensorLayout& flt, | ||||
const TensorLayout& bias, const TensorLayout& z, | const TensorLayout& bias, const TensorLayout& z, | ||||
const TensorLayout& dst) { | const TensorLayout& dst) { | ||||
TensorLayoutArray layouts{src, flt, bias, z, dst}; | |||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
return get_workspace_bundle(nullptr, src, flt, bias, z, dst) | return get_workspace_bundle(nullptr, src, flt, bias, z, dst) | ||||
.total_size_in_bytes(); | .total_size_in_bytes(); | ||||
} | } | ||||
@@ -13,6 +13,7 @@ | |||||
#include "src/naive/convolution/helper.h" | #include "src/naive/convolution/helper.h" | ||||
#include <cstring> | #include <cstring> | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "megdnn/dtype.h" | #include "megdnn/dtype.h" | ||||
#include "src/common/conv_bias.h" | #include "src/common/conv_bias.h" | ||||
#include "src/common/opr_delegate.h" | #include "src/common/opr_delegate.h" | ||||
@@ -201,6 +202,15 @@ size_t ConvBiasForwardImpl::get_workspace_in_bytes(const TensorLayout& src, | |||||
const TensorLayout& z, | const TensorLayout& z, | ||||
const TensorLayout& dst, | const TensorLayout& dst, | ||||
const PreprocessedFilter*) { | const PreprocessedFilter*) { | ||||
TensorLayoutArray layouts{src, flt, bias, z, dst}; | |||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
size_t float_workspace_size = 0; | size_t float_workspace_size = 0; | ||||
if (z.ndim > 0 && z.dtype.category() != DTypeCategory::FLOAT) { | if (z.ndim > 0 && z.dtype.category() != DTypeCategory::FLOAT) { | ||||
@@ -11,7 +11,7 @@ | |||||
#include "./opr_impl.h" | #include "./opr_impl.h" | ||||
#include "./helper.h" | #include "./helper.h" | ||||
#include "src/naive/handle.h" | |||||
#include "megdnn/heuristic_cache.h" | |||||
#include "src/naive/handle.h" | #include "src/naive/handle.h" | ||||
#include "src/common/utils.h" | #include "src/common/utils.h" | ||||
#include "megdnn/dtype.h" | #include "megdnn/dtype.h" | ||||
@@ -78,6 +78,15 @@ void ConvolutionForwardImpl::exec(_megdnn_tensor_in src, | |||||
size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes(const TensorLayout& filter, | size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes(const TensorLayout& filter, | ||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
TensorLayoutArray layouts{filter, diff, grad}; | |||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
size_t workspace_size = 0; | size_t workspace_size = 0; | ||||
auto flt_dt = filter.dtype.enumv(); | auto flt_dt = filter.dtype.enumv(); | ||||
auto grad_dt = grad.dtype.enumv(); | auto grad_dt = grad.dtype.enumv(); | ||||
@@ -191,6 +200,15 @@ size_t ConvolutionBackwardFilterImpl::get_workspace_in_bytes( | |||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
size_t workspace_size = 0; | size_t workspace_size = 0; | ||||
#if !MEGDNN_DISABLE_FLOAT16 | #if !MEGDNN_DISABLE_FLOAT16 | ||||
TensorLayoutArray layouts{src, diff, grad}; | |||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
auto src_dt = src.dtype.enumv(); | auto src_dt = src.dtype.enumv(); | ||||
auto grad_dt = grad.dtype.enumv(); | auto grad_dt = grad.dtype.enumv(); | ||||
auto diff_dt = diff.dtype.enumv(); | auto diff_dt = diff.dtype.enumv(); | ||||
@@ -12,6 +12,7 @@ | |||||
#include "src/naive/pooling/opr_impl.h" | #include "src/naive/pooling/opr_impl.h" | ||||
#include <cstring> | #include <cstring> | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "megdnn/dtype.h" | #include "megdnn/dtype.h" | ||||
#include "src/common/utils.h" | #include "src/common/utils.h" | ||||
#include "src/naive/handle.h" | #include "src/naive/handle.h" | ||||
@@ -402,6 +403,14 @@ WorkspaceBundle PoolingForwardImpl::get_workspace_bundle( | |||||
size_t PoolingForwardImpl::get_workspace_in_bytes(const TensorLayout& src, | size_t PoolingForwardImpl::get_workspace_in_bytes(const TensorLayout& src, | ||||
const TensorLayout& dst) { | const TensorLayout& dst) { | ||||
TensorLayoutArray layouts{src, dst}; | |||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
return get_workspace_bundle(nullptr, src, dst).total_size_in_bytes(); | return get_workspace_bundle(nullptr, src, dst).total_size_in_bytes(); | ||||
} | } | ||||
namespace { | namespace { | ||||
@@ -652,6 +661,14 @@ WorkspaceBundle PoolingBackwardImpl::get_workspace_bundle( | |||||
size_t PoolingBackwardImpl::get_workspace_in_bytes( | size_t PoolingBackwardImpl::get_workspace_in_bytes( | ||||
const TensorLayout& src, const TensorLayout& dst, | const TensorLayout& src, const TensorLayout& dst, | ||||
const TensorLayout& diff, const TensorLayout& grad) { | const TensorLayout& diff, const TensorLayout& grad) { | ||||
TensorLayoutArray layouts{src, dst, diff, grad}; | |||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
return get_workspace_bundle(nullptr, src, dst, diff, grad) | return get_workspace_bundle(nullptr, src, dst, diff, grad) | ||||
.total_size_in_bytes(); | .total_size_in_bytes(); | ||||
} | } | ||||
@@ -47,8 +47,7 @@ BatchedMatrixMulForwardImpl::get_algorithm_heuristic( | |||||
size_t BatchedMatrixMulForwardImpl::get_workspace_in_bytes( | size_t BatchedMatrixMulForwardImpl::get_workspace_in_bytes( | ||||
const TensorLayout& A, const TensorLayout& B, const TensorLayout& C) { | const TensorLayout& A, const TensorLayout& B, const TensorLayout& C) { | ||||
AlgoBase::SizeArgs args{this, A, B, C}; | |||||
return megdnn::get_algorithm(this, A, B, C)->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, A, B, C); | |||||
} | } | ||||
void BatchedMatrixMulForwardImpl::exec(_megdnn_tensor_in A, _megdnn_tensor_in B, | void BatchedMatrixMulForwardImpl::exec(_megdnn_tensor_in A, _megdnn_tensor_in B, | ||||
@@ -57,7 +56,7 @@ void BatchedMatrixMulForwardImpl::exec(_megdnn_tensor_in A, _megdnn_tensor_in B, | |||||
check_exec(A.layout, B.layout, C.layout, workspace.size); | check_exec(A.layout, B.layout, C.layout, workspace.size); | ||||
AlgoBase::ExecArgs args(this, A, B, C, workspace); | AlgoBase::ExecArgs args(this, A, B, C, workspace); | ||||
auto&& algo = get_algorithm(this, A.layout, B.layout, C.layout); | auto&& algo = get_algorithm(this, A.layout, B.layout, C.layout); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
algo->exec(args); | |||||
} | } | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -112,19 +112,30 @@ ConvolutionForwardImpl::get_all_algorithms(const TensorLayout& src, | |||||
size_t ConvolutionForwardImpl::get_workspace_in_bytes( | size_t ConvolutionForwardImpl::get_workspace_in_bytes( | ||||
const TensorLayout& src, const TensorLayout& filter, | const TensorLayout& src, const TensorLayout& filter, | ||||
const TensorLayout& dst, const PreprocessedFilter*) { | const TensorLayout& dst, const PreprocessedFilter*) { | ||||
TensorLayoutArray layouts{src, filter, dst}; | |||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
AlgoBase::SizeArgs args(this, src, filter, dst); | AlgoBase::SizeArgs args(this, src, filter, dst); | ||||
return get_algorithm(this, src, args.filter_meta, dst) | |||||
return get_algorithm(this, src, filter, dst) | |||||
->get_workspace_in_bytes(args); | ->get_workspace_in_bytes(args); | ||||
} | } | ||||
void ConvolutionForwardImpl::exec(_megdnn_tensor_in src, | void ConvolutionForwardImpl::exec(_megdnn_tensor_in src, | ||||
_megdnn_tensor_in filter, | _megdnn_tensor_in filter, | ||||
_megdnn_tensor_out dst, | _megdnn_tensor_out dst, | ||||
const PreprocessedFilter*, | |||||
const PreprocessedFilter* preprocessed_filter, | |||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
check_exec(src.layout, filter.layout, dst.layout, workspace.size, | |||||
preprocessed_filter); | |||||
AlgoBase::ExecArgs args(this, src, filter, dst, workspace); | AlgoBase::ExecArgs args(this, src, filter, dst, workspace); | ||||
auto algo = get_algorithm(this, src.layout, args.filter_meta, dst.layout); | |||||
algo->check_workspace(args, workspace).exec(args); | |||||
auto algo = get_algorithm(this, src.layout, filter.layout, dst.layout); | |||||
algo->exec(args); | |||||
} | } | ||||
const char* ConvolutionForwardImpl::get_algorithm_set_name() const { | const char* ConvolutionForwardImpl::get_algorithm_set_name() const { | ||||
@@ -137,9 +148,10 @@ void ConvolutionBackwardDataImpl::exec(_megdnn_tensor_in filter, | |||||
_megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad, | _megdnn_tensor_out grad, | ||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
check_exec(filter.layout, diff.layout, grad.layout, workspace.size); | |||||
AlgoBase::ExecArgs args(this, filter, diff, grad, workspace); | AlgoBase::ExecArgs args(this, filter, diff, grad, workspace); | ||||
auto algo = get_algorithm(this, args.filter_meta, diff.layout, grad.layout); | |||||
algo->check_workspace(args, workspace).exec(args); | |||||
auto algo = get_algorithm(this, filter.layout, diff.layout, grad.layout); | |||||
algo->exec(args); | |||||
} | } | ||||
std::vector<ConvolutionBackwardDataImpl::Algorithm*> | std::vector<ConvolutionBackwardDataImpl::Algorithm*> | ||||
@@ -192,8 +204,17 @@ ConvolutionBackwardDataImpl::get_algorithm_heuristic( | |||||
size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes( | size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes( | ||||
const TensorLayout& filter, const TensorLayout& diff, | const TensorLayout& filter, const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
TensorLayoutArray layouts{filter, diff, grad}; | |||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
AlgoBase::SizeArgs args(this, filter, diff, grad); | AlgoBase::SizeArgs args(this, filter, diff, grad); | ||||
return get_algorithm(this, args.filter_meta, diff, grad) | |||||
return get_algorithm(this, filter, diff, grad) | |||||
->get_workspace_in_bytes(args); | ->get_workspace_in_bytes(args); | ||||
} | } | ||||
@@ -207,10 +228,11 @@ void ConvolutionBackwardFilterImpl::exec(_megdnn_tensor_in src, | |||||
_megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad, | _megdnn_tensor_out grad, | ||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
check_exec(src.layout, diff.layout, grad.layout, workspace.size); | |||||
AlgoBase::ExecArgs args(this, src, diff, grad, workspace); | AlgoBase::ExecArgs args(this, src, diff, grad, workspace); | ||||
auto algo = | auto algo = | ||||
get_algorithm(this, src.layout, diff.layout, args.grad_filter_meta); | |||||
algo->check_workspace(args, workspace).exec(args); | |||||
get_algorithm(this, src.layout, diff.layout, grad.layout); | |||||
algo->exec(args); | |||||
} | } | ||||
std::vector<ConvolutionBackwardFilterImpl::Algorithm*> | std::vector<ConvolutionBackwardFilterImpl::Algorithm*> | ||||
@@ -264,8 +286,17 @@ ConvolutionBackwardFilterImpl::get_algorithm_heuristic( | |||||
size_t ConvolutionBackwardFilterImpl::get_workspace_in_bytes( | size_t ConvolutionBackwardFilterImpl::get_workspace_in_bytes( | ||||
const TensorLayout& src, const TensorLayout& diff, | const TensorLayout& src, const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
TensorLayoutArray layouts{src, diff, grad}; | |||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
AlgoBase::SizeArgs args(this, src, diff, grad); | AlgoBase::SizeArgs args(this, src, diff, grad); | ||||
return get_algorithm(this, src, diff, args.grad_filter_meta) | |||||
return get_algorithm(this, src, diff, grad) | |||||
->get_workspace_in_bytes(args); | ->get_workspace_in_bytes(args); | ||||
} | } | ||||
@@ -24,7 +24,7 @@ public: | |||||
const PreprocessedFilter* preprocessed_filter, | const PreprocessedFilter* preprocessed_filter, | ||||
_megdnn_workspace workspace) override; | _megdnn_workspace workspace) override; | ||||
AlgorithmInfo get_algorithm_info_heuristic( | AlgorithmInfo get_algorithm_info_heuristic( | ||||
const TensorLayout& src, const CanonizedFilterMeta& filter, | |||||
const TensorLayout& src, const TensorLayout& filter, | |||||
const TensorLayout& dst, size_t workspace_limit_in_bytes, | const TensorLayout& dst, size_t workspace_limit_in_bytes, | ||||
const AlgoAttribute& positive_attr, | const AlgoAttribute& positive_attr, | ||||
const AlgoAttribute& negative_attr) { | const AlgoAttribute& negative_attr) { | ||||
@@ -95,7 +95,7 @@ public: | |||||
void exec(_megdnn_tensor_in filter, _megdnn_tensor_in diff, | void exec(_megdnn_tensor_in filter, _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad, _megdnn_workspace workspace) override; | _megdnn_tensor_out grad, _megdnn_workspace workspace) override; | ||||
AlgorithmInfo get_algorithm_info_heuristic( | AlgorithmInfo get_algorithm_info_heuristic( | ||||
const CanonizedFilterMeta& filter, const TensorLayout& diff, | |||||
const TensorLayout& filter, const TensorLayout& diff, | |||||
const TensorLayout& grad, size_t workspace_limit_in_bytes, | const TensorLayout& grad, size_t workspace_limit_in_bytes, | ||||
const AlgoAttribute& positive_attr, | const AlgoAttribute& positive_attr, | ||||
const AlgoAttribute& negative_attr) { | const AlgoAttribute& negative_attr) { | ||||
@@ -145,7 +145,7 @@ public: | |||||
_megdnn_tensor_out grad, _megdnn_workspace workspace) override; | _megdnn_tensor_out grad, _megdnn_workspace workspace) override; | ||||
AlgorithmInfo get_algorithm_info_heuristic( | AlgorithmInfo get_algorithm_info_heuristic( | ||||
const TensorLayout& src, const TensorLayout& diff, | const TensorLayout& src, const TensorLayout& diff, | ||||
const CanonizedFilterMeta& grad, size_t workspace_limit_in_bytes, | |||||
const TensorLayout& grad, size_t workspace_limit_in_bytes, | |||||
const AlgoAttribute& positive_attr, | const AlgoAttribute& positive_attr, | ||||
const AlgoAttribute& negative_attr) { | const AlgoAttribute& negative_attr) { | ||||
return get_algorithm_heuristic(src, diff, grad, | return get_algorithm_heuristic(src, diff, grad, | ||||
@@ -44,8 +44,7 @@ MatrixMulForwardImpl::Algorithm* MatrixMulForwardImpl::get_algorithm_heuristic( | |||||
size_t MatrixMulForwardImpl::get_workspace_in_bytes(const TensorLayout& A, | size_t MatrixMulForwardImpl::get_workspace_in_bytes(const TensorLayout& A, | ||||
const TensorLayout& B, | const TensorLayout& B, | ||||
const TensorLayout& C) { | const TensorLayout& C) { | ||||
AlgoBase::SizeArgs args{this, A, B, C}; | |||||
return megdnn::get_algorithm(this, A, B, C)->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, A, B, C); | |||||
} | } | ||||
void MatrixMulForwardImpl::exec(_megdnn_tensor_in A, _megdnn_tensor_in B, | void MatrixMulForwardImpl::exec(_megdnn_tensor_in A, _megdnn_tensor_in B, | ||||
@@ -54,7 +53,7 @@ void MatrixMulForwardImpl::exec(_megdnn_tensor_in A, _megdnn_tensor_in B, | |||||
check_exec(A.layout, B.layout, C.layout, workspace.size); | check_exec(A.layout, B.layout, C.layout, workspace.size); | ||||
AlgoBase::ExecArgs args(this, A, B, C, workspace); | AlgoBase::ExecArgs args(this, A, B, C, workspace); | ||||
auto&& algo = get_algorithm(this, A.layout, B.layout, C.layout); | auto&& algo = get_algorithm(this, A.layout, B.layout, C.layout); | ||||
algo->check_workspace(args, workspace).exec(args); | |||||
algo->exec(args); | |||||
} | } | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -19,8 +19,7 @@ namespace rocm { | |||||
size_t PoolingForwardImpl::get_workspace_in_bytes(const TensorLayout& src, | size_t PoolingForwardImpl::get_workspace_in_bytes(const TensorLayout& src, | ||||
const TensorLayout& dst) { | const TensorLayout& dst) { | ||||
AlgoBase::SizeArgs args(this, src, dst); | |||||
return get_algorithm(this, src, dst)->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, src, dst); | |||||
} | } | ||||
const char* PoolingForwardImpl::get_algorithm_set_name() const { | const char* PoolingForwardImpl::get_algorithm_set_name() const { | ||||
@@ -69,9 +68,7 @@ size_t PoolingBackwardImpl::get_workspace_in_bytes(const TensorLayout& src, | |||||
const TensorLayout& dst, | const TensorLayout& dst, | ||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
AlgoBase::SizeArgs args(this, src, dst, diff, grad); | |||||
return get_algorithm(this, src, dst, diff, grad) | |||||
->get_workspace_in_bytes(args); | |||||
return get_dnn_workspace(this, src, dst, diff, grad); | |||||
}; | }; | ||||
const char* PoolingBackwardImpl::get_algorithm_set_name() const { | const char* PoolingBackwardImpl::get_algorithm_set_name() const { | ||||
@@ -46,6 +46,15 @@ WorkspaceBundle megdnn::x86::get_bundle(const TensorLayout& src, | |||||
size_t PoolingImpl::get_workspace_in_bytes(const TensorLayout& src, | size_t PoolingImpl::get_workspace_in_bytes(const TensorLayout& src, | ||||
const TensorLayout& dst) { | const TensorLayout& dst) { | ||||
TensorLayoutArray layouts{src, dst}; | |||||
HeuristicCache::Key key{this->handle(), this->get_opr_type(), | |||||
layouts.data(), layouts.size(), &this->param(), | |||||
sizeof(this->param())}; | |||||
auto rst = HeuristicCache::instance().get(key); | |||||
if (rst.policy.algo.valid()) { | |||||
return rst.workspace; | |||||
} | |||||
auto algo = get_algorithm(this, src, dst); | auto algo = get_algorithm(this, src, dst); | ||||
if (!is_fallback_algo(algo)) { | if (!is_fallback_algo(algo)) { | ||||
if (is_supported(SIMDType::SSE) && src.dtype == dtype::Float32() && | if (is_supported(SIMDType::SSE) && src.dtype == dtype::Float32() && | ||||
@@ -29,6 +29,7 @@ | |||||
#include "megbrain/plugin/profiler.h" | #include "megbrain/plugin/profiler.h" | ||||
#include "megbrain/test/helper.h" | #include "megbrain/test/helper.h" | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "megdnn/oprs/base.h" | #include "megdnn/oprs/base.h" | ||||
#include <atomic> | #include <atomic> | ||||
@@ -2075,10 +2076,12 @@ void test_free_memory_in_weight_preprocess(int record_level, CompNode cn) { | |||||
TEST(TestGraph, FreeMemoryInWeightPreprocess) { | TEST(TestGraph, FreeMemoryInWeightPreprocess) { | ||||
test_free_memory_in_weight_preprocess(0, CompNode::load("xpu0")); | test_free_memory_in_weight_preprocess(0, CompNode::load("xpu0")); | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
} | } | ||||
TEST(TestGraph, RecordFreeMemoryInWeightPreprocess) { | TEST(TestGraph, RecordFreeMemoryInWeightPreprocess) { | ||||
test_free_memory_in_weight_preprocess(1, CompNode::load("cpu0")); | test_free_memory_in_weight_preprocess(1, CompNode::load("cpu0")); | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
} | } | ||||
namespace { | namespace { | ||||
@@ -2157,6 +2160,7 @@ TEST(TestGraph, FreeMemoryInWeightPreprocessWithValueInfer) { | |||||
->cast_final_safe<opr::SharedDeviceTensor>() | ->cast_final_safe<opr::SharedDeviceTensor>() | ||||
.get_dev_tensor() | .get_dev_tensor() | ||||
.empty()); | .empty()); | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
} | } | ||||
TEST(TestGraph, FreeMemoryInWeightPreprocessWithMultiReader) { | TEST(TestGraph, FreeMemoryInWeightPreprocessWithMultiReader) { | ||||
@@ -2200,6 +2204,7 @@ TEST(TestGraph, FreeMemoryInWeightPreprocessWithMultiReader) { | |||||
->cast_final_safe<opr::SharedDeviceTensor>() | ->cast_final_safe<opr::SharedDeviceTensor>() | ||||
.get_dev_tensor() | .get_dev_tensor() | ||||
.empty()); | .empty()); | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
} | } | ||||
TEST(TestGraph, FreeBias) { | TEST(TestGraph, FreeBias) { | ||||
@@ -24,6 +24,7 @@ | |||||
//! TODO: here has to be know some megdnn::opr when there is produced midout.h | //! TODO: here has to be know some megdnn::opr when there is produced midout.h | ||||
//! fix it if there is another graceful way. | //! fix it if there is another graceful way. | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "megdnn/opr_param_defs.h" | #include "megdnn/opr_param_defs.h" | ||||
#include "megdnn/oprs.h" | #include "megdnn/oprs.h" | ||||
#include "megdnn/oprs/base.h" | #include "megdnn/oprs/base.h" | ||||
@@ -1156,6 +1157,15 @@ template <typename Opr> | |||||
size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts, | size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts, | ||||
Opr* megdnn_opr, const MGBOpr* mgb_opr, | Opr* megdnn_opr, const MGBOpr* mgb_opr, | ||||
bool allow_weight_preprocess) { | bool allow_weight_preprocess) { | ||||
HeuristicCache::Key cache_key( | |||||
megdnn_opr->handle(), megdnn_opr->get_opr_type(), layouts.data(), | |||||
layouts.size(), &megdnn_opr->param(), sizeof(megdnn_opr->param())); | |||||
auto rst = HeuristicCache::instance().get(cache_key); | |||||
if (rst.policy.algo.valid()) { | |||||
megdnn_opr->execution_policy() = rst.policy; | |||||
return rst.workspace; | |||||
} | |||||
if (WorkspaceLimitGetter::is_prealloc_run(mgb_opr->owner_graph())) { | if (WorkspaceLimitGetter::is_prealloc_run(mgb_opr->owner_graph())) { | ||||
return 0; | return 0; | ||||
} | } | ||||
@@ -1192,6 +1202,11 @@ size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts, | |||||
mgb_log_debug("%s", ret.c_str()); | mgb_log_debug("%s", ret.c_str()); | ||||
megdnn_opr->execution_policy() = policy; | megdnn_opr->execution_policy() = policy; | ||||
if (mgb_opr->execution_policy().strategy & ExecutionStrategy::HEURISTIC) { | |||||
HeuristicCache::Result cache_result{policy, workspace}; | |||||
HeuristicCache::instance().put(cache_key, cache_result); | |||||
} | |||||
return workspace; | return workspace; | ||||
} | } | ||||
@@ -22,6 +22,7 @@ | |||||
#include "megbrain/opr/tensor_manip.h" | #include "megbrain/opr/tensor_manip.h" | ||||
#include "megdnn/oprs/base.h" | #include "megdnn/oprs/base.h" | ||||
#include "megdnn/dtype.h" | #include "megdnn/dtype.h" | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include <cmath> | #include <cmath> | ||||
#include <random> | #include <random> | ||||
@@ -337,6 +338,7 @@ void test_no_profiling_on_shape_change(const TensorShapeArray& inps0, | |||||
TEST(TestOprDNN, FastrunNoProfilingOnShapeChange) { | TEST(TestOprDNN, FastrunNoProfilingOnShapeChange) { | ||||
REQUIRE_GPU(1); | REQUIRE_GPU(1); | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
test_no_profiling_on_shape_change<opr::Convolution>( | test_no_profiling_on_shape_change<opr::Convolution>( | ||||
{{12, 3, 36, 36}, {4, 3, 3, 3}}, {{32, 3, 28, 28}, {4, 3, 3, 3}}); | {{12, 3, 36, 36}, {4, 3, 3, 3}}, {{32, 3, 28, 28}, {4, 3, 3, 3}}); | ||||
@@ -21,6 +21,7 @@ | |||||
#include "megbrain/gopt/inference.h" | #include "megbrain/gopt/inference.h" | ||||
#include "megbrain/opr/tensor_manip.h" | #include "megbrain/opr/tensor_manip.h" | ||||
#include "megdnn/dtype.h" | #include "megdnn/dtype.h" | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "megdnn/oprs/base.h" | #include "megdnn/oprs/base.h" | ||||
#include <gmock/gmock.h> | #include <gmock/gmock.h> | ||||
@@ -396,6 +397,7 @@ TEST(TestOprDNN, ConvBiasExePolicy) { | |||||
#endif | #endif | ||||
run(strategy); | run(strategy); | ||||
} | } | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
ASSERT_THROW(run(S::OPTIMIZED | S::PROFILE), MegBrainError); | ASSERT_THROW(run(S::OPTIMIZED | S::PROFILE), MegBrainError); | ||||
PersistentCache::set_impl(orig_impl); | PersistentCache::set_impl(orig_impl); | ||||
} | } | ||||
@@ -460,6 +462,7 @@ TEST(TestOprDNN, ConvolutionExePolicy) { | |||||
for (auto strategy : | for (auto strategy : | ||||
SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | ||||
#endif | #endif | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
using Checker = AutoOprChecker<2, 1>; | using Checker = AutoOprChecker<2, 1>; | ||||
auto make_graph = [&](const Checker::SymInpArray& inputs) | auto make_graph = [&](const Checker::SymInpArray& inputs) | ||||
@@ -489,6 +492,7 @@ TEST(TestOprDNN, ConvolutionExePolicy) { | |||||
} else { | } else { | ||||
ASSERT_LT(0, nr_get); | ASSERT_LT(0, nr_get); | ||||
} | } | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
} | } | ||||
} | } | ||||
@@ -544,6 +548,7 @@ TEST(TestOprDNN, ConvolutionBackwardDataBfloat16ExePolicy) { | |||||
#else | #else | ||||
for (auto strategy: {S:HEURISTIC, S(S::PROFILE | S::HEURISTIC)}) { | for (auto strategy: {S:HEURISTIC, S(S::PROFILE | S::HEURISTIC)}) { | ||||
#endif | #endif | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
using Checker = AutoOprChecker<2, 1>; | using Checker = AutoOprChecker<2, 1>; | ||||
auto make_graph = [&](const Checker::SymInpArray& inputs) | auto make_graph = [&](const Checker::SymInpArray& inputs) | ||||
@@ -1835,6 +1840,7 @@ TEST(TestOprDNN, LocalShareForwardExecPolicy) { | |||||
auto run_with_param = [&](size_t fh = 3, size_t fw = 3, size_t sh = 1, | auto run_with_param = [&](size_t fh = 3, size_t fw = 3, size_t sh = 1, | ||||
size_t sw = 1, size_t sgh = 3, | size_t sw = 1, size_t sgh = 3, | ||||
size_t sgw = 3) { | size_t sgw = 3) { | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
size_t ph = fh / 2, pw = fw / 2; | size_t ph = fh / 2, pw = fw / 2; | ||||
param.pad_h = ph, param.pad_w = pw; | param.pad_h = ph, param.pad_w = pw; | ||||
param.stride_h = sh, param.stride_w = sw, | param.stride_h = sh, param.stride_w = sw, | ||||
@@ -2289,6 +2295,7 @@ TEST(TestOprDNN, HeuristicReproducible) { | |||||
} | } | ||||
algo_name0 = palgo->name(); | algo_name0 = palgo->name(); | ||||
} | } | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
{ | { | ||||
Checker checker(make_graph, fwd); | Checker checker(make_graph, fwd); | ||||
checker.run(inp_tensor(2, 3, 4, 9, 8, 3, 3), opt) | checker.run(inp_tensor(2, 3, 4, 9, 8, 3, 3), opt) | ||||
@@ -2306,6 +2313,7 @@ TEST(TestOprDNN, HeuristicReproducible) { | |||||
algo_name1 = palgo->name(); | algo_name1 = palgo->name(); | ||||
} | } | ||||
EXPECT_TRUE(algo_name0 == algo_name1); | EXPECT_TRUE(algo_name0 == algo_name1); | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
} | } | ||||
#undef inp_tensor | #undef inp_tensor | ||||
#undef get_shp | #undef get_shp | ||||
@@ -2585,6 +2593,7 @@ TEST_F(TestWeightPreprocess, NoPreprocessNeeded) { | |||||
} | } | ||||
TEST_F(TestWeightPreprocess, PreprocessCalledOnlyOnce) { | TEST_F(TestWeightPreprocess, PreprocessCalledOnlyOnce) { | ||||
megdnn::HeuristicCache::instance().clear(); | |||||
using ::testing::_; | using ::testing::_; | ||||
using ::testing::Return; | using ::testing::Return; | ||||
using ::testing::Field; | using ::testing::Field; | ||||