@@ -54,7 +54,10 @@ if(MGE_WITH_CUDA) | |||
add_library(cutlass INTERFACE) | |||
target_include_directories( | |||
cutlass | |||
INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include>) | |||
INTERFACE | |||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include> | |||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/tools/util/include>) | |||
add_library(cudnn-frontend INTERFACE) | |||
target_include_directories( | |||
cudnn-frontend | |||
@@ -31,7 +31,7 @@ public: | |||
} | |||
}; | |||
class Key { | |||
struct Key { | |||
Handle* m_handle; | |||
uint32_t m_opr_type; | |||
const TensorLayout* m_inp_layouts_ptr; | |||
@@ -15,7 +15,7 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() { | |||
non_cudnn_algos.push_back(&batched_matmul); | |||
non_cudnn_algos.push_back(&int1_simple); | |||
#if CUDNN_VERSION > 8004 | |||
#if CUDNN_VERSION >= 8020 | |||
all_algos.push_back(&cudnn_conv_v8); | |||
all_algos.push_back(&cudnn_conv_bias_activation_v8); | |||
#endif | |||
@@ -173,10 +173,10 @@ public: | |||
bool is_cudnn() const override { return true; } | |||
size_t get_preprocess_workspace_in_bytes(const SizeArgs& args) const override; | |||
SmallVector<TensorLayout> deduce_preprocessed_filter_layout( | |||
const SizeArgs& args) const override; | |||
void exec_preprocess(const ExecArgs& args) const override; | |||
// size_t get_preprocess_workspace_in_bytes(const SizeArgs& args) const override; | |||
// SmallVector<TensorLayout> deduce_preprocessed_filter_layout( | |||
// const SizeArgs& args) const override; | |||
// void exec_preprocess(const ExecArgs& args) const override; | |||
protected: | |||
virtual size_t cudnn_get_workspace_in_bytes(const SizeArgs& args) const = 0; | |||
@@ -237,7 +237,7 @@ private: | |||
CudnnAlgoPack::Attr m_attr; | |||
}; | |||
#if CUDNN_VERSION > 8004 | |||
#if CUDNN_VERSION >= 8020 | |||
class ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationV8 final | |||
: public AlgoCUDNNConvBiasActivationBase { | |||
public: | |||
@@ -414,7 +414,7 @@ private: | |||
CudnnAlgoPack::Attr m_attr; | |||
}; | |||
#if CUDNN_VERSION > 8004 | |||
#if CUDNN_VERSION >= 8020 | |||
class ConvBiasForwardImpl::AlgoCUDNNConvV8 final : public AlgoCUDNNConvBase { | |||
public: | |||
AlgoCUDNNConvV8() : AlgoCUDNNConvBase() { | |||
@@ -1247,7 +1247,7 @@ public: | |||
AlgoGroupConvGeneral group; | |||
AlgoBFloat16 bfloat16; | |||
AlgoSimpleInt1 int1_simple; | |||
#if CUDNN_VERSION > 8004 | |||
#if CUDNN_VERSION >= 8020 | |||
AlgoCUDNNConvV8 cudnn_conv_v8; | |||
AlgoCUDNNConvBiasActivationV8 cudnn_conv_bias_activation_v8; | |||
#endif | |||
@@ -1,14 +1,3 @@ | |||
/** | |||
* \file dnn/src/cuda/conv_bias/cudnn_conv_base.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "src/common/conv_bias.h" | |||
#include "src/cuda/conv_bias/algo.h" | |||
#include "src/cuda/utils.h" | |||
@@ -1,14 +1,3 @@ | |||
/** | |||
* \file dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_base.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "megdnn/oprs/general.h" | |||
#include "./algo.h" | |||
@@ -26,19 +15,21 @@ size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::get_workspace_in_by | |||
const SizeArgs& args) const { | |||
auto workspace_size = cudnn_get_workspace_in_bytes(args); | |||
auto&& param = args.opr->param(); | |||
if (args.preprocessed_filter == nullptr) { | |||
if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||
args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||
// cudnn require bias to be float when executing CONFIG_INT | |||
// convert bias to float if bias is not float at first | |||
workspace_size += sizeof(float) * args.bias_layout->span().dist_elem(); | |||
} | |||
if (param.format == param::ConvBias::Format::NCHW32) { | |||
workspace_size += args.filter_layout->span().dist_byte() + | |||
args.bias_layout->span().dist_byte(); | |||
} | |||
// if (args.preprocessed_filter == nullptr) { | |||
if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||
args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||
// cudnn require bias to be float when executing CONFIG_INT | |||
// convert bias to float if bias is not float at first | |||
workspace_size += sizeof(float) * args.bias_layout->span().dist_elem(); | |||
} | |||
// #if CUDNN_VERSION >= 7500 | |||
// auto&& param = args.opr->param(); | |||
// if (param.format == param::ConvBias::Format::NCHW32) { | |||
// workspace_size += args.filter_layout->span().dist_byte() + | |||
// args.bias_layout->span().dist_byte(); | |||
// } | |||
// #endif | |||
// } | |||
return workspace_size; | |||
} | |||
@@ -56,55 +47,62 @@ void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec( | |||
TensorND filter_tensor; | |||
TensorND bias_tensor; | |||
auto&& param = args.opr->param(); | |||
if (args.preprocessed_filter != nullptr) { | |||
bias_tensor = TensorND{ | |||
args.bias_tensor->layout, | |||
args.preprocessed_filter->tensors[0].raw_ptr()}; | |||
if (param.format == Param::Format::NCHW32) { | |||
megdnn_assert(args.preprocessed_filter->tensors.size() == 2); | |||
filter_tensor = TensorND{ | |||
args.filter_tensor->layout, | |||
args.preprocessed_filter->tensors[1].raw_ptr()}; | |||
} else { | |||
filter_tensor = *args.filter_tensor; | |||
} | |||
} else { | |||
if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||
args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||
auto cvt = args.handle->create_operator<TypeCvt>(); | |||
auto float_bias_layout = *args.bias_layout; | |||
auto converted_bias_layout = *args.bias_layout; | |||
converted_bias_layout.dtype = dtype::QuantizedS32(alpha); | |||
float_bias_layout.dtype = dtype::Float32(); | |||
auto bias_size_in_bytes = float_bias_layout.span().dist_byte(); | |||
megdnn_assert(args.workspace.size >= bias_size_in_bytes); | |||
cvt->exec( | |||
{args.bias_tensor->raw_ptr(), converted_bias_layout}, | |||
TensorND{workspace_ptr, float_bias_layout}); | |||
bias_ptr = workspace_ptr; | |||
workspace_ptr += bias_size_in_bytes; | |||
workspace_size -= bias_size_in_bytes; | |||
} | |||
if (param.format == Param::Format::NCHW32) { | |||
size_t reorder_workspace_size = | |||
args.filter_tensor->layout.span().dist_byte() + | |||
args.bias_tensor->layout.span().dist_byte(); | |||
auto reorder_filter_ptr = workspace_ptr; | |||
auto reorder_bias_ptr = | |||
workspace_ptr + args.filter_tensor->layout.span().dist_byte(); | |||
cudnn_reorder_filer_and_bias_nchw32( | |||
cudnn_handle(args.opr->handle()), args.filter_tensor->raw_ptr(), | |||
args.filter_meta, bias_ptr, reorder_filter_ptr, reorder_bias_ptr); | |||
filter_tensor = TensorND(args.filter_tensor->layout, reorder_filter_ptr); | |||
bias_ptr = reorder_bias_ptr; | |||
workspace_ptr += reorder_workspace_size; | |||
workspace_size -= reorder_workspace_size; | |||
} else { | |||
filter_tensor = *args.filter_tensor; | |||
} | |||
// if (args.preprocessed_filter != nullptr) { | |||
// bias_tensor = TensorND{ | |||
// args.bias_tensor->layout, | |||
// args.preprocessed_filter->tensors[0].raw_ptr()}; | |||
// // #if CUDNN_VERSION >= 7500 | |||
// // auto&& param = args.opr->param(); | |||
// // if (param.format == Param::Format::NCHW32) { | |||
// // megdnn_assert(args.preprocessed_filter->tensors.size() == 2); | |||
// // filter_tensor = TensorND{ | |||
// // args.filter_tensor->layout, | |||
// // args.preprocessed_filter->tensors[1].raw_ptr()}; | |||
// // } | |||
// // #else | |||
// filter_tensor = *args.filter_tensor; | |||
// // #endif | |||
// } else { | |||
if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||
args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||
auto cvt = args.handle->create_operator<TypeCvt>(); | |||
auto float_bias_layout = *args.bias_layout; | |||
auto converted_bias_layout = *args.bias_layout; | |||
converted_bias_layout.dtype = dtype::QuantizedS32(alpha); | |||
float_bias_layout.dtype = dtype::Float32(); | |||
auto bias_size_in_bytes = float_bias_layout.span().dist_byte(); | |||
megdnn_assert(args.workspace.size >= bias_size_in_bytes); | |||
cvt->exec( | |||
{args.bias_tensor->raw_ptr(), converted_bias_layout}, | |||
TensorND{workspace_ptr, float_bias_layout}); | |||
bias_ptr = workspace_ptr; | |||
workspace_ptr += bias_size_in_bytes; | |||
workspace_size -= bias_size_in_bytes; | |||
} | |||
// #if CUDNN_VERSION >= 7500 | |||
// auto&& param = args.opr->param(); | |||
// if (param.format == Param::Format::NCHW32) { | |||
// size_t reorder_workspace_size = | |||
// args.filter_tensor->layout.span().dist_byte() + | |||
// args.bias_tensor->layout.span().dist_byte(); | |||
// auto reorder_filter_ptr = workspace_ptr; | |||
// auto reorder_bias_ptr = | |||
// workspace_ptr + | |||
// args.filter_tensor->layout.span().dist_byte(); | |||
// cudnn_reorder_filter_and_bias_nchw32( | |||
// cudnn_handle(args.opr->handle()), | |||
// args.filter_tensor->raw_ptr(), args.filter_meta, | |||
// bias_ptr, reorder_filter_ptr, reorder_bias_ptr); | |||
// filter_tensor = TensorND(args.filter_tensor->layout, | |||
// reorder_filter_ptr); bias_ptr = reorder_bias_ptr; workspace_ptr | |||
// += reorder_workspace_size; workspace_size -= | |||
// reorder_workspace_size; | |||
// } | |||
// #else | |||
filter_tensor = *args.filter_tensor; | |||
// #endif | |||
// } | |||
bias_tensor = TensorND{args.bias_tensor->layout, bias_ptr}; | |||
ExecArgs exec_args{ | |||
@@ -153,58 +151,64 @@ void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec( | |||
} | |||
} | |||
size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: | |||
get_preprocess_workspace_in_bytes(const SizeArgs& args) const { | |||
auto&& param = args.opr->param(); | |||
if (param.format == Param::Format::NCHW32) { | |||
return args.bias_layout->span().dist_byte(); | |||
} | |||
return 0_z; | |||
} | |||
SmallVector<TensorLayout> ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: | |||
deduce_preprocessed_filter_layout(const SizeArgs& args) const { | |||
auto&& param = args.opr->param(); | |||
if (param.format == Param::Format::NCHW32) { | |||
return {args.bias_layout->collapse_contiguous(), | |||
args.filter_layout->collapse_contiguous()}; | |||
} else { | |||
return {args.bias_layout->collapse_contiguous()}; | |||
} | |||
} | |||
void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec_preprocess( | |||
const ExecArgs& args) const { | |||
float alpha, beta; | |||
std::tie(alpha, beta) = cudnn_get_conv_bias_act_scale_param( | |||
args.src_tensor->layout, args.dst_tensor->layout, | |||
args.filter_tensor->layout, args.bias_tensor->layout, | |||
args.z_tensor->layout); | |||
MEGDNN_MARK_USED_VAR(beta); | |||
auto workspace_ptr = args.workspace.raw_ptr; | |||
auto workspace_size = args.workspace.size; | |||
auto bias_ptr = workspace_size > 0 ? workspace_ptr | |||
: args.preprocessed_filter->tensors[0].raw_ptr(); | |||
if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||
args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||
auto cvt = args.handle->create_operator<TypeCvt>(); | |||
auto float_bias_layout = *args.bias_layout; | |||
auto converted_bias_layout = *args.bias_layout; | |||
converted_bias_layout.dtype = dtype::QuantizedS32(alpha); | |||
float_bias_layout.dtype = dtype::Float32(); | |||
cvt->exec( | |||
{args.bias_tensor->raw_ptr(), converted_bias_layout}, | |||
TensorND{bias_ptr, float_bias_layout}); | |||
} | |||
if (args.opr->param().format == Param::Format::NCHW32) { | |||
auto reorder_filter_ptr = args.preprocessed_filter->tensors[1].raw_ptr(); | |||
auto reorder_bias_ptr = args.preprocessed_filter->tensors[0].raw_ptr(); | |||
cudnn_reorder_filer_and_bias_nchw32( | |||
cudnn_handle(args.opr->handle()), args.filter_tensor->raw_ptr(), | |||
args.filter_meta, bias_ptr, reorder_filter_ptr, reorder_bias_ptr); | |||
} | |||
} | |||
// size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: | |||
// get_preprocess_workspace_in_bytes(const SizeArgs&) const { | |||
// #if CUDNN_VERSION >= 7500 | |||
// auto&& param = args.opr->param(); | |||
// if (param.format == Param::Format::NCHW32) { | |||
// return args.bias_layout->span().dist_byte(); | |||
// } | |||
// #endif | |||
// return 0_z; | |||
// } | |||
// SmallVector<TensorLayout> ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: | |||
// deduce_preprocessed_filter_layout(const SizeArgs& args) const { | |||
// #if CUDNN_VERSION >= 7500 | |||
// auto&& param = args.opr->param(); | |||
// if (param.format == Param::Format::NCHW32) { | |||
// return {args.bias_layout->collapse_contiguous(), | |||
// args.filter_layout->collapse_contiguous()}; | |||
// } | |||
// #endif | |||
// return {args.bias_layout->collapse_contiguous()}; | |||
// } | |||
// void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec_preprocess( | |||
// const ExecArgs& args) const { | |||
// float alpha, beta; | |||
// std::tie(alpha, beta) = cudnn_get_conv_bias_act_scale_param( | |||
// args.src_tensor->layout, args.dst_tensor->layout, | |||
// args.filter_tensor->layout, args.bias_tensor->layout, | |||
// args.z_tensor->layout); | |||
// MEGDNN_MARK_USED_VAR(beta); | |||
// auto workspace_ptr = args.workspace.raw_ptr; | |||
// auto workspace_size = args.workspace.size; | |||
// auto bias_ptr = workspace_size > 0 ? workspace_ptr | |||
// : | |||
// args.preprocessed_filter->tensors[0].raw_ptr(); | |||
// if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||
// args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||
// auto cvt = args.handle->create_operator<TypeCvt>(); | |||
// auto float_bias_layout = *args.bias_layout; | |||
// auto converted_bias_layout = *args.bias_layout; | |||
// converted_bias_layout.dtype = dtype::QuantizedS32(alpha); | |||
// float_bias_layout.dtype = dtype::Float32(); | |||
// cvt->exec( | |||
// {args.bias_tensor->raw_ptr(), converted_bias_layout}, | |||
// TensorND{bias_ptr, float_bias_layout}); | |||
// } | |||
// #if CUDNN_VERSION >= 7500 | |||
// if (args.opr->param().format == Param::Format::NCHW32) { | |||
// auto reorder_filter_ptr = args.preprocessed_filter->tensors[1].raw_ptr(); | |||
// auto reorder_bias_ptr = args.preprocessed_filter->tensors[0].raw_ptr(); | |||
// cudnn_reorder_filter_and_bias_nchw32( | |||
// cudnn_handle(args.opr->handle()), args.filter_tensor->raw_ptr(), | |||
// args.filter_meta, bias_ptr, reorder_filter_ptr, reorder_bias_ptr); | |||
// } | |||
// #endif | |||
// } | |||
// vim: syntax=cpp.doxygen |
@@ -1,14 +1,3 @@ | |||
/** | |||
* \file dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_v8.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "megdnn/oprs/general.h" | |||
#include "./algo.h" | |||
@@ -17,7 +6,7 @@ | |||
#include "src/cuda/cudnn_wrapper_v8.h" | |||
#include "src/cuda/utils.h" | |||
#if CUDNN_VERSION >= 8004 | |||
#if CUDNN_VERSION >= 8020 | |||
using namespace megdnn; | |||
using namespace cuda; | |||
using namespace conv_bias; | |||
@@ -1,20 +1,9 @@ | |||
/** | |||
* \file dnn/src/cuda/conv_bias/cudnn_conv_v8.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "src/common/conv_bias.h" | |||
#include "src/cuda/conv_bias/algo.h" | |||
#include "src/cuda/cudnn_wrapper_v8.h" | |||
#include "src/cuda/utils.h" | |||
#if CUDNN_VERSION >= 8004 | |||
#if CUDNN_VERSION >= 8020 | |||
using namespace megdnn; | |||
using namespace cuda; | |||
using namespace conv_bias; | |||
@@ -239,7 +239,8 @@ std::pair<float, float> cudnn_get_conv_bias_act_scale_param( | |||
return {alpha, beta}; | |||
} | |||
void cudnn_reorder_filer_and_bias_nchw32( | |||
#if CUDNN_VERSION >= 7500 | |||
void cudnn_reorder_filter_and_bias_nchw32( | |||
const cudnnHandle_t& handle, const void* filter_ptr, | |||
const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr, | |||
void* reordered_bias_ptr) { | |||
@@ -250,6 +251,8 @@ void cudnn_reorder_filer_and_bias_nchw32( | |||
handle, filter_desc.desc, CUDNN_DEFAULT_REORDER, filter_ptr, | |||
reordered_filter_ptr, reorder_bias, bias_ptr, reordered_bias_ptr)); | |||
} | |||
#endif | |||
} // namespace conv_bias | |||
} // namespace cuda | |||
} // namespace megdnn | |||
@@ -117,11 +117,12 @@ std::pair<float, float> cudnn_get_conv_bias_act_scale_param( | |||
const TensorLayout& x, const TensorLayout& y, const TensorLayout& w, | |||
const TensorLayout& b, const TensorLayout& z); | |||
void cudnn_reorder_filer_and_bias_nchw32( | |||
#if CUDNN_VERSION >= 7500 | |||
void cudnn_reorder_filter_and_bias_nchw32( | |||
const cudnnHandle_t& handle, const void* filter_ptr, | |||
const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr, | |||
void* reordered_bias_ptr); | |||
#endif | |||
} // namespace conv_bias | |||
} // namespace cuda | |||
} // namespace megdnn | |||
@@ -47,7 +47,7 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic( | |||
const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { | |||
using namespace conv_bias; | |||
AlgoBase::SizeArgs args{this, src, filter, bias, z, dst}; | |||
#if CUDNN_VERSION >= 8004 | |||
#if CUDNN_VERSION >= 8020 | |||
if (sm_algo_pack.cudnn_conv_v8.is_available_attribute( | |||
args, positive_attr, negative_attr, workspace_limit_in_bytes)) { | |||
return &sm_algo_pack.cudnn_conv_v8; | |||
@@ -32,12 +32,10 @@ public: | |||
const char* get_algorithm_set_name() const override; | |||
class AlgoBase; | |||
class AlgoCUDNNConvBiasActivation; | |||
class AlgoChanwise; | |||
class AlgoChanwiseSmall; | |||
class AlgoDepthwiseLargeFilter; | |||
class AlgoChanwise8x8x32; | |||
class AlgoCUDNNConv; | |||
class AlgoFallbackNCHWQS8; | |||
class AlgoInplaceMatmul; | |||
class AlgoMatmul; | |||
@@ -67,8 +65,10 @@ public: | |||
class AlgoFloat32NCHWFMAImplicitBatchedGemm; | |||
class AlgoFloat16NCHWHMMAImplicitBatchedGemm; | |||
class AlgoCUDNNConvBase; | |||
class AlgoCUDNNConv; | |||
class AlgoCUDNNConvBiasActivationBase; | |||
#if CUDNN_VERSION > 8004 | |||
class AlgoCUDNNConvBiasActivation; | |||
#if CUDNN_VERSION >= 8020 | |||
class AlgoCUDNNConvV8; | |||
class AlgoCUDNNConvBiasActivationV8; | |||
#endif | |||
@@ -1,13 +1,4 @@ | |||
/** | |||
* \file dnn/src/cuda/cudnn_wrapper_v8.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#if CUDNN_VERSION >= 8020 | |||
#include "src/cuda/cudnn_wrapper_v8.h" | |||
#include "src/cuda/cudnn_wrapper.h" | |||
@@ -19,7 +10,7 @@ | |||
#include "cudnn_frontend_EngineConfigGenerator.h" | |||
#include "megdnn/heuristic_cache.h" | |||
#include "megdnn/algorithm_cache.h" | |||
using namespace megdnn; | |||
using namespace cuda; | |||
@@ -240,9 +231,9 @@ auto make_activation_descriptor( | |||
// high-level api for convolution execution | |||
struct StaticData { | |||
using Key = megdnn::HeuristicCache::Key; | |||
using KeyStorage = megdnn::HeuristicCache::KeyStorage; | |||
using KeyHash = megdnn::HeuristicCache::Hash; | |||
using Key = megdnn::AlgorithmCache::Key; | |||
using KeyStorage = megdnn::AlgorithmCache::KeyStorage; | |||
using KeyHash = megdnn::AlgorithmCache::Hash; | |||
using Result = cudnn_frontend::ExecutionPlan; | |||
using CudnnFrontendExecutionPlanCache = | |||
std::unordered_map<KeyStorage, Result, KeyHash>; | |||
@@ -682,4 +673,5 @@ void megdnn::cuda::run_conv_bias_act_with_plan( | |||
handle, plan.get_raw_desc(), variant_pack.get_raw_desc())); | |||
} | |||
#endif | |||
// vim: syntax=cpp.doxygen |
@@ -1,15 +1,6 @@ | |||
/** | |||
* \file dnn/src/cuda/cudnn_wrapper_v8.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#pragma once | |||
#if CUDNN_VERSION >= 8020 | |||
#include "megdnn/basic_types.h" | |||
#include "megdnn/oprs/nn.h" | |||
#include "src/common/utils.h" | |||
@@ -67,4 +58,5 @@ void run_conv_bias_act_with_plan( | |||
} // namespace cuda | |||
} // namespace megdnn | |||
#endif | |||
// vim: syntax=cpp.doxygen |
@@ -58,11 +58,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle) | |||
For example `export CUDA_CACHE_MAXSIZE=2147483647` and `export CUDA_CACHE_PATH=/data/.cuda_cache`)"); | |||
} | |||
#endif | |||
size_t free, tot; | |||
cudaMemGetInfo(&free, &tot); | |||
printf("before cudnn create, free: %.2f MB, tot: %.2f MB, allocated: %.2f MB\n", | |||
free / 1024.0 / 1024.0, tot / 1024.0 / 1024.0, | |||
(tot - free) / 1024.0 / 1024.0); | |||
cudnn_check(cudnnCreate(&m_cudnn_handle)); | |||
cublas_check(cublasCreate(&m_cublas_handle)); | |||
#if CUDA_VERSION >= 10010 | |||
@@ -74,11 +69,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle) | |||
cudnn_check(cudnnSetStream(m_cudnn_handle, stream())); | |||
cublas_check(cublasSetStream(m_cublas_handle, stream())); | |||
#if CUDNN_VERSION >= 8004 | |||
// cudnn_check(cudnnOpsInferVersionCheck()); | |||
// cudnn_check(cudnnCnnInferVersionCheck()); | |||
#endif | |||
// Note that all cublas scalars (alpha, beta) and scalar results such as dot | |||
// output resides at device side. | |||
cublas_check(cublasSetPointerMode(m_cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); | |||
@@ -92,11 +82,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle) | |||
cudaMemcpyHostToDevice, stream())); | |||
cuda_check(cudaStreamSynchronize(stream())); | |||
cudaMemGetInfo(&free, &tot); | |||
printf("after cudnn create, free: %.2f MB, tot: %.2f MB, allocated: %.2f MB\n", | |||
free / 1024.0 / 1024.0, tot / 1024.0 / 1024.0, | |||
(tot - free) / 1024.0 / 1024.0); | |||
// check tk1 | |||
m_is_tegra_k1 = (strcmp(m_device_prop->name, "GK20A") == 0); | |||
m_cusolver_handle = nullptr; | |||
@@ -1,13 +1,3 @@ | |||
/** | |||
* \file dnn/test/cuda/conv_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "megdnn/dtype.h" | |||
#include "test/cuda/fixture.h" | |||
@@ -26,7 +16,7 @@ using namespace megdnn; | |||
using namespace test; | |||
using namespace conv_bias; | |||
#if CUDNN_VERSION >= 8004 | |||
#if CUDNN_VERSION >= 8020 | |||
TEST_F(CUDA, CONV_V8_FLOAT) { | |||
Checker<ConvBiasForward> checker(handle_cuda()); | |||
checker.set_before_exec_callback( | |||