@@ -54,7 +54,10 @@ if(MGE_WITH_CUDA) | |||||
add_library(cutlass INTERFACE) | add_library(cutlass INTERFACE) | ||||
target_include_directories( | target_include_directories( | ||||
cutlass | cutlass | ||||
INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include>) | |||||
INTERFACE | |||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include> | |||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/tools/util/include>) | |||||
add_library(cudnn-frontend INTERFACE) | add_library(cudnn-frontend INTERFACE) | ||||
target_include_directories( | target_include_directories( | ||||
cudnn-frontend | cudnn-frontend | ||||
@@ -31,7 +31,7 @@ public: | |||||
} | } | ||||
}; | }; | ||||
class Key { | |||||
struct Key { | |||||
Handle* m_handle; | Handle* m_handle; | ||||
uint32_t m_opr_type; | uint32_t m_opr_type; | ||||
const TensorLayout* m_inp_layouts_ptr; | const TensorLayout* m_inp_layouts_ptr; | ||||
@@ -15,7 +15,7 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() { | |||||
non_cudnn_algos.push_back(&batched_matmul); | non_cudnn_algos.push_back(&batched_matmul); | ||||
non_cudnn_algos.push_back(&int1_simple); | non_cudnn_algos.push_back(&int1_simple); | ||||
#if CUDNN_VERSION > 8004 | |||||
#if CUDNN_VERSION >= 8020 | |||||
all_algos.push_back(&cudnn_conv_v8); | all_algos.push_back(&cudnn_conv_v8); | ||||
all_algos.push_back(&cudnn_conv_bias_activation_v8); | all_algos.push_back(&cudnn_conv_bias_activation_v8); | ||||
#endif | #endif | ||||
@@ -173,10 +173,10 @@ public: | |||||
bool is_cudnn() const override { return true; } | bool is_cudnn() const override { return true; } | ||||
size_t get_preprocess_workspace_in_bytes(const SizeArgs& args) const override; | |||||
SmallVector<TensorLayout> deduce_preprocessed_filter_layout( | |||||
const SizeArgs& args) const override; | |||||
void exec_preprocess(const ExecArgs& args) const override; | |||||
// size_t get_preprocess_workspace_in_bytes(const SizeArgs& args) const override; | |||||
// SmallVector<TensorLayout> deduce_preprocessed_filter_layout( | |||||
// const SizeArgs& args) const override; | |||||
// void exec_preprocess(const ExecArgs& args) const override; | |||||
protected: | protected: | ||||
virtual size_t cudnn_get_workspace_in_bytes(const SizeArgs& args) const = 0; | virtual size_t cudnn_get_workspace_in_bytes(const SizeArgs& args) const = 0; | ||||
@@ -237,7 +237,7 @@ private: | |||||
CudnnAlgoPack::Attr m_attr; | CudnnAlgoPack::Attr m_attr; | ||||
}; | }; | ||||
#if CUDNN_VERSION > 8004 | |||||
#if CUDNN_VERSION >= 8020 | |||||
class ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationV8 final | class ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationV8 final | ||||
: public AlgoCUDNNConvBiasActivationBase { | : public AlgoCUDNNConvBiasActivationBase { | ||||
public: | public: | ||||
@@ -414,7 +414,7 @@ private: | |||||
CudnnAlgoPack::Attr m_attr; | CudnnAlgoPack::Attr m_attr; | ||||
}; | }; | ||||
#if CUDNN_VERSION > 8004 | |||||
#if CUDNN_VERSION >= 8020 | |||||
class ConvBiasForwardImpl::AlgoCUDNNConvV8 final : public AlgoCUDNNConvBase { | class ConvBiasForwardImpl::AlgoCUDNNConvV8 final : public AlgoCUDNNConvBase { | ||||
public: | public: | ||||
AlgoCUDNNConvV8() : AlgoCUDNNConvBase() { | AlgoCUDNNConvV8() : AlgoCUDNNConvBase() { | ||||
@@ -1247,7 +1247,7 @@ public: | |||||
AlgoGroupConvGeneral group; | AlgoGroupConvGeneral group; | ||||
AlgoBFloat16 bfloat16; | AlgoBFloat16 bfloat16; | ||||
AlgoSimpleInt1 int1_simple; | AlgoSimpleInt1 int1_simple; | ||||
#if CUDNN_VERSION > 8004 | |||||
#if CUDNN_VERSION >= 8020 | |||||
AlgoCUDNNConvV8 cudnn_conv_v8; | AlgoCUDNNConvV8 cudnn_conv_v8; | ||||
AlgoCUDNNConvBiasActivationV8 cudnn_conv_bias_activation_v8; | AlgoCUDNNConvBiasActivationV8 cudnn_conv_bias_activation_v8; | ||||
#endif | #endif | ||||
@@ -1,14 +1,3 @@ | |||||
/** | |||||
* \file dnn/src/cuda/conv_bias/cudnn_conv_base.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
*/ | |||||
#include "src/common/conv_bias.h" | #include "src/common/conv_bias.h" | ||||
#include "src/cuda/conv_bias/algo.h" | #include "src/cuda/conv_bias/algo.h" | ||||
#include "src/cuda/utils.h" | #include "src/cuda/utils.h" | ||||
@@ -1,14 +1,3 @@ | |||||
/** | |||||
* \file dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_base.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
*/ | |||||
#include "megdnn/oprs/general.h" | #include "megdnn/oprs/general.h" | ||||
#include "./algo.h" | #include "./algo.h" | ||||
@@ -26,19 +15,21 @@ size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::get_workspace_in_by | |||||
const SizeArgs& args) const { | const SizeArgs& args) const { | ||||
auto workspace_size = cudnn_get_workspace_in_bytes(args); | auto workspace_size = cudnn_get_workspace_in_bytes(args); | ||||
auto&& param = args.opr->param(); | |||||
if (args.preprocessed_filter == nullptr) { | |||||
if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||||
args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||||
// cudnn require bias to be float when executing CONFIG_INT | |||||
// convert bias to float if bias is not float at first | |||||
workspace_size += sizeof(float) * args.bias_layout->span().dist_elem(); | |||||
} | |||||
if (param.format == param::ConvBias::Format::NCHW32) { | |||||
workspace_size += args.filter_layout->span().dist_byte() + | |||||
args.bias_layout->span().dist_byte(); | |||||
} | |||||
// if (args.preprocessed_filter == nullptr) { | |||||
if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||||
args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||||
// cudnn require bias to be float when executing CONFIG_INT | |||||
// convert bias to float if bias is not float at first | |||||
workspace_size += sizeof(float) * args.bias_layout->span().dist_elem(); | |||||
} | } | ||||
// #if CUDNN_VERSION >= 7500 | |||||
// auto&& param = args.opr->param(); | |||||
// if (param.format == param::ConvBias::Format::NCHW32) { | |||||
// workspace_size += args.filter_layout->span().dist_byte() + | |||||
// args.bias_layout->span().dist_byte(); | |||||
// } | |||||
// #endif | |||||
// } | |||||
return workspace_size; | return workspace_size; | ||||
} | } | ||||
@@ -56,55 +47,62 @@ void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec( | |||||
TensorND filter_tensor; | TensorND filter_tensor; | ||||
TensorND bias_tensor; | TensorND bias_tensor; | ||||
auto&& param = args.opr->param(); | |||||
if (args.preprocessed_filter != nullptr) { | |||||
bias_tensor = TensorND{ | |||||
args.bias_tensor->layout, | |||||
args.preprocessed_filter->tensors[0].raw_ptr()}; | |||||
if (param.format == Param::Format::NCHW32) { | |||||
megdnn_assert(args.preprocessed_filter->tensors.size() == 2); | |||||
filter_tensor = TensorND{ | |||||
args.filter_tensor->layout, | |||||
args.preprocessed_filter->tensors[1].raw_ptr()}; | |||||
} else { | |||||
filter_tensor = *args.filter_tensor; | |||||
} | |||||
} else { | |||||
if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||||
args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||||
auto cvt = args.handle->create_operator<TypeCvt>(); | |||||
auto float_bias_layout = *args.bias_layout; | |||||
auto converted_bias_layout = *args.bias_layout; | |||||
converted_bias_layout.dtype = dtype::QuantizedS32(alpha); | |||||
float_bias_layout.dtype = dtype::Float32(); | |||||
auto bias_size_in_bytes = float_bias_layout.span().dist_byte(); | |||||
megdnn_assert(args.workspace.size >= bias_size_in_bytes); | |||||
cvt->exec( | |||||
{args.bias_tensor->raw_ptr(), converted_bias_layout}, | |||||
TensorND{workspace_ptr, float_bias_layout}); | |||||
bias_ptr = workspace_ptr; | |||||
workspace_ptr += bias_size_in_bytes; | |||||
workspace_size -= bias_size_in_bytes; | |||||
} | |||||
if (param.format == Param::Format::NCHW32) { | |||||
size_t reorder_workspace_size = | |||||
args.filter_tensor->layout.span().dist_byte() + | |||||
args.bias_tensor->layout.span().dist_byte(); | |||||
auto reorder_filter_ptr = workspace_ptr; | |||||
auto reorder_bias_ptr = | |||||
workspace_ptr + args.filter_tensor->layout.span().dist_byte(); | |||||
cudnn_reorder_filer_and_bias_nchw32( | |||||
cudnn_handle(args.opr->handle()), args.filter_tensor->raw_ptr(), | |||||
args.filter_meta, bias_ptr, reorder_filter_ptr, reorder_bias_ptr); | |||||
filter_tensor = TensorND(args.filter_tensor->layout, reorder_filter_ptr); | |||||
bias_ptr = reorder_bias_ptr; | |||||
workspace_ptr += reorder_workspace_size; | |||||
workspace_size -= reorder_workspace_size; | |||||
} else { | |||||
filter_tensor = *args.filter_tensor; | |||||
} | |||||
// if (args.preprocessed_filter != nullptr) { | |||||
// bias_tensor = TensorND{ | |||||
// args.bias_tensor->layout, | |||||
// args.preprocessed_filter->tensors[0].raw_ptr()}; | |||||
// // #if CUDNN_VERSION >= 7500 | |||||
// // auto&& param = args.opr->param(); | |||||
// // if (param.format == Param::Format::NCHW32) { | |||||
// // megdnn_assert(args.preprocessed_filter->tensors.size() == 2); | |||||
// // filter_tensor = TensorND{ | |||||
// // args.filter_tensor->layout, | |||||
// // args.preprocessed_filter->tensors[1].raw_ptr()}; | |||||
// // } | |||||
// // #else | |||||
// filter_tensor = *args.filter_tensor; | |||||
// // #endif | |||||
// } else { | |||||
if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||||
args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||||
auto cvt = args.handle->create_operator<TypeCvt>(); | |||||
auto float_bias_layout = *args.bias_layout; | |||||
auto converted_bias_layout = *args.bias_layout; | |||||
converted_bias_layout.dtype = dtype::QuantizedS32(alpha); | |||||
float_bias_layout.dtype = dtype::Float32(); | |||||
auto bias_size_in_bytes = float_bias_layout.span().dist_byte(); | |||||
megdnn_assert(args.workspace.size >= bias_size_in_bytes); | |||||
cvt->exec( | |||||
{args.bias_tensor->raw_ptr(), converted_bias_layout}, | |||||
TensorND{workspace_ptr, float_bias_layout}); | |||||
bias_ptr = workspace_ptr; | |||||
workspace_ptr += bias_size_in_bytes; | |||||
workspace_size -= bias_size_in_bytes; | |||||
} | } | ||||
// #if CUDNN_VERSION >= 7500 | |||||
// auto&& param = args.opr->param(); | |||||
// if (param.format == Param::Format::NCHW32) { | |||||
// size_t reorder_workspace_size = | |||||
// args.filter_tensor->layout.span().dist_byte() + | |||||
// args.bias_tensor->layout.span().dist_byte(); | |||||
// auto reorder_filter_ptr = workspace_ptr; | |||||
// auto reorder_bias_ptr = | |||||
// workspace_ptr + | |||||
// args.filter_tensor->layout.span().dist_byte(); | |||||
// cudnn_reorder_filter_and_bias_nchw32( | |||||
// cudnn_handle(args.opr->handle()), | |||||
// args.filter_tensor->raw_ptr(), args.filter_meta, | |||||
// bias_ptr, reorder_filter_ptr, reorder_bias_ptr); | |||||
// filter_tensor = TensorND(args.filter_tensor->layout, | |||||
// reorder_filter_ptr); bias_ptr = reorder_bias_ptr; workspace_ptr | |||||
// += reorder_workspace_size; workspace_size -= | |||||
// reorder_workspace_size; | |||||
// } | |||||
// #else | |||||
filter_tensor = *args.filter_tensor; | |||||
// #endif | |||||
// } | |||||
bias_tensor = TensorND{args.bias_tensor->layout, bias_ptr}; | bias_tensor = TensorND{args.bias_tensor->layout, bias_ptr}; | ||||
ExecArgs exec_args{ | ExecArgs exec_args{ | ||||
@@ -153,58 +151,64 @@ void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec( | |||||
} | } | ||||
} | } | ||||
size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: | |||||
get_preprocess_workspace_in_bytes(const SizeArgs& args) const { | |||||
auto&& param = args.opr->param(); | |||||
if (param.format == Param::Format::NCHW32) { | |||||
return args.bias_layout->span().dist_byte(); | |||||
} | |||||
return 0_z; | |||||
} | |||||
SmallVector<TensorLayout> ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: | |||||
deduce_preprocessed_filter_layout(const SizeArgs& args) const { | |||||
auto&& param = args.opr->param(); | |||||
if (param.format == Param::Format::NCHW32) { | |||||
return {args.bias_layout->collapse_contiguous(), | |||||
args.filter_layout->collapse_contiguous()}; | |||||
} else { | |||||
return {args.bias_layout->collapse_contiguous()}; | |||||
} | |||||
} | |||||
void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec_preprocess( | |||||
const ExecArgs& args) const { | |||||
float alpha, beta; | |||||
std::tie(alpha, beta) = cudnn_get_conv_bias_act_scale_param( | |||||
args.src_tensor->layout, args.dst_tensor->layout, | |||||
args.filter_tensor->layout, args.bias_tensor->layout, | |||||
args.z_tensor->layout); | |||||
MEGDNN_MARK_USED_VAR(beta); | |||||
auto workspace_ptr = args.workspace.raw_ptr; | |||||
auto workspace_size = args.workspace.size; | |||||
auto bias_ptr = workspace_size > 0 ? workspace_ptr | |||||
: args.preprocessed_filter->tensors[0].raw_ptr(); | |||||
if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||||
args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||||
auto cvt = args.handle->create_operator<TypeCvt>(); | |||||
auto float_bias_layout = *args.bias_layout; | |||||
auto converted_bias_layout = *args.bias_layout; | |||||
converted_bias_layout.dtype = dtype::QuantizedS32(alpha); | |||||
float_bias_layout.dtype = dtype::Float32(); | |||||
cvt->exec( | |||||
{args.bias_tensor->raw_ptr(), converted_bias_layout}, | |||||
TensorND{bias_ptr, float_bias_layout}); | |||||
} | |||||
if (args.opr->param().format == Param::Format::NCHW32) { | |||||
auto reorder_filter_ptr = args.preprocessed_filter->tensors[1].raw_ptr(); | |||||
auto reorder_bias_ptr = args.preprocessed_filter->tensors[0].raw_ptr(); | |||||
cudnn_reorder_filer_and_bias_nchw32( | |||||
cudnn_handle(args.opr->handle()), args.filter_tensor->raw_ptr(), | |||||
args.filter_meta, bias_ptr, reorder_filter_ptr, reorder_bias_ptr); | |||||
} | |||||
} | |||||
// size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: | |||||
// get_preprocess_workspace_in_bytes(const SizeArgs&) const { | |||||
// #if CUDNN_VERSION >= 7500 | |||||
// auto&& param = args.opr->param(); | |||||
// if (param.format == Param::Format::NCHW32) { | |||||
// return args.bias_layout->span().dist_byte(); | |||||
// } | |||||
// #endif | |||||
// return 0_z; | |||||
// } | |||||
// SmallVector<TensorLayout> ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: | |||||
// deduce_preprocessed_filter_layout(const SizeArgs& args) const { | |||||
// #if CUDNN_VERSION >= 7500 | |||||
// auto&& param = args.opr->param(); | |||||
// if (param.format == Param::Format::NCHW32) { | |||||
// return {args.bias_layout->collapse_contiguous(), | |||||
// args.filter_layout->collapse_contiguous()}; | |||||
// } | |||||
// #endif | |||||
// return {args.bias_layout->collapse_contiguous()}; | |||||
// } | |||||
// void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec_preprocess( | |||||
// const ExecArgs& args) const { | |||||
// float alpha, beta; | |||||
// std::tie(alpha, beta) = cudnn_get_conv_bias_act_scale_param( | |||||
// args.src_tensor->layout, args.dst_tensor->layout, | |||||
// args.filter_tensor->layout, args.bias_tensor->layout, | |||||
// args.z_tensor->layout); | |||||
// MEGDNN_MARK_USED_VAR(beta); | |||||
// auto workspace_ptr = args.workspace.raw_ptr; | |||||
// auto workspace_size = args.workspace.size; | |||||
// auto bias_ptr = workspace_size > 0 ? workspace_ptr | |||||
// : | |||||
// args.preprocessed_filter->tensors[0].raw_ptr(); | |||||
// if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||||
// args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||||
// auto cvt = args.handle->create_operator<TypeCvt>(); | |||||
// auto float_bias_layout = *args.bias_layout; | |||||
// auto converted_bias_layout = *args.bias_layout; | |||||
// converted_bias_layout.dtype = dtype::QuantizedS32(alpha); | |||||
// float_bias_layout.dtype = dtype::Float32(); | |||||
// cvt->exec( | |||||
// {args.bias_tensor->raw_ptr(), converted_bias_layout}, | |||||
// TensorND{bias_ptr, float_bias_layout}); | |||||
// } | |||||
// #if CUDNN_VERSION >= 7500 | |||||
// if (args.opr->param().format == Param::Format::NCHW32) { | |||||
// auto reorder_filter_ptr = args.preprocessed_filter->tensors[1].raw_ptr(); | |||||
// auto reorder_bias_ptr = args.preprocessed_filter->tensors[0].raw_ptr(); | |||||
// cudnn_reorder_filter_and_bias_nchw32( | |||||
// cudnn_handle(args.opr->handle()), args.filter_tensor->raw_ptr(), | |||||
// args.filter_meta, bias_ptr, reorder_filter_ptr, reorder_bias_ptr); | |||||
// } | |||||
// #endif | |||||
// } | |||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -1,14 +1,3 @@ | |||||
/** | |||||
* \file dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_v8.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
*/ | |||||
#include "megdnn/oprs/general.h" | #include "megdnn/oprs/general.h" | ||||
#include "./algo.h" | #include "./algo.h" | ||||
@@ -17,7 +6,7 @@ | |||||
#include "src/cuda/cudnn_wrapper_v8.h" | #include "src/cuda/cudnn_wrapper_v8.h" | ||||
#include "src/cuda/utils.h" | #include "src/cuda/utils.h" | ||||
#if CUDNN_VERSION >= 8004 | |||||
#if CUDNN_VERSION >= 8020 | |||||
using namespace megdnn; | using namespace megdnn; | ||||
using namespace cuda; | using namespace cuda; | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
@@ -1,20 +1,9 @@ | |||||
/** | |||||
* \file dnn/src/cuda/conv_bias/cudnn_conv_v8.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
*/ | |||||
#include "src/common/conv_bias.h" | #include "src/common/conv_bias.h" | ||||
#include "src/cuda/conv_bias/algo.h" | #include "src/cuda/conv_bias/algo.h" | ||||
#include "src/cuda/cudnn_wrapper_v8.h" | #include "src/cuda/cudnn_wrapper_v8.h" | ||||
#include "src/cuda/utils.h" | #include "src/cuda/utils.h" | ||||
#if CUDNN_VERSION >= 8004 | |||||
#if CUDNN_VERSION >= 8020 | |||||
using namespace megdnn; | using namespace megdnn; | ||||
using namespace cuda; | using namespace cuda; | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
@@ -239,7 +239,8 @@ std::pair<float, float> cudnn_get_conv_bias_act_scale_param( | |||||
return {alpha, beta}; | return {alpha, beta}; | ||||
} | } | ||||
void cudnn_reorder_filer_and_bias_nchw32( | |||||
#if CUDNN_VERSION >= 7500 | |||||
void cudnn_reorder_filter_and_bias_nchw32( | |||||
const cudnnHandle_t& handle, const void* filter_ptr, | const cudnnHandle_t& handle, const void* filter_ptr, | ||||
const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr, | const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr, | ||||
void* reordered_bias_ptr) { | void* reordered_bias_ptr) { | ||||
@@ -250,6 +251,8 @@ void cudnn_reorder_filer_and_bias_nchw32( | |||||
handle, filter_desc.desc, CUDNN_DEFAULT_REORDER, filter_ptr, | handle, filter_desc.desc, CUDNN_DEFAULT_REORDER, filter_ptr, | ||||
reordered_filter_ptr, reorder_bias, bias_ptr, reordered_bias_ptr)); | reordered_filter_ptr, reorder_bias, bias_ptr, reordered_bias_ptr)); | ||||
} | } | ||||
#endif | |||||
} // namespace conv_bias | } // namespace conv_bias | ||||
} // namespace cuda | } // namespace cuda | ||||
} // namespace megdnn | } // namespace megdnn | ||||
@@ -117,11 +117,12 @@ std::pair<float, float> cudnn_get_conv_bias_act_scale_param( | |||||
const TensorLayout& x, const TensorLayout& y, const TensorLayout& w, | const TensorLayout& x, const TensorLayout& y, const TensorLayout& w, | ||||
const TensorLayout& b, const TensorLayout& z); | const TensorLayout& b, const TensorLayout& z); | ||||
void cudnn_reorder_filer_and_bias_nchw32( | |||||
#if CUDNN_VERSION >= 7500 | |||||
void cudnn_reorder_filter_and_bias_nchw32( | |||||
const cudnnHandle_t& handle, const void* filter_ptr, | const cudnnHandle_t& handle, const void* filter_ptr, | ||||
const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr, | const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr, | ||||
void* reordered_bias_ptr); | void* reordered_bias_ptr); | ||||
#endif | |||||
} // namespace conv_bias | } // namespace conv_bias | ||||
} // namespace cuda | } // namespace cuda | ||||
} // namespace megdnn | } // namespace megdnn | ||||
@@ -47,7 +47,7 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic( | |||||
const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { | const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
AlgoBase::SizeArgs args{this, src, filter, bias, z, dst}; | AlgoBase::SizeArgs args{this, src, filter, bias, z, dst}; | ||||
#if CUDNN_VERSION >= 8004 | |||||
#if CUDNN_VERSION >= 8020 | |||||
if (sm_algo_pack.cudnn_conv_v8.is_available_attribute( | if (sm_algo_pack.cudnn_conv_v8.is_available_attribute( | ||||
args, positive_attr, negative_attr, workspace_limit_in_bytes)) { | args, positive_attr, negative_attr, workspace_limit_in_bytes)) { | ||||
return &sm_algo_pack.cudnn_conv_v8; | return &sm_algo_pack.cudnn_conv_v8; | ||||
@@ -32,12 +32,10 @@ public: | |||||
const char* get_algorithm_set_name() const override; | const char* get_algorithm_set_name() const override; | ||||
class AlgoBase; | class AlgoBase; | ||||
class AlgoCUDNNConvBiasActivation; | |||||
class AlgoChanwise; | class AlgoChanwise; | ||||
class AlgoChanwiseSmall; | class AlgoChanwiseSmall; | ||||
class AlgoDepthwiseLargeFilter; | class AlgoDepthwiseLargeFilter; | ||||
class AlgoChanwise8x8x32; | class AlgoChanwise8x8x32; | ||||
class AlgoCUDNNConv; | |||||
class AlgoFallbackNCHWQS8; | class AlgoFallbackNCHWQS8; | ||||
class AlgoInplaceMatmul; | class AlgoInplaceMatmul; | ||||
class AlgoMatmul; | class AlgoMatmul; | ||||
@@ -67,8 +65,10 @@ public: | |||||
class AlgoFloat32NCHWFMAImplicitBatchedGemm; | class AlgoFloat32NCHWFMAImplicitBatchedGemm; | ||||
class AlgoFloat16NCHWHMMAImplicitBatchedGemm; | class AlgoFloat16NCHWHMMAImplicitBatchedGemm; | ||||
class AlgoCUDNNConvBase; | class AlgoCUDNNConvBase; | ||||
class AlgoCUDNNConv; | |||||
class AlgoCUDNNConvBiasActivationBase; | class AlgoCUDNNConvBiasActivationBase; | ||||
#if CUDNN_VERSION > 8004 | |||||
class AlgoCUDNNConvBiasActivation; | |||||
#if CUDNN_VERSION >= 8020 | |||||
class AlgoCUDNNConvV8; | class AlgoCUDNNConvV8; | ||||
class AlgoCUDNNConvBiasActivationV8; | class AlgoCUDNNConvBiasActivationV8; | ||||
#endif | #endif | ||||
@@ -1,13 +1,4 @@ | |||||
/** | |||||
* \file dnn/src/cuda/cudnn_wrapper_v8.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
*/ | |||||
#if CUDNN_VERSION >= 8020 | |||||
#include "src/cuda/cudnn_wrapper_v8.h" | #include "src/cuda/cudnn_wrapper_v8.h" | ||||
#include "src/cuda/cudnn_wrapper.h" | #include "src/cuda/cudnn_wrapper.h" | ||||
@@ -19,7 +10,7 @@ | |||||
#include "cudnn_frontend_EngineConfigGenerator.h" | #include "cudnn_frontend_EngineConfigGenerator.h" | ||||
#include "megdnn/heuristic_cache.h" | |||||
#include "megdnn/algorithm_cache.h" | |||||
using namespace megdnn; | using namespace megdnn; | ||||
using namespace cuda; | using namespace cuda; | ||||
@@ -240,9 +231,9 @@ auto make_activation_descriptor( | |||||
// high-level api for convolution execution | // high-level api for convolution execution | ||||
struct StaticData { | struct StaticData { | ||||
using Key = megdnn::HeuristicCache::Key; | |||||
using KeyStorage = megdnn::HeuristicCache::KeyStorage; | |||||
using KeyHash = megdnn::HeuristicCache::Hash; | |||||
using Key = megdnn::AlgorithmCache::Key; | |||||
using KeyStorage = megdnn::AlgorithmCache::KeyStorage; | |||||
using KeyHash = megdnn::AlgorithmCache::Hash; | |||||
using Result = cudnn_frontend::ExecutionPlan; | using Result = cudnn_frontend::ExecutionPlan; | ||||
using CudnnFrontendExecutionPlanCache = | using CudnnFrontendExecutionPlanCache = | ||||
std::unordered_map<KeyStorage, Result, KeyHash>; | std::unordered_map<KeyStorage, Result, KeyHash>; | ||||
@@ -682,4 +673,5 @@ void megdnn::cuda::run_conv_bias_act_with_plan( | |||||
handle, plan.get_raw_desc(), variant_pack.get_raw_desc())); | handle, plan.get_raw_desc(), variant_pack.get_raw_desc())); | ||||
} | } | ||||
#endif | |||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -1,15 +1,6 @@ | |||||
/** | |||||
* \file dnn/src/cuda/cudnn_wrapper_v8.h | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
*/ | |||||
#pragma once | #pragma once | ||||
#if CUDNN_VERSION >= 8020 | |||||
#include "megdnn/basic_types.h" | #include "megdnn/basic_types.h" | ||||
#include "megdnn/oprs/nn.h" | #include "megdnn/oprs/nn.h" | ||||
#include "src/common/utils.h" | #include "src/common/utils.h" | ||||
@@ -67,4 +58,5 @@ void run_conv_bias_act_with_plan( | |||||
} // namespace cuda | } // namespace cuda | ||||
} // namespace megdnn | } // namespace megdnn | ||||
#endif | |||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -58,11 +58,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle) | |||||
For example `export CUDA_CACHE_MAXSIZE=2147483647` and `export CUDA_CACHE_PATH=/data/.cuda_cache`)"); | For example `export CUDA_CACHE_MAXSIZE=2147483647` and `export CUDA_CACHE_PATH=/data/.cuda_cache`)"); | ||||
} | } | ||||
#endif | #endif | ||||
size_t free, tot; | |||||
cudaMemGetInfo(&free, &tot); | |||||
printf("before cudnn create, free: %.2f MB, tot: %.2f MB, allocated: %.2f MB\n", | |||||
free / 1024.0 / 1024.0, tot / 1024.0 / 1024.0, | |||||
(tot - free) / 1024.0 / 1024.0); | |||||
cudnn_check(cudnnCreate(&m_cudnn_handle)); | cudnn_check(cudnnCreate(&m_cudnn_handle)); | ||||
cublas_check(cublasCreate(&m_cublas_handle)); | cublas_check(cublasCreate(&m_cublas_handle)); | ||||
#if CUDA_VERSION >= 10010 | #if CUDA_VERSION >= 10010 | ||||
@@ -74,11 +69,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle) | |||||
cudnn_check(cudnnSetStream(m_cudnn_handle, stream())); | cudnn_check(cudnnSetStream(m_cudnn_handle, stream())); | ||||
cublas_check(cublasSetStream(m_cublas_handle, stream())); | cublas_check(cublasSetStream(m_cublas_handle, stream())); | ||||
#if CUDNN_VERSION >= 8004 | |||||
// cudnn_check(cudnnOpsInferVersionCheck()); | |||||
// cudnn_check(cudnnCnnInferVersionCheck()); | |||||
#endif | |||||
// Note that all cublas scalars (alpha, beta) and scalar results such as dot | // Note that all cublas scalars (alpha, beta) and scalar results such as dot | ||||
// output resides at device side. | // output resides at device side. | ||||
cublas_check(cublasSetPointerMode(m_cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); | cublas_check(cublasSetPointerMode(m_cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); | ||||
@@ -92,11 +82,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle) | |||||
cudaMemcpyHostToDevice, stream())); | cudaMemcpyHostToDevice, stream())); | ||||
cuda_check(cudaStreamSynchronize(stream())); | cuda_check(cudaStreamSynchronize(stream())); | ||||
cudaMemGetInfo(&free, &tot); | |||||
printf("after cudnn create, free: %.2f MB, tot: %.2f MB, allocated: %.2f MB\n", | |||||
free / 1024.0 / 1024.0, tot / 1024.0 / 1024.0, | |||||
(tot - free) / 1024.0 / 1024.0); | |||||
// check tk1 | // check tk1 | ||||
m_is_tegra_k1 = (strcmp(m_device_prop->name, "GK20A") == 0); | m_is_tegra_k1 = (strcmp(m_device_prop->name, "GK20A") == 0); | ||||
m_cusolver_handle = nullptr; | m_cusolver_handle = nullptr; | ||||
@@ -1,13 +1,3 @@ | |||||
/** | |||||
* \file dnn/test/cuda/conv_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
*/ | |||||
#include "megdnn/dtype.h" | #include "megdnn/dtype.h" | ||||
#include "test/cuda/fixture.h" | #include "test/cuda/fixture.h" | ||||
@@ -26,7 +16,7 @@ using namespace megdnn; | |||||
using namespace test; | using namespace test; | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
#if CUDNN_VERSION >= 8004 | |||||
#if CUDNN_VERSION >= 8020 | |||||
TEST_F(CUDA, CONV_V8_FLOAT) { | TEST_F(CUDA, CONV_V8_FLOAT) { | ||||
Checker<ConvBiasForward> checker(handle_cuda()); | Checker<ConvBiasForward> checker(handle_cuda()); | ||||
checker.set_before_exec_callback( | checker.set_before_exec_callback( | ||||