GitOrigin-RevId: 7882f9c68c
release-1.5
@@ -37,14 +37,13 @@ all: ${PARAM_DEFS} ${ELEMWISE_IMPL} ${CUDA_CONV_IMPL} $(CUDA_MATMUL_IMPL) | |||
../src/cuda/elemwise_multi_type/kimpl: gen_elemwise_multi_type_kern_impls.py | |||
./$^ --type cuda $@ | |||
../src/cuda/conv_bias/int8/kimpl: gen_cuda_conv_bias_kern_impls.py gen_cutlass_conv_bias_kern_impls.py cutlass_generator/generator.py | |||
../src/cuda/conv_bias/int8/kimpl: gen_cuda_conv_bias_kern_impls.py cutlass_generator | |||
./gen_cuda_conv_bias_kern_impls.py --type dp4a $@ | |||
./gen_cutlass_conv_bias_kern_impls.py --type dp4a $@ | |||
python3 ./cutlass_generator/generator.py --operations all --type simt $@ | |||
../src/cuda/conv_bias/int8_imma/kimpl: gen_cuda_conv_bias_kern_impls.py gen_cutlass_conv_bias_kern_impls.py | |||
../src/cuda/conv_bias/int8_imma/kimpl: gen_cuda_conv_bias_kern_impls.py cutlass_generator | |||
./gen_cuda_conv_bias_kern_impls.py --type imma $@ | |||
./gen_cutlass_conv_bias_kern_impls.py --type imma $@ | |||
python3 ./cutlass_generator/generator.py --operations conv2d --type tensorop8816 $@ | |||
../src/cuda/batch_conv_bias/int8/kimpl: gen_cuda_batch_conv_bias_kern_impls.py | |||
./$^ --type dp4a $@ | |||
@@ -807,9 +807,9 @@ void megdnn::cuda::cutlass_wrapper:: | |||
const int32_t* d_bias, const uint8_t* d_z, uint8_t* d_dst, | |||
int* workspace, const convolution::ConvParam& param, | |||
uint32_t nonlinear_mode, float alpha, float beta, float gamma, | |||
float delta, float theta, float scale, uint8_t src_zero_point, | |||
const GemmCoord& threadblock_shape, const GemmCoord& warp_shape, | |||
cudaStream_t stream) { | |||
float delta, float theta, float /* scale */, | |||
uint8_t src_zero_point, const GemmCoord& threadblock_shape, | |||
const GemmCoord& warp_shape, cudaStream_t stream) { | |||
#define DISPATCH_KERNEL_WITH_TILE_SHAPE(threadblock_m_, threadblock_n_, \ | |||
threadblock_k_, warp_m_, warp_n_, \ | |||
warp_k_) \ | |||
@@ -878,15 +878,6 @@ void megdnn::cuda::cutlass_wrapper:: | |||
0, delta, theta}; | |||
DISPATCH_KERNEL; | |||
} | |||
case NonlineMode::H_SWISH: { | |||
using EpilogueOp = cutlass::epilogue::thread:: | |||
BiasAddLinearCombinationHSwishClamp< | |||
ElementOutput, 16, ElementAccumulator, ElementBias, | |||
ElementCompute>; | |||
typename EpilogueOp::Params epilogue{alpha, beta, gamma, | |||
scale, delta, theta}; | |||
DISPATCH_KERNEL; | |||
} | |||
default: | |||
megdnn_assert(false, | |||
"unsupported nonlinear mode for conv bias operator"); | |||
@@ -960,8 +951,7 @@ void megdnn::cuda::cutlass_wrapper:: | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, \ | |||
cutlass::conv::threadblock:: \ | |||
ConvolutionFpropNCxHWxThreadblockSwizzle, \ | |||
stages_, 4, aligned_, true, \ | |||
cutlass::arch::OpMultiplyAddSaturate>; \ | |||
stages_, 4, aligned_, true, cutlass::arch::OpMultiplyAdd>; \ | |||
typename Convolution::ConvolutionParameter conv_param( \ | |||
param.n, param.hi, param.wi, param.ci, param.co, param.fh, \ | |||
param.fw, param.ho, param.wo, param.ph, param.pw, param.sh, \ | |||
@@ -1,65 +0,0 @@ | |||
/** | |||
* \file | |||
* dnn/src/cuda/conv_bias/int4/conv_bias_int4_implicit_gemm_cutlass_wrapper.cuinl | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "cutlass/convolution/device/convolution.h" | |||
#include "src/cuda/conv_bias/cutlass_convolution_wrapper.cuh" | |||
using namespace megdnn; | |||
using namespace cuda; | |||
using namespace cutlass_wrapper; | |||
template <typename Convolution> | |||
void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param) { | |||
typename Convolution::TensorRefSrc tensor_src{ | |||
const_cast<typename Convolution::ElementSrc*>(d_src), | |||
Convolution::LayoutSrc::packed( | |||
{conv_param.N, conv_param.H, conv_param.W, conv_param.C})}; | |||
typename Convolution::TensorRefFilter tensor_filter{ | |||
const_cast<typename Convolution::ElementFilter*>(d_filter), | |||
Convolution::LayoutFilter::packed( | |||
{conv_param.K, conv_param.R, conv_param.S, conv_param.C})}; | |||
typename Convolution::TensorRefBias tensor_bias{ | |||
const_cast<typename Convolution::ElementBias*>(d_bias), | |||
Convolution::LayoutBias::packed({1, 1, 1, conv_param.K})}; | |||
typename Convolution::TensorRefDst tensor_z{ | |||
const_cast<typename Convolution::ElementDst*>(d_z), | |||
Convolution::LayoutDst::packed( | |||
{conv_param.N, conv_param.P, conv_param.Q, conv_param.K})}; | |||
typename Convolution::TensorRefDst tensor_dst{ | |||
d_dst, | |||
Convolution::LayoutDst::packed( | |||
{conv_param.N, conv_param.P, conv_param.Q, conv_param.K})}; | |||
typename Convolution::Arguments arguments{conv_param, | |||
tensor_src.non_const_ref(), | |||
tensor_filter.non_const_ref(), | |||
tensor_bias.non_const_ref(), | |||
tensor_z.non_const_ref(), | |||
tensor_dst.non_const_ref(), | |||
epilogue, | |||
{}, | |||
{}, | |||
extra_param}; | |||
Convolution conv_op; | |||
cutlass_check(conv_op.initialize(arguments, workspace)); | |||
cutlass_check(conv_op(stream)); | |||
after_kernel_launch(); | |||
} | |||
// vim: syntax=cuda.doxygen |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int4_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int4/conv_bias_int4_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<64>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<64>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<64>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 128, 128>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 64, 128>; | |||
using InstructionShape = cutlass::gemm::GemmShape<8, 8, 32>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
cutlass::int4b_t, 16, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
cutlass::int4b_t, LayoutSrc, cutlass::int4b_t, LayoutFilter, cutlass::int4b_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm75, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 32, 32, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int4_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int4/conv_bias_int4_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<64>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<64>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<64>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 128, 128>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 64, 128>; | |||
using InstructionShape = cutlass::gemm::GemmShape<8, 8, 32>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
cutlass::int4b_t, 16, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
cutlass::int4b_t, LayoutSrc, cutlass::int4b_t, LayoutFilter, cutlass::int4b_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm75, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 32, 32, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int4_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int4/conv_bias_int4_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<64>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<64>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<64>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 128, 128>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 64, 128>; | |||
using InstructionShape = cutlass::gemm::GemmShape<8, 8, 32>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
cutlass::int4b_t, 16, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
cutlass::int4b_t, LayoutSrc, cutlass::int4b_t, LayoutFilter, cutlass::int4b_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm75, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 32, 32, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int4_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int4/conv_bias_int4_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<64>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<64>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<64>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<256, 128, 128>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 64, 128>; | |||
using InstructionShape = cutlass::gemm::GemmShape<8, 8, 32>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
cutlass::int4b_t, 16, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
cutlass::int4b_t, LayoutSrc, cutlass::int4b_t, LayoutFilter, cutlass::int4b_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm75, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 32, 32, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int4_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int4/conv_bias_int4_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<64>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<64>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<64>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<256, 128, 128>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 64, 128>; | |||
using InstructionShape = cutlass::gemm::GemmShape<8, 8, 32>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
cutlass::int4b_t, 16, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
cutlass::int4b_t, LayoutSrc, cutlass::int4b_t, LayoutFilter, cutlass::int4b_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm75, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 32, 32, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int4_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int4/conv_bias_int4_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<64>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<64>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<64>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<256, 128, 128>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 64, 128>; | |||
using InstructionShape = cutlass::gemm::GemmShape<8, 8, 32>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
cutlass::int4b_t, 16, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
cutlass::int4b_t, LayoutSrc, cutlass::int4b_t, LayoutFilter, cutlass::int4b_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm75, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 32, 32, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int4_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int4/conv_bias_int4_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<64>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<64>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<64>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 128, 128>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 64, 128>; | |||
using InstructionShape = cutlass::gemm::GemmShape<8, 8, 32>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
cutlass::uint4b_t, 16, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
cutlass::uint4b_t, LayoutSrc, cutlass::int4b_t, LayoutFilter, cutlass::uint4b_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm75, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 32, 32, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int4_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int4/conv_bias_int4_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<64>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<64>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<64>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 128, 128>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 64, 128>; | |||
using InstructionShape = cutlass::gemm::GemmShape<8, 8, 32>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
cutlass::uint4b_t, 16, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
cutlass::uint4b_t, LayoutSrc, cutlass::int4b_t, LayoutFilter, cutlass::uint4b_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm75, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 32, 32, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int4_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int4/conv_bias_int4_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<64>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<64>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<64>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 128, 128>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 64, 128>; | |||
using InstructionShape = cutlass::gemm::GemmShape<8, 8, 32>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
cutlass::uint4b_t, 16, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
cutlass::uint4b_t, LayoutSrc, cutlass::int4b_t, LayoutFilter, cutlass::uint4b_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm75, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 32, 32, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int4_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int4/conv_bias_int4_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<64>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<64>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<64>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<256, 128, 128>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 64, 128>; | |||
using InstructionShape = cutlass::gemm::GemmShape<8, 8, 32>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
cutlass::uint4b_t, 16, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
cutlass::uint4b_t, LayoutSrc, cutlass::int4b_t, LayoutFilter, cutlass::uint4b_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm75, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 32, 32, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int4_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int4/conv_bias_int4_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<64>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<64>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<64>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<256, 128, 128>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 64, 128>; | |||
using InstructionShape = cutlass::gemm::GemmShape<8, 8, 32>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
cutlass::uint4b_t, 16, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
cutlass::uint4b_t, LayoutSrc, cutlass::int4b_t, LayoutFilter, cutlass::uint4b_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm75, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 32, 32, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int4_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int4/conv_bias_int4_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<64>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<64>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<64>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<256, 128, 128>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 64, 128>; | |||
using InstructionShape = cutlass::gemm::GemmShape<8, 8, 32>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
cutlass::uint4b_t, 16, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
cutlass::uint4b_t, LayoutSrc, cutlass::int4b_t, LayoutFilter, cutlass::uint4b_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm75, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 32, 32, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -0,0 +1,55 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/implicit_gemm_conv_bias_cutlass_wrapper.cuinl" | |||
// kernel instance "cutlass_tensorop_s4_i8832fprop_hswish_s4_128x128x128_64x64x128_2_nc64hw64" generated by cutlass generator | |||
using Convolution = | |||
typename cutlass::conv::device::Convolution< | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorCxRSKx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::conv::ConvType::kConvolution, | |||
cutlass::arch::OpClassTensorOp, | |||
cutlass::arch::Sm75, | |||
cutlass::gemm::GemmShape<128, 128, 128>, | |||
cutlass::gemm::GemmShape<64, 64, 128>, | |||
cutlass::gemm::GemmShape<8, 8, 32>, | |||
cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
cutlass::int4b_t, | |||
16, | |||
int32_t, | |||
int32_t, | |||
float | |||
>, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, | |||
32, | |||
32, | |||
true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, | |||
typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -0,0 +1,55 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/implicit_gemm_conv_bias_cutlass_wrapper.cuinl" | |||
// kernel instance "cutlass_tensorop_s4_i8832fprop_hswish_s4_256x128x128_64x64x128_2_nc64hw64" generated by cutlass generator | |||
using Convolution = | |||
typename cutlass::conv::device::Convolution< | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorCxRSKx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::conv::ConvType::kConvolution, | |||
cutlass::arch::OpClassTensorOp, | |||
cutlass::arch::Sm75, | |||
cutlass::gemm::GemmShape<256, 128, 128>, | |||
cutlass::gemm::GemmShape<64, 64, 128>, | |||
cutlass::gemm::GemmShape<8, 8, 32>, | |||
cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
cutlass::int4b_t, | |||
16, | |||
int32_t, | |||
int32_t, | |||
float | |||
>, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, | |||
32, | |||
32, | |||
true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, | |||
typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -0,0 +1,55 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/implicit_gemm_conv_bias_cutlass_wrapper.cuinl" | |||
// kernel instance "cutlass_tensorop_s4_i8832fprop_identity_s4_128x128x128_64x64x128_2_nc64hw64" generated by cutlass generator | |||
using Convolution = | |||
typename cutlass::conv::device::Convolution< | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorCxRSKx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::conv::ConvType::kConvolution, | |||
cutlass::arch::OpClassTensorOp, | |||
cutlass::arch::Sm75, | |||
cutlass::gemm::GemmShape<128, 128, 128>, | |||
cutlass::gemm::GemmShape<64, 64, 128>, | |||
cutlass::gemm::GemmShape<8, 8, 32>, | |||
cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
cutlass::int4b_t, | |||
16, | |||
int32_t, | |||
int32_t, | |||
float | |||
>, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, | |||
32, | |||
32, | |||
true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, | |||
typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -0,0 +1,55 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/implicit_gemm_conv_bias_cutlass_wrapper.cuinl" | |||
// kernel instance "cutlass_tensorop_s4_i8832fprop_identity_s4_256x128x128_64x64x128_2_nc64hw64" generated by cutlass generator | |||
using Convolution = | |||
typename cutlass::conv::device::Convolution< | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorCxRSKx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::conv::ConvType::kConvolution, | |||
cutlass::arch::OpClassTensorOp, | |||
cutlass::arch::Sm75, | |||
cutlass::gemm::GemmShape<256, 128, 128>, | |||
cutlass::gemm::GemmShape<64, 64, 128>, | |||
cutlass::gemm::GemmShape<8, 8, 32>, | |||
cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
cutlass::int4b_t, | |||
16, | |||
int32_t, | |||
int32_t, | |||
float | |||
>, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, | |||
32, | |||
32, | |||
true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, | |||
typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -0,0 +1,55 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/implicit_gemm_conv_bias_cutlass_wrapper.cuinl" | |||
// kernel instance "cutlass_tensorop_s4_i8832fprop_relu_s4_128x128x128_64x64x128_2_nc64hw64" generated by cutlass generator | |||
using Convolution = | |||
typename cutlass::conv::device::Convolution< | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorCxRSKx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::conv::ConvType::kConvolution, | |||
cutlass::arch::OpClassTensorOp, | |||
cutlass::arch::Sm75, | |||
cutlass::gemm::GemmShape<128, 128, 128>, | |||
cutlass::gemm::GemmShape<64, 64, 128>, | |||
cutlass::gemm::GemmShape<8, 8, 32>, | |||
cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
cutlass::int4b_t, | |||
16, | |||
int32_t, | |||
int32_t, | |||
float | |||
>, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, | |||
32, | |||
32, | |||
true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, | |||
typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -0,0 +1,55 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/implicit_gemm_conv_bias_cutlass_wrapper.cuinl" | |||
// kernel instance "cutlass_tensorop_s4_i8832fprop_relu_s4_256x128x128_64x64x128_2_nc64hw64" generated by cutlass generator | |||
using Convolution = | |||
typename cutlass::conv::device::Convolution< | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorCxRSKx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::conv::ConvType::kConvolution, | |||
cutlass::arch::OpClassTensorOp, | |||
cutlass::arch::Sm75, | |||
cutlass::gemm::GemmShape<256, 128, 128>, | |||
cutlass::gemm::GemmShape<64, 64, 128>, | |||
cutlass::gemm::GemmShape<8, 8, 32>, | |||
cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
cutlass::int4b_t, | |||
16, | |||
int32_t, | |||
int32_t, | |||
float | |||
>, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, | |||
32, | |||
32, | |||
true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, | |||
typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -0,0 +1,55 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/implicit_gemm_conv_bias_cutlass_wrapper.cuinl" | |||
// kernel instance "cutlass_tensorop_u4_i8832fprop_identity_u4_s4_128x128x128_64x64x128_2_nc64hw64" generated by cutlass generator | |||
using Convolution = | |||
typename cutlass::conv::device::Convolution< | |||
cutlass::uint4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorCxRSKx<64>, | |||
cutlass::uint4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::conv::ConvType::kConvolution, | |||
cutlass::arch::OpClassTensorOp, | |||
cutlass::arch::Sm75, | |||
cutlass::gemm::GemmShape<128, 128, 128>, | |||
cutlass::gemm::GemmShape<64, 64, 128>, | |||
cutlass::gemm::GemmShape<8, 8, 32>, | |||
cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
cutlass::uint4b_t, | |||
16, | |||
int32_t, | |||
int32_t, | |||
float | |||
>, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, | |||
32, | |||
32, | |||
true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, | |||
typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -0,0 +1,55 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/implicit_gemm_conv_bias_cutlass_wrapper.cuinl" | |||
// kernel instance "cutlass_tensorop_u4_i8832fprop_identity_u4_s4_256x128x128_64x64x128_2_nc64hw64" generated by cutlass generator | |||
using Convolution = | |||
typename cutlass::conv::device::Convolution< | |||
cutlass::uint4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorCxRSKx<64>, | |||
cutlass::uint4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::conv::ConvType::kConvolution, | |||
cutlass::arch::OpClassTensorOp, | |||
cutlass::arch::Sm75, | |||
cutlass::gemm::GemmShape<256, 128, 128>, | |||
cutlass::gemm::GemmShape<64, 64, 128>, | |||
cutlass::gemm::GemmShape<8, 8, 32>, | |||
cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
cutlass::uint4b_t, | |||
16, | |||
int32_t, | |||
int32_t, | |||
float | |||
>, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, | |||
32, | |||
32, | |||
true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, | |||
typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -0,0 +1,55 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/implicit_gemm_conv_bias_cutlass_wrapper.cuinl" | |||
// kernel instance "cutlass_tensorop_u4_i8832fprop_relu_u4_s4_128x128x128_64x64x128_2_nc64hw64" generated by cutlass generator | |||
using Convolution = | |||
typename cutlass::conv::device::Convolution< | |||
cutlass::uint4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorCxRSKx<64>, | |||
cutlass::uint4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::conv::ConvType::kConvolution, | |||
cutlass::arch::OpClassTensorOp, | |||
cutlass::arch::Sm75, | |||
cutlass::gemm::GemmShape<128, 128, 128>, | |||
cutlass::gemm::GemmShape<64, 64, 128>, | |||
cutlass::gemm::GemmShape<8, 8, 32>, | |||
cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
cutlass::uint4b_t, | |||
16, | |||
int32_t, | |||
int32_t, | |||
float | |||
>, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, | |||
32, | |||
32, | |||
true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, | |||
typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -0,0 +1,55 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/implicit_gemm_conv_bias_cutlass_wrapper.cuinl" | |||
// kernel instance "cutlass_tensorop_u4_i8832fprop_relu_u4_s4_256x128x128_64x64x128_2_nc64hw64" generated by cutlass generator | |||
using Convolution = | |||
typename cutlass::conv::device::Convolution< | |||
cutlass::uint4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
cutlass::int4b_t, | |||
cutlass::layout::TensorCxRSKx<64>, | |||
cutlass::uint4b_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::layout::TensorNCxHWx<64>, | |||
int32_t, | |||
cutlass::conv::ConvType::kConvolution, | |||
cutlass::arch::OpClassTensorOp, | |||
cutlass::arch::Sm75, | |||
cutlass::gemm::GemmShape<256, 128, 128>, | |||
cutlass::gemm::GemmShape<64, 64, 128>, | |||
cutlass::gemm::GemmShape<8, 8, 32>, | |||
cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
cutlass::uint4b_t, | |||
16, | |||
int32_t, | |||
int32_t, | |||
float | |||
>, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, | |||
32, | |||
32, | |||
true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, | |||
typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1 +0,0 @@ | |||
../implicit_gemm_conv_bias_cutlass_wrapper.cuinl |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<16, 128, 16>; | |||
using WarpShape = cutlass::gemm::GemmShape<16, 128, 16>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
1, 4, 8, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<16, 128, 16>; | |||
using WarpShape = cutlass::gemm::GemmShape<16, 128, 16>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
1, 4, 8, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<16, 128, 16>; | |||
using WarpShape = cutlass::gemm::GemmShape<16, 128, 16>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
1, 4, 8, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<16, 64, 8>; | |||
using WarpShape = cutlass::gemm::GemmShape<16, 64, 8>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 4, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<16, 64, 8>; | |||
using WarpShape = cutlass::gemm::GemmShape<16, 64, 8>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 4, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<16, 64, 8>; | |||
using WarpShape = cutlass::gemm::GemmShape<16, 64, 8>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 4, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<16, 128, 16>; | |||
using WarpShape = cutlass::gemm::GemmShape<16, 128, 16>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
1, 4, 8, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<16, 128, 16>; | |||
using WarpShape = cutlass::gemm::GemmShape<16, 128, 16>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
1, 4, 8, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<16, 128, 16>; | |||
using WarpShape = cutlass::gemm::GemmShape<16, 128, 16>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
1, 4, 8, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<16, 64, 8>; | |||
using WarpShape = cutlass::gemm::GemmShape<16, 64, 8>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 4, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<16, 64, 8>; | |||
using WarpShape = cutlass::gemm::GemmShape<16, 64, 8>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 4, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<16, 64, 8>; | |||
using WarpShape = cutlass::gemm::GemmShape<16, 64, 8>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 4, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, false, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<32, 64, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<4>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<64, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<32>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<32>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<32>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 128, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<32>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<32>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<32>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 32, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationReluClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<32>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationHSwishClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |
@@ -1,36 +0,0 @@ | |||
#if !MEGDNN_TEGRA_X1 | |||
// generated by gen_cuda_conv_bias_int8_kern_impls.py | |||
// ignore warning of cutlass | |||
#pragma GCC diagnostic push | |||
#pragma GCC diagnostic ignored "-Wunused-parameter" | |||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" | |||
#include "src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl" | |||
using LayoutSrc = cutlass::layout::TensorNCxHWx<4>; | |||
using LayoutFilter = cutlass::layout::TensorCxRSKx<4>; | |||
using LayoutDst = cutlass::layout::TensorNCxHWx<32>; | |||
using ThreadBlockShape = cutlass::gemm::GemmShape<128, 64, 32>; | |||
using WarpShape = cutlass::gemm::GemmShape<64, 32, 32>; | |||
using InstructionShape = cutlass::gemm::GemmShape<1, 1, 4>; | |||
using EpilogueOp = cutlass::epilogue::thread::BiasAddLinearCombinationClamp< | |||
int8_t, 4, int32_t, int32_t, float>; | |||
using Convolution = cutlass::conv::device::Convolution< | |||
int8_t, LayoutSrc, int8_t, LayoutFilter, int8_t, | |||
LayoutDst, int32_t, LayoutDst, int32_t, | |||
cutlass::conv::ConvType::kConvolution, cutlass::arch::OpClassSimt, cutlass::arch::Sm61, | |||
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, | |||
cutlass::conv::threadblock::ConvolutionFpropNCxHWxThreadblockSwizzle, | |||
2, 4, 16, true, | |||
cutlass::arch::OpMultiplyAddSaturate>; | |||
template void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper<Convolution>( | |||
const typename Convolution::ElementSrc* d_src, | |||
const typename Convolution::ElementFilter* d_filter, | |||
const typename Convolution::ElementBias* d_bias, | |||
const typename Convolution::ElementDst* d_z, | |||
typename Convolution::ElementDst* d_dst, | |||
int* workspace, | |||
typename Convolution::ConvolutionParameter const& conv_param, | |||
typename Convolution::EpilogueOutputOp::Params const& epilogue, | |||
cudaStream_t stream, typename Convolution::ExtraParam extra_param); | |||
#pragma GCC diagnostic pop | |||
#endif |