/** * \file dnn/src/cuda/matrix_mul/opr_impl.h * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #pragma once #include "megdnn/oprs.h" #include namespace megdnn { namespace cuda { class MatrixMulForwardImpl : public MatrixMulForward { public: using MatrixMulForward::MatrixMulForward; void exec(_megdnn_tensor_in A, _megdnn_tensor_in B, _megdnn_tensor_out C, _megdnn_workspace workspace) override; size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, const TensorLayout&) override; bool is_thread_safe() const override { return true; } const char* get_algorithm_set_name() const override { return "CUDA MATMUL"; } class AlgoBase; class AlgoCuBlas; #if CUDA_VERSION >= 10000 class AlgoUInt4x4x32WMMA; #endif #if CUDA_VERSION >= 10010 class AlgoCuBlasLt; #endif class AlgoNaive; #if !MEGDNN_DISABLE_FLOAT16 class AlgoBFloat16; #endif #if CUDA_VERSION >= 9020 class AlgoFloat32SIMT; class AlgoFloat32SIMTSplitK; class AlgoFloat32SIMTGemvBatchedStrided; #endif class AlgoPack; static const AlgoPack& algo_pack() { return sm_algo_pack; } Algorithm* get_algorithm_from_desc(const AlgorithmDesc& desc) override; protected: std::vector get_all_algorithms(const TensorLayout& A, const TensorLayout& B, const TensorLayout& C) override; Algorithm* get_algorithm_heuristic( const TensorLayout& A, const TensorLayout& B, const TensorLayout& C, size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) override; private: static AlgoPack sm_algo_pack; }; } // namespace cuda } // namespace megdnn // vim: syntax=cpp.doxygen