GitOrigin-RevId: ffd6431299
tags/v1.6.0-rc1
@@ -67,7 +67,6 @@ option(MGE_WITH_ROCM "Enable ROCM support" OFF) | |||||
option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF) | option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF) | ||||
option(MGE_BUILD_WITH_ASAN "Enable build with ASAN, need compiler support" OFF) | option(MGE_BUILD_WITH_ASAN "Enable build with ASAN, need compiler support" OFF) | ||||
if(MSVC OR WIN32) | if(MSVC OR WIN32) | ||||
message(STATUS "windows force cudnn static link") | message(STATUS "windows force cudnn static link") | ||||
set(MGE_WITH_CUDNN_SHARED OFF) | set(MGE_WITH_CUDNN_SHARED OFF) | ||||
@@ -332,7 +331,6 @@ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MGE_COMMON_LINKER_ | |||||
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}") | set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}") | ||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}") | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}") | ||||
if(NOT MGE_WITH_JIT) | if(NOT MGE_WITH_JIT) | ||||
if(MGE_WITH_HALIDE) | if(MGE_WITH_HALIDE) | ||||
message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled") | message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled") | ||||
@@ -728,7 +726,6 @@ if (MGE_WITH_ROCM) | |||||
include(cmake/rocm.cmake) | include(cmake/rocm.cmake) | ||||
endif () | endif () | ||||
if(MGE_WITH_ATLAS) | if(MGE_WITH_ATLAS) | ||||
add_subdirectory(dnn/atlas-stub) | add_subdirectory(dnn/atlas-stub) | ||||
list(APPEND MGE_ATLAS_LIBS atlas-stub) | list(APPEND MGE_ATLAS_LIBS atlas-stub) | ||||
@@ -736,7 +733,6 @@ if(MGE_WITH_ATLAS) | |||||
set(MGB_ATLAS ${MGE_WITH_ATLAS}) | set(MGB_ATLAS ${MGE_WITH_ATLAS}) | ||||
endif() | endif() | ||||
find_program(CCACHE_BIN ccache) | find_program(CCACHE_BIN ccache) | ||||
if(CCACHE_BIN) | if(CCACHE_BIN) | ||||
set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN}) | set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN}) | ||||
@@ -834,12 +830,10 @@ endif() | |||||
set(MGB_CUDA ${MGE_WITH_CUDA}) | set(MGB_CUDA ${MGE_WITH_CUDA}) | ||||
set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA}) | set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA}) | ||||
#ROCM | #ROCM | ||||
set(MGB_ROCM ${MGE_WITH_ROCM}) | set(MGB_ROCM ${MGE_WITH_ROCM}) | ||||
set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM}) | set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM}) | ||||
# CAMBRICON | # CAMBRICON | ||||
set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON}) | set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON}) | ||||
set(MEGDNN_WITH_CAMBRICON ${MGE_WITH_CAMBRICON}) | set(MEGDNN_WITH_CAMBRICON ${MGE_WITH_CAMBRICON}) | ||||
@@ -1029,7 +1023,6 @@ if(MGE_BUILD_SDK) | |||||
add_subdirectory(sdk/load-and-run) | add_subdirectory(sdk/load-and-run) | ||||
endif() | endif() | ||||
if(MGE_BUILD_IMPERATIVE_RT) | if(MGE_BUILD_IMPERATIVE_RT) | ||||
add_subdirectory(imperative) | add_subdirectory(imperative) | ||||
message(STATUS "Enable imperative python wrapper runtime") | message(STATUS "Enable imperative python wrapper runtime") | ||||
@@ -1117,4 +1110,3 @@ if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND("${CUDNN_VERSION}" VERSION_GREATER | |||||
message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") | message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") | ||||
message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") | message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") | ||||
endif() | endif() | ||||
@@ -1,3 +1,4 @@ | |||||
include(ExternalProject) | include(ExternalProject) | ||||
find_package(LLVM 6.0 REQUIRED CONFIG) | find_package(LLVM 6.0 REQUIRED CONFIG) | ||||
@@ -38,7 +38,6 @@ list(APPEND OPR_PARAM_DEFS_OUTS | |||||
) | ) | ||||
list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR}) | list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR}) | ||||
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/megdnn DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.h") | install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/megdnn DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.h") | ||||
add_custom_target(_opr_param_defs DEPENDS ${OPR_PARAM_DEFS_OUTS}) | add_custom_target(_opr_param_defs DEPENDS ${OPR_PARAM_DEFS_OUTS}) | ||||
@@ -56,7 +55,6 @@ endforeach() | |||||
add_dependencies(opr_param_defs _opr_param_defs) | add_dependencies(opr_param_defs _opr_param_defs) | ||||
install(TARGETS opr_param_defs EXPORT ${MGE_EXPORT_TARGETS}) | install(TARGETS opr_param_defs EXPORT ${MGE_EXPORT_TARGETS}) | ||||
if(MGE_WITH_CUDA) | if(MGE_WITH_CUDA) | ||||
add_library(cutlass INTERFACE) | add_library(cutlass INTERFACE) | ||||
target_include_directories(cutlass | target_include_directories(cutlass | ||||
@@ -13,7 +13,6 @@ | |||||
#if !defined(__CUDACC__) && !defined(__HIPCC__) | #if !defined(__CUDACC__) && !defined(__HIPCC__) | ||||
#endif // !defined(__CUDACC__) | #endif // !defined(__CUDACC__) | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -90,7 +90,6 @@ class Handle { | |||||
std::unique_ptr<opr> create_rocm_operator(); | std::unique_ptr<opr> create_rocm_operator(); | ||||
#endif | #endif | ||||
virtual ~Handle(); | virtual ~Handle(); | ||||
/*! | /*! | ||||
@@ -137,11 +137,9 @@ if(MGE_WITH_CUDA) | |||||
gen_cutlass_kimpl(conv2d tensorop8832) | gen_cutlass_kimpl(conv2d tensorop8832) | ||||
file(GLOB_RECURSE CUTLASS_SOURCES ${CUTLASS_GEN_DIR}/*.cu) | file(GLOB_RECURSE CUTLASS_SOURCES ${CUTLASS_GEN_DIR}/*.cu) | ||||
list(APPEND SOURCES ${CUTLASS_SOURCES}) | list(APPEND SOURCES ${CUTLASS_SOURCES}) | ||||
list(APPEND SOURCES ${CUSOURCES}) | list(APPEND SOURCES ${CUSOURCES}) | ||||
endif() | endif() | ||||
if(MGE_WITH_CAMBRICON) | if(MGE_WITH_CAMBRICON) | ||||
file(GLOB_RECURSE SOURCES_ cambricon/*.cpp) | file(GLOB_RECURSE SOURCES_ cambricon/*.cpp) | ||||
list(APPEND SOURCES ${SOURCES_}) | list(APPEND SOURCES ${SOURCES_}) | ||||
@@ -161,7 +159,6 @@ if(MGE_WITH_ATLAS) | |||||
list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_ATLAS=1) | list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_ATLAS=1) | ||||
endif() | endif() | ||||
add_definitions(${LIBMEGDNN_DEF}) | add_definitions(${LIBMEGDNN_DEF}) | ||||
add_library(megdnn EXCLUDE_FROM_ALL OBJECT ${SOURCES}) | add_library(megdnn EXCLUDE_FROM_ALL OBJECT ${SOURCES}) | ||||
@@ -186,7 +183,6 @@ if(MGE_WITH_ROCM) | |||||
${AMDOCL_LIBRARY_DIR}) | ${AMDOCL_LIBRARY_DIR}) | ||||
endif() | endif() | ||||
if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") | if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") | ||||
if(MGE_ENABLE_CPUINFO) | if(MGE_ENABLE_CPUINFO) | ||||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cpuinfo>) | target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cpuinfo>) | ||||
@@ -15,5 +15,4 @@ | |||||
#pragma message "Mangling is disabled." | #pragma message "Mangling is disabled." | ||||
#endif // MEGDNN_ENABLE_MANGLING | #endif // MEGDNN_ENABLE_MANGLING | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -31,13 +31,10 @@ | |||||
#include "src/aarch64/handle.h" | #include "src/aarch64/handle.h" | ||||
#endif | #endif | ||||
#if MEGDNN_WITH_CUDA | #if MEGDNN_WITH_CUDA | ||||
#include "src/cuda/handle.h" | #include "src/cuda/handle.h" | ||||
#endif | #endif | ||||
#if MEGDNN_WITH_CAMBRICON | #if MEGDNN_WITH_CAMBRICON | ||||
#include "src/cambricon/handle.h" | #include "src/cambricon/handle.h" | ||||
#endif | #endif | ||||
@@ -128,7 +125,6 @@ std::unique_ptr<Handle> Handle::make(megcoreComputingHandle_t computing_handle, | |||||
return nullptr; | return nullptr; | ||||
} | } | ||||
void Handle::set_destructor(const thin_function<void()>& d) { | void Handle::set_destructor(const thin_function<void()>& d) { | ||||
megdnn_assert(!m_destructor, "destructor can be set only once"); | megdnn_assert(!m_destructor, "destructor can be set only once"); | ||||
m_destructor = d; | m_destructor = d; | ||||
@@ -17,8 +17,6 @@ | |||||
#include "src/cuda/megcore/cuda_computing_context.hpp" | #include "src/cuda/megcore/cuda_computing_context.hpp" | ||||
#endif | #endif | ||||
#if MEGDNN_WITH_ROCM | #if MEGDNN_WITH_ROCM | ||||
#include "src/rocm/megcore/computing_context.hpp" | #include "src/rocm/megcore/computing_context.hpp" | ||||
#endif | #endif | ||||
@@ -880,7 +880,6 @@ void remap(const Mat<T>& src, Mat<T>& dst, Mat<short>& map1, Mat<ushort>& map2, | |||||
for (; x1 <= bcols - 8; x1 += 8) | for (; x1 <= bcols - 8; x1 += 8) | ||||
vst1q_u16(A + x1, | vst1q_u16(A + x1, | ||||
vandq_u16(vld1q_u16(sA + x1), v_scale)); | vandq_u16(vld1q_u16(sA + x1), v_scale)); | ||||
#endif | #endif | ||||
for (; x1 < bcols; ++x1) | for (; x1 < bcols; ++x1) | ||||
A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2 - 1)); | A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2 - 1)); | ||||
@@ -287,7 +287,6 @@ void ConvBiasForwardImpl::AlgoPack::fill_dp4a_algos() { | |||||
int8_nchw4_dotprod.emplace_back(AlgoParam{16, 64, 8, 16, 64, 8, 2}); | int8_nchw4_dotprod.emplace_back(AlgoParam{16, 64, 8, 16, 64, 8, 2}); | ||||
} | } | ||||
ConvBiasForwardImpl::AlgoBase* | ConvBiasForwardImpl::AlgoBase* | ||||
ConvBiasForwardImpl::AlgoPack::cudnn_conv_from_enum( | ConvBiasForwardImpl::AlgoPack::cudnn_conv_from_enum( | ||||
cudnnConvolutionFwdAlgo_t algo) { | cudnnConvolutionFwdAlgo_t algo) { | ||||
@@ -1037,7 +1037,6 @@ private: | |||||
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; | WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; | ||||
}; | }; | ||||
class ConvBiasForwardImpl::AlgoPack : NonCopyableObj { | class ConvBiasForwardImpl::AlgoPack : NonCopyableObj { | ||||
private: | private: | ||||
AlgoBase::Mapper m_all_algos_map; | AlgoBase::Mapper m_all_algos_map; | ||||
@@ -10,7 +10,6 @@ | |||||
*/ | */ | ||||
#include "src/common/utils.h" | #include "src/common/utils.h" | ||||
namespace { | namespace { | ||||
template <bool is_xcorr, typename dtype> | template <bool is_xcorr, typename dtype> | ||||
@@ -34,7 +34,6 @@ if(MGE_WITH_CAMBRICON) | |||||
list(APPEND SOURCES ${SOURCES_}) | list(APPEND SOURCES ${SOURCES_}) | ||||
endif() | endif() | ||||
if(MGE_WITH_ATLAS) | if(MGE_WITH_ATLAS) | ||||
file(GLOB_RECURSE SOURCES_ atlas/*.cpp) | file(GLOB_RECURSE SOURCES_ atlas/*.cpp) | ||||
list(APPEND SOURCES ${SOURCES_}) | list(APPEND SOURCES ${SOURCES_}) | ||||
@@ -45,8 +44,6 @@ if (MGE_WITH_ROCM) | |||||
list (APPEND SOURCES ${SOURCES_}) | list (APPEND SOURCES ${SOURCES_}) | ||||
endif() | endif() | ||||
add_executable(megdnn_test ${SOURCES}) | add_executable(megdnn_test ${SOURCES}) | ||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") | ||||
target_link_libraries(megdnn_test gtest) | target_link_libraries(megdnn_test gtest) | ||||
@@ -60,7 +57,6 @@ if(MGE_WITH_ATLAS) | |||||
target_link_libraries(megdnn_test atlas-stub) | target_link_libraries(megdnn_test atlas-stub) | ||||
endif() | endif() | ||||
target_include_directories(megdnn_test | target_include_directories(megdnn_test | ||||
PRIVATE | PRIVATE | ||||
${PROJECT_SOURCE_DIR}/third_party/midout/src | ${PROJECT_SOURCE_DIR}/third_party/midout/src | ||||
@@ -494,7 +494,6 @@ std::vector<TestArg> get_int8_nchw44_args(size_t kernel_size, size_t pack_size, | |||||
return args; | return args; | ||||
} | } | ||||
std::vector<TestArg> get_int8_nchw4_args_check_bounds(size_t kernel_size) { | std::vector<TestArg> get_int8_nchw4_args_check_bounds(size_t kernel_size) { | ||||
std::vector<TestArg> args; | std::vector<TestArg> args; | ||||
param::ConvBias cur_param; | param::ConvBias cur_param; | ||||
@@ -530,7 +529,6 @@ std::vector<TestArg> get_int8_nchw4_args_check_bounds(size_t kernel_size) { | |||||
return args; | return args; | ||||
} | } | ||||
std::vector<TestArg> get_int8_nchw4_args_small_batch(size_t kernel_size) { | std::vector<TestArg> get_int8_nchw4_args_small_batch(size_t kernel_size) { | ||||
std::vector<TestArg> args; | std::vector<TestArg> args; | ||||
param::ConvBias cur_param; | param::ConvBias cur_param; | ||||
@@ -974,7 +972,6 @@ void benchmark_winograd(const char* algo_name, Handle* handle, size_t kernel, | |||||
} | } | ||||
#endif // MEGDNN_WITH_BENCHMARK | #endif // MEGDNN_WITH_BENCHMARK | ||||
std::vector<conv_bias::TestArg> get_conv_bias_args( | std::vector<conv_bias::TestArg> get_conv_bias_args( | ||||
std::vector<size_t> kernel, size_t stride, bool no_pad, bool no_bias, | std::vector<size_t> kernel, size_t stride, bool no_pad, bool no_bias, | ||||
bool no_nonlinemode, bool quantized_nlmod, bool only_broadcast_bias) { | bool no_nonlinemode, bool quantized_nlmod, bool only_broadcast_bias) { | ||||
@@ -1188,7 +1185,6 @@ void check_conv_bias_preprocess(std::vector<conv_bias::TestArg> args, | |||||
} | } | ||||
} | } | ||||
void checker_conv_bias_common(std::vector<conv_bias::TestArg> args, Handle* handle, | void checker_conv_bias_common(std::vector<conv_bias::TestArg> args, Handle* handle, | ||||
RNG* rng, float epsilon, DType type0, DType type1, | RNG* rng, float epsilon, DType type0, DType type1, | ||||
DType type2, DType type3, const char* algo_name) { | DType type2, DType type3, const char* algo_name) { | ||||
@@ -93,7 +93,6 @@ void check_conv_bias(std::vector<megdnn::test::conv_bias::TestArg> args, | |||||
void checker_conv_bias_int8x8x16( | void checker_conv_bias_int8x8x16( | ||||
std::vector<megdnn::test::conv_bias::TestArg> args, | std::vector<megdnn::test::conv_bias::TestArg> args, | ||||
megdnn::Handle* handle, const char* algo_name); | megdnn::Handle* handle, const char* algo_name); | ||||
void checker_conv_bias_common(std::vector<conv_bias::TestArg> args, | void checker_conv_bias_common(std::vector<conv_bias::TestArg> args, | ||||
Handle* handle, RNG* rng, float epsilon, | Handle* handle, RNG* rng, float epsilon, | ||||
DType type0, DType type1, DType type2, | DType type0, DType type1, DType type2, | ||||
@@ -1145,7 +1145,6 @@ TEST(SmallVectorTest, SwapMoveOnly) { | |||||
} | } | ||||
} | } | ||||
} | } | ||||
} // anonymous namespace | } // anonymous namespace | ||||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -40,7 +40,6 @@ TensorLayout make_layout(std::initializer_list<size_t> shape, | |||||
} | } | ||||
} // anonymous namespace | } // anonymous namespace | ||||
#if MEGDNN_64_BIT | #if MEGDNN_64_BIT | ||||
TEST(BASIC_TYPES, TOTAL_NR_ELEMS) { | TEST(BASIC_TYPES, TOTAL_NR_ELEMS) { | ||||
TensorShape shp{1u<<31, 1u<<31}; | TensorShape shp{1u<<31, 1u<<31}; | ||||
@@ -340,5 +339,4 @@ TEST(BASIC_TYPES, TENSOR_LAYOUT_FMT_LOW_BITS_VALID) { | |||||
LowbitsAlignedToBytesTensorFormat::make(4_z)), | LowbitsAlignedToBytesTensorFormat::make(4_z)), | ||||
MegDNNError); | MegDNNError); | ||||
} | } | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -697,7 +697,6 @@ TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_UNROLL_WIDTH_TENSORCORE_1x1_ALGO_2) { | |||||
conv_bias::get_int8_chwn4_args_small_batch(1)); | conv_bias::get_int8_chwn4_args_small_batch(1)); | ||||
} | } | ||||
TEST_F(CUDA, FALLBACK_CONV_QS8) { | TEST_F(CUDA, FALLBACK_CONV_QS8) { | ||||
require_compute_capability_eq(7, 5); | require_compute_capability_eq(7, 5); | ||||
Checker<ConvBiasForward> checker(handle_cuda()); | Checker<ConvBiasForward> checker(handle_cuda()); | ||||
@@ -1100,7 +1099,6 @@ TEST_F(CUDA, BENCHMARK_CONV_BIAS_INT8_NCHW4_NCHW) { | |||||
run({{16, 16, 46, 80, 4}, {32, 16, 3, 3, 4}, {1, 32, 1, 1}}); | run({{16, 16, 46, 80, 4}, {32, 16, 3, 3, 4}, {1, 32, 1, 1}}); | ||||
} | } | ||||
#if CUDA_VERSION >= 10020 | #if CUDA_VERSION >= 10020 | ||||
TEST_F(CUDA, BENCHMARK_CUTLASS_CONV_BIAS_INT8_NCHW32) { | TEST_F(CUDA, BENCHMARK_CUTLASS_CONV_BIAS_INT8_NCHW32) { | ||||
require_compute_capability(7, 5); | require_compute_capability(7, 5); | ||||
@@ -32,7 +32,6 @@ TYPED_TEST(CUDA_ELEMWISE_MULTI_TYPE, run) { | |||||
elemwise_multi_type::run_test<TypeParam>(this->handle_cuda()); | elemwise_multi_type::run_test<TypeParam>(this->handle_cuda()); | ||||
} | } | ||||
using Mode = ElemwiseMultiType::Param::Mode; | using Mode = ElemwiseMultiType::Param::Mode; | ||||
static void run_test(int arity, Checker<ElemwiseMultiType>& checker, Mode mode) { | static void run_test(int arity, Checker<ElemwiseMultiType>& checker, Mode mode) { | ||||
for (auto type : std::vector<std::pair<DType, DType>>{ | for (auto type : std::vector<std::pair<DType, DType>>{ | ||||
@@ -22,7 +22,6 @@ | |||||
using namespace megdnn; | using namespace megdnn; | ||||
using namespace test; | using namespace test; | ||||
TEST_F(CUDA, SLEEP) { | TEST_F(CUDA, SLEEP) { | ||||
auto opr = this->handle_cuda()->create_operator<megdnn::SleepForward>(); | auto opr = this->handle_cuda()->create_operator<megdnn::SleepForward>(); | ||||
@@ -53,6 +52,5 @@ TEST_F(CUDA, SLEEP) { | |||||
} | } | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen | ||||
@@ -75,7 +75,6 @@ TEST_F(FALLBACK, CONV_BIAS_FORWARD) { | |||||
.execs({src_shape, filter_shape, bias_shape, {}, {}}) | .execs({src_shape, filter_shape, bias_shape, {}, {}}) | ||||
.execs({src_shape, filter_shape, bias_shape_channel, {}, {}}); | .execs({src_shape, filter_shape, bias_shape_channel, {}, {}}); | ||||
} | } | ||||
} | } | ||||
std::vector<conv_bias::TestArg> get_conv_bias_args( | std::vector<conv_bias::TestArg> get_conv_bias_args( | ||||
@@ -236,7 +235,6 @@ TEST_F(FALLBACK_MULTI_THREADS, CONV_BIAS_FORWARD_QUANTIZED) { | |||||
"FALLBACK_NAIVE"); | "FALLBACK_NAIVE"); | ||||
} | } | ||||
#if MEGDNN_WITH_BENCHMARK | #if MEGDNN_WITH_BENCHMARK | ||||
TEST_F(FALLBACK, BENCHMARK_CONVBIAS) { | TEST_F(FALLBACK, BENCHMARK_CONVBIAS) { | ||||
constexpr size_t RUNS = 10; | constexpr size_t RUNS = 10; | ||||
@@ -139,3 +139,52 @@ def batch_conv_bias_activation( | |||||
) | ) | ||||
(outputs,) = apply(op, inp, weight, bias) | (outputs,) = apply(op, inp, weight, bias) | ||||
return outputs | return outputs | ||||
def conv_transpose2d( | |||||
inp: Tensor, | |||||
weight: Tensor, | |||||
bias: Tensor = None, | |||||
dtype=None, | |||||
stride: Union[int, Tuple[int, int]] = 1, | |||||
padding: Union[int, Tuple[int, int]] = 0, | |||||
dilation: Union[int, Tuple[int, int]] = 1, | |||||
groups: int = 1, | |||||
conv_mode="cross_correlation", | |||||
compute_mode="default", | |||||
) -> Tensor: | |||||
assert ( | |||||
conv_mode.lower() == "cross_correlation" | |||||
or conv_mode.name == "CROSS_CORRELATION" | |||||
) | |||||
assert compute_mode.lower() == "default" or compute_mode.name == "DEFAULT" | |||||
if groups != 1: | |||||
raise NotImplementedError( | |||||
"group quantized transposed conv2d is not supported yet." | |||||
) | |||||
if bias is not None: | |||||
raise NotImplementedError( | |||||
"bias of quantized transposed conv2d is not supported yet." | |||||
) | |||||
pad_h, pad_w = _pair(padding) | |||||
stride_h, stride_w = _pair_nonzero(stride) | |||||
dilate_h, dilate_w = _pair_nonzero(dilation) | |||||
# should be replaced by Op with bias such as ConvolutionBackwardDataBias | |||||
op = builtin.ConvolutionBackwardData( | |||||
stride_h=stride_h, | |||||
stride_w=stride_w, | |||||
pad_h=pad_h, | |||||
pad_w=pad_w, | |||||
dilate_h=dilate_h, | |||||
dilate_w=dilate_w, | |||||
strategy=get_execution_strategy(), | |||||
dtype=dtype, | |||||
compute_mode=compute_mode, | |||||
mode=conv_mode, | |||||
) | |||||
(output,) = apply(op, weight, inp) | |||||
return output |
@@ -651,11 +651,11 @@ class ConvTranspose2d(_ConvNd): | |||||
# Assume format is NCHW | # Assume format is NCHW | ||||
return (1, self.out_channels, 1, 1) | return (1, self.out_channels, 1, 1) | ||||
def forward(self, inp): | |||||
def calc_conv_transpose2d(self, inp, weight, bias): | |||||
return conv_transpose2d( | return conv_transpose2d( | ||||
inp, | inp, | ||||
self.weight, | |||||
self.bias, | |||||
weight, | |||||
bias, | |||||
self.stride, | self.stride, | ||||
self.padding, | self.padding, | ||||
self.dilation, | self.dilation, | ||||
@@ -664,6 +664,9 @@ class ConvTranspose2d(_ConvNd): | |||||
self.compute_mode, | self.compute_mode, | ||||
) | ) | ||||
def forward(self, inp): | |||||
return self.calc_conv_transpose2d(inp, self.weight, self.bias) | |||||
class LocalConv2d(Conv2d): | class LocalConv2d(Conv2d): | ||||
r""" | r""" | ||||
@@ -7,7 +7,7 @@ | |||||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
from .batch_matmul_activation import BatchMatMulActivation | from .batch_matmul_activation import BatchMatMulActivation | ||||
from .concat import Concat | from .concat import Concat | ||||
from .conv import Conv2d, ConvRelu2d | |||||
from .conv import Conv2d, ConvRelu2d, ConvTranspose2d | |||||
from .conv_bn import ConvBn2d, ConvBnRelu2d | from .conv_bn import ConvBn2d, ConvBnRelu2d | ||||
from .elemwise import Elemwise | from .elemwise import Elemwise | ||||
from .linear import Linear | from .linear import Linear | ||||
@@ -57,3 +57,42 @@ class ConvRelu2d(Conv2d): | |||||
def forward(self, inp): | def forward(self, inp): | ||||
return self.apply_quant_activation(F.relu(self.calc_conv_qat(inp))) | return self.apply_quant_activation(F.relu(self.calc_conv_qat(inp))) | ||||
class ConvTranspose2d(Float.ConvTranspose2d, QATModule): | |||||
r""" | |||||
A :class:`~.QATModule` :class:`~.module.ConvTranspose2d` with QAT support. | |||||
Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||||
""" | |||||
def calc_conv_transpose2d_qat(self, inp): | |||||
w_qat = self.apply_quant_weight(self.weight) | |||||
b_qat = self.apply_quant_bias(self.bias, inp, w_qat) | |||||
conv = self.calc_conv_transpose2d(inp, w_qat, b_qat) | |||||
return conv | |||||
@classmethod | |||||
def from_float_module(cls, float_module: Float.ConvTranspose2d): | |||||
r""" | |||||
Return a :class:`~.QATModule` instance converted from | |||||
a float :class:`~.Module` instance. | |||||
""" | |||||
qat_module = cls( | |||||
float_module.in_channels, | |||||
float_module.out_channels, | |||||
float_module.kernel_size, | |||||
float_module.stride, | |||||
float_module.padding, | |||||
float_module.dilation, | |||||
float_module.groups, | |||||
float_module.bias is not None, | |||||
float_module.conv_mode, | |||||
float_module.compute_mode, | |||||
name=float_module.name, | |||||
) | |||||
qat_module.weight = float_module.weight | |||||
qat_module.bias = float_module.bias | |||||
return qat_module | |||||
def forward(self, inp): | |||||
return self.apply_quant_activation(self.calc_conv_transpose2d_qat(inp)) |
@@ -7,7 +7,7 @@ | |||||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
from .batch_matmul_activation import BatchMatMulActivation | from .batch_matmul_activation import BatchMatMulActivation | ||||
from .concat import Concat | from .concat import Concat | ||||
from .conv import Conv2d, ConvRelu2d | |||||
from .conv import Conv2d, ConvRelu2d, ConvTranspose2d | |||||
from .conv_bn import ConvBn2d, ConvBnRelu2d | from .conv_bn import ConvBn2d, ConvBnRelu2d | ||||
from .elemwise import Elemwise | from .elemwise import Elemwise | ||||
from .linear import Linear | from .linear import Linear | ||||
@@ -12,6 +12,7 @@ import numpy as np | |||||
from ... import module as Float | from ... import module as Float | ||||
from ...core.tensor import dtype | from ...core.tensor import dtype | ||||
from ...functional.nn import conv_bias_activation | from ...functional.nn import conv_bias_activation | ||||
from ...functional.quantized import conv_transpose2d | |||||
from ...tensor import Parameter | from ...tensor import Parameter | ||||
from ..qat import conv as QAT | from ..qat import conv as QAT | ||||
from .module import QuantizedModule | from .module import QuantizedModule | ||||
@@ -108,3 +109,98 @@ class ConvRelu2d(Conv2d): | |||||
def forward(self, inp): | def forward(self, inp): | ||||
return self.calc_conv_quantized(inp, nonlinear_mode="relu") | return self.calc_conv_quantized(inp, nonlinear_mode="relu") | ||||
class ConvTranspose2d(Float.ConvTranspose2d, QuantizedModule): | |||||
r"""Quantized version of :class:`~.qat.ConvTranspose2d`. | |||||
Applies a 2D transposed convolution over a quantized input tensor, used | |||||
for inference only. | |||||
The parameter is same with :class:`~.module.ConvTranspose2d` but dtype. | |||||
:param dtype: data type of the output, should be qint8. | |||||
""" | |||||
def __init__( | |||||
self, | |||||
in_channels: int, | |||||
out_channels: int, | |||||
kernel_size: Union[int, Tuple[int, int]], | |||||
stride: Union[int, Tuple[int, int]] = 1, | |||||
padding: Union[int, Tuple[int, int]] = 0, | |||||
dilation: Union[int, Tuple[int, int]] = 1, | |||||
groups: int = 1, | |||||
bias: bool = True, | |||||
conv_mode: str = "cross_correlation", | |||||
compute_mode: str = "default", | |||||
dtype=None, | |||||
**kwargs | |||||
): | |||||
super().__init__( | |||||
in_channels=in_channels, | |||||
out_channels=out_channels, | |||||
kernel_size=kernel_size, | |||||
stride=stride, | |||||
padding=padding, | |||||
dilation=dilation, | |||||
groups=groups, | |||||
bias=bias, | |||||
conv_mode=conv_mode, | |||||
compute_mode=compute_mode, | |||||
) | |||||
self.output_dtype = dtype | |||||
@classmethod | |||||
def from_qat_module(cls, qat_module: QAT.ConvTranspose2d): | |||||
r""" | |||||
return a :class:`~.QuantizedModule` instance converted from a | |||||
:class:`~.QATModule` instance. | |||||
""" | |||||
output_dtype = qat_module.get_activation_dtype() | |||||
qconv = cls( | |||||
qat_module.in_channels, | |||||
qat_module.out_channels, | |||||
qat_module.kernel_size, | |||||
qat_module.stride, | |||||
qat_module.padding, | |||||
qat_module.dilation, | |||||
qat_module.groups, | |||||
qat_module.bias is not None, | |||||
qat_module.conv_mode, | |||||
qat_module.compute_mode, | |||||
dtype=output_dtype, | |||||
name=qat_module.name, | |||||
) | |||||
weight = qat_module.weight.astype(qat_module.get_weight_dtype()) | |||||
qconv.weight = Parameter(weight.numpy(), name=qat_module.weight.name) | |||||
qconv.bias = ( | |||||
Parameter(qat_module.bias.numpy(), name=qat_module.bias.name) | |||||
if qat_module.bias is not None | |||||
else None | |||||
) | |||||
return qconv | |||||
def calc_conv_transpose2d_quantized(self, inp): | |||||
if self.bias is not None: | |||||
inp_scale = dtype.get_scale(inp.dtype) | |||||
w_scale = dtype.get_scale(self.weight.dtype) | |||||
bias_scale = inp_scale * w_scale | |||||
return conv_transpose2d( | |||||
inp=inp, | |||||
weight=self.weight, | |||||
bias=self.bias.astype(dtype.qint32(bias_scale)) | |||||
if self.bias is not None | |||||
else None, | |||||
dtype=self.output_dtype, | |||||
stride=self.stride, | |||||
padding=self.padding, | |||||
dilation=self.dilation, | |||||
groups=self.groups, | |||||
conv_mode=self.conv_mode, | |||||
compute_mode=self.compute_mode, | |||||
) | |||||
def forward(self, inp): | |||||
return self.calc_conv_transpose2d_quantized(inp) |
@@ -13,5 +13,3 @@ from .fake_quant import _FakeQuantize | |||||
from .observer import MinMaxObserver | from .observer import MinMaxObserver | ||||
from .qconfig import QConfig | from .qconfig import QConfig | ||||
from .utils import QParams | from .utils import QParams | ||||
@@ -69,7 +69,6 @@ class PersistentCacheOnServer(_PersistentCache): | |||||
def make_user_prefix(cls): | def make_user_prefix(cls): | ||||
return "mgbcache:{}".format(getpass.getuser()) | return "mgbcache:{}".format(getpass.getuser()) | ||||
def _make_key(self, category, key): | def _make_key(self, category, key): | ||||
prefix_with_version = "{}:MGB{}".format(self._prefix, __version__) | prefix_with_version = "{}:MGB{}".format(self._prefix, __version__) | ||||
return b"@".join( | return b"@".join( | ||||
@@ -86,5 +85,3 @@ class PersistentCacheOnServer(_PersistentCache): | |||||
key = self._make_key(category, key) | key = self._make_key(category, key) | ||||
self._prev_get_refkeep = conn.get(key) | self._prev_get_refkeep = conn.get(key) | ||||
return self._prev_get_refkeep | return self._prev_get_refkeep | ||||
@@ -38,7 +38,6 @@ class build_ext(_build_ext): | |||||
modpath = str(pathlib.Path(*modpath).resolve()) | modpath = str(pathlib.Path(*modpath).resolve()) | ||||
copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run) | copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run) | ||||
package_name = 'MegEngine' | package_name = 'MegEngine' | ||||
v = {} | v = {} | ||||
@@ -79,7 +78,6 @@ megengine_data += [ | |||||
for f in pathlib.Path('megengine', 'core', 'lib').glob('**/*') | for f in pathlib.Path('megengine', 'core', 'lib').glob('**/*') | ||||
] | ] | ||||
with open('requires.txt') as f: | with open('requires.txt') as f: | ||||
requires = f.read().splitlines() | requires = f.read().splitlines() | ||||
with open('requires-style.txt') as f: | with open('requires-style.txt') as f: | ||||
@@ -108,8 +106,6 @@ setup_kwargs = dict( | |||||
cmdclass={'build_ext': build_ext}, | cmdclass={'build_ext': build_ext}, | ||||
scripts = ['./megengine/tools/mge'], | scripts = ['./megengine/tools/mge'], | ||||
) | ) | ||||
setup_kwargs.update(dict( | setup_kwargs.update(dict( | ||||
classifiers=[ | classifiers=[ | ||||
'Development Status :: 3 - Alpha', | 'Development Status :: 3 - Alpha', | ||||
@@ -876,8 +876,6 @@ def test_nms_is_same(): | |||||
assert op3 != op4 | assert op3 != op4 | ||||
def test_argmxx_on_inf(): | def test_argmxx_on_inf(): | ||||
def run_argmax(): | def run_argmax(): | ||||
x = F.zeros((100, 100)) | x = F.zeros((100, 100)) | ||||
@@ -13,6 +13,7 @@ from megengine.module import ( | |||||
Conv2d, | Conv2d, | ||||
ConvBn2d, | ConvBn2d, | ||||
ConvRelu2d, | ConvRelu2d, | ||||
ConvTranspose2d, | |||||
DequantStub, | DequantStub, | ||||
Module, | Module, | ||||
QuantStub, | QuantStub, | ||||
@@ -202,3 +203,40 @@ def test_quantize_batchmatmul_activation(): | |||||
infer_cg = cgtools.GraphInference(file)[0] | infer_cg = cgtools.GraphInference(file)[0] | ||||
dumped_outputs = list(infer_cg.run(inputs.numpy()).values())[0] | dumped_outputs = list(infer_cg.run(inputs.numpy()).values())[0] | ||||
np.testing.assert_allclose(quantize_outputs.numpy(), dumped_outputs, atol=1e-6) | np.testing.assert_allclose(quantize_outputs.numpy(), dumped_outputs, atol=1e-6) | ||||
def test_qat_conv_transpose2d(): | |||||
in_channels = 32 | |||||
out_channels = 64 | |||||
kernel_size = 3 | |||||
class TestNet(Module): | |||||
def __init__(self, bias): | |||||
super().__init__() | |||||
self.quant = QuantStub() | |||||
self.dequant = DequantStub() | |||||
self.conv = ConvTranspose2d( | |||||
in_channels, out_channels, kernel_size, bias=bias | |||||
) | |||||
def forward(self, inp): | |||||
out = self.quant(inp) | |||||
out = self.conv(out) | |||||
out = self.dequant(out) | |||||
return out | |||||
inputs = tensor(np.random.randn(4, in_channels, 32, 32).astype(np.float32)) | |||||
for bias in [True, False]: | |||||
net = TestNet(bias) | |||||
net.train() | |||||
qat_net = quantize_qat(net, inplace=False) | |||||
disable_fake_quant(qat_net) | |||||
normal_outputs = net(inputs) | |||||
qat_outputs = qat_net(inputs) | |||||
np.testing.assert_allclose(normal_outputs.numpy(), qat_outputs.numpy()) | |||||
net.eval() | |||||
normal_outputs = net(inputs) | |||||
qat_net.eval() | |||||
qat_outputs = qat_net(inputs) | |||||
np.testing.assert_allclose(normal_outputs.numpy(), qat_outputs.numpy()) |
@@ -92,8 +92,6 @@ def test_tqt(): | |||||
np.testing.assert_allclose(g_s.numpy(), g_s_np, rtol=5e-5, atol=5e-5) | np.testing.assert_allclose(g_s.numpy(), g_s_np, rtol=5e-5, atol=5e-5) | ||||
def _save_to(self, name="grad"): | def _save_to(self, name="grad"): | ||||
def callback(grad): | def callback(grad): | ||||
setattr(self, name, grad) | setattr(self, name, grad) | ||||
@@ -14,6 +14,7 @@ import megengine.functional as F | |||||
from megengine.core.tensor import dtype | from megengine.core.tensor import dtype | ||||
from megengine.device import get_device_count | from megengine.device import get_device_count | ||||
from megengine.functional.elemwise import _elemwise_multi_type, _elwise | from megengine.functional.elemwise import _elemwise_multi_type, _elwise | ||||
from megengine.module.quantized.conv import ConvTranspose2d | |||||
from megengine.quantization import QuantMode, create_qparams | from megengine.quantization import QuantMode, create_qparams | ||||
@@ -168,3 +169,94 @@ def test_conv_bias(): | |||||
run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False, "relu") | run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False, "relu") | ||||
run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, True, "relu") | run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, True, "relu") | ||||
def test_conv_transpose2d(): | |||||
rng = np.random.RandomState(seed=2021) | |||||
def test_func( | |||||
N, | |||||
IC, | |||||
IH, | |||||
IW, | |||||
OC, | |||||
KH, | |||||
KW, | |||||
SH, | |||||
SW, | |||||
PH, | |||||
PW, | |||||
DH, | |||||
DW, | |||||
groups=1, | |||||
has_bias=True, | |||||
conv_mode: str = "cross_correlation", | |||||
compute_mode: str = "default", | |||||
): | |||||
inp_scale = np.float32(rng.uniform(low=0.04, high=0.06)) | |||||
weight_scale = np.float32(rng.uniform(low=0.04, high=0.06)) | |||||
bias_scale = inp_scale * weight_scale | |||||
out_scale = np.float32(rng.uniform(low=0.04, high=0.06)) | |||||
inp_dtype = dtype.qint8(inp_scale) | |||||
weight_dtype = dtype.qint8(weight_scale) | |||||
bias_dtype = dtype.qint32(bias_scale) | |||||
out_dtype = dtype.qint8(out_scale) | |||||
inp_fp32 = rng.uniform(low=-1, high=1, size=(N, IC, IH, IW)).astype(np.float32) | |||||
weight_fp32 = rng.uniform(low=-1, high=1, size=(IC, OC, KH, KW)).astype( | |||||
np.float32 | |||||
) | |||||
bias_fp32 = rng.uniform(low=-1, high=1, size=(1, OC, 1, 1)).astype(np.float32) | |||||
inp_int8 = dtype.convert_to_qint8(inp_fp32, inp_dtype) | |||||
weight_int8 = dtype.convert_to_qint8(weight_fp32, weight_dtype) | |||||
bias_int32 = dtype.convert_to_qint32(bias_fp32, bias_dtype) | |||||
inp_int8 = mge.tensor(inp_int8, dtype=inp_dtype) | |||||
weight_int8 = mge.Parameter(weight_int8, dtype=weight_dtype) | |||||
bias_int32 = mge.Parameter(bias_int32, dtype=bias_dtype) | |||||
inp_fp32 = inp_int8.astype("float32") | |||||
weight_fp32 = weight_int8.astype("float32") | |||||
bias_fp32 = bias_int32.astype("float32") | |||||
expected = F.conv_transpose2d( | |||||
inp_fp32, | |||||
weight_fp32, | |||||
bias_fp32 if has_bias else None, | |||||
stride=(SH, SW), | |||||
padding=(PH, PW), | |||||
dilation=(DH, DW), | |||||
groups=groups, | |||||
conv_mode=conv_mode, | |||||
compute_mode=compute_mode, | |||||
) | |||||
expected = dtype.convert_to_qint8(expected.numpy(), out_dtype) | |||||
expected = dtype.convert_from_qint8(expected) | |||||
conv_transpose2d = ConvTranspose2d( | |||||
in_channels=IC, | |||||
out_channels=OC, | |||||
kernel_size=(KH, KW), | |||||
stride=(SH, SW), | |||||
padding=(PH, PW), | |||||
dilation=(DH, DW), | |||||
groups=groups, | |||||
bias=has_bias, | |||||
conv_mode=conv_mode, | |||||
compute_mode=compute_mode, | |||||
dtype=out_dtype, | |||||
) | |||||
conv_transpose2d.weight = mge.Parameter(weight_int8) | |||||
if has_bias: | |||||
conv_transpose2d.bias = mge.Parameter(bias_int32) | |||||
result = conv_transpose2d.forward(inp_int8).numpy() | |||||
result = dtype.convert_from_qint8(result) | |||||
np.testing.assert_allclose(result, expected, atol=out_scale) | |||||
test_func(1, 4, 1, 1, 4, 1, 1, 1, 1, 0, 0, 1, 1, 1, False) | |||||
test_func(2, 4, 3, 1, 8, 1, 1, 1, 1, 0, 0, 1, 1, 1, False) | |||||
test_func(4, 4, 16, 16, 8, 3, 3, 1, 1, 1, 1, 1, 1, 1, False) | |||||
test_func(32, 64, 36, 28, 16, 3, 2, 1, 3, 1, 0, 1, 1, 1, False) |
@@ -486,8 +486,6 @@ def test_topk(): | |||||
check_pygraph_dump(fwd, [x], [top, indices]) | check_pygraph_dump(fwd, [x], [top, indices]) | ||||
def test_random(): | def test_random(): | ||||
@trace(symbolic=True, capture_as_const=True) | @trace(symbolic=True, capture_as_const=True) | ||||
def fwd(): | def fwd(): | ||||
@@ -723,8 +721,6 @@ def test_elemwise_multitype(): | |||||
check_pygraph_dump(fwd, [x, y], [result]) | check_pygraph_dump(fwd, [x, y], [result]) | ||||
def test_cvtcolor(): | def test_cvtcolor(): | ||||
inp = np.random.randn(3, 3, 3, 3).astype(np.float32) | inp = np.random.randn(3, 3, 3, 3).astype(np.float32) | ||||
x = Tensor(inp) | x = Tensor(inp) | ||||
@@ -7,4 +7,3 @@ | |||||
# software distributed under the License is distributed on an | # software distributed under the License is distributed on an | ||||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
__version__ = "1.6.0.dev" | __version__ = "1.6.0.dev" | ||||
@@ -43,6 +43,11 @@ auto apply_on_var_node( | |||||
const VarNodeArray& inputs) { | const VarNodeArray& inputs) { | ||||
auto&& conv = static_cast<const ConvolutionBackwardData&>(def); | auto&& conv = static_cast<const ConvolutionBackwardData&>(def); | ||||
OperatorNodeConfig config{conv.make_name()}; | OperatorNodeConfig config{conv.make_name()}; | ||||
DType output_dtype = conv.dtype; | |||||
if (output_dtype.valid()) { | |||||
config.output_dtype(output_dtype); | |||||
} | |||||
if (inputs.size() == 2) { | if (inputs.size() == 2) { | ||||
return opr::ConvolutionBackwardData::make(inputs[0], inputs[1], conv.param(), conv.policy(), config); | return opr::ConvolutionBackwardData::make(inputs[0], inputs[1], conv.param(), conv.policy(), config); | ||||
} else { | } else { | ||||
@@ -192,7 +192,6 @@ function do_build() { | |||||
#handle dlopen path | #handle dlopen path | ||||
install_name_tool -change @rpath/libmegengine_export.dylib @loader_path/lib/libmegengine_export.dylib _imperative_rt.so | install_name_tool -change @rpath/libmegengine_export.dylib @loader_path/lib/libmegengine_export.dylib _imperative_rt.so | ||||
#copy megbrain_export lib | #copy megbrain_export lib | ||||
DEPEND_LIB=${BUILD_DIR}/staging/megengine/core/lib/ | DEPEND_LIB=${BUILD_DIR}/staging/megengine/core/lib/ | ||||
rm -rf ${DEPEND_LIB} | rm -rf ${DEPEND_LIB} | ||||
@@ -209,7 +208,6 @@ function do_build() { | |||||
echo "comapt whl name: ${compat_whl_name}" | echo "comapt whl name: ${compat_whl_name}" | ||||
cp ${BUILD_DIR}/staging/dist/Meg*.whl ${MACOS_WHL_HOME}/${compat_whl_name} | cp ${BUILD_DIR}/staging/dist/Meg*.whl ${MACOS_WHL_HOME}/${compat_whl_name} | ||||
cd ${SRC_DIR} | cd ${SRC_DIR} | ||||
echo "" | echo "" | ||||
echo "##############################################################################################" | echo "##############################################################################################" | ||||
@@ -220,12 +218,10 @@ function do_build() { | |||||
done | done | ||||
} | } | ||||
function third_party_prepare() { | function third_party_prepare() { | ||||
echo "init third_party..." | echo "init third_party..." | ||||
${SRC_DIR}/third_party/prepare.sh | ${SRC_DIR}/third_party/prepare.sh | ||||
if [[ -z ${ALREADY_INSTALL_MKL} ]] | if [[ -z ${ALREADY_INSTALL_MKL} ]] | ||||
then | then | ||||
echo "init third_party..." | echo "init third_party..." | ||||
@@ -55,13 +55,11 @@ function patch_elf_depend_lib_mgb_mge() { | |||||
patchelf --force-rpath --set-rpath '$ORIGIN/.' ${LIBS_DIR}/libmegengine_export.so | patchelf --force-rpath --set-rpath '$ORIGIN/.' ${LIBS_DIR}/libmegengine_export.so | ||||
handle_strip ${LIBS_DIR}/libmegengine_export.so | handle_strip ${LIBS_DIR}/libmegengine_export.so | ||||
# as some version of cudnn/trt libs have dlopen libs, so we can not use auditwheel | # as some version of cudnn/trt libs have dlopen libs, so we can not use auditwheel | ||||
# TODO: PR for auditwheel to support args for dlopen libs | # TODO: PR for auditwheel to support args for dlopen libs | ||||
handle_copy_cuda_libs ${LIBS_DIR} | handle_copy_cuda_libs ${LIBS_DIR} | ||||
} | } | ||||
SRC_DIR=$(readlink -f "`dirname $0`/../../../") | SRC_DIR=$(readlink -f "`dirname $0`/../../../") | ||||
source ${SRC_DIR}/scripts/whl/utils/utils.sh | source ${SRC_DIR}/scripts/whl/utils/utils.sh | ||||
@@ -142,7 +140,6 @@ do | |||||
mkdir -p staging | mkdir -p staging | ||||
cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | ||||
cd ${BUILD_DIR}/staging/megengine/core | cd ${BUILD_DIR}/staging/megengine/core | ||||
mkdir -p lib/ucx | mkdir -p lib/ucx | ||||
patch_elf_depend_lib_mgb_mge | patch_elf_depend_lib_mgb_mge | ||||
@@ -158,7 +155,6 @@ do | |||||
echo "comapt whl name: ${compat_whl_name}" | echo "comapt whl name: ${compat_whl_name}" | ||||
mv ${org_whl_name} ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME}/${compat_whl_name} | mv ${org_whl_name} ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME}/${compat_whl_name} | ||||
cd /home/output | cd /home/output | ||||
chown -R ${UID}.${UID} . | chown -R ${UID}.${UID} . | ||||
# compat for root-less docker env to remove output at host side | # compat for root-less docker env to remove output at host side | ||||
@@ -70,7 +70,6 @@ then | |||||
BUILD_WHL_CPU_ONLY="OFF" | BUILD_WHL_CPU_ONLY="OFF" | ||||
fi | fi | ||||
# config NVIDIA libs | # config NVIDIA libs | ||||
TRT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/TensorRT-6.0.1.5/lib/nvinfer.dll" | TRT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/TensorRT-6.0.1.5/lib/nvinfer.dll" | ||||
CUDNN_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/cudnn-10.1-windows10-x64-v7.6.5.32/cuda/bin/cudnn64_7.dll" | CUDNN_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/cudnn-10.1-windows10-x64-v7.6.5.32/cuda/bin/cudnn64_7.dll" | ||||
@@ -102,14 +101,11 @@ function copy_more_dll() { | |||||
# empty.file to triger setup.py to create a null empty | # empty.file to triger setup.py to create a null empty | ||||
echo "empty" > ${CP_WHL_DST_IMP}/empty.file | echo "empty" > ${CP_WHL_DST_IMP}/empty.file | ||||
if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then | if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then | ||||
echo "copy nvidia lib to whl use...." | echo "copy nvidia lib to whl use...." | ||||
depend_real_copy ${CP_WHL_DST_IMP} | depend_real_copy ${CP_WHL_DST_IMP} | ||||
fi | fi | ||||
} | } | ||||
BUILD_DIR=${SRC_DIR}/build_dir/host/build/ | BUILD_DIR=${SRC_DIR}/build_dir/host/build/ | ||||
# here we just treat cu file should not in the increment build file list | # here we just treat cu file should not in the increment build file list | ||||
@@ -194,14 +190,12 @@ function do_build() { | |||||
llvm-strip -s ${rt_file} | llvm-strip -s ${rt_file} | ||||
mv ${rt_file} _imperative_rt.pyd | mv ${rt_file} _imperative_rt.pyd | ||||
copy_more_dll | copy_more_dll | ||||
cd ${BUILD_DIR}/staging | cd ${BUILD_DIR}/staging | ||||
echo "call setup.py now" | echo "call setup.py now" | ||||
${PYTHON_DIR}/python3 setup.py bdist_wheel | ${PYTHON_DIR}/python3 setup.py bdist_wheel | ||||
cp ${BUILD_DIR}/staging/dist/Meg*.whl ${WINDOWS_WHL_HOME}/ | cp ${BUILD_DIR}/staging/dist/Meg*.whl ${WINDOWS_WHL_HOME}/ | ||||
echo "" | echo "" | ||||
echo "##############################################################################################" | echo "##############################################################################################" | ||||
echo "windows whl package location: ${WINDOWS_WHL_HOME}" | echo "windows whl package location: ${WINDOWS_WHL_HOME}" | ||||
@@ -215,7 +209,6 @@ function third_party_prepare() { | |||||
echo "init third_party..." | echo "init third_party..." | ||||
${SRC_DIR}/third_party/prepare.sh | ${SRC_DIR}/third_party/prepare.sh | ||||
if [[ -z ${ALREADY_INSTALL_MKL} ]] | if [[ -z ${ALREADY_INSTALL_MKL} ]] | ||||
then | then | ||||
echo "init third_party..." | echo "init third_party..." | ||||
@@ -35,8 +35,6 @@ | |||||
#include "megcore_atlas.h" | #include "megcore_atlas.h" | ||||
#endif | #endif | ||||
using namespace mgb; | using namespace mgb; | ||||
/* =================== MegDNNHandle =================== */ | /* =================== MegDNNHandle =================== */ | ||||
@@ -102,7 +100,6 @@ MegDNNHandle::MegDNNHandle(const CompNodeEnv& env) { | |||||
} | } | ||||
#endif | #endif | ||||
if (env.property().type == CompNode::DeviceType::CPU) { | if (env.property().type == CompNode::DeviceType::CPU) { | ||||
megcoreCreateDeviceHandle(&m_dev_hdl, megcorePlatformCPU); | megcoreCreateDeviceHandle(&m_dev_hdl, megcorePlatformCPU); | ||||
megcoreCreateComputingHandleWithCPUDispatcher(&m_comp_hdl, m_dev_hdl, | megcoreCreateComputingHandleWithCPUDispatcher(&m_comp_hdl, m_dev_hdl, | ||||
@@ -234,7 +231,6 @@ void CompNodeEnv::init_cuda_async(int dev, CompNode comp_node, | |||||
} | } | ||||
#endif | #endif | ||||
#if MGB_ATLAS | #if MGB_ATLAS | ||||
void mgb::_on_atlas_error(const char* expr, int err, const char* file, | void mgb::_on_atlas_error(const char* expr, int err, const char* file, | ||||
@@ -258,8 +254,6 @@ void CompNodeEnv::init_atlas(CompNode comp_node, const AtlasEnv& env) { | |||||
} | } | ||||
#endif | #endif | ||||
#if MGB_ROCM | #if MGB_ROCM | ||||
void mgb::_on_hip_error(const char* expr, hipError_t err, const char* file, | void mgb::_on_hip_error(const char* expr, hipError_t err, const char* file, | ||||
@@ -381,7 +375,6 @@ void CompNodeEnv::init_cpu(const CpuEnv& env, CompNode comp_node) { | |||||
MegDNNHandle::get(*this).handle()->alignment_requirement(); | MegDNNHandle::get(*this).handle()->alignment_requirement(); | ||||
} | } | ||||
#if MGB_CAMBRICON | #if MGB_CAMBRICON | ||||
void CompNodeEnv::init_cnrt(int dev, CompNode comp_node, | void CompNodeEnv::init_cnrt(int dev, CompNode comp_node, | ||||
const ContinuationCtx<cnrtQueue_t>& cont) { | const ContinuationCtx<cnrtQueue_t>& cont) { | ||||
@@ -446,7 +439,6 @@ void CompNodeEnv::fini() { | |||||
MGB_ATLAS_CHECK(aclrtDestroyStream(m_atlas_env.stream)); | MGB_ATLAS_CHECK(aclrtDestroyStream(m_atlas_env.stream)); | ||||
} | } | ||||
#endif | #endif | ||||
} | } | ||||
#if MGB_ENABLE_COMP_NODE_ASYNC_INIT | #if MGB_ENABLE_COMP_NODE_ASYNC_INIT | ||||
@@ -73,14 +73,11 @@ std::string CudaError::get_cuda_extra_info() { | |||||
#endif | #endif | ||||
} | } | ||||
AtlasError::AtlasError(const std::string &msg): | AtlasError::AtlasError(const std::string &msg): | ||||
SystemError(msg) | SystemError(msg) | ||||
{ | { | ||||
} | } | ||||
ROCmError::ROCmError(const std::string &msg): | ROCmError::ROCmError(const std::string &msg): | ||||
SystemError(msg) | SystemError(msg) | ||||
{ | { | ||||
@@ -23,7 +23,6 @@ | |||||
#include "megbrain/graph/helper.h" | #include "megbrain/graph/helper.h" | ||||
#include "megbrain/opr/utility.h" | #include "megbrain/opr/utility.h" | ||||
#if MGB_ENABLE_TENSOR_RT | #if MGB_ENABLE_TENSOR_RT | ||||
#include "megbrain/tensorrt/opr_replace.h" | #include "megbrain/tensorrt/opr_replace.h" | ||||
#endif | #endif | ||||
@@ -554,7 +553,6 @@ ComputingGraphImpl::CompileState ComputingGraphImpl::compile_prepare( | |||||
} | } | ||||
#endif | #endif | ||||
#if MGB_JIT | #if MGB_JIT | ||||
if (std::abs(options().graph_opt_level) == 0 && | if (std::abs(options().graph_opt_level) == 0 && | ||||
(options().graph_opt.jit || options().graph_opt.jit_config.enabled())) { | (options().graph_opt.jit || options().graph_opt.jit_config.enabled())) { | ||||
@@ -445,7 +445,6 @@ class VarNodeMemManager { | |||||
SyncableCounter m_cpu_async_release_barrier; | SyncableCounter m_cpu_async_release_barrier; | ||||
#if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM | #if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM | ||||
//! release dynamic var on after compnode event finishes | //! release dynamic var on after compnode event finishes | ||||
class AsyncVarReleaser; | class AsyncVarReleaser; | ||||
@@ -508,7 +508,6 @@ class CompNode { | |||||
*/ | */ | ||||
static bool enable_affinity_for_cpu(bool flag); | static bool enable_affinity_for_cpu(bool flag); | ||||
protected: | protected: | ||||
//! ImplBase with env(); defined in CompNodeEnv | //! ImplBase with env(); defined in CompNodeEnv | ||||
class Impl; | class Impl; | ||||
@@ -19,8 +19,6 @@ | |||||
#include "megdnn/handle.h" | #include "megdnn/handle.h" | ||||
#if MGB_CUDA | #if MGB_CUDA | ||||
#include <cuda_runtime.h> | #include <cuda_runtime.h> | ||||
#include <cuda.h> | #include <cuda.h> | ||||
@@ -90,8 +88,6 @@ | |||||
#endif // MGB_ATLAS | #endif // MGB_ATLAS | ||||
#if MGB_ROCM | #if MGB_ROCM | ||||
#include "hcc_detail/hcc_defs_prologue.h" | #include "hcc_detail/hcc_defs_prologue.h" | ||||
#include "megcore_rocm.h" | #include "megcore_rocm.h" | ||||
@@ -196,7 +192,6 @@ namespace mgb { | |||||
const char* file, const char* func, int line); | const char* file, const char* func, int line); | ||||
#endif | #endif | ||||
#if MGB_CUDA | #if MGB_CUDA | ||||
[[noreturn]] void _on_cuda_error(const char* expr, cudaError_t err, | [[noreturn]] void _on_cuda_error(const char* expr, cudaError_t err, | ||||
const char* file, const char* func, int line); | const char* file, const char* func, int line); | ||||
@@ -205,7 +200,6 @@ namespace mgb { | |||||
int line); | int line); | ||||
#endif | #endif | ||||
#if MGB_ROCM | #if MGB_ROCM | ||||
[[noreturn]] void _on_hip_error(const char* expr, hipError_t err, | [[noreturn]] void _on_hip_error(const char* expr, hipError_t err, | ||||
const char* file, const char* func, int line); | const char* file, const char* func, int line); | ||||
@@ -232,7 +226,6 @@ public: | |||||
mgb_assert(0, "The CompNode set_affinity is not implement"); | mgb_assert(0, "The CompNode set_affinity is not implement"); | ||||
} | } | ||||
}; | }; | ||||
using AtlasDispatcher = CPUDispatcher; | using AtlasDispatcher = CPUDispatcher; | ||||
/*! | /*! | ||||
@@ -328,7 +321,6 @@ public: | |||||
} | } | ||||
#endif | #endif | ||||
} | } | ||||
/*! | /*! | ||||
@@ -370,7 +362,6 @@ public: | |||||
const ContinuationCtx<cudaStream_t>& cont); | const ContinuationCtx<cudaStream_t>& cont); | ||||
#endif | #endif | ||||
#if MGB_ATLAS | #if MGB_ATLAS | ||||
struct AtlasEnv { | struct AtlasEnv { | ||||
int device = -1; | int device = -1; | ||||
@@ -431,8 +422,6 @@ public: | |||||
void init_atlas(CompNode comp_node, const AtlasEnv& env); | void init_atlas(CompNode comp_node, const AtlasEnv& env); | ||||
#endif | #endif | ||||
#if MGB_ROCM | #if MGB_ROCM | ||||
struct ROCmEnv { | struct ROCmEnv { | ||||
int device = -1; | int device = -1; | ||||
@@ -547,7 +536,6 @@ private: | |||||
CompNode m_comp_node; | CompNode m_comp_node; | ||||
Property m_property; | Property m_property; | ||||
MemEventHandler m_mem_event_handler; | MemEventHandler m_mem_event_handler; | ||||
#if MGB_CUDA | #if MGB_CUDA | ||||
CudaEnv m_cuda_env; | CudaEnv m_cuda_env; | ||||
#endif | #endif | ||||
@@ -71,7 +71,6 @@ | |||||
}) \ | }) \ | ||||
do { \ | do { \ | ||||
} while (0) | } while (0) | ||||
namespace mgb { | namespace mgb { | ||||
//! the most general MegBrain exception type; also base class for all megbrain | //! the most general MegBrain exception type; also base class for all megbrain | ||||
@@ -149,7 +148,6 @@ public: | |||||
AtlasError(const std::string& msg); | AtlasError(const std::string& msg); | ||||
}; | }; | ||||
class ROCmError final : public SystemError { | class ROCmError final : public SystemError { | ||||
public: | public: | ||||
/*! | /*! | ||||
@@ -224,7 +222,6 @@ public: | |||||
using MegBrainError::MegBrainError; | using MegBrainError::MegBrainError; | ||||
}; | }; | ||||
} // namespace mgb | } // namespace mgb | ||||
namespace mgb { | namespace mgb { | ||||
@@ -233,5 +230,4 @@ bool has_uncaught_exception(); | |||||
} // namespace mgb | } // namespace mgb | ||||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -49,7 +49,11 @@ def SVD: MgbHashableOp<"SVD", [SVDParam]>; | |||||
def Convolution : MgbHashableOp<"Convolution", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]>; | def Convolution : MgbHashableOp<"Convolution", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]>; | ||||
def ConvolutionBackwardData: MgbHashableOp<"ConvolutionBackwardData", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]>; | |||||
def ConvolutionBackwardData: MgbHashableOp<"ConvolutionBackwardData", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]> { | |||||
let extraArguments = (ins | |||||
MgbDTypeAttr:$dtype | |||||
); | |||||
} | |||||
def Convolution3D: MgbHashableOp<"Convolution3D", [Convolution3DParam, ExecutionPolicyParamBase<"policy">]>; | def Convolution3D: MgbHashableOp<"Convolution3D", [Convolution3DParam, ExecutionPolicyParamBase<"policy">]>; | ||||
@@ -40,7 +40,6 @@ TEST(TestCompNode, Parse) { | |||||
ASSERT_EQ(L::parse("cpu2:23"), make_lc(D::CPU, 2, 23)); | ASSERT_EQ(L::parse("cpu2:23"), make_lc(D::CPU, 2, 23)); | ||||
ASSERT_EQ(L::parse("cpu21:23"), make_lc(D::CPU, 21, 23)); | ASSERT_EQ(L::parse("cpu21:23"), make_lc(D::CPU, 21, 23)); | ||||
ASSERT_EQ(L::parse("rocmx"), make_lc(D::ROCM, -1, 0)); | ASSERT_EQ(L::parse("rocmx"), make_lc(D::ROCM, -1, 0)); | ||||
ASSERT_EQ(L::parse("rocm2"), make_lc(D::ROCM, 2, 0)); | ASSERT_EQ(L::parse("rocm2"), make_lc(D::ROCM, 2, 0)); | ||||
ASSERT_EQ(L::parse("rocm2:3"), make_lc(D::ROCM, 2, 3)); | ASSERT_EQ(L::parse("rocm2:3"), make_lc(D::ROCM, 2, 3)); | ||||
@@ -62,7 +61,6 @@ TEST(TestCompNode, Parse) { | |||||
ASSERT_EQ(L::parse("multithread:default:2"), | ASSERT_EQ(L::parse("multithread:default:2"), | ||||
make_lc(D::MULTITHREAD, L::DEVICE_MULTITHREAD_DEFAULT, 2)); | make_lc(D::MULTITHREAD, L::DEVICE_MULTITHREAD_DEFAULT, 2)); | ||||
ASSERT_THROW(L::parse("apu"), MegBrainError); | ASSERT_THROW(L::parse("apu"), MegBrainError); | ||||
ASSERT_THROW(L::parse("fpgbx"), MegBrainError); | ASSERT_THROW(L::parse("fpgbx"), MegBrainError); | ||||
ASSERT_THROW(L::parse("cab0"), MegBrainError); | ASSERT_THROW(L::parse("cab0"), MegBrainError); | ||||
@@ -165,8 +163,6 @@ TEST(TestCompNode, Load) { | |||||
auto atlas1 = CompNode::load("atlas1"); | auto atlas1 = CompNode::load("atlas1"); | ||||
ASSERT_NE(atlas0, atlas1); | ASSERT_NE(atlas0, atlas1); | ||||
#endif | #endif | ||||
} | } | ||||
TEST(TestCompNode, FreeAfterFinalize) { | TEST(TestCompNode, FreeAfterFinalize) { | ||||
@@ -355,7 +351,6 @@ TEST(TestCompNodeAtlas, MemNode) { | |||||
} | } | ||||
#endif | #endif | ||||
TEST(TestCompNodeCPU, PhysicalDispatch) { | TEST(TestCompNodeCPU, PhysicalDispatch) { | ||||
constexpr int ID = 0x2a6453e0; | constexpr int ID = 0x2a6453e0; | ||||
using L = CompNode::Locator; | using L = CompNode::Locator; | ||||
@@ -754,7 +749,6 @@ TEST(TestCompNodeCambricon, P2PCopy) { | |||||
#endif | #endif | ||||
#endif // MGB_CAMBRICON | #endif // MGB_CAMBRICON | ||||
#if MGB_ATLAS | #if MGB_ATLAS | ||||
TEST(TestCompNodeAtlas, D2DCopy) { | TEST(TestCompNodeAtlas, D2DCopy) { | ||||
@@ -780,7 +774,6 @@ TEST(TestCompNodeAtlas, D2DCopy) { | |||||
} | } | ||||
#endif | #endif | ||||
namespace { | namespace { | ||||
class CompNodeDepedentObjectInst final : public CompNodeDepedentObject { | class CompNodeDepedentObjectInst final : public CompNodeDepedentObject { | ||||
int *m_dst, *m_timer; | int *m_dst, *m_timer; | ||||
@@ -634,7 +634,6 @@ void test_gather_other(CompNode cn0, CompNode cn1) { | |||||
opr::Sleep::sleep(cn1, 0.7); | opr::Sleep::sleep(cn1, 0.7); | ||||
func->execute(); | func->execute(); | ||||
} | } | ||||
} // namespace | } // namespace | ||||
#if MGB_CUDA | #if MGB_CUDA | ||||
@@ -668,5 +667,4 @@ TEST(TestCudaMemAlloc, FreeMem) { | |||||
} | } | ||||
#endif // MGB_CUDA | #endif // MGB_CUDA | ||||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -340,7 +340,6 @@ TEST(TestTensor, ValueDump) { | |||||
auto val = debug::dump_tensor(*gen({23, 45}), "test"); | auto val = debug::dump_tensor(*gen({23, 45}), "test"); | ||||
debug::write_to_file(output_file("TestTensor.ValueDump.bin").c_str(), val); | debug::write_to_file(output_file("TestTensor.ValueDump.bin").c_str(), val); | ||||
} | } | ||||
template <class Src, class Dst> | template <class Src, class Dst> | ||||
void run_negative_index_test() { | void run_negative_index_test() { | ||||
constexpr size_t S0 = 200, S1 = 200; | constexpr size_t S0 = 200, S1 = 200; | ||||
@@ -1912,7 +1912,6 @@ TEST_PASS(FuseConvBiasNonlinPass, Basic) { | |||||
} | } | ||||
} | } | ||||
#if MGB_CUDA | #if MGB_CUDA | ||||
TEST(TestEnableTensorCore, SmallInputShape) { | TEST(TestEnableTensorCore, SmallInputShape) { | ||||
@@ -4735,7 +4734,6 @@ TEST(TestGoptInference, PaddingChannelsWithWarpPerspective) { | |||||
MGB_ASSERT_TENSOR_EQ(t1, t2); | MGB_ASSERT_TENSOR_EQ(t1, t2); | ||||
} | } | ||||
#endif | #endif | ||||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -67,7 +67,6 @@ | |||||
#define MGB_CUDA 1 | #define MGB_CUDA 1 | ||||
#endif | #endif | ||||
// whether to include file/line location for assert message | // whether to include file/line location for assert message | ||||
#ifndef MGB_ASSERT_LOC | #ifndef MGB_ASSERT_LOC | ||||
#define MGB_ASSERT_LOC 1 | #define MGB_ASSERT_LOC 1 | ||||
@@ -162,7 +161,6 @@ | |||||
#define MGB_JIT_HALIDE 0 | #define MGB_JIT_HALIDE 0 | ||||
#endif | #endif | ||||
#ifndef MEGDNN_WITH_CAMBRICON | #ifndef MEGDNN_WITH_CAMBRICON | ||||
#define MEGDNN_WITH_CAMBRICON 0 | #define MEGDNN_WITH_CAMBRICON 0 | ||||
#endif | #endif | ||||
@@ -182,7 +180,6 @@ | |||||
#define MGB_ENABLE_FASTRUN 1 | #define MGB_ENABLE_FASTRUN 1 | ||||
#endif | #endif | ||||
/* ================= following are more finegrind controls ================= */ | /* ================= following are more finegrind controls ================= */ | ||||
// whether to enable json dumper | // whether to enable json dumper | ||||
@@ -162,7 +162,6 @@ namespace opr { | |||||
using ReduceV2 = opr::Reduce; | using ReduceV2 = opr::Reduce; | ||||
MGB_SEREG_OPR(ReduceV2, 0); | MGB_SEREG_OPR(ReduceV2, 0); | ||||
} // namespace opr | } // namespace opr | ||||
using TypeCvtV2 = opr::TypeCvt; | using TypeCvtV2 = opr::TypeCvt; | ||||
MGB_SEREG_OPR(TypeCvtV2, 1); | MGB_SEREG_OPR(TypeCvtV2, 1); | ||||
@@ -97,7 +97,6 @@ MGB_SEREG_OPR(SVD, 1); | |||||
} // namespace opr | } // namespace opr | ||||
} // namespace mgb | } // namespace mgb | ||||
// vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -613,7 +613,6 @@ MGB_SEREG_OPR(LSQ, 4); | |||||
MGB_SEREG_OPR(LSQBackward, 5); | MGB_SEREG_OPR(LSQBackward, 5); | ||||
} // namespace opr | } // namespace opr | ||||
} // namespace mgb | } // namespace mgb | ||||
// vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -196,7 +196,6 @@ using DctChannelSelectV1 = opr::DctChannelSelect; | |||||
MGB_SEREG_OPR(DctChannelSelectV1, 0); | MGB_SEREG_OPR(DctChannelSelectV1, 0); | ||||
} // namespace opr | } // namespace opr | ||||
} // namespace mgb | } // namespace mgb | ||||
// vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -57,7 +57,6 @@ namespace serialization { | |||||
} // namespace serialization | } // namespace serialization | ||||
namespace opr { | namespace opr { | ||||
MGB_SEREG_OPR(Argmax, 1); | MGB_SEREG_OPR(Argmax, 1); | ||||
@@ -14,7 +14,6 @@ | |||||
namespace mgb { | namespace mgb { | ||||
namespace opr { | namespace opr { | ||||
using UniformRNGV1 = opr::UniformRNG; | using UniformRNGV1 = opr::UniformRNG; | ||||
@@ -120,7 +120,6 @@ namespace serialization { | |||||
#endif | #endif | ||||
} // namespace serialization | } // namespace serialization | ||||
namespace opr { | namespace opr { | ||||
MGB_SEREG_OPR(Broadcast, 2); | MGB_SEREG_OPR(Broadcast, 2); | ||||
MGB_SEREG_OPR(Dimshuffle, 1); | MGB_SEREG_OPR(Dimshuffle, 1); | ||||
@@ -2401,7 +2401,6 @@ TEST(TestOprDNN, ConvolutionMultiCompNode) { | |||||
worker0.join(); | worker0.join(); | ||||
worker1.join(); | worker1.join(); | ||||
} | } | ||||
#endif | #endif | ||||
} // anonymous namespace | } // anonymous namespace | ||||
@@ -37,7 +37,6 @@ GraphLoader::shared_tensor_name_map() { | |||||
} | } | ||||
return ret; | return ret; | ||||
} | } | ||||
std::unique_ptr<GraphLoader> make_fbs_loader(std::unique_ptr<InputFile> file); | std::unique_ptr<GraphLoader> make_fbs_loader(std::unique_ptr<InputFile> file); | ||||
std::unique_ptr<GraphDumper> make_fbs_dumper(std::unique_ptr<OutputFile> file); | std::unique_ptr<GraphDumper> make_fbs_dumper(std::unique_ptr<OutputFile> file); | ||||
bool is_fbs_file(InputFile& file); | bool is_fbs_file(InputFile& file); | ||||
@@ -502,5 +502,4 @@ TEST(TestExternCOpr, Dedup) { | |||||
ASSERT_EQ(0, MGBOprDescImpl<>::nr_inst); | ASSERT_EQ(0, MGBOprDescImpl<>::nr_inst); | ||||
} | } | ||||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -15,7 +15,6 @@ if (MGE_WITH_CUDA AND MGE_WITH_TRT) | |||||
list(APPEND SOURCES ${SOURCES_}) | list(APPEND SOURCES ${SOURCES_}) | ||||
endif() | endif() | ||||
add_executable(megbrain_test ${SOURCES}) | add_executable(megbrain_test ${SOURCES}) | ||||
target_link_libraries(megbrain_test gtest gmock) | target_link_libraries(megbrain_test gtest gmock) | ||||
target_link_libraries(megbrain_test megbrain megdnn ${MGE_CUDA_LIBS}) | target_link_libraries(megbrain_test megbrain megdnn ${MGE_CUDA_LIBS}) | ||||
@@ -63,7 +63,6 @@ pdef('PersistentOutputStorage').add_fields( | |||||
'false') | 'false') | ||||
) | ) | ||||
(pdef('CollectiveComm', 'collective communication between multiple computing ' | (pdef('CollectiveComm', 'collective communication between multiple computing ' | ||||
'nodes on localhost') | 'nodes on localhost') | ||||
.add_enum(Doc('Mode', 'mode of collective communication'), | .add_enum(Doc('Mode', 'mode of collective communication'), | ||||