diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f180944..01d7b0ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,6 +82,16 @@ option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF) option(MGE_BUILD_WITH_ASAN "Enable build with ASAN, need compiler support" OFF) option(MGE_WITH_CUSTOM_OP "Build with Custom op" OFF) if(MSVC OR WIN32) + # FIXME: static link Windows vc runtime with some version from Visual Studio have + # some runtime issue at some call PATH, for example: _imperative_rt.pyd --> megengine_shared.dll + # for example c api flush can not find the fd args, I have no idea about this issue + # as a Workround, dynamic link vc runtime, but at some case, we will static link vcrt + # when MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP/MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2, so please + # use lite_static_all_in_one(lite/CMakeLists.txt) in Windows XP env as possible + # How to install VC runtime if you env do not install, refer to: + # https://docs.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist?view=msvc-160 + option(MGE_STATIC_LINK_WITH_VC_RUNTIME "Enable mge static link with Windows vc runtime" OFF) + option(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP "Enable deploy inference on Windows xp" OFF) # special MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2 for Windows XP sp2(32bit) # internal behavior: @@ -103,6 +113,9 @@ if(MSVC OR WIN32) # which always locate in Microsoft Visual Studio/*/*/VC/Tools/MSVC/*/bin/*/*/link.exe set(CMAKE_LINKER "link.exe") if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP OR MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2) + set(MGE_STATIC_LINK_WITH_VC_RUNTIME ON) + message(STATUS "Force set MGE_STATIC_LINK_WITH_VC_RUNTIME ON when build for Windows XP") + if(NOT ${MGE_ARCH} STREQUAL "i386") message(FATAL_ERROR "only support 32bit when build for Windows xp") endif() @@ -273,10 +286,22 @@ if(MSVC OR WIN32) # for cmake after 3.15.2 cmake_policy(SET CMP0091 NEW) set(CMAKE_OBJECT_PATH_MAX 300) - if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") - set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug") + if(MGE_BUILD_WITH_ASAN) + set(MGE_STATIC_LINK_WITH_VC_RUNTIME ON) + message(STATUS "Force set MGE_STATIC_LINK_WITH_VC_RUNTIME ON when build for Windows MGE_BUILD_WITH_ASAN") + endif() + if(MGE_STATIC_LINK_WITH_VC_RUNTIME) + if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") + set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug") + else() + set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded") + endif() else() - set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded") + if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") + set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebugDLL") + else() + set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDLL") + endif() endif() add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1) @@ -1183,25 +1208,6 @@ if (NOT MGE_WITH_DISTRIBUTED) DESTINATION ${MGE_INSTALL_CMAKEDIR}) endif() -if(MSVC OR WIN32) - add_compile_options( - $<$:/MT> - $<$:/MTd> - $<$:/MT> - ) - foreach (CompilerFlag - CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE - CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) - if(${CompilerFlag} MATCHES "/MD") - string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}") - set(${CompilerFlag} "${${CompilerFlag}}" CACHE STRING "msvc compiler flags" FORCE) - message(VERBOSE "MSVC flags: ${CompilerFlag}:${${CompilerFlag}}") - endif() - endforeach() -endif() - if(MGE_WITH_JIT_MLIR) add_subdirectory(tools/mlir/mgb-opt) add_subdirectory(tools/mlir/mgb-file-check) diff --git a/dnn/cuda-stub/CMakeLists.txt b/dnn/cuda-stub/CMakeLists.txt index 05256b56..e89dddbd 100644 --- a/dnn/cuda-stub/CMakeLists.txt +++ b/dnn/cuda-stub/CMakeLists.txt @@ -22,4 +22,5 @@ if (MSVC OR WIN32) else() target_link_libraries(cuda-stub PRIVATE dl -Wl,--no-undefined) endif() +target_include_directories(cuda-stub PRIVATE $) install (TARGETS cuda-stub EXPORT ${MGE_EXPORT_TARGETS}) diff --git a/dnn/cuda-stub/src/libcuda.cpp b/dnn/cuda-stub/src/libcuda.cpp index 0a20aba5..025b8402 100644 --- a/dnn/cuda-stub/src/libcuda.cpp +++ b/dnn/cuda-stub/src/libcuda.cpp @@ -1,9 +1,10 @@ +#include "megbrain_build_config.h" + #pragma GCC visibility push(default) #include #define LOGE(fmt, v...) fprintf(stderr, "err: " fmt "\n", ##v) - extern "C" { #include "cuda.h" } @@ -28,8 +29,29 @@ CUresult on_init_failed(int func_idx) { #if CUDA_VERSION == 10010 #include "./libcuda-wrap_10.1.h" + +//! as some symbols link from cuda lib, but used at other module, export here +#ifdef WIN32 +#pragma comment(linker, "/export:cudaSetDevice") +#pragma comment(linker, "/export:cuCtxGetCurrent") +#pragma comment(linker, "/export:cudaGetDeviceCount") +#pragma comment(linker, "/export:cudaGetDeviceProperties") +#pragma comment(linker, "/export:cudaRuntimeGetVersion") +#pragma comment(linker, "/export:cudaGetDevice") +#pragma comment(linker, "/export:cudaDeviceSynchronize") +#endif #elif CUDA_VERSION == 10020 #include "./libcuda-wrap_10.2.h" +//! as some symbols link from cuda lib, but used at other module, export here +#ifdef WIN32 +#pragma comment(linker, "/export:cudaSetDevice") +#pragma comment(linker, "/export:cuCtxGetCurrent") +#pragma comment(linker, "/export:cudaGetDeviceCount") +#pragma comment(linker, "/export:cudaGetDeviceProperties") +#pragma comment(linker, "/export:cudaRuntimeGetVersion") +#pragma comment(linker, "/export:cudaGetDevice") +#pragma comment(linker, "/export:cudaDeviceSynchronize") +#endif #elif CUDA_VERSION == 11010 #include "./libcuda-wrap_11.1.h" #elif CUDA_VERSION == 11020 @@ -79,4 +101,4 @@ static const char* extra_so_paths[] = { }; static const char* g_default_api_name = "cuda"; -#include "./dlopen_helper.h" \ No newline at end of file +#include "./dlopen_helper.h" diff --git a/dnn/include/megdnn/basic_types.h b/dnn/include/megdnn/basic_types.h index 53f22c9a..44831f6d 100644 --- a/dnn/include/megdnn/basic_types.h +++ b/dnn/include/megdnn/basic_types.h @@ -104,22 +104,22 @@ struct TensorShape { #if MEGDNN_CC_HOST TensorShape() = default; TensorShape(const TensorShape& rhs) = default; - TensorShape(const SmallVector& init_shape); - TensorShape(std::initializer_list init_shape); - std::string to_string() const; + MGE_WIN_DECLSPEC_FUC TensorShape(const SmallVector& init_shape); + MGE_WIN_DECLSPEC_FUC TensorShape(std::initializer_list init_shape); + MGE_WIN_DECLSPEC_FUC std::string to_string() const; #endif //! total number of elements - size_t total_nr_elems() const; + MGE_WIN_DECLSPEC_FUC size_t total_nr_elems() const; //! check whether two shapes are equal - bool eq_shape(const TensorShape& rhs) const; + MGE_WIN_DECLSPEC_FUC bool eq_shape(const TensorShape& rhs) const; //! check whether the shape can be treated as a scalar bool is_scalar() const { return ndim == 1 && shape[0] == 1; } //! check whether ndim != 0 and at least one shape is 0 - bool is_empty() const; + MGE_WIN_DECLSPEC_FUC bool is_empty() const; //! access single element, without boundary check size_t& operator[](size_t i) { return shape[i]; } @@ -168,8 +168,8 @@ struct TensorLayout : public TensorShape { class ImplBase; #if MEGDNN_CC_HOST - Format(); - Format(DType dtype); + MGE_WIN_DECLSPEC_FUC Format(); + MGE_WIN_DECLSPEC_FUC Format(DType dtype); const ImplBase* impl() const { return m_impl; } @@ -190,16 +190,17 @@ struct TensorLayout : public TensorShape { } //! get human-readable string description of this format - std::string to_string() const; + MGE_WIN_DECLSPEC_FUC std::string to_string() const; - std::string serialize() const; - static Format deserialize(const std::string& bin, const Handle* handle); + MGE_WIN_DECLSPEC_FUC std::string serialize() const; + MGE_WIN_DECLSPEC_FUC static Format deserialize( + const std::string& bin, const Handle* handle); //! whether this is the default tensor format - bool is_default() const; + MGE_WIN_DECLSPEC_FUC bool is_default() const; //! whether this is the lowbit aligned to bytes tensor format - bool is_lowbit_aligned() const; + MGE_WIN_DECLSPEC_FUC bool is_lowbit_aligned() const; bool operator==(Format rhs) const { return m_impl == rhs.m_impl; } bool operator!=(Format rhs) const { return m_impl != rhs.m_impl; } @@ -218,27 +219,28 @@ struct TensorLayout : public TensorShape { DType dtype; Format format; - TensorLayout(); + MGE_WIN_DECLSPEC_FUC TensorLayout(); #if MEGDNN_CC_HOST TensorLayout(const TensorLayout& layout) = default; //! create empty layout with given dtype - explicit TensorLayout(DType dtype_); + MGE_WIN_DECLSPEC_FUC explicit TensorLayout(DType dtype_); - TensorLayout(DType dtype_, Format format); + MGE_WIN_DECLSPEC_FUC TensorLayout(DType dtype_, Format format); //! create layout with given shape and contiguous stride. - TensorLayout(const TensorShape& shape, DType dtype); + MGE_WIN_DECLSPEC_FUC TensorLayout(const TensorShape& shape, DType dtype); - TensorLayout(const TensorShape& shape, DType dtype, Format format); + MGE_WIN_DECLSPEC_FUC TensorLayout( + const TensorShape& shape, DType dtype, Format format); //! creating layout with user-specified shape and stride. - TensorLayout( + MGE_WIN_DECLSPEC_FUC TensorLayout( const TensorShape& shape, const std::vector& stride, DType dtype); - TensorLayout( + MGE_WIN_DECLSPEC_FUC TensorLayout( const TensorShape& shape, const std::vector& stride, DType dtype, Format format); @@ -251,28 +253,30 @@ struct TensorLayout : public TensorShape { * * \return total number of elements */ - size_t init_contiguous_stride(); + MGE_WIN_DECLSPEC_FUC size_t init_contiguous_stride(); /*! * \brief init stride to be contiguous by first assigning shape * * Use current format. */ - size_t init_contiguous_stride(const TensorShape& shape); + MGE_WIN_DECLSPEC_FUC size_t init_contiguous_stride(const TensorShape& shape); - size_t init_contiguous_stride(const TensorShape& shape, Format format); + MGE_WIN_DECLSPEC_FUC size_t + init_contiguous_stride(const TensorShape& shape, Format format); /*! * \brief inplace version of remove_axis */ - void remove_axis_inplace(size_t idx); + MGE_WIN_DECLSPEC_FUC void remove_axis_inplace(size_t idx); /*! * \brief add an axis before given *axis* with given shape and stride * * Other shapes and strides would not be changed. */ - void add_axis_inplace(size_t axis, size_t shape, ptrdiff_t stride); + MGE_WIN_DECLSPEC_FUC void add_axis_inplace( + size_t axis, size_t shape, ptrdiff_t stride); /*! * \brief add an axis before given *axis*, with shape 1 and contiguous @@ -287,7 +291,7 @@ struct TensorLayout : public TensorShape { * * By the way this API will modify the format according to the data type */ - void modify_dtype_inplace(DType dtype); + MGE_WIN_DECLSPEC_FUC void modify_dtype_inplace(DType dtype); /* =================== generate new layout =================== */ @@ -297,21 +301,23 @@ struct TensorLayout : public TensorShape { * example: * (2, 0, 1) -> AxBxC to CxAxB */ - TensorLayout dimshuffle(const std::vector& dims) const - MEGDNN_WARN_UNUSED_RESULT; + MGE_WIN_DECLSPEC_FUC TensorLayout + dimshuffle(const std::vector& dims) const MEGDNN_WARN_UNUSED_RESULT; /** * \brief Remove an axis from the layout by moving later shape/stride * elements earlier. No extra check is performed. */ - TensorLayout remove_axis(size_t idx) const MEGDNN_WARN_UNUSED_RESULT; + MGE_WIN_DECLSPEC_FUC TensorLayout + remove_axis(size_t idx) const MEGDNN_WARN_UNUSED_RESULT; /** * \brief Returns a different view. * * \throw TensorReshapeError if no stride exists for target shape. */ - TensorLayout reshape(const TensorShape& shape) const MEGDNN_WARN_UNUSED_RESULT; + MGE_WIN_DECLSPEC_FUC TensorLayout + reshape(const TensorShape& shape) const MEGDNN_WARN_UNUSED_RESULT; /*! * \brief try to reshape to another view; return whether these two shapes @@ -319,14 +325,16 @@ struct TensorLayout : public TensorShape { * \return true iff there exists target stride so this layout can be * converted to target shape and the elements can match. */ - bool try_reshape(TensorLayout& output, const TensorShape& shape) const - MEGDNN_WARN_UNUSED_RESULT; + MGE_WIN_DECLSPEC_FUC bool try_reshape( + TensorLayout& output, + const TensorShape& shape) const MEGDNN_WARN_UNUSED_RESULT; /*! * \brief Broadcast on dims with shape == 1 to match target *shape*. * \throw TensorReshapeError if could not be satisfied */ - TensorLayout broadcast(const TensorShape& shape) const MEGDNN_WARN_UNUSED_RESULT; + MGE_WIN_DECLSPEC_FUC TensorLayout + broadcast(const TensorShape& shape) const MEGDNN_WARN_UNUSED_RESULT; /*! * \brief Collapse consecutive axes with contiguous layout together @@ -335,13 +343,14 @@ struct TensorLayout : public TensorShape { * scalar, the result would always be a one-dimensional empty or scalar, * with stride being 1. */ - TensorLayout collapse_contiguous() const MEGDNN_WARN_UNUSED_RESULT; + MGE_WIN_DECLSPEC_FUC TensorLayout + collapse_contiguous() const MEGDNN_WARN_UNUSED_RESULT; /* =================== properties =================== */ - std::string to_string() const; + MGE_WIN_DECLSPEC_FUC std::string to_string() const; - std::string serialize() const; + MGE_WIN_DECLSPEC_FUC std::string serialize() const; #endif // MEGDNN_CC_HOST /*! @@ -353,17 +362,17 @@ struct TensorLayout : public TensorShape { * Note that empty tensors (i.e. with 0 shapes) are not considered as * contiguous. */ - bool is_contiguous() const; + MGE_WIN_DECLSPEC_FUC bool is_contiguous() const; //! check whether it is physically contiguous disregarding format - bool is_physical_contiguous() const; + MGE_WIN_DECLSPEC_FUC bool is_physical_contiguous() const; /*! * \brief check whether the layout is monotonous * * A tensor is monotonous if abs(stride[i]) >= abs(stride[i+1])*shape[i+1] */ - bool is_abs_monotonous_allow_brdcst() const; + MGE_WIN_DECLSPEC_FUC bool is_abs_monotonous_allow_brdcst() const; /*! * \brief check whether the layout is contiguous, allowing broadcasting @@ -371,7 +380,7 @@ struct TensorLayout : public TensorShape { * This checks whether the underlying storage is contiguous, where * broadcasting is also considered to be so. */ - bool is_contiguous_allow_brdcst() const; + MGE_WIN_DECLSPEC_FUC bool is_contiguous_allow_brdcst() const; /*! * \brief if this function returns true, then no two elements can occupy the @@ -382,15 +391,15 @@ struct TensorLayout : public TensorShape { * still possible that actually no two elements share the same memory * location. */ - bool is_non_overlapping_strong() const; + MGE_WIN_DECLSPEC_FUC bool is_non_overlapping_strong() const; - bool eq_layout(const TensorLayout& rhs) const; + MGE_WIN_DECLSPEC_FUC bool eq_layout(const TensorLayout& rhs) const; //! get lowest and highest offset reachable from this layout - Span span() const; + MGE_WIN_DECLSPEC_FUC Span span() const; //! total number of access bytes - size_t access_bytes() const; + MGE_WIN_DECLSPEC_FUC size_t access_bytes() const; }; /** diff --git a/dnn/include/megdnn/dtype.h b/dnn/include/megdnn/dtype.h index fbcce01a..5bd360b8 100644 --- a/dnn/include/megdnn/dtype.h +++ b/dnn/include/megdnn/dtype.h @@ -386,10 +386,11 @@ using DTypeParam = DTypeParamImpl::ctype>; */ class DType { private: - MEGDNN_NORETURN void on_request_lowbit_size() const; + MGE_WIN_DECLSPEC_FUC MEGDNN_NORETURN void on_request_lowbit_size() const; // HACK: This is required in ParameterizedDType::downcast_from public: - MEGDNN_NORETURN void on_assert_is_failed(const char* rname) const; + MGE_WIN_DECLSPEC_FUC MEGDNN_NORETURN void on_assert_is_failed( + const char* rname) const; protected: struct Trait { @@ -493,7 +494,7 @@ public: bool operator!=(const DType& rhs) const { return m_trait != rhs.m_trait; } //! get dtype object from enum - static DType from_enum(DTypeEnum ev); + MGE_WIN_DECLSPEC_FUC static DType from_enum(DTypeEnum ev); //! get a handle of the dtype that could be used for equivalence check const void* handle() const { return m_trait; } @@ -531,9 +532,10 @@ class ParameterizedDType MEGDNN_FINAL : public DType { }; // static part of the trait - static DType::Trait sm_trait; + static MGE_WIN_DECLSPEC_DATA DType::Trait sm_trait; - static Trait* make_from_param(const DTypeParam& param); + MGE_WIN_DECLSPEC_FUC static Trait* make_from_param( + const DTypeParam& param); explicit ParameterizedDType(DType dtype) : DType(dtype) {} public: @@ -569,12 +571,12 @@ public: //! dtype implementation classes namespace dtype { -#define IMPL(_name) \ - class _name MEGDNN_FINAL : public DType { \ - static Trait sm_trait; \ - \ - public: \ - _name() : DType(&sm_trait) {} \ +#define IMPL(_name) \ + class _name MEGDNN_FINAL : public DType { \ + static MGE_WIN_DECLSPEC_DATA Trait sm_trait; \ + \ + public: \ + _name() : DType(&sm_trait) {} \ }; MEGDNN_FOREACH_DTYPE_NAME(IMPL) @@ -764,7 +766,7 @@ struct DTypeParamImpl { uint8_t zero_point; DTypeParamImpl() = default; - DTypeParamImpl(float scale, uint8_t zero_point); + MGE_WIN_DECLSPEC_FUC DTypeParamImpl(float scale, uint8_t zero_point); #ifdef MEGDNN_CC_HOST std::size_t hash() const; @@ -788,7 +790,7 @@ struct DTypeParamImpl { float scale; DTypeParamImpl() = default; - DTypeParamImpl(float scale); + MGE_WIN_DECLSPEC_FUC DTypeParamImpl(float scale); #ifdef MEGDNN_CC_HOST std::size_t hash() const; #endif @@ -810,7 +812,7 @@ struct DTypeParamImpl { float scale; DTypeParamImpl() = default; - DTypeParamImpl(float scale); + MGE_WIN_DECLSPEC_FUC DTypeParamImpl(float scale); #ifdef MEGDNN_CC_HOST std::size_t hash() const; #endif // MEGDNN_CC_HOST @@ -831,7 +833,7 @@ struct DTypeParamImpl { float scale; DTypeParamImpl() = default; - DTypeParamImpl(float scale); + MGE_WIN_DECLSPEC_FUC DTypeParamImpl(float scale); #ifdef MEGDNN_CC_HOST std::size_t hash() const; #endif // MEGDNN_CC_HOST @@ -854,7 +856,7 @@ struct DTypeParamImpl { uint8_t zero_point; DTypeParamImpl() = default; - DTypeParamImpl(float scale, uint8_t zero_point); + MGE_WIN_DECLSPEC_FUC DTypeParamImpl(float scale, uint8_t zero_point); #ifdef MEGDNN_CC_HOST std::size_t hash() const; #endif @@ -879,7 +881,7 @@ struct DTypeParamImpl { float scale; DTypeParamImpl() = default; - DTypeParamImpl(float scale); + MGE_WIN_DECLSPEC_FUC DTypeParamImpl(float scale); #ifdef MEGDNN_CC_HOST std::size_t hash() const; #endif diff --git a/dnn/include/megdnn/handle.h b/dnn/include/megdnn/handle.h index a0e07d6c..a83d156e 100644 --- a/dnn/include/megdnn/handle.h +++ b/dnn/include/megdnn/handle.h @@ -73,20 +73,20 @@ public: * * **Debug level 1 and 2 should not be used in productions.** */ - static std::unique_ptr make( + MGE_WIN_DECLSPEC_FUC static std::unique_ptr make( megcoreComputingHandle_t computing_handle, int debug_level = 0); #if MEGDNN_WITH_CUDA - static std::unique_ptr make_cuda_handle( + MGE_WIN_DECLSPEC_FUC static std::unique_ptr make_cuda_handle( megcoreComputingHandle_t computing_handle); template - std::unique_ptr create_cuda_operator(); + MGE_WIN_DECLSPEC_FUC std::unique_ptr create_cuda_operator(); #endif #if MEGDNN_WITH_ROCM - static std::unique_ptr make_rocm_handle( + MGE_WIN_DECLSPEC_FUC static std::unique_ptr make_rocm_handle( megcoreComputingHandle_t computing_handle); template - std::unique_ptr create_rocm_operator(); + MGE_WIN_DECLSPEC_FUC std::unique_ptr create_rocm_operator(); #endif virtual ~Handle(); @@ -105,7 +105,7 @@ public: * * This function can be called at most once. */ - void set_destructor(const thin_function& d); + MGE_WIN_DECLSPEC_FUC void set_destructor(const thin_function& d); /*! * \brief set a callback to be invoked when an operator is destructed @@ -116,13 +116,13 @@ public: cb.swap(m_on_opr_destructed); } - void on_opr_destructed(OperatorBase* opr); + MGE_WIN_DECLSPEC_FUC void on_opr_destructed(OperatorBase* opr); /** * \brief Create operator of Opr type. */ template - std::unique_ptr create_operator(); + MGE_WIN_DECLSPEC_FUC std::unique_ptr create_operator(); /* * ============================================================= @@ -134,13 +134,13 @@ public: * \brief The internal data pointer of TensorND should be aligned to * alignment_requirement() in bytes. */ - virtual size_t alignment_requirement() const; + MGE_WIN_DECLSPEC_FUC virtual size_t alignment_requirement() const; //! get alignment in bytes for rows of image 2D tensor format - virtual size_t image2d_pitch_alignment() const; + MGE_WIN_DECLSPEC_FUC virtual size_t image2d_pitch_alignment() const; //! get vendor type - virtual HandleVendorType vendor_type() const; + MGE_WIN_DECLSPEC_FUC virtual HandleVendorType vendor_type() const; HandleType type() const { return m_handle_type; } @@ -149,7 +149,8 @@ public: * 1. The handle of the src and the dst is the same kind * 2. The dst is continguous. */ - virtual bool check_cross_dev_copy_constraint(const TensorLayout& src); + MGE_WIN_DECLSPEC_FUC virtual bool check_cross_dev_copy_constraint( + const TensorLayout& src); private: static constexpr uint32_t ALIVE_MAGIC = 0x8595e9d2u; diff --git a/dnn/include/megdnn/oprs/general.h b/dnn/include/megdnn/oprs/general.h index c9af381f..95dddd91 100644 --- a/dnn/include/megdnn/oprs/general.h +++ b/dnn/include/megdnn/oprs/general.h @@ -51,7 +51,7 @@ public: name(NULL) {} //! get trait from a mode; this function is thread safe - static const ModeTrait& from_mode(Mode mode); + MGE_WIN_DECLSPEC_FUC static const ModeTrait& from_mode(Mode mode); }; //! get trait of current mode @@ -69,17 +69,20 @@ public: virtual void exec(_megdnn_in const TensorNDArray& src, _megdnn_tensor_out dst) = 0; //! deduce output shape (do not check whether arity matches) - static void deduce_shape(const TensorShapeArray& src, TensorShape& dst); + MGE_WIN_DECLSPEC_FUC static void deduce_shape( + const TensorShapeArray& src, TensorShape& dst); - static void deduce_format(const TensorFormatArray& src, TensorFormat& dst); + MGE_WIN_DECLSPEC_FUC static void deduce_format( + const TensorFormatArray& src, TensorFormat& dst); //! deduce output layout - void deduce_layout(const TensorLayoutArray& src, TensorLayout& dst); + MGE_WIN_DECLSPEC_FUC void deduce_layout( + const TensorLayoutArray& src, TensorLayout& dst); protected: //! throw exception if incorrect layout; broadcast input shape to //! output shape - void check_layout_and_broadcast( + MGE_WIN_DECLSPEC_FUC void check_layout_and_broadcast( const TensorLayoutPtrArray& src, const TensorLayout& dst); private: @@ -577,7 +580,7 @@ public: ParamPackConcatSplitBase(Handle* handle) : OperatorBase(handle) {} //! generate offsets to be used with ParamPackConcat and ParamPackSplit - static std::vector gen_offsets( + MGE_WIN_DECLSPEC_FUC static std::vector gen_offsets( const TensorShapeArray& shapes, size_t alignment, size_t dtype_size); }; diff --git a/dnn/include/megdnn/oprs/nn_int.h b/dnn/include/megdnn/oprs/nn_int.h index 9abd6b71..3406a236 100644 --- a/dnn/include/megdnn/oprs/nn_int.h +++ b/dnn/include/megdnn/oprs/nn_int.h @@ -43,7 +43,7 @@ public: const char* name = nullptr; //!< name of the mode //! get trait from a mode; this function is thread safe - static const ModeTrait& from_mode(Mode mode); + MGE_WIN_DECLSPEC_FUC static const ModeTrait& from_mode(Mode mode); }; virtual void exec(_megdnn_in const TensorNDArray& src, _megdnn_tensor_out dst) = 0; diff --git a/dnn/include/megdnn/thin/small_vector.h b/dnn/include/megdnn/thin/small_vector.h index fa4a36ec..b6ccf190 100644 --- a/dnn/include/megdnn/thin/small_vector.h +++ b/dnn/include/megdnn/thin/small_vector.h @@ -50,7 +50,8 @@ class SmallVectorBase { protected: void *m_begin_ptr, *m_end_ptr, *m_capacity_ptr; - MEGDNN_NORETURN static void on_invalid_at(size_t idx, size_t size); + MGE_WIN_DECLSPEC_FUC MEGDNN_NORETURN static void on_invalid_at( + size_t idx, size_t size); protected: SmallVectorBase(void* first_elm, size_t size) @@ -58,7 +59,8 @@ protected: m_end_ptr(first_elm), m_capacity_ptr(static_cast(first_elm) + size) {} - void grow_pod(void* first_elm_ptr, size_t min_sz_in_bytes, size_t type_size); + MGE_WIN_DECLSPEC_FUC void grow_pod( + void* first_elm_ptr, size_t min_sz_in_bytes, size_t type_size); public: size_t size_in_bytes() const { diff --git a/dnn/include/megdnn/version.h b/dnn/include/megdnn/version.h index e6365327..2881407e 100644 --- a/dnn/include/megdnn/version.h +++ b/dnn/include/megdnn/version.h @@ -14,6 +14,7 @@ #define MEGDNN_MINOR 3 #define MEGDNN_PATCH 0 +#include "megbrain_build_config.h" #include "megdnn/internal/visibility_prologue.h" namespace megdnn { @@ -22,7 +23,7 @@ struct Version { }; //! get megdnn version of the binary -Version get_version(); +MGE_WIN_DECLSPEC_FUC Version get_version(); } // namespace megdnn #include "megdnn/internal/visibility_epilogue.h" diff --git a/imperative/CMakeLists.txt b/imperative/CMakeLists.txt index 1f49d331..e3cccaac 100644 --- a/imperative/CMakeLists.txt +++ b/imperative/CMakeLists.txt @@ -25,15 +25,15 @@ add_custom_target(_version_ld SOURCES ${MGE_VERSION_SCRIPT}) add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11) pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS}) if (APPLE) - target_link_libraries(${MODULE_NAME} PRIVATE megengine_export) + target_link_libraries(${MODULE_NAME} PRIVATE megengine_shared) elseif (MSVC OR WIN32) - # Windows does not support implicitly importing data members from DLL. - target_link_libraries(${MODULE_NAME} PRIVATE megbrain megdnn ${MGE_CUDA_LIBS}) + target_link_libraries(${MODULE_NAME} PRIVATE megengine_shared) + target_compile_definitions(${MODULE_NAME} PRIVATE MGE_DLL_IMPORT_DATA) message(STATUS "CMAKE_MSVC_RUNTIME_LIBRARY: ${CMAKE_MSVC_RUNTIME_LIBRARY}") set_target_properties(${MODULE_NAME} PROPERTIES MSVC_RUNTIME_LIBRARY "${CMAKE_MSVC_RUNTIME_LIBRARY}") else() # use to fix runtime crash when build both mgb(MGE_WITH_PYTHON_MODULE) and imperative(MGE_BUILD_IMPERATIVE_RT) - target_link_libraries(${MODULE_NAME} PRIVATE megengine_export -Wl,--version-script=${MGE_VERSION_SCRIPT}) + target_link_libraries(${MODULE_NAME} PRIVATE megengine_shared -Wl,--version-script=${MGE_VERSION_SCRIPT}) endif() add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/range-v3 ${PROJECT_BINARY_DIR}/third_party/range-v3) diff --git a/imperative/python/megengine/__init__.py b/imperative/python/megengine/__init__.py index 3cb920fc..c525d4c0 100644 --- a/imperative/python/megengine/__init__.py +++ b/imperative/python/megengine/__init__.py @@ -54,6 +54,8 @@ if sys.platform == "win32": err.strerror += ' Error loading "{}" or one of its dependencies.'.format( dll ) + err.strerror += " \nplease install VC runtime from: " + err.strerror += " \nhttps://docs.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist?view=msvc-160" raise err elif res is not None: is_loaded = True @@ -67,6 +69,8 @@ if sys.platform == "win32": err.strerror += ' Error loading "{}" or one of its dependencies.'.format( dll ) + err.strerror += " \nplease install VC runtime from: " + err.strerror += " \nhttps://docs.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist?view=msvc-160" raise err kernel32.SetErrorMode(old_error_mode) diff --git a/lite/CMakeLists.txt b/lite/CMakeLists.txt index 41b8dcef..6e422252 100644 --- a/lite/CMakeLists.txt +++ b/lite/CMakeLists.txt @@ -42,6 +42,9 @@ include_directories($) if(LITE_BUILD_WITH_MGE) target_link_libraries(lite_static PRIVATE megbrain megdnn ${MGE_CUDA_LIBS}) add_compile_definitions(LITE_BUILD_WITH_MGE=1) + if(WIN32 OR MSVC) + target_compile_definitions(lite_static PRIVATE MGE_DLL_IMPORT_DATA) + endif() message(STATUS "build lite with MegEngine.") else() target_link_libraries(lite_static PUBLIC flatbuffers) @@ -71,12 +74,13 @@ endif() # define a shared lib for whl add_library(lite_shared_whl SHARED $) if(LITE_BUILD_WITH_MGE) - if (MSVC OR WIN32 OR IOS) - # TODO: this will lead whl size increase on Windows, caused by - # Windows does not support implicitly importing data members from DLL. + if (IOS) target_link_libraries(lite_shared_whl PRIVATE megbrain megdnn ${MGE_CUDA_LIBS}) else() - target_link_libraries(lite_shared_whl PRIVATE megengine_export) + target_link_libraries(lite_shared_whl PRIVATE megengine_shared) + endif() + if(WIN32 OR MSVC) + target_compile_definitions(lite_shared_whl PRIVATE MGE_DLL_IMPORT_DATA) endif() endif() if(ANDROID) diff --git a/lite/pylite/megenginelite/base.py b/lite/pylite/megenginelite/base.py index 2aa65f3b..f803bea6 100644 --- a/lite/pylite/megenginelite/base.py +++ b/lite/pylite/megenginelite/base.py @@ -56,6 +56,8 @@ if sys.platform == "win32": err.strerror += ' Error loading "{}" or one of its dependencies.'.format( dll ) + err.strerror += " \nplease install VC runtime from: " + err.strerror += " \nhttps://docs.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist?view=msvc-160" raise err elif res is not None: is_loaded = True @@ -69,6 +71,8 @@ if sys.platform == "win32": err.strerror += ' Error loading "{}" or one of its dependencies.'.format( dll ) + err.strerror += " \nplease install VC runtime from: " + err.strerror += " \nhttps://docs.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist?view=msvc-160" raise err kernel32.SetErrorMode(old_error_mode) diff --git a/scripts/whl/macos/macos_build_whl.sh b/scripts/whl/macos/macos_build_whl.sh index 1993b481..c411c67d 100755 --- a/scripts/whl/macos/macos_build_whl.sh +++ b/scripts/whl/macos/macos_build_whl.sh @@ -89,7 +89,7 @@ function config_python_env() { fi } -MEGENGINE_LIB="${SRC_DIR}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release/build/src/libmegengine_export.dylib" +MEGENGINE_LIB="${SRC_DIR}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release/build/src/libmegengine_shared.dylib" function depend_real_copy() { REAL_DST=$1 echo "real copy lib to $1" @@ -192,7 +192,7 @@ function do_build() { fi #handle dlopen path - install_name_tool -change @rpath/libmegengine_export.dylib @loader_path/lib/libmegengine_export.dylib _imperative_rt.so + install_name_tool -change @rpath/libmegengine_shared.dylib @loader_path/lib/libmegengine_shared.dylib _imperative_rt.so #copy megbrain_export lib DEPEND_LIB=${BUILD_DIR}/staging/megengine/core/lib/ @@ -209,7 +209,7 @@ function do_build() { cp ${SRC_DIR}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release/build/lite/liblite_shared_whl.dylib ${LITE_LIB} llvm-strip -s ${LITE_LIB} #handle dlopen path - install_name_tool -change @rpath/libmegengine_export.dylib @loader_path/../../megengine/core/lib/libmegengine_export.dylib ${LITE_LIB} + install_name_tool -change @rpath/libmegengine_shared.dylib @loader_path/../../megengine/core/lib/libmegengine_shared.dylib ${LITE_LIB} cd ${BUILD_DIR}/staging ${PYTHON_DIR}/bin/python3 setup.py bdist_wheel diff --git a/scripts/whl/manylinux2014/do_build_common.sh b/scripts/whl/manylinux2014/do_build_common.sh index 5f6b1087..2df0dbff 100755 --- a/scripts/whl/manylinux2014/do_build_common.sh +++ b/scripts/whl/manylinux2014/do_build_common.sh @@ -50,10 +50,10 @@ function patch_elf_depend_lib_mgb_mge() { patchelf --force-rpath --set-rpath '$ORIGIN/lib' ${BUILD_DIR}/staging/megengine/core/_imperative_rt.so handle_strip ${BUILD_DIR}/staging/megengine/core/_imperative_rt.so - cp ${BUILD_DIR}/src/libmegengine_export.so ${LIBS_DIR} - patchelf --remove-rpath ${LIBS_DIR}/libmegengine_export.so - patchelf --force-rpath --set-rpath '$ORIGIN/.' ${LIBS_DIR}/libmegengine_export.so - handle_strip ${LIBS_DIR}/libmegengine_export.so + cp ${BUILD_DIR}/src/libmegengine_shared.so ${LIBS_DIR} + patchelf --remove-rpath ${LIBS_DIR}/libmegengine_shared.so + patchelf --force-rpath --set-rpath '$ORIGIN/.' ${LIBS_DIR}/libmegengine_shared.so + handle_strip ${LIBS_DIR}/libmegengine_shared.so # as some version of cudnn/trt libs have dlopen libs, so we can not use auditwheel # TODO: PR for auditwheel to support args for dlopen libs diff --git a/scripts/whl/windows/windows_build_whl.sh b/scripts/whl/windows/windows_build_whl.sh index e61ef3c8..0dc61908 100755 --- a/scripts/whl/windows/windows_build_whl.sh +++ b/scripts/whl/windows/windows_build_whl.sh @@ -18,8 +18,6 @@ function append_path_env_and_check() { export VS_PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Enterprise echo "export LLVM install path" export LLVM_PATH=/c/Program\ Files/LLVM_12_0_1 - # for llvm-strip - export PATH=${LLVM_PATH}/bin/:$PATH } append_path_env_and_check @@ -78,16 +76,23 @@ CUBLAS_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublas6 CURAND_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/curand64_10.dll" CUBLASLT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublasLt64_10.dll" CUDART_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cudart64_101.dll" +MGE_EXPORT_LIB="${SRC_DIR}/build_dir/host/build/src/megengine_shared.dll" + function depend_real_copy() { REAL_DST=$1 echo "real copy lib to $1" - cp "${TRT_LIB}" ${REAL_DST} - cp "${CUDNN_LIB}" ${REAL_DST} - cp "${CUSOLVER_LIB}" ${REAL_DST} - cp "${CUBLAS_LIB}" ${REAL_DST} - cp "${CURAND_LIB}" ${REAL_DST} - cp "${CUBLASLT_LIB}" ${REAL_DST} - cp "${CUDART_LIB}" ${REAL_DST} + cp "${MGE_EXPORT_LIB}" ${REAL_DST} + + if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then + echo "copy nvidia lib...." + cp "${TRT_LIB}" ${REAL_DST} + cp "${CUDNN_LIB}" ${REAL_DST} + cp "${CUSOLVER_LIB}" ${REAL_DST} + cp "${CUBLAS_LIB}" ${REAL_DST} + cp "${CURAND_LIB}" ${REAL_DST} + cp "${CUBLASLT_LIB}" ${REAL_DST} + cp "${CUDART_LIB}" ${REAL_DST} + fi } function copy_more_dll() { @@ -97,23 +102,15 @@ function copy_more_dll() { rm -rf ${CP_WHL_DST_IMP} mkdir ${CP_WHL_DST_IMP} - # workround for cpu-only version import failed, use a - # empty.file to triger setup.py to create a null empty - echo "empty" > ${CP_WHL_DST_IMP}/empty.file - - if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then - echo "copy nvidia lib to whl use...." - depend_real_copy ${CP_WHL_DST_IMP} - fi + depend_real_copy ${CP_WHL_DST_IMP} } function lite_copy_more_dll() { - if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then - if [ ${IN_CI} = "true" ]; then - echo "copy lib for lite for ci test" - IMP_TEST_DST=${SRC_DIR}/build_dir/host/build/lite/test/ - depend_real_copy ${IMP_TEST_DST} - fi + if [ ${IN_CI} = "true" ]; then + echo "copy lib for lite for ci test" + IMP_TEST_DST=${SRC_DIR}/build_dir/host/build/lite/test/ + depend_real_copy ${IMP_TEST_DST} + rm "${IMP_TEST_DST}/megengine_shared.dll" fi } @@ -199,7 +196,6 @@ function do_build() { echo "ERR: can not find valid rt file" exit -1 fi - llvm-strip -s ${rt_file} mv ${rt_file} _imperative_rt.pyd copy_more_dll @@ -212,7 +208,6 @@ function do_build() { mkdir -p ${LITE_CORE_LIB_DIR} cd ${LITE_CORE_LIB_DIR} cp ${BUILD_DIR}/lite/lite_shared_whl.dll liblite_shared_whl.pyd - llvm-strip -s liblite_shared_whl.pyd lite_copy_more_dll cd ${BUILD_DIR}/staging diff --git a/sdk/load-and-run/CMakeLists.txt b/sdk/load-and-run/CMakeLists.txt index 7b9bfc93..5823ca52 100755 --- a/sdk/load-and-run/CMakeLists.txt +++ b/sdk/load-and-run/CMakeLists.txt @@ -1,21 +1,24 @@ include_directories(src) -file (GLOB_RECURSE SOURCES src/*.cpp main.cpp) -add_executable (load_and_run ${SOURCES}) +file(GLOB_RECURSE SOURCES src/*.cpp main.cpp) -if (WIN32) - # Windows does not support implicitly importing data members from DLL. - target_link_libraries(load_and_run megbrain megdnn ${MGE_CUDA_LIBS}) -else() - target_link_libraries (load_and_run megengine) +add_executable(load_and_run ${SOURCES}) +target_link_libraries(load_and_run megbrain megdnn ${MGE_CUDA_LIBS}) + +# load_and_run_depends_shared always for CI check, please do not delete +if(BUILD_SHARED_LIBS) + add_executable(load_and_run_depends_shared ${SOURCES}) + target_link_libraries(load_and_run_depends_shared megengine) + if(WIN32 OR MSVC) + target_compile_definitions(load_and_run_depends_shared PRIVATE MGE_DLL_IMPORT_DATA) + endif() +endif() + +install(TARGETS load_and_run EXPORT ${MGE_EXPORT_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) +if(BUILD_SHARED_LIBS) + install(TARGETS load_and_run_depends_shared EXPORT ${MGE_EXPORT_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) endif() -install (TARGETS load_and_run EXPORT ${MGE_EXPORT_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) if(MGE_WITH_TEST) add_executable(json_loader_test test/json_loader_test.cpp src/json_loader.h src/json_loader.cpp) - # Windows does not support implicitly importing data members from DLL. - if (WIN32) - target_link_libraries (json_loader_test megbrain megdnn ${MGE_CUDA_LIBS}) - else() - target_link_libraries (json_loader_test megengine) - endif() + target_link_libraries(json_loader_test megbrain megdnn ${MGE_CUDA_LIBS}) endif() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index be7caf74..3a834015 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -204,35 +204,45 @@ endif() set (_VER_FILE ${PROJECT_SOURCE_DIR}/src/version.ld) -# Windows does not support implicitly importing data members from DLL. -# on Windows: -# depends on megdnn/megbrain target, refs to sdk/load-and-run/CMakeLists.txt -# depends on megengine lite_share or lite_static -if(NOT WIN32) - message(VERBOSE "create a export SHARED lib for python use") - add_library(megengine_export SHARED) - target_link_libraries(megengine_export PUBLIC megbrain megdnn) - target_link_libraries(megengine_export PRIVATE ${MGE_CUDA_LIBS}) - if (MGE_WITH_DISTRIBUTED) - message(VERBOSE "megengine_export configured to link megray") - target_link_libraries(megengine_export PUBLIC megray) - endif() - - # Build as SHARED or STATIC depending on BUILD_SHARED_LIBS=ON/OFF - add_library(megengine) - target_link_libraries(megengine PRIVATE ${MGE_CUDA_LIBS}) - target_link_libraries(megengine PUBLIC megbrain megdnn) - if (UNIX AND NOT APPLE) - target_link_options(megengine PRIVATE -Wl,--no-undefined -Wl,--version-script=${_VER_FILE}) - set_target_properties(megengine PROPERTIES LINK_DEPENDS ${_VER_FILE}) - endif() - # Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready - # for this. - install(TARGETS megengine - EXPORT ${MGE_EXPORT_TARGETS} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -endif() +# Build as SHARED or STATIC depending on BUILD_SHARED_LIBS=ON/OFF +add_library(megengine) +# force define a SHARED target for whl, caused by when build for APPLE +# we will force set BUILD_SHARED_LIBS=OFF for xcode needed +add_library(megengine_shared SHARED) +target_link_libraries(megengine PRIVATE ${MGE_CUDA_LIBS}) +target_link_libraries(megengine PUBLIC megbrain megdnn) +target_link_libraries(megengine_shared PUBLIC megbrain megdnn) +target_link_libraries(megengine_shared PRIVATE ${MGE_CUDA_LIBS}) +if (UNIX AND NOT APPLE) + target_link_options(megengine PRIVATE -Wl,--no-undefined -Wl,--version-script=${_VER_FILE}) + set_target_properties(megengine PROPERTIES LINK_DEPENDS ${_VER_FILE}) + target_link_options(megengine_shared PRIVATE -Wl,--no-undefined -Wl,--version-script=${_VER_FILE}) + set_target_properties(megengine_shared PROPERTIES LINK_DEPENDS ${_VER_FILE}) +endif() +if(WIN32 OR MSVC) + target_compile_definitions(megbrain PRIVATE MGE_DLL_EXPORT) + target_compile_definitions(megdnn PRIVATE MGE_DLL_EXPORT) + target_compile_definitions(megengine PRIVATE MGE_DLL_EXPORT) + target_compile_definitions(megengine_shared PRIVATE MGE_DLL_EXPORT) + target_compile_definitions(megbrain PRIVATE MGE_DLL_EXPORT_DATA) + target_compile_definitions(megdnn PRIVATE MGE_DLL_EXPORT_DATA) + target_compile_definitions(megengine PRIVATE MGE_DLL_EXPORT_DATA) + target_compile_definitions(megengine_shared PRIVATE MGE_DLL_EXPORT_DATA) + # please do not use WINDOWS_EXPORT_ALL_SYMBOLS, as symbols max than 65535 when build with CUDA + #set_target_properties(megengine PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) + #set_target_properties(megengine_shared PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) +endif() +if (MGE_WITH_DISTRIBUTED) + message(VERBOSE "megengine configured to link megray") + target_link_libraries(megengine PUBLIC megray) + target_link_libraries(megengine_shared PUBLIC megray) +endif() +# Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready +# for this. +install(TARGETS megengine + EXPORT ${MGE_EXPORT_TARGETS} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) if (NOT MGE_WITH_DISTRIBUTED) install(TARGETS megbrain diff --git a/src/core/impl/comp_node/cuda/comp_node.cpp b/src/core/impl/comp_node/cuda/comp_node.cpp index c583e4fa..e37351be 100644 --- a/src/core/impl/comp_node/cuda/comp_node.cpp +++ b/src/core/impl/comp_node/cuda/comp_node.cpp @@ -612,6 +612,19 @@ bool CudaCompNodeImpl::check_global_finalized() { "recovery by OS!!"); return true; } + //! FIXME: megengine dynamic with VCRT, atexit fuctions table have + //! some order issue, which will lead to cuda runtime uploading, this + //! always happened at python3 unload dll(means python3 will exit), + //! as a workround, recovery resource by OS temporarily, may need + //! remove this after upgrade cuda runtime + int dev = -1; + if (cudaErrorCudartUnloading == cudaGetDevice(&dev)) { + mgb_log_debug( + "windows cudaErrorCudartUnloading happened!!, resource " + "recovery by OS!!"); + return true; + } + #endif return false; } diff --git a/src/core/impl/graph/static_infer_impl.h b/src/core/impl/graph/static_infer_impl.h index 227eba31..572017a0 100644 --- a/src/core/impl/graph/static_infer_impl.h +++ b/src/core/impl/graph/static_infer_impl.h @@ -58,18 +58,18 @@ public: /*! * \brief get a tag handler for shape inference */ - TagHandler* get_tag_handler_for_shape(Tag tag); + MGE_WIN_DECLSPEC_FUC TagHandler* get_tag_handler_for_shape(Tag tag); /*! * \brief get a tag handler for value inference */ - TagHandler* get_tag_handler_for_value(Tag tag); + MGE_WIN_DECLSPEC_FUC TagHandler* get_tag_handler_for_value(Tag tag); /*! * \brief clear registered handler for a tag; this is only used in error * handling in opr creation */ - void clear_tag_handler(Tag tag); + MGE_WIN_DECLSPEC_FUC void clear_tag_handler(Tag tag); /*! * \brief set the operator that is allowd to call register_*_infer @@ -87,13 +87,13 @@ public: * tag * \return set of missing inputs; the pointer is always available */ - const TagHandlerSet& get_missing_inp(TagHandler* dest); + MGE_WIN_DECLSPEC_FUC const TagHandlerSet& get_missing_inp(TagHandler* dest); /*! * \brief update mutable src tag's shape explictly which only used by eager eval */ - void update_mutable_src_shape(Tag tag); + MGE_WIN_DECLSPEC_FUC void update_mutable_src_shape(Tag tag); /*! * \brief get original deps given in the InferDesc which is registered @@ -103,7 +103,7 @@ public: * deps since the StaticInferManagerImpl folds the infererence chain of * the const var shape */ - DepVal get_deps(const DepElement& elem); + MGE_WIN_DECLSPEC_FUC DepVal get_deps(const DepElement& elem); private: friend class CompSeqManager; diff --git a/src/core/impl/tensor.cpp b/src/core/impl/tensor.cpp index bbe3b32f..c1010bf9 100644 --- a/src/core/impl/tensor.cpp +++ b/src/core/impl/tensor.cpp @@ -333,7 +333,7 @@ namespace mgb { // host to host template <> template <> -void TensorStorage::copy_from( +MGE_WIN_DECLSPEC_FUC void TensorStorage::copy_from( const TensorStorage& src, size_t size) const { mgb_assert(size <= this->size() && size <= src.size()); memcpy(ptr(), src.ptr(), size); @@ -342,7 +342,7 @@ void TensorStorage::copy_from( // device to host template <> template <> -void TensorStorage::copy_from( +MGE_WIN_DECLSPEC_FUC void TensorStorage::copy_from( const TensorStorage& src, size_t size) const { bool need_sync = false; mgb_assert(size <= this->size() && size <= src.size()); @@ -370,7 +370,7 @@ void TensorStorage::copy_from( // host to device template <> template <> -void TensorStorage::copy_from( +MGE_WIN_DECLSPEC_FUC void TensorStorage::copy_from( const TensorStorage& src, size_t size) const { mgb_assert(size <= this->size() && size <= src.size()); m_comp_node.copy_to_device(ptr(), src.ptr(), size); @@ -379,7 +379,7 @@ void TensorStorage::copy_from( // device to device template <> template <> -void TensorStorage::copy_from( +MGE_WIN_DECLSPEC_FUC void TensorStorage::copy_from( const TensorStorage& src, size_t size) const { mgb_assert(size <= this->size() && size <= src.size()); if (src.comp_node().device_type() == CompNode::DeviceType::CPU && diff --git a/src/core/include/megbrain/common.h b/src/core/include/megbrain/common.h index 9d6b6ac0..bf81a27c 100644 --- a/src/core/include/megbrain/common.h +++ b/src/core/include/megbrain/common.h @@ -110,7 +110,7 @@ void __on_exception_throw__(const std::exception& exc) __attribute__((noreturn)) } while (0) // assert -void __assert_fail__( +MGE_WIN_DECLSPEC_FUC void __assert_fail__( const char* file, int line, const char* func, const char* expr, const char* msg_fmt = 0, ...) __attribute__((format(printf, 5, 6), noreturn)); #if MGB_ASSERT_LOC @@ -165,23 +165,23 @@ typedef void (*LogHandler)( * * \return previous log level */ -LogLevel set_log_level(LogLevel level); +MGE_WIN_DECLSPEC_FUC LogLevel set_log_level(LogLevel level); /*! * \brief get logging level * * \return current log level */ -LogLevel get_log_level(); +MGE_WIN_DECLSPEC_FUC LogLevel get_log_level(); /*! * \brief set callback for receiving log requests * \return previous log handler */ -LogHandler set_log_handler(LogHandler handler); +MGE_WIN_DECLSPEC_FUC LogHandler set_log_handler(LogHandler handler); #if MGB_ENABLE_LOGGING -void __log__( +MGE_WIN_DECLSPEC_FUC void __log__( LogLevel level, const char* file, const char* func, int line, const char* fmt, ...) __attribute__((format(printf, 5, 6))); @@ -233,9 +233,10 @@ void __log__( /*! * \brief printf-like std::string constructor */ -std::string ssprintf(const char* fmt, ...) __attribute__((format(printf, 1, 2))); +MGE_WIN_DECLSPEC_FUC std::string ssprintf(const char* fmt, ...) + __attribute__((format(printf, 1, 2))); -std::string svsprintf(const char* fmt, va_list ap); +MGE_WIN_DECLSPEC_FUC std::string svsprintf(const char* fmt, va_list ap); #if 0 // used for win32 with vs prior to 2015 diff --git a/src/core/include/megbrain/comp_node.h b/src/core/include/megbrain/comp_node.h index 9bb7eea0..7ca88006 100644 --- a/src/core/include/megbrain/comp_node.h +++ b/src/core/include/megbrain/comp_node.h @@ -129,18 +129,19 @@ public: * currently supported ID format: (gpu|cpu)[:m] where n is the * device number, possibly with m as the stream id. */ - static Locator parse(const std::string& id); + MGE_WIN_DECLSPEC_FUC static Locator parse(const std::string& id); /*! * \brief set mapping between device numbers of a device type */ - static void set_device_map(DeviceType type, int from, int to); + MGE_WIN_DECLSPEC_FUC static void set_device_map( + DeviceType type, int from, int to); /*! * \brief set the actual device type to be used for * DeviceType::UNSPEC */ - static void set_unspec_device_type(DeviceType type); + MGE_WIN_DECLSPEC_FUC static void set_unspec_device_type(DeviceType type); /*! * \brief get corresponding physical Locator @@ -148,13 +149,13 @@ public: * DeviceType::UNSPEC would be resolved, and device map would be * applied on device number */ - Locator to_physical() const; + MGE_WIN_DECLSPEC_FUC Locator to_physical() const; /*! * \brief get string description of this locator that can be parsed * again */ - std::string to_string() const; + MGE_WIN_DECLSPEC_FUC std::string to_string() const; bool operator==(const Locator& rhs) const { return type == rhs.type && device == rhs.device && stream == rhs.stream; @@ -186,7 +187,7 @@ public: /*! * \brief manually destroy all comp node resources */ - static void finalize(); + MGE_WIN_DECLSPEC_FUC static void finalize(); /*! * \brief load a computing node from logical locator ID; @@ -201,7 +202,7 @@ public: return load(locator.to_physical(), locator); } - static CompNode load( + MGE_WIN_DECLSPEC_FUC static CompNode load( const Locator& locator_physical, const Locator& locator_logical); /* =================== memory management ======================== */ @@ -216,10 +217,10 @@ public: * * Exception should be raised if allocation fails. */ - void* alloc_device(size_t size) const; + MGE_WIN_DECLSPEC_FUC void* alloc_device(size_t size) const; //! deallocate device buffer; see alloc_device() for more details - void free_device(void* ptr) const; + MGE_WIN_DECLSPEC_FUC void free_device(void* ptr) const; /*! * \brief allocate memory on host that is associated with the device, @@ -227,9 +228,9 @@ public: * * Both allocation and deallocation on host are synchronous. */ - void* alloc_host(size_t size) const; + MGE_WIN_DECLSPEC_FUC void* alloc_host(size_t size) const; - void free_host(void* ptr) const; + MGE_WIN_DECLSPEC_FUC void free_host(void* ptr) const; //! copy from underlying device to host void copy_to_host(void* host_ptr, const void* device_ptr, size_t size) const { @@ -269,19 +270,20 @@ public: * \brief release consecutive free chunks on all devices to defragment; * see DevMemAlloc::try_coalesce_free */ - static void try_coalesce_all_free_memory(); + MGE_WIN_DECLSPEC_FUC static void try_coalesce_all_free_memory(); /* * \brief specifies how to pre-allocate from raw dev allocator * */ - static void set_prealloc_config( + MGE_WIN_DECLSPEC_FUC static void set_prealloc_config( size_t alignment, size_t min_req, size_t max_overhead, double growth_factor, DeviceType device_type); /*! * \brief get compute capability of the specified device */ - static size_t get_compute_capability(int dev, DeviceType device_type); + MGE_WIN_DECLSPEC_FUC static size_t get_compute_capability( + int dev, DeviceType device_type); /* =================== synchronization ======================== */ @@ -304,7 +306,7 @@ public: /*! * \brief synchronize all computing nodes */ - static void sync_all(); + MGE_WIN_DECLSPEC_FUC static void sync_all(); /* =================== misc ======================== */ @@ -341,7 +343,7 @@ public: #endif //! change to another stream on the same memory node - CompNode change_stream(int dest_stream) const; + MGE_WIN_DECLSPEC_FUC CompNode change_stream(int dest_stream) const; //! get string representation std::string to_string() const { @@ -371,10 +373,10 @@ public: Locator locator_logical() const { return m_impl->locator_logical(); } //! see CompNodeEnv::activate - void activate() const; + MGE_WIN_DECLSPEC_FUC void activate() const; //! get device type of this comp node - DeviceType device_type() const; + MGE_WIN_DECLSPEC_FUC DeviceType device_type() const; /*! * \brief check for error on the asynchronous computing stream @@ -385,7 +387,7 @@ public: * directly throw exception; return nullptr if no error. */ MGB_WARN_UNUSED_RESULT - std::unique_ptr check_async_error() const; + MGE_WIN_DECLSPEC_FUC std::unique_ptr check_async_error() const; /*! * \brief create a CompNodeSeqRecorder associated with this computing @@ -461,7 +463,7 @@ public: bool contain_flag(Flag flag) { return contain_flag(device_type(), flag); } - static bool contain_flag(DeviceType device_type, Flag flag); + MGE_WIN_DECLSPEC_FUC static bool contain_flag(DeviceType device_type, Flag flag); using UnorderedSet = ThinHashSet; @@ -469,16 +471,17 @@ public: using UnorderedMap = ThinHashMap; //! apply function to each initialized comp node - static void foreach (thin_function callback); + MGE_WIN_DECLSPEC_FUC static void foreach (thin_function callback); //! get total number of specific devices on this system - static size_t get_device_count(DeviceType type, bool warn = true); + MGE_WIN_DECLSPEC_FUC static size_t get_device_count( + DeviceType type, bool warn = true); /* =================== specialized ======================== */ //! get default CPU comp node // implemented in comp_node/cpu/comp_node.cpp - static CompNode default_cpu(); + MGE_WIN_DECLSPEC_FUC static CompNode default_cpu(); /*! * \brief set whether to enable affinity setting for CPU comp nodes @@ -491,7 +494,7 @@ public: * * \return original setting */ - static bool enable_affinity_for_cpu(bool flag); + MGE_WIN_DECLSPEC_FUC static bool enable_affinity_for_cpu(bool flag); protected: //! ImplBase with env(); defined in CompNodeEnv @@ -680,15 +683,15 @@ class CompNode::EventPool { size_t m_flags; public: - explicit EventPool(CompNode cn, size_t flags = 0); - ~EventPool(); + MGE_WIN_DECLSPEC_FUC explicit EventPool(CompNode cn, size_t flags = 0); + MGE_WIN_DECLSPEC_FUC ~EventPool(); - CompNode::Event* alloc(); + MGE_WIN_DECLSPEC_FUC CompNode::Event* alloc(); - void free(CompNode::Event* ev); + MGE_WIN_DECLSPEC_FUC void free(CompNode::Event* ev); //! assert that all allocated events have been freed - void assert_all_freed(); + MGE_WIN_DECLSPEC_FUC void assert_all_freed(); }; void CompNode::device_wait_event(Event& event) const { @@ -732,14 +735,14 @@ class DepedentObjList { } protected: - virtual std::shared_ptr callback() = 0; + MGE_WIN_DECLSPEC_FUC virtual std::shared_ptr callback() = 0; ~DepedentObjList() = default; - static void add(DepedentObjList* ptr); - static void remove(DepedentObjList* ptr); + MGE_WIN_DECLSPEC_FUC static void add(DepedentObjList* ptr); + MGE_WIN_DECLSPEC_FUC static void remove(DepedentObjList* ptr); public: - static void invoke_callback_and_clean(); + MGE_WIN_DECLSPEC_FUC static void invoke_callback_and_clean(); }; } // namespace comp_node_detail @@ -764,7 +767,7 @@ public: class CompNodeDepedentObject : private comp_node_detail::DepedentObjList { //! 1: in on_comp_node_finalize(); 2: after on_comp_node_finalize() int m_state = 0; - std::shared_ptr callback() override final; + MGE_WIN_DECLSPEC_FUC std::shared_ptr callback() override final; protected: CompNodeDepedentObject() { add(this); } diff --git a/src/core/include/megbrain/comp_node_env.h b/src/core/include/megbrain/comp_node_env.h index 759c1e34..f8f2f013 100644 --- a/src/core/include/megbrain/comp_node_env.h +++ b/src/core/include/megbrain/comp_node_env.h @@ -191,10 +191,10 @@ namespace mgb { #endif #if MGB_CUDA -[[noreturn]] void _on_cuda_error( +[[noreturn]] MGE_WIN_DECLSPEC_FUC void _on_cuda_error( const char* expr, cudaError_t err, const char* file, const char* func, int line); -[[noreturn]] void _on_cuda_cu_error( +[[noreturn]] MGE_WIN_DECLSPEC_FUC void _on_cuda_cu_error( const char* expr, CUresult err, const char* file, const char* func, int line); #endif @@ -509,13 +509,14 @@ public: bool* do_task_inplace = nullptr; #endif - void enable_dispatch(); + MGE_WIN_DECLSPEC_FUC void enable_dispatch(); - void disable_dispatch(bool* flag); + MGE_WIN_DECLSPEC_FUC void disable_dispatch(bool* flag); - void dispatch(Task&& task) const; + MGE_WIN_DECLSPEC_FUC void dispatch(Task&& task) const; - void dispatch(MultiThreadingTask&& task, size_t parallelism) const; + MGE_WIN_DECLSPEC_FUC void dispatch( + MultiThreadingTask&& task, size_t parallelism) const; void set_affinity(AffinityCallBack&& cb) const { dispatcher->set_affinity(std::move(cb)); @@ -560,7 +561,8 @@ private: std::unique_ptr m_user_data_container; mutable RecursiveSpinlock m_user_data_container_mtx; - [[noreturn]] void on_bad_device_type(DeviceType expected) const; + [[noreturn]] MGE_WIN_DECLSPEC_FUC void on_bad_device_type( + DeviceType expected) const; #if MGB_ENABLE_COMP_NODE_ASYNC_INIT //! whether async init is in future; set by init*_async methods @@ -575,7 +577,7 @@ private: } } - void wait_async_init(); + MGE_WIN_DECLSPEC_FUC void wait_async_init(); #else void ensure_async_init_finished() const {} #endif @@ -597,10 +599,10 @@ class MegDNNHandle final : public UserDataContainer::UserData, #endif public: - MegDNNHandle(const CompNodeEnv& env); + MGE_WIN_DECLSPEC_FUC MegDNNHandle(const CompNodeEnv& env); ~MegDNNHandle() noexcept; - static MegDNNHandle& get(const CompNodeEnv& env); + MGE_WIN_DECLSPEC_FUC static MegDNNHandle& get(const CompNodeEnv& env); megdnn::Handle* operator->() const { return handle(); } diff --git a/src/core/include/megbrain/dtype.h b/src/core/include/megbrain/dtype.h index 4286f2c5..b4f24e30 100644 --- a/src/core/include/megbrain/dtype.h +++ b/src/core/include/megbrain/dtype.h @@ -97,7 +97,7 @@ public: /*! * \brief set to given value by raw storage */ - DTypeScalar& set_raw(DType dtype, const void* storage); + MGE_WIN_DECLSPEC_FUC DTypeScalar& set_raw(DType dtype, const void* storage); /*! * \brief set to given value, with dtype corresponding to ctype @@ -114,7 +114,8 @@ public: * \brief set to given value, but use current dtype and cast value to it */ template - typename ctype_enable_if::type set_retain_dtype(ctype val); + MGE_WIN_DECLSPEC_FUC typename ctype_enable_if::type set_retain_dtype( + ctype val); /*! * \brief get underlying value, which must be exactly given type @@ -172,30 +173,32 @@ static_assert( sizeof(DTypeScalar) == sizeof(DTypeScalar::max_ctype) + sizeof(DType), "bad DTypeScalar size"); -DType dtype_promotion(DType t0, DType t1); +MGE_WIN_DECLSPEC_FUC DType dtype_promotion(DType t0, DType t1); /*! * \brief copy from byte representation to compact representation for lowbit * types */ -void lowbit_memcpy_byte2compact(DType dtype, void* dest, const void* src, size_t n); +MGE_WIN_DECLSPEC_FUC void lowbit_memcpy_byte2compact( + DType dtype, void* dest, const void* src, size_t n); /*! * \brief copy from compact representation to byte representation for lowbit * types */ -void lowbit_memcpy_compact2byte(DType dtype, void* dest, const void* src, size_t n); +MGE_WIN_DECLSPEC_FUC void lowbit_memcpy_compact2byte( + DType dtype, void* dest, const void* src, size_t n); /*! * \brief copy from byte representation to an aligend tensor for lowbit types */ -void lowbit_memcpy_byte2aligned( +MGE_WIN_DECLSPEC_FUC void lowbit_memcpy_byte2aligned( void* dest, const void* src, const ::megdnn::TensorLayout& ly); /*! * \brief copy from an aligend tensor to byte representation for lowbit types */ -void lowbit_memcpy_aligned2byte( +MGE_WIN_DECLSPEC_FUC void lowbit_memcpy_aligned2byte( void* dest, const void* src, const ::megdnn::TensorLayout& ly); } // namespace mgb diff --git a/src/core/include/megbrain/exception.h b/src/core/include/megbrain/exception.h index e09aa9ca..0a210f16 100644 --- a/src/core/include/megbrain/exception.h +++ b/src/core/include/megbrain/exception.h @@ -110,7 +110,7 @@ public: private: std::shared_ptr m_extra_info; - void init(); + MGE_WIN_DECLSPEC_FUC void init(); }; //! base class for system error: error caused by uncontrollable environment diff --git a/src/core/include/megbrain/graph/cg.h b/src/core/include/megbrain/graph/cg.h index 31a88568..3dad0648 100644 --- a/src/core/include/megbrain/graph/cg.h +++ b/src/core/include/megbrain/graph/cg.h @@ -48,7 +48,7 @@ public: * \param[out] dest output tensor storage; its comp node has been * initialized to target comp node */ - virtual void alloc_static( + MGE_WIN_DECLSPEC_FUC virtual void alloc_static( ComputingGraph* graph, DeviceTensorStorage& dest, size_t size); /*! @@ -59,7 +59,8 @@ public: * Note: if allocation fails, MemAllocError should be raised so * VarDevMemDefragmenter can catch the error and do defragmentation. */ - virtual void alloc_dynamic(VarNode* var, DeviceTensorStorage& dest, size_t size); + MGE_WIN_DECLSPEC_FUC virtual void alloc_dynamic( + VarNode* var, DeviceTensorStorage& dest, size_t size); /*! * \brief Ensure a contiguous storage for memory defragmenter @@ -68,7 +69,7 @@ public: * allocation requests can be placed in a contiguous storage. This function * would be called before calling alloc_dynamic() on the individual vars. */ - virtual void defrag_prealloc_contig( + MGE_WIN_DECLSPEC_FUC virtual void defrag_prealloc_contig( ComputingGraph* graph, CompNode comp_node, size_t size); /*! @@ -77,7 +78,8 @@ public: * If version changes before graph exec, static memory would be reallocated. * This function would be only called once in each graph execution. */ - virtual size_t static_alloc_version(ComputingGraph* graph) const; + MGE_WIN_DECLSPEC_FUC virtual size_t static_alloc_version( + ComputingGraph* graph) const; }; /** @@ -168,7 +170,7 @@ struct GraphCommonOptimizeOptions { class ComputingGraph : public std::enable_shared_from_this, public CompNodeDepedentObject { public: - ComputingGraph(); + MGE_WIN_DECLSPEC_FUC ComputingGraph(); virtual ~ComputingGraph() = default; /*! @@ -181,10 +183,11 @@ public: virtual size_t next_node_id() = 0; - static std::shared_ptr make(); + MGE_WIN_DECLSPEC_FUC static std::shared_ptr make(); //! assert that refcnt for ptr is one and destories the ptr - static void assert_destroy(std::shared_ptr& ptr); + MGE_WIN_DECLSPEC_FUC static void assert_destroy( + std::shared_ptr& ptr); /*! * \brief callback to be invoked when some output is ready diff --git a/src/core/include/megbrain/graph/event.h b/src/core/include/megbrain/graph/event.h index 5c279116..e60a29f0 100644 --- a/src/core/include/megbrain/graph/event.h +++ b/src/core/include/megbrain/graph/event.h @@ -33,7 +33,7 @@ struct OprInserted { //! associated exception if insertion fails; nullptr if no error MegBrainError* exc; - MGB_TYPEINFO_OBJ_DECL; + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT; }; /*! @@ -44,7 +44,7 @@ struct OprExecStart { OperatorNodeBase* opr; GraphExecutable::ExecEnv* env; - MGB_TYPEINFO_OBJ_DECL; + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT; }; /*! @@ -55,7 +55,7 @@ struct AfterWait { CompNode comp_node; OperatorNodeBase* opr; - MGB_TYPEINFO_OBJ_DECL; + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT; }; /*! @@ -66,7 +66,7 @@ struct OprExecKernelStart { OperatorNodeBase* opr; GraphExecutable::ExecEnv* env; - MGB_TYPEINFO_OBJ_DECL; + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT; }; /*! @@ -76,7 +76,7 @@ struct OprExecKernelEnd { OperatorNodeBase* opr; GraphExecutable::ExecEnv* env; - MGB_TYPEINFO_OBJ_DECL; + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT; }; /*! @@ -86,7 +86,7 @@ struct OprExecFinished { OperatorNodeBase* opr; GraphExecutable::ExecEnv* env; - MGB_TYPEINFO_OBJ_DECL; + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT; }; /*! @@ -98,7 +98,7 @@ struct BeforeKernel { OperatorNodeBase* opr; CompNode comp_node; - MGB_TYPEINFO_OBJ_DECL; + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT; }; /*! @@ -110,7 +110,7 @@ struct AfterKernel { OperatorNodeBase* opr; CompNode comp_node; - MGB_TYPEINFO_OBJ_DECL; + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT; }; /*! @@ -128,7 +128,7 @@ struct StaticMemAlloc { CompNode comp_node; size_t alloc_size; - MGB_TYPEINFO_OBJ_DECL; + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT; }; /*! @@ -139,7 +139,7 @@ struct CompSeqOrderDetermined { ComputingGraph* graph; AsyncExecutable* exec; - MGB_TYPEINFO_OBJ_DECL; + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT; }; /*! @@ -163,7 +163,7 @@ struct CompSeqExecBeforeStart { //! configuration) size_t seq_version; - MGB_TYPEINFO_OBJ_DECL; + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT; }; /*! @@ -197,7 +197,7 @@ struct CompSeqExecFinished { ComputingGraph* graph; AsyncExecutable* exec; - MGB_TYPEINFO_OBJ_DECL; + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT; }; /*! @@ -211,7 +211,7 @@ struct CompSeqExecError { ComputingGraph* grah; AsyncExecutable* exec; - MGB_TYPEINFO_OBJ_DECL; + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT; }; /*! @@ -221,7 +221,7 @@ struct SubgraphAssociated { ComputingGraph* par_graph; ComputingGraph* sub_graph; - MGB_TYPEINFO_OBJ_DECL; + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT; }; #if MGB_ENABLE_VAR_DEV_MEM_DEFRAGMENTER @@ -229,7 +229,7 @@ struct SubgraphAssociated { * \brief signaled before graph memory defragementation */ struct BeforeMemDefrag { - MGB_TYPEINFO_OBJ_DECL; + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT; }; #endif diff --git a/src/core/include/megbrain/graph/extern_copr_api.h b/src/core/include/megbrain/graph/extern_copr_api.h index c380e63c..680b31e3 100644 --- a/src/core/include/megbrain/graph/extern_copr_api.h +++ b/src/core/include/megbrain/graph/extern_copr_api.h @@ -20,7 +20,7 @@ namespace mgb { /*! * \brief config extern c opr dynamic param */ -void config_extern_c_opr_dynamic_param( +MGE_WIN_DECLSPEC_FUC void config_extern_c_opr_dynamic_param( std::unique_ptr& func, std::shared_ptr param); diff --git a/src/core/include/megbrain/graph/grad_impl.h b/src/core/include/megbrain/graph/grad_impl.h index 5a084d3b..ab28d7b8 100644 --- a/src/core/include/megbrain/graph/grad_impl.h +++ b/src/core/include/megbrain/graph/grad_impl.h @@ -31,7 +31,7 @@ public: //! check that m_all.size() matches opr->input().size(), and return //! m_all - VarNodeArray& all(OperatorNodeBase* opr); + MGE_WIN_DECLSPEC_FUC VarNodeArray& all(OperatorNodeBase* opr); }; /*! @@ -69,12 +69,12 @@ using VarVirtualReceiverGrad = thin_function& varmap); /*! @@ -169,7 +172,7 @@ SymbolVarArray replace_vars( * \return a list of vars correpsonding to \p dest whose dependencies have been * replaced according to \p oprmap */ -SymbolVarArray replace_oprs( +MGE_WIN_DECLSPEC_FUC SymbolVarArray replace_oprs( const SymbolVarArray& dest, const ThinHashMap& oprmap); @@ -180,10 +183,10 @@ SymbolVarArray replace_oprs( * \return a list of vars correpsonding to \p dest whose owner_graph have been * replaced with \p new_graph */ -SymbolVarArray replace_vars_comp_graph( - const SymbolVarArray& dest, ComputingGraph* new_graph); +MGE_WIN_DECLSPEC_FUC SymbolVarArray +replace_vars_comp_graph(const SymbolVarArray& dest, ComputingGraph* new_graph); -SymbolVarArray find_h2d(const SymbolVarArray& dest); +MGE_WIN_DECLSPEC_FUC SymbolVarArray find_h2d(const SymbolVarArray& dest); /*! * \brief go through OperatorNodeBase::NodeProp::Attribute::src_opr until it @@ -191,7 +194,7 @@ SymbolVarArray find_h2d(const SymbolVarArray& dest); * * This function also performs path compression */ -OperatorNodeBase* get_opr_root_source_opr(OperatorNodeBase* opr); +MGE_WIN_DECLSPEC_FUC OperatorNodeBase* get_opr_root_source_opr(OperatorNodeBase* opr); //! describes how two mem plans intersect enum class MemPlanIntersectionType { @@ -199,13 +202,14 @@ enum class MemPlanIntersectionType { IDENTICAL, //!< completely same OVERLAP //!< intersects but not identical }; -MemPlanIntersectionType get_mem_plan_intersection_type(VarNode* a, VarNode* b); +MGE_WIN_DECLSPEC_FUC MemPlanIntersectionType +get_mem_plan_intersection_type(VarNode* a, VarNode* b); /*! * \brief request output var to writable forward input var if no mem plan of * other input vars intersects with this input var */ -void request_fwd_in2out_writable_if_no_mem_ovelap( +MGE_WIN_DECLSPEC_FUC void request_fwd_in2out_writable_if_no_mem_ovelap( OperatorNodeBase* opr, size_t inp, size_t out); /*! @@ -217,7 +221,7 @@ void request_fwd_in2out_writable_if_no_mem_ovelap( * * Note: implemented in cg_impl.cpp, since it is used during graph init */ -void update_output_var_shapes(OperatorNodeBase* opr); +MGE_WIN_DECLSPEC_FUC void update_output_var_shapes(OperatorNodeBase* opr); /*! * \brief add an output to be used as the workspace for an operator @@ -227,17 +231,19 @@ void update_output_var_shapes(OperatorNodeBase* opr); * This helper is usually called from an opr constructor and used for adding the * last output. */ -void add_workspace_output(OperatorNodeBase* opr); +MGE_WIN_DECLSPEC_FUC void add_workspace_output(OperatorNodeBase* opr); /*! * \brief copy a raw tensor shape into a host tensor */ -void copy_shape_to_tensor_value(DeviceTensorND& dest, const TensorShape& shp); +MGE_WIN_DECLSPEC_FUC void copy_shape_to_tensor_value( + DeviceTensorND& dest, const TensorShape& shp); /*! * \brief copy value of a host tensor into a raw tensor shape */ -void copy_tensor_value_to_shape(TensorShape& dest, const DeviceTensorND& val); +MGE_WIN_DECLSPEC_FUC void copy_tensor_value_to_shape( + TensorShape& dest, const DeviceTensorND& val); /*! * \brief get a symbolvar whose value is tensor shape, used for other @@ -246,7 +252,7 @@ void copy_tensor_value_to_shape(TensorShape& dest, const DeviceTensorND& val); * \param opr_name operator that invokes this function; used in error * function if *config* is invalid */ -SymbolVar var_from_tensor_shape( +MGE_WIN_DECLSPEC_FUC SymbolVar var_from_tensor_shape( ComputingGraph& graph, const OperatorNodeConfig& config, const char* opr_name, const TensorShape& shape); @@ -275,7 +281,7 @@ public: : m_cb{std::move(cb)}, m_extra_dep(std::move(extra_dep)) {} //! add an operator whose deps should be discovered - void add(OperatorNodeBase* dest); + MGE_WIN_DECLSPEC_FUC void add(OperatorNodeBase* dest); void add(SymbolVar var) { add(var.node()->owner_opr()); } @@ -334,7 +340,7 @@ public: * * This function should be called only once on a graph */ - static void register_to( + MGE_WIN_DECLSPEC_FUC static void register_to( ComputingGraph* dest, const ComputingGraph* src, const TransFunc& trans); /*! @@ -342,12 +348,13 @@ public: * \return previously registered transformer on given graph or nullptr * if none registered */ - static const InterGraphVarTransformer* get(const ComputingGraph& graph); + MGE_WIN_DECLSPEC_FUC static const InterGraphVarTransformer* get( + const ComputingGraph& graph); /*! * \brief transform a var into this graph */ - VarNode* trans(VarNode* src) const; + MGE_WIN_DECLSPEC_FUC VarNode* trans(VarNode* src) const; private: ComputingGraph* m_graph_dest; diff --git a/src/core/include/megbrain/graph/operator_node.h b/src/core/include/megbrain/graph/operator_node.h index 5006018b..67b9c81b 100644 --- a/src/core/include/megbrain/graph/operator_node.h +++ b/src/core/include/megbrain/graph/operator_node.h @@ -31,13 +31,13 @@ class ExecutionMask; * \brief configuration for operator nodes */ class OperatorNodeConfig final : public Hashable { - MGB_DYN_TYPE_OBJ_FINAL_DECL; + MGB_DYN_TYPE_OBJ_FINAL_DECL_WITH_EXPORT; public: using CompNodeArray = SmallVector; OperatorNodeConfig() = default; - ~OperatorNodeConfig(); + MGE_WIN_DECLSPEC_FUC ~OperatorNodeConfig(); OperatorNodeConfig(std::string name) : m_name{std::move(name)} {} @@ -101,18 +101,18 @@ public: /*! * \brief set preferred single comp node */ - OperatorNodeConfig& comp_node(const CompNode& node); + MGE_WIN_DECLSPEC_FUC OperatorNodeConfig& comp_node(const CompNode& node); /*! * \brief directly set all the CompNodes */ - OperatorNodeConfig& comp_node_arr(const CompNodeArray& arr); + MGE_WIN_DECLSPEC_FUC OperatorNodeConfig& comp_node_arr(const CompNodeArray& arr); /*! * \brief get single comp node if the user has set it, or an invalid * comp node if the config is empty */ - CompNode get_single_comp_node() const; + MGE_WIN_DECLSPEC_FUC CompNode get_single_comp_node() const; /*! * \brief follow the computing node of dest @@ -121,7 +121,7 @@ public: return comp_node(dest.node()->comp_node()); } - OperatorNodeConfig& output_dtype(DType dtype); + MGE_WIN_DECLSPEC_FUC OperatorNodeConfig& output_dtype(DType dtype); DType output_dtype() const { return m_output_dtype; } @@ -132,9 +132,9 @@ public: const CompNodeArray& comp_node() const { return m_comp_node; } - size_t hash() const override; + MGE_WIN_DECLSPEC_FUC size_t hash() const override; - bool is_same_st(const Hashable& rhs) const override; + MGE_WIN_DECLSPEC_FUC bool is_same_st(const Hashable& rhs) const override; private: static constexpr size_t sm_initial_instance_id = 1333331; @@ -163,7 +163,7 @@ public: * * The default implementation does nothing */ - virtual void record_execute_deps(ExecDependencyArray& record); + MGE_WIN_DECLSPEC_FUC virtual void record_execute_deps(ExecDependencyArray& record); protected: ~GraphExecutable() = default; @@ -408,7 +408,7 @@ public: * \brief reset dep type; the vars could contain duplicated var nodes, * in which case the corresponding dep type would be ORed together */ - void reset_dep_type( + MGE_WIN_DECLSPEC_FUC void reset_dep_type( const VarNodeArray& vars, const SmallVector& dep_types); /*! @@ -488,11 +488,11 @@ public: const VarNodeArrayView& input_var_naming; }; - virtual ~OperatorNodeBase() noexcept; + MGE_WIN_DECLSPEC_FUC virtual ~OperatorNodeBase() noexcept; #if MGB_ENABLE_JSON /* ===================== json io ===================== */ - std::shared_ptr to_json() const override; + MGE_WIN_DECLSPEC_FUC std::shared_ptr to_json() const override; //! extra value to be added to json std::shared_ptr to_json_extra_json = json::Object::make(); @@ -511,7 +511,7 @@ public: const VarNodeArray& output() const { return m_output; } // non-volatile outputs - const VarNodeArray usable_output() const; + MGE_WIN_DECLSPEC_FUC const VarNodeArray usable_output() const; VarNode* input(size_t idx) const { return m_input.at(idx); } @@ -519,7 +519,7 @@ public: //! hash that combines all inputs, m_config.comp_node() and all //! add_equivalence_component calls - size_t hash() const override final; + MGE_WIN_DECLSPEC_FUC size_t hash() const override final; /*! * \brief get node prop, which is available and constant after node @@ -527,7 +527,7 @@ public: * * Note that this function calls do_make_node_prop() on first call */ - const NodeProp& node_prop() const; + MGE_WIN_DECLSPEC_FUC const NodeProp& node_prop() const; /*! * \brief called by ComputingGraph to mark that this node has been @@ -549,7 +549,7 @@ public: * 3. call do_execute * 4. set_ready on output */ - void execute(ExecEnv& env) override final; + MGE_WIN_DECLSPEC_FUC void execute(ExecEnv& env) override final; /*! * \brief specifies waiting strategy on one comp node for input vars @@ -617,7 +617,7 @@ public: * \brief get callbacks to be invoked on events related to this * operator; default implementation returns empty event */ - virtual OprEventCallback get_opr_event_callback(); + MGE_WIN_DECLSPEC_FUC virtual OprEventCallback get_opr_event_callback(); /*! * \brief called when stream of comp node of output vars is changed for @@ -635,7 +635,7 @@ public: * * This function is called once during operator insertion. */ - virtual void init_output_dtype(); + MGE_WIN_DECLSPEC_FUC virtual void init_output_dtype(); /*! * \brief initialize output format by calling VarNode::format @@ -645,7 +645,7 @@ public: * * This function is called once during operator insertion */ - virtual void init_output_format(); + MGE_WIN_DECLSPEC_FUC virtual void init_output_format(); /*! * \brief inititialize output comp_node by calling VarNode::comp_node @@ -687,7 +687,7 @@ public: * \param dynamic if true, initialize mem plans for vars that could not * be statically inferred; otherwise for statically inferable vars */ - virtual void init_output_mem_plan(bool dynamic); + MGE_WIN_DECLSPEC_FUC virtual void init_output_mem_plan(bool dynamic); /* * ============================================================= @@ -703,7 +703,7 @@ public: }; //! add input var to this operator - void add_input( + MGE_WIN_DECLSPEC_FUC void add_input( std::initializer_list list, AddInputSortType sort_type = AddInputSortType::NONE); @@ -711,7 +711,7 @@ public: * \brief allocate a new output VarNode; the name would be appended to * this->name to form the final name */ - VarNode* add_output(const Maybe& name); + MGE_WIN_DECLSPEC_FUC VarNode* add_output(const Maybe& name); /*! * \brief add extra component for equivalence check @@ -734,7 +734,7 @@ public: * \brief allocate a new node prop and initialize dep entry as all * inputs */ - virtual NodeProp* do_make_node_prop() const; + MGE_WIN_DECLSPEC_FUC virtual NodeProp* do_make_node_prop() const; /*! * \brief Update operator priority. @@ -744,13 +744,13 @@ public: * priority. * \return whether the priority would be changed. */ - virtual bool update_priority() const; + MGE_WIN_DECLSPEC_FUC virtual bool update_priority() const; protected: /*! * \param input_var_naming used for generating default node name */ - OperatorNodeBase( + MGE_WIN_DECLSPEC_FUC OperatorNodeBase( ComputingGraph* owner, const OperatorNodeConfig& config, const std::string& default_name, const VarNodeArrayView& input_var_naming); @@ -781,9 +781,10 @@ private: mutable Maybe m_node_prop; Maybe m_input_waiting_spec; - void do_add_equivalence_component(HashableContainer&& hashable); + MGE_WIN_DECLSPEC_FUC void do_add_equivalence_component( + HashableContainer&& hashable); - bool is_same_st(const Hashable& rhs) const override final; + MGE_WIN_DECLSPEC_FUC bool is_same_st(const Hashable& rhs) const override final; }; /*! @@ -856,7 +857,7 @@ protected: * mixin_on_output_comp_node_stream_changed(), which is called from * opr.on_output_comp_node_stream_changed() invoked by this function. */ - static void mixin_init_output_comp_node(OperatorNodeBase& opr); + MGE_WIN_DECLSPEC_FUC static void mixin_init_output_comp_node(OperatorNodeBase& opr); /*! * \brief only infer output comp node, without modifying anything @@ -865,7 +866,7 @@ protected: * least one input exists and they are all placed on the same comp node. * It also checks the comp node set in config. */ - static CompNode mixin_infer_output_comp_node( + MGE_WIN_DECLSPEC_FUC static CompNode mixin_infer_output_comp_node( const OperatorNodeBase& opr, bool cross_mem); CompNode mixin_comp_node() const { return m_comp_node; } @@ -874,22 +875,25 @@ protected: * \brief initialize NodeProp with SINGLE_COMP_NODE, and setup * dependency on input */ - NodeProp* mixin_do_make_node_prop(const OperatorNodeBase& opr) const; + MGE_WIN_DECLSPEC_FUC NodeProp* mixin_do_make_node_prop( + const OperatorNodeBase& opr) const; - void mixin_do_execute(OperatorNodeBase& opr, OperatorNodeBase::ExecEnv& env); + MGE_WIN_DECLSPEC_FUC void mixin_do_execute( + OperatorNodeBase& opr, OperatorNodeBase::ExecEnv& env); - void mixin_on_output_comp_node_stream_changed(OperatorNodeBase& opr); + MGE_WIN_DECLSPEC_FUC void mixin_on_output_comp_node_stream_changed( + OperatorNodeBase& opr); /*! * \brief set comp node during initializing */ - void mixin_comp_node(OperatorNodeBase& opr, CompNode node); + MGE_WIN_DECLSPEC_FUC void mixin_comp_node(OperatorNodeBase& opr, CompNode node); /*! * \brief override by subclass to perform raw computing; this function * is already dispatched on corresponding stream in ExecEnv */ - virtual void scn_do_execute() = 0; + MGE_WIN_DECLSPEC_FUC virtual void scn_do_execute() = 0; ~SingleCNOperatorNode() = default; }; @@ -903,7 +907,8 @@ class OutshapePureByInshapeOpr : public OperatorNodeMixinBase { size_t m_inp_run_id = -1; TensorShapeArray m_out_shp; - bool infer_desc(size_t out_idx, TensorShape& dest, const StaticInferInpVal& inp); + MGE_WIN_DECLSPEC_FUC bool infer_desc( + size_t out_idx, TensorShape& dest, const StaticInferInpVal& inp); protected: /*! @@ -912,9 +917,11 @@ protected: * of output vars that should be managed by this helper (they would be * the first vars of all output vars). */ - void mixin_set_nr_managed_outputs(OperatorNodeBase& opr, size_t nr); + MGE_WIN_DECLSPEC_FUC void mixin_set_nr_managed_outputs( + OperatorNodeBase& opr, size_t nr); - void mixin_init_output_static_infer_desc(OperatorNodeBase& opr); + MGE_WIN_DECLSPEC_FUC void mixin_init_output_static_infer_desc( + OperatorNodeBase& opr); /*! * \brief get output shapes from input shapes @@ -926,7 +933,7 @@ protected: virtual void get_output_var_shape( const TensorShapeArray& inp_shape, TensorShapeArray& out_shape) const = 0; - ~OutshapePureByInshapeOpr(); + MGE_WIN_DECLSPEC_FUC ~OutshapePureByInshapeOpr(); }; /*! @@ -1010,6 +1017,9 @@ using OprNodeArray = SmallVector; MGB_DEFINE_CLS_WITH_SUPER(_name final, _base, ##__VA_ARGS__) \ MGB_DYN_TYPE_OBJ_FINAL_DECL; +#define MGB_DEFINE_OPR_CLASS_WITH_EXPORT(_name, _base, ...) \ + MGB_DEFINE_CLS_WITH_SUPER(_name final, _base, ##__VA_ARGS__) \ + MGB_DYN_TYPE_OBJ_FINAL_DECL_WITH_EXPORT; } // namespace cg } // namespace mgb diff --git a/src/core/include/megbrain/graph/static_infer.h b/src/core/include/megbrain/graph/static_infer.h index bf134cfe..81b0730c 100644 --- a/src/core/include/megbrain/graph/static_infer.h +++ b/src/core/include/megbrain/graph/static_infer.h @@ -117,13 +117,13 @@ struct ShapeInferDesc { * \brief make a ShapeInferDesc that copies shape of another var into * dest var */ - static ShapeInferDesc make_identity(VarNode* src); + MGE_WIN_DECLSPEC_FUC static ShapeInferDesc make_identity(VarNode* src); /*! * \brief make a constant ShapeInferDesc that always produces given * value */ - static ShapeInferDesc make_const(const TensorShape& shp); + MGE_WIN_DECLSPEC_FUC static ShapeInferDesc make_const(const TensorShape& shp); }; /*! @@ -154,7 +154,7 @@ struct ValueInferDesc { * \brief make a ValueInferDesc that copies shape of another var into * dest var */ - static ValueInferDesc make_identity(VarNode* src); + MGE_WIN_DECLSPEC_FUC static ValueInferDesc make_identity(VarNode* src); }; struct InferType { diff --git a/src/core/include/megbrain/graph/symbol_var.h b/src/core/include/megbrain/graph/symbol_var.h index eda07c4d..dd8d88e6 100644 --- a/src/core/include/megbrain/graph/symbol_var.h +++ b/src/core/include/megbrain/graph/symbol_var.h @@ -53,15 +53,15 @@ public: * \brief set a new name; note that the underlying VarNode would be * modified, not this SymbolVar itself */ - SymbolVar rename(const std::string& name) const; + MGE_WIN_DECLSPEC_FUC SymbolVar rename(const std::string& name) const; - SymbolVar reshape(const TensorShape& tshape) const; - SymbolVar reshape(SymbolVar tshape) const; - SymbolVar broadcast(const TensorShape& tshape) const; - SymbolVar broadcast(SymbolVar tshape) const; - SymbolVar symshape() const; - SymbolVar flatten() const; - SymbolVar add_axis(size_t idx) const; + MGE_WIN_DECLSPEC_FUC SymbolVar reshape(const TensorShape& tshape) const; + MGE_WIN_DECLSPEC_FUC SymbolVar reshape(SymbolVar tshape) const; + MGE_WIN_DECLSPEC_FUC SymbolVar broadcast(const TensorShape& tshape) const; + MGE_WIN_DECLSPEC_FUC SymbolVar broadcast(SymbolVar tshape) const; + MGE_WIN_DECLSPEC_FUC SymbolVar symshape() const; + MGE_WIN_DECLSPEC_FUC SymbolVar flatten() const; + MGE_WIN_DECLSPEC_FUC SymbolVar add_axis(size_t idx) const; const TensorShape& shape() const { return m_node->shape(); } @@ -105,7 +105,8 @@ public: * \brief make a const scalar value on given computing graph and * computing node */ - static SymbolVar make_scalar(DTypeScalar value, ComputingGraph& cg, CompNode cn); + MGE_WIN_DECLSPEC_FUC static SymbolVar make_scalar( + DTypeScalar value, ComputingGraph& cg, CompNode cn); /*! * \brief make a const scalar value using computing graph and comp node @@ -131,7 +132,7 @@ public: * This essentially synchronizes the dispatch queue and then call * dev_tensor() */ - const DeviceTensorND& eager_eval_get_value() const; + MGE_WIN_DECLSPEC_FUC const DeviceTensorND& eager_eval_get_value() const; bool allow_shape_change() const { return m_node->allow_shape_change(); } }; diff --git a/src/core/include/megbrain/graph/var_node.h b/src/core/include/megbrain/graph/var_node.h index 63cd5f01..611f482a 100644 --- a/src/core/include/megbrain/graph/var_node.h +++ b/src/core/include/megbrain/graph/var_node.h @@ -145,7 +145,7 @@ public: MemAllocPlan& layout(const TensorLayout& dest, bool allow_shape_change = false); #if MGB_ENABLE_JSON - std::shared_ptr to_json() const override; + MGE_WIN_DECLSPEC_FUC std::shared_ptr to_json() const override; #endif /*! @@ -153,13 +153,13 @@ public: * * Release tensor storage if refcnt drops to zero */ - MemAllocPlan& release_chunk(); + MGE_WIN_DECLSPEC_FUC MemAllocPlan& release_chunk(); /*! * \brief reset chunk to a privately owned chunk, and setup offset and * layout from owner var, and clear tensor storage */ - MemAllocPlan& reset_from_owner_var(); + MGE_WIN_DECLSPEC_FUC MemAllocPlan& reset_from_owner_var(); /*! * \brief reset to a special marker that indicates this var is not @@ -187,10 +187,11 @@ public: } //! assign layout, offset and chunk from another mem alloc plan - MemAllocPlan& assign(const MemAllocPlan& src); + MGE_WIN_DECLSPEC_FUC MemAllocPlan& assign(const MemAllocPlan& src); //! assign for readonly forward - MemAllocPlan& assign_for_forward(const MemAllocPlan& src, const SubTensorSpec& sub); + MGE_WIN_DECLSPEC_FUC MemAllocPlan& assign_for_forward( + const MemAllocPlan& src, const SubTensorSpec& sub); /*! * \brief next readonly-forward reader of this MemAllocPlan @@ -212,7 +213,7 @@ private: public: MemAllocPlan* next() const { return m_next; } - void reset(); + MGE_WIN_DECLSPEC_FUC void reset(); inline void insert_after(const MemAllocPlan& prev, MemAllocPlan* self); inline void remove_self(); }; @@ -261,7 +262,8 @@ public: * with given layout may be forwarded to opr directly, otherwise it * will be implicitly rearranged to a contiguous one. */ - VarNode& add_layout_constraint(LayoutConstraintCallback callback); + MGE_WIN_DECLSPEC_FUC VarNode& add_layout_constraint( + LayoutConstraintCallback callback); /*! * \brief requires the layout to be contiguous @@ -272,7 +274,7 @@ public: * existing callbacks would be cleared and new callbacks would be * ignored after add_layout_constraint_contiguous() is invoked. */ - VarNode& add_layout_constraint_contiguous(); + MGE_WIN_DECLSPEC_FUC VarNode& add_layout_constraint_contiguous(); /*! * \brief requires the layout to be monotone while allowing broadcast @@ -281,7 +283,7 @@ public: * implemented by marking a flag; however user-defined callbacks are * still invoked since they might impose stronger constraints. */ - VarNode& add_layout_constraint_monotone(); + MGE_WIN_DECLSPEC_FUC VarNode& add_layout_constraint_monotone(); /*! * \brief request that memory should be readonly forwarded from other @@ -292,7 +294,7 @@ public: * * \return whether this request could be satisfied */ - MGB_WARN_UNUSED_RESULT bool set_fwd_in2out_readonly( + MGB_WARN_UNUSED_RESULT MGE_WIN_DECLSPEC_FUC bool set_fwd_in2out_readonly( VarNode* input, const SubTensorSpec& sub); /*! @@ -302,7 +304,7 @@ public: * Note that this function must be called from * OperatorNodeBase::mem_plan_fwd_in2out_writable. */ - VarNode& set_fwd_in2out_writable(VarNode* input); + MGE_WIN_DECLSPEC_FUC VarNode& set_fwd_in2out_writable(VarNode* input); /*! * \brief require this var to share memory from another var; only used @@ -311,14 +313,14 @@ public: * Note that this function must be called during operator node * initialization */ - VarNode& set_fwd_in2out_writable_force(VarNode* input); + MGE_WIN_DECLSPEC_FUC VarNode& set_fwd_in2out_writable_force(VarNode* input); /* ===================== getter and setters ===================== */ OperatorNodeBase* owner_opr() const { return m_owner; } //! get name; if name is not valid, get name of owner opr - const std::string& name() const; + MGE_WIN_DECLSPEC_FUC const std::string& name() const; //! get name as C-string const char* cname() const { return name().c_str(); } @@ -327,7 +329,7 @@ public: bool has_name_set() const { return m_has_name_set; } //! set name explicitly - VarNode& name(std::string name); + MGE_WIN_DECLSPEC_FUC VarNode& name(std::string name); //! get data type of data in this var DType dtype() const { return m_dev_tensor.dtype(); } @@ -336,10 +338,10 @@ public: TensorFormat format() const { return m_dev_tensor.format(); } //! set dtype; this function can only be called once - VarNode& dtype(DType dtype); + MGE_WIN_DECLSPEC_FUC VarNode& dtype(DType dtype); //! set format; this function can only be called once - VarNode& format(TensorFormat format); + MGE_WIN_DECLSPEC_FUC VarNode& format(TensorFormat format); MemAllocPlan& mem_plan() { return m_mem_plan; } @@ -351,7 +353,7 @@ public: } //! get the underlying device tensor to fill data - const DeviceTensorND& dev_tensor() const; + MGE_WIN_DECLSPEC_FUC const DeviceTensorND& dev_tensor() const; /*! * \brief get the underlying device tensor that can be modified(like @@ -360,7 +362,7 @@ public: * This should only be called from the owner opr of this var, and this * var must have flag NO_SYS_MEM_ALLOC. */ - DeviceTensorND& mutable_dev_tensor(); + MGE_WIN_DECLSPEC_FUC DeviceTensorND& mutable_dev_tensor(); /*! * \brief previous dev ptr before deallocating dev_tensor; used for @@ -377,7 +379,7 @@ public: * \brief set comp node; only the memory node could be changed if called * multiple times */ - VarNode& comp_node(const CompNode& cn); + MGE_WIN_DECLSPEC_FUC VarNode& comp_node(const CompNode& cn); const TensorShape& shape() const { return m_shape; } @@ -389,7 +391,7 @@ public: * \brief reset VarNode shape * \return whether shape differs from old shape */ - VarNode& shape(const TensorShape& shape); + MGE_WIN_DECLSPEC_FUC VarNode& shape(const TensorShape& shape); bool allow_shape_change() const { return m_allow_shape_change; } @@ -399,7 +401,7 @@ public: } #if MGB_ENABLE_JSON - std::shared_ptr to_json() const override; + MGE_WIN_DECLSPEC_FUC std::shared_ptr to_json() const override; #endif /*! @@ -413,7 +415,7 @@ public: enum class Flag : uint32_t; - VarNode& add_flag(Flag flag); + MGE_WIN_DECLSPEC_FUC VarNode& add_flag(Flag flag); inline bool contain_flag(Flag flag) const; @@ -429,7 +431,8 @@ public: * * \warning Alloc size_req memory if size_req != 0. */ - VarNode& shape_alloc(const TensorShape& shape, size_t size_req = 0); + MGE_WIN_DECLSPEC_FUC VarNode& shape_alloc( + const TensorShape& shape, size_t size_req = 0); /*! * \brief directly reset device tensor from another var @@ -459,7 +462,8 @@ public: * \param value the tensor to be used; it must be contiguous or empty * and be placed on the same comp node of this var. */ - VarNode& reset_dev_tensor_from_tensor(const DeviceTensorND& value); + MGE_WIN_DECLSPEC_FUC VarNode& reset_dev_tensor_from_tensor( + const DeviceTensorND& value); /*! * \brief add a var to add RT_FORCE_DYNAMIC_MEM_ALLOC flag if such flag @@ -472,7 +476,8 @@ public: * This method should be called from * OperatorNodeBase::init_rt_force_dynamic_mem_alloc_imply_chain impls. */ - VarNode& add_rt_force_dynamic_mem_alloc_imply_chain(VarNode* dest); + MGE_WIN_DECLSPEC_FUC VarNode& add_rt_force_dynamic_mem_alloc_imply_chain( + VarNode* dest); /* ===================== graph compiler special ===================== */ @@ -486,7 +491,8 @@ public: * \param fixed_alloc if not null, it should be a tensor providing * memory allocation for this var. */ - MemAllocPlan& init_mem_plan(const DeviceTensorND* fixed_alloc = nullptr); + MGE_WIN_DECLSPEC_FUC MemAllocPlan& init_mem_plan( + const DeviceTensorND* fixed_alloc = nullptr); /*! * \brief get the shape and value infer trait @@ -541,12 +547,14 @@ private: std::vector m_rt_force_dynamic_mem_alloc_imply_chain; - void modify_flag(Flag delta, Flag new_flag); + MGE_WIN_DECLSPEC_FUC void modify_flag(Flag delta, Flag new_flag); - void assign_dev_tensor_from_tensor(const DeviceTensorND& value); + MGE_WIN_DECLSPEC_FUC void assign_dev_tensor_from_tensor( + const DeviceTensorND& value); #if MGB_ENABLE_JSON - std::shared_ptr dump_static_infer_info_to_json() const; + MGE_WIN_DECLSPEC_FUC std::shared_ptr dump_static_infer_info_to_json() + const; #endif friend class static_infer::StaticInferManagerImpl; diff --git a/src/core/include/megbrain/system.h b/src/core/include/megbrain/system.h index e2bd5ed4..aaf8f948 100644 --- a/src/core/include/megbrain/system.h +++ b/src/core/include/megbrain/system.h @@ -25,27 +25,27 @@ namespace mgb { namespace sys { //! set name of caller thread -void set_thread_name(const std::string& name); +MGE_WIN_DECLSPEC_FUC void set_thread_name(const std::string& name); #if !__DEPLOY_ON_XP_SP2__ /*! * \brief get name of of given thread * \param tid thread id, or None to for the caller thread */ -std::string get_thread_name(Maybe tid = None); +MGE_WIN_DECLSPEC_FUC std::string get_thread_name(Maybe tid = None); #endif //! get number of CPU cores on this system -int get_cpu_count(); +MGE_WIN_DECLSPEC_FUC int get_cpu_count(); //! set cpu affinity for caller thread -void set_cpu_affinity(const std::vector& cpuset); +MGE_WIN_DECLSPEC_FUC void set_cpu_affinity(const std::vector& cpuset); //! whether stderr supports ansi color code -bool stderr_ansi_color(); +MGE_WIN_DECLSPEC_FUC bool stderr_ansi_color(); //! get total ram and free ram in bytes -std::pair get_ram_status_bytes(); +MGE_WIN_DECLSPEC_FUC std::pair get_ram_status_bytes(); /*! * \brief invoke a function with time limit @@ -207,7 +207,7 @@ public: virtual void kill_worker() = 0; //! global unique instance - static TimedFuncInvoker& ins(); + MGE_WIN_DECLSPEC_FUC static TimedFuncInvoker& ins(); }; } // namespace sys diff --git a/src/core/include/megbrain/tensor.h b/src/core/include/megbrain/tensor.h index 9b7a90b8..1d388152 100644 --- a/src/core/include/megbrain/tensor.h +++ b/src/core/include/megbrain/tensor.h @@ -50,7 +50,7 @@ public: } //! make a SubTensorSpec from given layout and offset - static SubTensorSpec make_from_offset_elem( + MGE_WIN_DECLSPEC_FUC static SubTensorSpec make_from_offset_elem( const TensorLayout& layout, ptrdiff_t offset_elem); //! get underlying layout @@ -72,7 +72,7 @@ public: * \brief merge with another SubTensorSpec: accum offset, and replace * layout by rhs */ - void merge_with(const SubTensorSpec& rhs); + MGE_WIN_DECLSPEC_FUC void merge_with(const SubTensorSpec& rhs); }; /*! @@ -99,7 +99,7 @@ public: * \param axis the axis to apply this slice; -1 can be used for * flattened layout */ - SubTensorSpec apply(TensorLayout layout, int axis) const; + MGE_WIN_DECLSPEC_FUC SubTensorSpec apply(TensorLayout layout, int axis) const; }; template @@ -133,7 +133,7 @@ public: TensorStorage(const TensorStorage& rhs) { *this = rhs; } - TensorStorage& operator=(const TensorStorage& rhs); + MGE_WIN_DECLSPEC_FUC TensorStorage& operator=(const TensorStorage& rhs); /*! * \brief whether given tensor span is valid in this storage @@ -153,14 +153,14 @@ public: * 2. This method would only grow storage, but it would not release * memory */ - TensorStorage& ensure_size(size_t sz); + MGE_WIN_DECLSPEC_FUC TensorStorage& ensure_size(size_t sz); /*! * \brief return a subtensor that shares the memory; the returned * subtensor is not allowed to realloc * \param offset offset given in bytes */ - TensorStorage sub(ptrdiff_t offset) const; + MGE_WIN_DECLSPEC_FUC TensorStorage sub(ptrdiff_t offset) const; //! apply lazy resize and get ptr dt_byte* ptr() const { @@ -204,7 +204,8 @@ public: * changed, the underlying data would be released and this tensor would * become empty */ - TensorStorage& comp_node(CompNode node, bool allow_mem_node_change = false); + MGE_WIN_DECLSPEC_FUC TensorStorage& comp_node( + CompNode node, bool allow_mem_node_change = false); /*! * \brief copy from another TensorStorage, possibly of other storage @@ -216,12 +217,13 @@ public: * this or src */ template - void copy_from(const TensorStorage& src, size_t size) const; + MGE_WIN_DECLSPEC_FUC void copy_from( + const TensorStorage& src, size_t size) const; /*! * \brief reset the tensor storage to given memory area */ - void reset(CompNode node, size_t size, RawStorage data); + MGE_WIN_DECLSPEC_FUC void reset(CompNode node, size_t size, RawStorage data); /*! * \brief make a TensorStorage that shares memory with another @@ -233,7 +235,8 @@ public: template < class RTrait, typename = typename std::enable_if< !std::is_same::value>::type> - static TensorStorage make_proxy(const TensorStorage& src); + MGE_WIN_DECLSPEC_FUC static TensorStorage make_proxy( + const TensorStorage& src); /*! * \brief make a DeviceTensorStorage on default_cpu @@ -302,9 +305,9 @@ private: on_invalid_comp_node(); } - dt_byte* apply_lazy_and_get_ptr(); + MGE_WIN_DECLSPEC_FUC dt_byte* apply_lazy_and_get_ptr(); - [[noreturn]] static void on_invalid_comp_node(); + [[noreturn]] MGE_WIN_DECLSPEC_FUC static void on_invalid_comp_node(); }; template @@ -326,30 +329,31 @@ class TensorND { public: using ChainReturnType = TensorND; - TensorND(); + MGE_WIN_DECLSPEC_FUC TensorND(); - explicit TensorND(CompNode node); + MGE_WIN_DECLSPEC_FUC explicit TensorND(CompNode node); - explicit TensorND(DType dtype); + MGE_WIN_DECLSPEC_FUC explicit TensorND(DType dtype); - TensorND(CompNode node, DType dtype); + MGE_WIN_DECLSPEC_FUC TensorND(CompNode node, DType dtype); //! allocate contiguous tensor - TensorND( + MGE_WIN_DECLSPEC_FUC TensorND( CompNode node, const TensorShape& shape, DType dtype = dtype::Float32{}, TensorFormat format = {}); //! allocate contiguous tensor from given comp node and layout; layout //! is required to be contiguous, and its dtype and format would be used - TensorND(CompNode node, const TensorLayout& layout); + MGE_WIN_DECLSPEC_FUC TensorND(CompNode node, const TensorLayout& layout); /* ================= shape and basic functionality ================= */ //! get subtensor according to given slices - ChainReturnType operator[](std::initializer_list slice) const; + MGE_WIN_DECLSPEC_FUC ChainReturnType + operator[](std::initializer_list slice) const; //! get subtensor according to spec - ChainReturnType sub(const SubTensorSpec& spec) const; + MGE_WIN_DECLSPEC_FUC ChainReturnType sub(const SubTensorSpec& spec) const; //! whether underlying storage is empty bool empty() const { return m_storage.empty(); } @@ -409,19 +413,21 @@ public: * * dtype and format would not be changed */ - ChainReturnType& resize(const TensorShape& shape); + MGE_WIN_DECLSPEC_FUC ChainReturnType& resize(const TensorShape& shape); /*! * \brief totally reset the tensor to given storage and layout */ - ChainReturnType& reset(TensorStorage storage, const TensorLayout& layout); + MGE_WIN_DECLSPEC_FUC ChainReturnType& reset( + TensorStorage storage, const TensorLayout& layout); /* ================= getter and setters ================= */ /*! * \brief change comp node; see TensorStorage::comp_node() */ - ChainReturnType& comp_node(CompNode comp_node, bool allow_mem_node_change = false); + MGE_WIN_DECLSPEC_FUC ChainReturnType& comp_node( + CompNode comp_node, bool allow_mem_node_change = false); CompNode comp_node() const { return m_storage.comp_node(); } @@ -431,7 +437,7 @@ public: * \brief change the storage and invalidate all data, resulting in an * empty tensor */ - ChainReturnType& storage(const TensorStorage& storage); + MGE_WIN_DECLSPEC_FUC ChainReturnType& storage(const TensorStorage& storage); //! get data type DType dtype() const { return m_layout.dtype; } @@ -444,14 +450,14 @@ public: * * layout would be cleared (reset to ndim=0) if dtype actually changes */ - ChainReturnType& dtype(DType dtype); + MGE_WIN_DECLSPEC_FUC ChainReturnType& dtype(DType dtype); /*! * \brief change underlying tensor format * * layout would be cleared (reset to ndim=0) if format actually changes */ - ChainReturnType& format(TensorFormat format); + MGE_WIN_DECLSPEC_FUC ChainReturnType& format(TensorFormat format); /*! * \brief copy from another tensor and initialize contiguous layout @@ -470,7 +476,7 @@ public: * to be contiguous. */ template - ChainReturnType& copy_from(const TensorND& src); + MGE_WIN_DECLSPEC_FUC ChainReturnType& copy_from(const TensorND& src); /*! * \brief copy from another tensor of the same shape, retaining current @@ -481,7 +487,8 @@ public: * contiguous. */ template - const ChainReturnType& copy_from_fixlayout(const TensorND& src) const; + MGE_WIN_DECLSPEC_FUC const ChainReturnType& copy_from_fixlayout( + const TensorND& src) const; //! non-const version of copy_from_fixlayout template @@ -547,7 +554,7 @@ public: /*! * \brief call memset in the data of a device tensor */ -void dev_tensor_memset(const DeviceTensorND& tensor, int val); +MGE_WIN_DECLSPEC_FUC void dev_tensor_memset(const DeviceTensorND& tensor, int val); /*! * \brief fill zeros in the content of a dev tensor diff --git a/src/core/include/megbrain/utils/debug.h b/src/core/include/megbrain/utils/debug.h index f2cb4c57..d4a0834a 100644 --- a/src/core/include/megbrain/utils/debug.h +++ b/src/core/include/megbrain/utils/debug.h @@ -28,7 +28,7 @@ public: using SystemError::SystemError; //! function to throw this exception; could be overwritten - static void (*throw_)(); + static MGE_WIN_DECLSPEC_DATA void (*throw_)(); }; struct BacktraceResult { @@ -53,7 +53,7 @@ BacktraceResult backtrace(int nr_exclude = 1); * 1: log warning message * 2: throw ForkAfterCudaError() exception */ -void set_fork_cuda_warning_flag(int flag); +MGE_WIN_DECLSPEC_FUC void set_fork_cuda_warning_flag(int flag); /*! * \brief supress fork warning in this scope @@ -79,7 +79,8 @@ public: * The binary can be parsed by `megbrain.plugin.load_tensor_binary` python * function */ -std::string dump_tensor(const HostTensorND& value, const std::string& name); +MGE_WIN_DECLSPEC_FUC std::string dump_tensor( + const HostTensorND& value, const std::string& name); static inline std::string dump_tensor( const DeviceTensorND& value, const std::string& name) { @@ -87,7 +88,7 @@ static inline std::string dump_tensor( } //! write the value of a string to file -void write_to_file( +MGE_WIN_DECLSPEC_FUC void write_to_file( const char* filename, const std::string& content, const char* mode = "wb"); /*! @@ -96,7 +97,7 @@ void write_to_file( * \return None if tensors are considered equal; or a human-readable * message indicating their difference */ -Maybe compare_tensor_value( +MGE_WIN_DECLSPEC_FUC Maybe compare_tensor_value( const HostTensorND& expect, const char* expect_expr, const HostTensorND& get, const char* get_expr, float maxerr); diff --git a/src/core/include/megbrain/utils/event.h b/src/core/include/megbrain/utils/event.h index 0d95131f..aa2e5d99 100644 --- a/src/core/include/megbrain/utils/event.h +++ b/src/core/include/megbrain/utils/event.h @@ -65,7 +65,7 @@ public: class ReceiverHandlerImpl; struct ReceiverHandlerImplDeleter { public: - void operator()(ReceiverHandlerImpl*); + MGE_WIN_DECLSPEC_FUC void operator()(ReceiverHandlerImpl*); }; using ReceiverHandler = std::unique_ptr; @@ -109,8 +109,8 @@ public: private: std::vector m_permanent_handler; - ReceiverHandler do_register_receiver( - Typeinfo* type, std::unique_ptr receiver); + MGE_WIN_DECLSPEC_FUC ReceiverHandler + do_register_receiver(Typeinfo* type, std::unique_ptr receiver); }; } // namespace mgb diff --git a/src/core/include/megbrain/utils/hash.h b/src/core/include/megbrain/utils/hash.h index 85b4941b..6abe9c24 100644 --- a/src/core/include/megbrain/utils/hash.h +++ b/src/core/include/megbrain/utils/hash.h @@ -14,6 +14,7 @@ #include #include #include "megbrain/utils/thin/function.h" +#include "megbrain_build_config.h" namespace mgb { @@ -57,14 +58,14 @@ class XXHash { long long m_state[11]; public: - XXHash(); - void reset(); + MGE_WIN_DECLSPEC_FUC XXHash(); + MGE_WIN_DECLSPEC_FUC void reset(); //! update internal state, and return *this - XXHash& update(const void* data, size_t len); + MGE_WIN_DECLSPEC_FUC XXHash& update(const void* data, size_t len); //! get hash value, guaranteed to be non-zero - uint64_t digest() const; + MGE_WIN_DECLSPEC_FUC uint64_t digest() const; }; /*! diff --git a/src/core/include/megbrain/utils/hashable.h b/src/core/include/megbrain/utils/hashable.h index 284d74ab..6c9bd0ef 100644 --- a/src/core/include/megbrain/utils/hashable.h +++ b/src/core/include/megbrain/utils/hashable.h @@ -144,7 +144,7 @@ public: */ template class ScalarHash final : public HashableVD { - MGB_DYN_TYPE_OBJ_FINAL_DECL; + MGB_DYN_TYPE_OBJ_FINAL_DECL_WITH_EXPORT; union U { T t; @@ -181,7 +181,7 @@ MGB_DYN_TYPE_OBJ_FINAL_IMPL(ScalarHash); */ template class PODHash final : public HashableVD { - MGB_DYN_TYPE_OBJ_FINAL_DECL; + MGB_DYN_TYPE_OBJ_FINAL_DECL_WITH_EXPORT; static_assert(is_location_invariant::value, "key must be location invariant"); @@ -219,7 +219,7 @@ MGB_DYN_TYPE_OBJ_FINAL_IMPL(PODHash); * \brief wraps around a raw pointer to Hashable object */ class HashableObjPtrWrapper final : public HashableVD { - MGB_DYN_TYPE_OBJ_FINAL_DECL; + MGB_DYN_TYPE_OBJ_FINAL_DECL_WITH_EXPORT; const Hashable* m_ptr; diff --git a/src/core/include/megbrain/utils/infile_persistent_cache.h b/src/core/include/megbrain/utils/infile_persistent_cache.h index 823e3915..008f2549 100644 --- a/src/core/include/megbrain/utils/infile_persistent_cache.h +++ b/src/core/include/megbrain/utils/infile_persistent_cache.h @@ -60,19 +60,22 @@ class InFilePersistentCache final : public PersistentCache { void read_cache(Input& inp); public: - InFilePersistentCache() = default; - InFilePersistentCache(const char* path, bool always_open = false); - InFilePersistentCache(const uint8_t* bin, size_t size); + MGE_WIN_DECLSPEC_FUC InFilePersistentCache() = default; + MGE_WIN_DECLSPEC_FUC InFilePersistentCache( + const char* path, bool always_open = false); + MGE_WIN_DECLSPEC_FUC InFilePersistentCache(const uint8_t* bin, size_t size); /** * \warning You should invoke \c dump_cache mannually to save the cache * file. */ - void dump_cache(const char* path); - void dump_cache(OutputFile* out_file); + MGE_WIN_DECLSPEC_FUC void dump_cache(const char* path); + MGE_WIN_DECLSPEC_FUC void dump_cache(OutputFile* out_file); - Maybe get(const std::string& category, const Blob& key) override; - void put(const std::string& category, const Blob& key, const Blob& value) override; + MGE_WIN_DECLSPEC_FUC Maybe get( + const std::string& category, const Blob& key) override; + MGE_WIN_DECLSPEC_FUC void put( + const std::string& category, const Blob& key, const Blob& value) override; bool support_dump_cache() override { return true; } }; } // namespace mgb diff --git a/src/core/include/megbrain/utils/json.h b/src/core/include/megbrain/utils/json.h index fcbf7e49..4ef2f914 100644 --- a/src/core/include/megbrain/utils/json.h +++ b/src/core/include/megbrain/utils/json.h @@ -28,19 +28,21 @@ class Value : public std::enable_shared_from_this, public DynTypeObj { public: virtual void writeto(std::string& fout, int indent = 0) const = 0; - void writeto_fpath(const std::string& fout_path, int indent = 0) const { + MGE_WIN_DECLSPEC_FUC void writeto_fpath( + const std::string& fout_path, int indent = 0) const { writeto_fpath(fout_path.c_str(), indent); } - void writeto_fpath(const char* fout_path, int indent = 0) const; + MGE_WIN_DECLSPEC_FUC void writeto_fpath( + const char* fout_path, int indent = 0) const; - virtual std::string to_string(int indent = 0) const final; + MGE_WIN_DECLSPEC_FUC virtual std::string to_string(int indent = 0) const final; virtual ~Value() = default; }; class Number final : public Value { - MGB_DYN_TYPE_OBJ_FINAL_DECL; + MGB_DYN_TYPE_OBJ_FINAL_DECL_WITH_EXPORT; double m_val; @@ -59,7 +61,7 @@ public: }; class NumberInt final : public Value { - MGB_DYN_TYPE_OBJ_FINAL_DECL; + MGB_DYN_TYPE_OBJ_FINAL_DECL_WITH_EXPORT; int64_t m_val; @@ -70,7 +72,7 @@ public: return std::make_shared(v); } - void writeto(std::string& fout, int indent = 0) const override; + MGE_WIN_DECLSPEC_FUC void writeto(std::string& fout, int indent = 0) const override; auto&& get_impl() { return m_val; } @@ -78,7 +80,7 @@ public: }; class Bool final : public Value { - MGB_DYN_TYPE_OBJ_FINAL_DECL; + MGB_DYN_TYPE_OBJ_FINAL_DECL_WITH_EXPORT; bool m_val; @@ -87,7 +89,7 @@ public: static std::shared_ptr make(bool v); - void writeto(std::string& fout, int indent = 0) const override; + MGE_WIN_DECLSPEC_FUC void writeto(std::string& fout, int indent = 0) const override; auto&& get_impl() { return m_val; } @@ -95,7 +97,7 @@ public: }; class String final : public Value { - MGB_DYN_TYPE_OBJ_FINAL_DECL; + MGB_DYN_TYPE_OBJ_FINAL_DECL_WITH_EXPORT; std::string m_val; @@ -110,7 +112,7 @@ public: bool operator==(const String& rhs) const { return m_val == rhs.m_val; } - void writeto(std::string& fout, int indent = 0) const override; + MGE_WIN_DECLSPEC_FUC void writeto(std::string& fout, int indent = 0) const override; auto&& get_impl() { return m_val; } @@ -118,7 +120,7 @@ public: }; class Object final : public Value { - MGB_DYN_TYPE_OBJ_FINAL_DECL; + MGB_DYN_TYPE_OBJ_FINAL_DECL_WITH_EXPORT; std::unordered_map, StdHashAdaptor> m_val; @@ -140,7 +142,7 @@ public: std::shared_ptr& operator[](const char* s) { return m_val[std::string(s)]; } - void writeto(std::string& fout, int indent = 0) const override; + MGE_WIN_DECLSPEC_FUC void writeto(std::string& fout, int indent = 0) const override; auto&& get_impl() { return m_val; } @@ -148,7 +150,7 @@ public: }; class Array final : public Value { - MGB_DYN_TYPE_OBJ_FINAL_DECL; + MGB_DYN_TYPE_OBJ_FINAL_DECL_WITH_EXPORT; std::vector> m_val; @@ -162,7 +164,7 @@ public: std::shared_ptr& operator[](size_t idx) { return m_val.at(idx); } - void writeto(std::string& fout, int indent = 0) const override; + MGE_WIN_DECLSPEC_FUC void writeto(std::string& fout, int indent = 0) const override; auto&& get_impl() { return m_val; } @@ -170,7 +172,7 @@ public: }; class Null final : public Value { - MGB_DYN_TYPE_OBJ_FINAL_DECL; + MGB_DYN_TYPE_OBJ_FINAL_DECL_WITH_EXPORT; public: static std::shared_ptr make() { @@ -178,7 +180,7 @@ public: return v; } - void writeto(std::string& fout, int /*indent*/) const override; + MGE_WIN_DECLSPEC_FUC void writeto(std::string& fout, int /*indent*/) const override; }; class Serializable { diff --git a/src/core/include/megbrain/utils/mempool.h b/src/core/include/megbrain/utils/mempool.h index 85b8e992..d6babd99 100644 --- a/src/core/include/megbrain/utils/mempool.h +++ b/src/core/include/megbrain/utils/mempool.h @@ -15,6 +15,7 @@ #include #include #include +#include "megbrain_build_config.h" namespace mgb { @@ -25,24 +26,24 @@ class MemPoolStorage { std::vector m_free; public: - MemPoolStorage() noexcept; - MemPoolStorage(MemPoolStorage&& rhs) noexcept; - ~MemPoolStorage() noexcept; - MemPoolStorage& operator=(MemPoolStorage&& rhs) noexcept; + MGE_WIN_DECLSPEC_FUC MemPoolStorage() noexcept; + MGE_WIN_DECLSPEC_FUC MemPoolStorage(MemPoolStorage&& rhs) noexcept; + MGE_WIN_DECLSPEC_FUC ~MemPoolStorage() noexcept; + MGE_WIN_DECLSPEC_FUC MemPoolStorage& operator=(MemPoolStorage&& rhs) noexcept; - void swap(MemPoolStorage& other); + MGE_WIN_DECLSPEC_FUC void swap(MemPoolStorage& other); /*! * \brief allocate sotrage for an object of specified size * \param elem_size size of the object; it must remain unchanged * during lifespan of this MemPoolStorage */ - void* alloc(size_t elem_size); - void free(void* ptr); - void reorder_free(); + MGE_WIN_DECLSPEC_FUC void* alloc(size_t elem_size); + MGE_WIN_DECLSPEC_FUC void free(void* ptr); + MGE_WIN_DECLSPEC_FUC void reorder_free(); //! clear all allocated storage - void clear(); + MGE_WIN_DECLSPEC_FUC void clear(); void disable_freelist() { m_disable_freelist = true; } }; diff --git a/src/core/include/megbrain/utils/metahelper.h b/src/core/include/megbrain/utils/metahelper.h index c3225357..ae566f02 100644 --- a/src/core/include/megbrain/utils/metahelper.h +++ b/src/core/include/megbrain/utils/metahelper.h @@ -115,6 +115,13 @@ public: \ private: \ static ::mgb::Typeinfo sm_typeinfo +#define MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT \ +public: \ + static inline ::mgb::Typeinfo* typeinfo() { return &sm_typeinfo; } \ + \ +private: \ + static MGE_WIN_DECLSPEC_DATA ::mgb::Typeinfo sm_typeinfo + #if MGB_VERBOSE_TYPEINFO_NAME //! get class name from class object #define _MGB_TYPEINFO_CLASS_NAME(_cls) #_cls @@ -133,6 +140,11 @@ public: \ ::mgb::Typeinfo* dyn_typeinfo() const override final; \ MGB_TYPEINFO_OBJ_DECL +#define MGB_DYN_TYPE_OBJ_FINAL_DECL_WITH_EXPORT \ +public: \ + MGE_WIN_DECLSPEC_FUC ::mgb::Typeinfo* dyn_typeinfo() const override final; \ + MGB_TYPEINFO_OBJ_DECL_WITH_EXPORT + //! put in the impl file of a final class inherited from DynTypeObj #define MGB_DYN_TYPE_OBJ_FINAL_IMPL(_cls) \ _MGB_DYN_TYPE_OBJ_FINAL_IMPL_TPL \ @@ -364,7 +376,7 @@ public: virtual ~UserData() = default; }; - ~UserDataContainer() noexcept; + MGE_WIN_DECLSPEC_FUC ~UserDataContainer() noexcept; /*! * \brief register new user data @@ -430,10 +442,10 @@ public: } private: - void do_add(Typeinfo* type, std::shared_ptr ptr); - std::pair do_get(Typeinfo* type) const; - void* do_get_one(Typeinfo* type) const; - int do_pop(Typeinfo* type); + MGE_WIN_DECLSPEC_FUC void do_add(Typeinfo* type, std::shared_ptr ptr); + MGE_WIN_DECLSPEC_FUC std::pair do_get(Typeinfo* type) const; + MGE_WIN_DECLSPEC_FUC void* do_get_one(Typeinfo* type) const; + MGE_WIN_DECLSPEC_FUC int do_pop(Typeinfo* type); //! use a set to help erase std::unordered_set> m_refkeeper; diff --git a/src/core/include/megbrain/utils/metahelper_basic.h b/src/core/include/megbrain/utils/metahelper_basic.h index f250646a..49aab4f7 100644 --- a/src/core/include/megbrain/utils/metahelper_basic.h +++ b/src/core/include/megbrain/utils/metahelper_basic.h @@ -22,7 +22,7 @@ namespace mgb { namespace metahelper_detail { -[[noreturn]] void on_maybe_invalid_val_access(); +[[noreturn]] MGE_WIN_DECLSPEC_FUC void on_maybe_invalid_val_access(); template constexpr T make_from_tuple_impl(Tuple&& t, std::index_sequence) { @@ -140,7 +140,7 @@ constexpr bool is_complete_v = //! a None type to represent invalid Maybe class None {}; -extern class None None; +MGE_WIN_DECLSPEC_DATA extern class None None; //! an optional storage for arbitrary object template diff --git a/src/core/include/megbrain/utils/persistent_cache.h b/src/core/include/megbrain/utils/persistent_cache.h index 01dc093b..2a9bab61 100644 --- a/src/core/include/megbrain/utils/persistent_cache.h +++ b/src/core/include/megbrain/utils/persistent_cache.h @@ -24,7 +24,7 @@ namespace mgb { * The implementation must be thread safe. */ class PersistentCache { - static std::shared_ptr sm_impl; + static MGE_WIN_DECLSPEC_DATA std::shared_ptr sm_impl; public: virtual ~PersistentCache() = default; @@ -42,7 +42,7 @@ public: virtual bool support_dump_cache() { return false; } //! set an implementation; return the original implementation - static std::shared_ptr set_impl( + MGE_WIN_DECLSPEC_FUC static std::shared_ptr set_impl( std::shared_ptr impl); //! get the instance; the default implementation just caches in diff --git a/src/core/include/megbrain/utils/thread_impl_1.h b/src/core/include/megbrain/utils/thread_impl_1.h index 7de0766d..3c967be3 100644 --- a/src/core/include/megbrain/utils/thread_impl_1.h +++ b/src/core/include/megbrain/utils/thread_impl_1.h @@ -68,28 +68,28 @@ class SCQueueSynchronizer { std::thread m_worker_thread; public: - SCQueueSynchronizer(size_t max_spin); + MGE_WIN_DECLSPEC_FUC SCQueueSynchronizer(size_t max_spin); - ~SCQueueSynchronizer() noexcept; + MGE_WIN_DECLSPEC_FUC ~SCQueueSynchronizer() noexcept; bool worker_started() const { return m_worker_started; } #ifdef WIN32 - static bool is_into_atexit; + static MGE_WIN_DECLSPEC_DATA bool is_into_atexit; void set_finish_called(bool status) { m_wait_finish_called = status; } #endif //! get global default max spin from env - static size_t get_default_max_spin(); + MGE_WIN_DECLSPEC_FUC static size_t get_default_max_spin(); - void start_worker(std::thread thread); + MGE_WIN_DECLSPEC_FUC void start_worker(std::thread thread); //! add a new task in producer thread; require worker to have //! started - void producer_add(); + MGE_WIN_DECLSPEC_FUC void producer_add(); //! wait for currently added tasks to finish - void producer_wait(); + MGE_WIN_DECLSPEC_FUC void producer_wait(); bool check_finished() const { return m_finished_task.load(std::memory_order_acquire) == @@ -102,13 +102,13 @@ public: * \param min minimal number of tasks to be fetched * \return number of tasks fetched; return 0 if worker should exit */ - size_t consumer_fetch(size_t max, size_t min = 1); + MGE_WIN_DECLSPEC_FUC size_t consumer_fetch(size_t max, size_t min = 1); /*! * \brief ack that tasks have been processed in consumer * \param nr numnber of tasks to be committed */ - void consumer_commit(size_t nr); + MGE_WIN_DECLSPEC_FUC void consumer_commit(size_t nr); }; /*! diff --git a/src/core/include/megbrain/utils/timer.h b/src/core/include/megbrain/utils/timer.h index 715688af..f64ff8d6 100644 --- a/src/core/include/megbrain/utils/timer.h +++ b/src/core/include/megbrain/utils/timer.h @@ -12,6 +12,7 @@ #pragma once #include +#include "megbrain_build_config.h" namespace mgb { @@ -34,7 +35,7 @@ class Timer { TimeSpec m_start; public: - static TimeSpec get_time(); + MGE_WIN_DECLSPEC_FUC static TimeSpec get_time(); Timer() { reset(); } diff --git a/src/core/include/megbrain/version.h b/src/core/include/megbrain/version.h index 5c413d02..099427df 100644 --- a/src/core/include/megbrain/version.h +++ b/src/core/include/megbrain/version.h @@ -11,6 +11,8 @@ #pragma once +#include "megbrain_build_config.h" + #define MGB_MAJOR 8 #define MGB_MINOR 9999 #define MGB_PATCH 0 @@ -24,7 +26,7 @@ struct Version { int major, minor, patch, is_dev; }; -Version get_version(); +MGE_WIN_DECLSPEC_FUC Version get_version(); } // namespace mgb // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/custom/include/megbrain/custom/manager.h b/src/custom/include/megbrain/custom/manager.h index f2c1a2e0..db89533f 100644 --- a/src/custom/include/megbrain/custom/manager.h +++ b/src/custom/include/megbrain/custom/manager.h @@ -24,23 +24,27 @@ class CustomOpManager { public: PREVENT_COPY_AND_ASSIGN(CustomOpManager); - static CustomOpManager* inst(void); - ~CustomOpManager(); + MGE_WIN_DECLSPEC_FUC static CustomOpManager* inst(void); + MGE_WIN_DECLSPEC_FUC ~CustomOpManager(); - std::shared_ptr insert(const std::string& name, uint32_t version); - bool erase(const std::string& name); - bool erase(const RunTimeId& id); + MGE_WIN_DECLSPEC_FUC std::shared_ptr insert( + const std::string& name, uint32_t version); + MGE_WIN_DECLSPEC_FUC bool erase(const std::string& name); + MGE_WIN_DECLSPEC_FUC bool erase(const RunTimeId& id); - std::shared_ptr find_or_reg(const std::string& name, uint32_t version); + MGE_WIN_DECLSPEC_FUC std::shared_ptr find_or_reg( + const std::string& name, uint32_t version); - RunTimeId to_id(const std::string& name) const; - std::string to_name(const RunTimeId& id) const; + MGE_WIN_DECLSPEC_FUC RunTimeId to_id(const std::string& name) const; + MGE_WIN_DECLSPEC_FUC std::string to_name(const RunTimeId& id) const; - std::shared_ptr find(const std::string& name) const; - std::shared_ptr find(const RunTimeId& id) const; + MGE_WIN_DECLSPEC_FUC std::shared_ptr find( + const std::string& name) const; + MGE_WIN_DECLSPEC_FUC std::shared_ptr find( + const RunTimeId& id) const; - std::vector op_name_list(void); - std::vector op_id_list(void); + MGE_WIN_DECLSPEC_FUC std::vector op_name_list(void); + MGE_WIN_DECLSPEC_FUC std::vector op_id_list(void); }; class CustomLib { @@ -67,10 +71,10 @@ class LibManager { public: PREVENT_COPY_AND_ASSIGN(LibManager); - static LibManager* inst(void); - const std::vector& install( + MGE_WIN_DECLSPEC_FUC static LibManager* inst(void); + MGE_WIN_DECLSPEC_FUC const std::vector& install( const std::string& name, const std::string& path); - bool uninstall(const std::string& name); + MGE_WIN_DECLSPEC_FUC bool uninstall(const std::string& name); friend class CustomOpManager; }; diff --git a/src/custom/include/megbrain/custom/op.h b/src/custom/include/megbrain/custom/op.h index fe739149..2646ce56 100644 --- a/src/custom/include/megbrain/custom/op.h +++ b/src/custom/include/megbrain/custom/op.h @@ -34,24 +34,25 @@ using RunTimeId = uint64_t; class ArgInfo { CUSTOM_PIMPL_CLS_DECL(ArgInfo); - ArgInfo(const std::string& name, const std::string& desc, + MGE_WIN_DECLSPEC_FUC ArgInfo( + const std::string& name, const std::string& desc, const std::unordered_set& dtypes, const int& ndim, const std::string& mem_stgy); - const std::string& name(void) const; - const std::string& desc(void) const; - const std::unordered_set& dtypes(void) const; - int ndim(void) const; - const std::string& mem_strategy(void) const; + MGE_WIN_DECLSPEC_FUC const std::string& name(void) const; + MGE_WIN_DECLSPEC_FUC const std::string& desc(void) const; + MGE_WIN_DECLSPEC_FUC const std::unordered_set& dtypes(void) const; + MGE_WIN_DECLSPEC_FUC int ndim(void) const; + MGE_WIN_DECLSPEC_FUC const std::string& mem_strategy(void) const; - std::string str() const; + MGE_WIN_DECLSPEC_FUC std::string str() const; }; class CustomOp { std::unique_ptr m_impl; public: - CustomOp(const std::string& op_type, uint32_t version); + MGE_WIN_DECLSPEC_FUC CustomOp(const std::string& op_type, uint32_t version); PREVENT_COPY_AND_ASSIGN(CustomOp); using DeviceInferFuncPtr = @@ -70,65 +71,70 @@ public: void (*)(const std::vector&, const Param&, std::vector&); // write for forward - CustomOp& set_device_infer(DeviceInferFuncPtr func); - CustomOp& set_shape_infer(ShapeInferFuncPtr func); - CustomOp& set_dtype_infer(DTypeInferFuncPtr func); - CustomOp& set_format_infer(FormatInferFuncPtr func); - CustomOp& set_preprocess(PreprocessFuncPtr func); - CustomOp& set_preprocess(const std::string& device, PreprocessFuncPtr func); - CustomOp& set_postprocess(PostprocessFuncPtr func); - CustomOp& set_postprocess(const std::string& device, PostprocessFuncPtr func); - CustomOp& set_compute(ComputeFuncPtr func); - CustomOp& set_compute(const std::string& device, ComputeFuncPtr func); - - CustomOp& set_description(const std::string& op_desc); - CustomOp& add_input( + MGE_WIN_DECLSPEC_FUC CustomOp& set_device_infer(DeviceInferFuncPtr func); + MGE_WIN_DECLSPEC_FUC CustomOp& set_shape_infer(ShapeInferFuncPtr func); + MGE_WIN_DECLSPEC_FUC CustomOp& set_dtype_infer(DTypeInferFuncPtr func); + MGE_WIN_DECLSPEC_FUC CustomOp& set_format_infer(FormatInferFuncPtr func); + MGE_WIN_DECLSPEC_FUC CustomOp& set_preprocess(PreprocessFuncPtr func); + MGE_WIN_DECLSPEC_FUC CustomOp& set_preprocess( + const std::string& device, PreprocessFuncPtr func); + MGE_WIN_DECLSPEC_FUC CustomOp& set_postprocess(PostprocessFuncPtr func); + MGE_WIN_DECLSPEC_FUC CustomOp& set_postprocess( + const std::string& device, PostprocessFuncPtr func); + MGE_WIN_DECLSPEC_FUC CustomOp& set_compute(ComputeFuncPtr func); + MGE_WIN_DECLSPEC_FUC CustomOp& set_compute( + const std::string& device, ComputeFuncPtr func); + + MGE_WIN_DECLSPEC_FUC CustomOp& set_description(const std::string& op_desc); + MGE_WIN_DECLSPEC_FUC CustomOp& add_input( const std::string& name, const std::string& desc, const std::initializer_list& legal_dtypes = {"float32"}, int dims = -1, const std::string& mem_stgy = "default"); - CustomOp& add_output( + MGE_WIN_DECLSPEC_FUC CustomOp& add_output( const std::string& name, const std::string& desc, const std::initializer_list& legal_dtypes = {"float32"}, int dims = -1, const std::string& mem_stgy = "default"); - CustomOp& add_input( + MGE_WIN_DECLSPEC_FUC CustomOp& add_input( const std::string& name, const std::initializer_list& legal_dtypes = {"float32"}, int dims = -1, const std::string& mem_stgy = "default"); - CustomOp& add_output( + MGE_WIN_DECLSPEC_FUC CustomOp& add_output( const std::string& name, const std::initializer_list& legal_dtypes = {"float32"}, int dims = -1, const std::string& mem_stgy = "default"); - CustomOp& add_inputs(const size_t& input_num); - CustomOp& add_outputs(const size_t& output_num); - CustomOp& add_param(const std::string& name, const ParamVal& default_val); - CustomOp& add_param( + MGE_WIN_DECLSPEC_FUC CustomOp& add_inputs(const size_t& input_num); + MGE_WIN_DECLSPEC_FUC CustomOp& add_outputs(const size_t& output_num); + MGE_WIN_DECLSPEC_FUC CustomOp& add_param( + const std::string& name, const ParamVal& default_val); + MGE_WIN_DECLSPEC_FUC CustomOp& add_param( const std::string& name, const std::string& desc, const ParamVal& default_val); // read - std::string op_type(void) const; - std::string op_desc(void) const; - RunTimeId runtime_id(void) const; - size_t input_num(void) const; - size_t output_num(void) const; - std::string str(void) const; - - const ParamInfo& param_info(void) const; - ArgInfo input_info(size_t idx) const; - ArgInfo output_info(size_t idx) const; - const std::vector& inputs_info(void) const; - const std::vector& outputs_info(void) const; + MGE_WIN_DECLSPEC_FUC std::string op_type(void) const; + MGE_WIN_DECLSPEC_FUC std::string op_desc(void) const; + MGE_WIN_DECLSPEC_FUC RunTimeId runtime_id(void) const; + MGE_WIN_DECLSPEC_FUC size_t input_num(void) const; + MGE_WIN_DECLSPEC_FUC size_t output_num(void) const; + MGE_WIN_DECLSPEC_FUC std::string str(void) const; + + MGE_WIN_DECLSPEC_FUC const ParamInfo& param_info(void) const; + MGE_WIN_DECLSPEC_FUC ArgInfo input_info(size_t idx) const; + MGE_WIN_DECLSPEC_FUC ArgInfo output_info(size_t idx) const; + MGE_WIN_DECLSPEC_FUC const std::vector& inputs_info(void) const; + MGE_WIN_DECLSPEC_FUC const std::vector& outputs_info(void) const; // use - std::vector infer_output_device( + MGE_WIN_DECLSPEC_FUC std::vector infer_output_device( const std::vector&, const Param&) const; - std::vector infer_output_shape( + MGE_WIN_DECLSPEC_FUC std::vector infer_output_shape( const std::vector&, const Param&) const; - std::vector infer_output_dtype( + MGE_WIN_DECLSPEC_FUC std::vector infer_output_dtype( const std::vector&, const Param&) const; - std::vector infer_output_format( + MGE_WIN_DECLSPEC_FUC std::vector infer_output_format( const std::vector&, const Param&) const; - void compute(const std::vector&, const Param&, std::vector&) const; + MGE_WIN_DECLSPEC_FUC void compute( + const std::vector&, const Param&, std::vector&) const; }; } // namespace custom diff --git a/src/custom/include/megbrain/custom/param.h b/src/custom/include/megbrain/custom/param.h index c8c710d5..d895d913 100644 --- a/src/custom/include/megbrain/custom/param.h +++ b/src/custom/include/megbrain/custom/param.h @@ -49,15 +49,15 @@ class ParamInfo { class Param { CUSTOM_PIMPL_CLS_DECL(Param); - Param(const ParamInfo&); - ParamVal& operator[](const std::string&); - const ParamVal& operator[](const std::string&) const; - const std::unordered_map& raw() const; - bool exist(const std::string& name) const; - std::string to_bytes(void) const; - void from_bytes(const std::string&); + MGE_WIN_DECLSPEC_FUC Param(const ParamInfo&); + MGE_WIN_DECLSPEC_FUC ParamVal& operator[](const std::string&); + MGE_WIN_DECLSPEC_FUC const ParamVal& operator[](const std::string&) const; + MGE_WIN_DECLSPEC_FUC const std::unordered_map& raw() const; + MGE_WIN_DECLSPEC_FUC bool exist(const std::string& name) const; + MGE_WIN_DECLSPEC_FUC std::string to_bytes(void) const; + MGE_WIN_DECLSPEC_FUC void from_bytes(const std::string&); }; -bool operator==(const Param&, const Param&); +MGE_WIN_DECLSPEC_FUC bool operator==(const Param&, const Param&); } // namespace custom diff --git a/src/custom/include/megbrain/custom/param_val.h b/src/custom/include/megbrain/custom/param_val.h index dfb84abc..31b2a4b6 100644 --- a/src/custom/include/megbrain/custom/param_val.h +++ b/src/custom/include/megbrain/custom/param_val.h @@ -175,15 +175,15 @@ class ParamVal { public: template - ParamVal(const T& val); + MGE_WIN_DECLSPEC_FUC ParamVal(const T& val); template - ParamVal(const std::initializer_list& val); + MGE_WIN_DECLSPEC_FUC ParamVal(const std::initializer_list& val); - ParamVal(); - ParamVal(const char* str); - ParamVal(const std::initializer_list& strs); - ParamVal(const std::vector& strs); - ParamVal(const ParamVal& rhs); + MGE_WIN_DECLSPEC_FUC ParamVal(); + MGE_WIN_DECLSPEC_FUC ParamVal(const char* str); + MGE_WIN_DECLSPEC_FUC ParamVal(const std::initializer_list& strs); + MGE_WIN_DECLSPEC_FUC ParamVal(const std::vector& strs); + MGE_WIN_DECLSPEC_FUC ParamVal(const ParamVal& rhs); template ParamVal& operator=(const T& rhs); @@ -196,18 +196,19 @@ public: ParamVal& operator=(const ParamVal& rhs); template - const T& as(void) const; + MGE_WIN_DECLSPEC_FUC const T& as(void) const; template - T& as(void); + MGE_WIN_DECLSPEC_FUC T& as(void); - const void* raw_ptr(void) const; - void* raw_ptr(void); - ParamDynType type(void) const; - std::string str(void) const; - size_t size(void) const; + MGE_WIN_DECLSPEC_FUC const void* raw_ptr(void) const; + MGE_WIN_DECLSPEC_FUC void* raw_ptr(void); + MGE_WIN_DECLSPEC_FUC ParamDynType type(void) const; + MGE_WIN_DECLSPEC_FUC std::string str(void) const; + MGE_WIN_DECLSPEC_FUC size_t size(void) const; - static std::string to_bytes(const ParamVal& value); - static ParamVal from_bytes(const std::string& bytes, size_t& offset); + MGE_WIN_DECLSPEC_FUC static std::string to_bytes(const ParamVal& value); + MGE_WIN_DECLSPEC_FUC static ParamVal from_bytes( + const std::string& bytes, size_t& offset); friend ParamVal operator+(const ParamVal& lhs, const ParamVal& rhs); friend ParamVal operator-(const ParamVal& lhs, const ParamVal& rhs); diff --git a/src/custom/include/megbrain/custom/tensor.h b/src/custom/include/megbrain/custom/tensor.h index f579782a..a1dd9ba5 100644 --- a/src/custom/include/megbrain/custom/tensor.h +++ b/src/custom/include/megbrain/custom/tensor.h @@ -31,8 +31,8 @@ namespace custom { custom_type, class Device { - const void* impl() const; - Device(const void* impl); + MGE_WIN_DECLSPEC_FUC const void* impl() const; + MGE_WIN_DECLSPEC_FUC Device(const void* impl); CUSTOM_PIMPL_CLS_DECL(Device); public: @@ -40,16 +40,16 @@ public: CUSTOM_FOR_EACH_DEVICE_TYPE(CUSTOM_DEVICE_TYPE_ENUM_DECL) }; - Device(const std::string& device); - Device(const char* device); - Device(DeviceEnum device); + MGE_WIN_DECLSPEC_FUC Device(const std::string& device); + MGE_WIN_DECLSPEC_FUC Device(const char* device); + MGE_WIN_DECLSPEC_FUC Device(DeviceEnum device); - std::string str(void) const; - DeviceEnum enumv(void) const; + MGE_WIN_DECLSPEC_FUC std::string str(void) const; + MGE_WIN_DECLSPEC_FUC DeviceEnum enumv(void) const; - static bool is_legal(const std::string& device); - static bool is_legal(DeviceEnum device); - static std::vector legal_devices(void); + MGE_WIN_DECLSPEC_FUC static bool is_legal(const std::string& device); + MGE_WIN_DECLSPEC_FUC static bool is_legal(DeviceEnum device); + MGE_WIN_DECLSPEC_FUC static std::vector legal_devices(void); friend class Tensor; friend bool operator==(const Device& lhs, const Device& rhs); @@ -61,19 +61,19 @@ using DeviceEnum = Device::DeviceEnum; bool operator==(const Device& lhs, const Device& rhs); class Shape { - const void* impl() const; - Shape(const void* impl); + MGE_WIN_DECLSPEC_FUC const void* impl() const; + MGE_WIN_DECLSPEC_FUC Shape(const void* impl); CUSTOM_PIMPL_CLS_DECL(Shape); public: - Shape(const std::vector& rhs); - Shape(const std::initializer_list& rhs); + MGE_WIN_DECLSPEC_FUC Shape(const std::vector& rhs); + MGE_WIN_DECLSPEC_FUC Shape(const std::initializer_list& rhs); size_t& operator[](size_t idx); size_t operator[](size_t idx) const; - void ndim(size_t dim); - size_t ndim(void) const; + MGE_WIN_DECLSPEC_FUC void ndim(size_t dim); + MGE_WIN_DECLSPEC_FUC size_t ndim(void) const; friend class Tensor; friend bool operator==(const Shape& lhs, const Shape& rhs); @@ -105,8 +105,8 @@ using bfloat16_t = uint16_t; #define CUSTOM_DTYPE_ENUM_DECL(custom_type, builtin_type, ctype) custom_type, class DType { - const void* impl() const; - DType(const void* impl); + MGE_WIN_DECLSPEC_FUC const void* impl() const; + MGE_WIN_DECLSPEC_FUC DType(const void* impl); CUSTOM_PIMPL_CLS_DECL(DType); public: @@ -114,23 +114,24 @@ public: CUSTOM_FOR_EACH_TENSOR_DATA_TYPE(CUSTOM_DTYPE_ENUM_DECL) }; - DType(const std::string& dtype); - DType(const char* dtype); - DType(const std::string& dtype, float scale, uint8_t zero_point = 0); - DType(const char* dtype, float scale, uint8_t zero_point = 0); - DType(DTypeEnum dtype); - DType(DTypeEnum dtype, float scale, uint8_t zero_point = 0); - - std::string str(void) const; - DTypeEnum enumv() const; - float scale(void) const; - uint8_t zero_point(void) const; + MGE_WIN_DECLSPEC_FUC DType(const std::string& dtype); + MGE_WIN_DECLSPEC_FUC DType(const char* dtype); + MGE_WIN_DECLSPEC_FUC DType( + const std::string& dtype, float scale, uint8_t zero_point = 0); + MGE_WIN_DECLSPEC_FUC DType(const char* dtype, float scale, uint8_t zero_point = 0); + MGE_WIN_DECLSPEC_FUC DType(DTypeEnum dtype); + MGE_WIN_DECLSPEC_FUC DType(DTypeEnum dtype, float scale, uint8_t zero_point = 0); + + MGE_WIN_DECLSPEC_FUC std::string str(void) const; + MGE_WIN_DECLSPEC_FUC DTypeEnum enumv() const; + MGE_WIN_DECLSPEC_FUC float scale(void) const; + MGE_WIN_DECLSPEC_FUC uint8_t zero_point(void) const; template - bool is_compatible(void) const; + MGE_WIN_DECLSPEC_FUC bool is_compatible(void) const; - static bool is_legal(const std::string& dtype); - static bool is_legal(const DTypeEnum& dtype); - static std::vector legal_dtypes(void); + MGE_WIN_DECLSPEC_FUC static bool is_legal(const std::string& dtype); + MGE_WIN_DECLSPEC_FUC static bool is_legal(const DTypeEnum& dtype); + MGE_WIN_DECLSPEC_FUC static std::vector legal_dtypes(void); friend class Tensor; friend bool operator==(const DType& lhs, const DType& rhs); @@ -180,16 +181,16 @@ bool operator==(const std::string& lhs, const DType& rhs); bool operator==(const char* lhs, const DType& rhs); class Format { - const void* impl() const; - Format(const void* impl); + MGE_WIN_DECLSPEC_FUC const void* impl() const; + MGE_WIN_DECLSPEC_FUC Format(const void* impl); CUSTOM_PIMPL_CLS_DECL(Format); public: - Format(const std::string& format); - Format(const char* format); + MGE_WIN_DECLSPEC_FUC Format(const std::string& format); + MGE_WIN_DECLSPEC_FUC Format(const char* format); - std::string str(void) const; - bool is_default(void) const; + MGE_WIN_DECLSPEC_FUC std::string str(void) const; + MGE_WIN_DECLSPEC_FUC bool is_default(void) const; friend class Tensor; CUSTOM_DATA_ADAPTOR_FRIEND_DECL; @@ -198,26 +199,26 @@ public: class Tensor { void* m_tensor; - const void* impl(void) const; - Tensor(const void* impl); + MGE_WIN_DECLSPEC_FUC const void* impl(void) const; + MGE_WIN_DECLSPEC_FUC Tensor(const void* impl); - const size_t* shapes_raw(void) const; - const ptrdiff_t* strides_raw(void) const; + MGE_WIN_DECLSPEC_FUC const size_t* shapes_raw(void) const; + MGE_WIN_DECLSPEC_FUC const ptrdiff_t* strides_raw(void) const; public: Tensor() = delete; - Tensor(const Tensor& rhs); - Tensor& operator=(const Tensor& rhs); - - Shape shape(void) const; - DType dtype(void) const; - Format format(void) const; - Device device(void) const; - - size_t size(void) const; - std::vector stride(void) const; - float scale(void) const; - uint8_t zero_point(void) const; + MGE_WIN_DECLSPEC_FUC Tensor(const Tensor& rhs); + MGE_WIN_DECLSPEC_FUC Tensor& operator=(const Tensor& rhs); + + MGE_WIN_DECLSPEC_FUC Shape shape(void) const; + MGE_WIN_DECLSPEC_FUC DType dtype(void) const; + MGE_WIN_DECLSPEC_FUC Format format(void) const; + MGE_WIN_DECLSPEC_FUC Device device(void) const; + + MGE_WIN_DECLSPEC_FUC size_t size(void) const; + MGE_WIN_DECLSPEC_FUC std::vector stride(void) const; + MGE_WIN_DECLSPEC_FUC float scale(void) const; + MGE_WIN_DECLSPEC_FUC uint8_t zero_point(void) const; void* data(void); const void* data(void) const; diff --git a/src/custom/include/megbrain/custom/utils.h b/src/custom/include/megbrain/custom/utils.h index f483a186..318bc62d 100644 --- a/src/custom/include/megbrain/custom/utils.h +++ b/src/custom/include/megbrain/custom/utils.h @@ -54,9 +54,9 @@ void impl_deleter(void* ptr) { std::unique_ptr m_impl; \ \ public: \ - Cls(); \ - Cls(const Cls& rhs); \ - Cls& operator=(const Cls& rhs) + MGE_WIN_DECLSPEC_FUC Cls(); \ + MGE_WIN_DECLSPEC_FUC Cls(const Cls& rhs); \ + MGE_WIN_DECLSPEC_FUC Cls& operator=(const Cls& rhs) #define CUSTOM_PIMPL_CLS_DEFINE(Cls) \ Cls::Cls() : m_impl(new Cls##Impl(), impl_deleter) {} \ diff --git a/src/gopt/include/megbrain/gopt/framework.h b/src/gopt/include/megbrain/gopt/framework.h index 05ef1485..396c9a97 100644 --- a/src/gopt/include/megbrain/gopt/framework.h +++ b/src/gopt/include/megbrain/gopt/framework.h @@ -375,7 +375,7 @@ public: ~GraphOptimizer() noexcept; //! add an optimization pass - GraphOptimizer& add_pass(std::unique_ptr pass); + MGE_WIN_DECLSPEC_FUC GraphOptimizer& add_pass(std::unique_ptr pass); //! add a pass with given type template @@ -415,14 +415,14 @@ public: const ComputingGraph::Options* comp_graph_opt = nullptr); //! transform given graph into a new optimized graph - SubGraph apply(const SubGraph& graph) const; + MGE_WIN_DECLSPEC_FUC SubGraph apply(const SubGraph& graph) const; /*! * \brief optimize graph defined by given endpoints and modify them * inplace * \return *this */ - const GraphOptimizer& apply_inplace(VarNodeArray& vars) const; + MGE_WIN_DECLSPEC_FUC const GraphOptimizer& apply_inplace(VarNodeArray& vars) const; /*! * \brief get var replace map associated with a computing graph @@ -431,14 +431,14 @@ public: * Note that the map would be cleared when GraphOptimizer is applied * on the graph. */ - static const ThinHashMap& var_replace_map( + MGE_WIN_DECLSPEC_FUC static const ThinHashMap& var_replace_map( ComputingGraph& graph); /*! * \brief get the final replaced var in * var_replace_map(var->owner_graph()) corresponding to var */ - static VarNode* var_replace_lookup(VarNode* var); + MGE_WIN_DECLSPEC_FUC static VarNode* var_replace_lookup(VarNode* var); /** * \brief add pass indicated by optimize options. @@ -446,10 +446,10 @@ public: * \param options common options * \param reset if set true, it will reset options when add passes. */ - const GraphOptimizer& add_passes_for_optimize_options( + MGE_WIN_DECLSPEC_FUC const GraphOptimizer& add_passes_for_optimize_options( cg::GraphCommonOptimizeOptions& options, bool reset = false); - const GraphOptimizer& add_passes_for_optimize_options( + MGE_WIN_DECLSPEC_FUC const GraphOptimizer& add_passes_for_optimize_options( const cg::GraphCommonOptimizeOptions& options); /** @@ -457,7 +457,7 @@ public: * * \param options graph tuning options */ - const GraphOptimizer& add_passes_for_graph_tuning_options( + MGE_WIN_DECLSPEC_FUC const GraphOptimizer& add_passes_for_graph_tuning_options( const GraphTuningOptions& options); }; @@ -491,7 +491,7 @@ public: bool all_const_inp; }; - AddOprResult add_opr(OperatorNodeBase* opr); + MGE_WIN_DECLSPEC_FUC AddOprResult add_opr(OperatorNodeBase* opr); const AddOprResult& opr_rst(OperatorNodeBase* opr) const { return m_oprinfo.at(opr).result; diff --git a/src/gopt/include/megbrain/gopt/inference.h b/src/gopt/include/megbrain/gopt/inference.h index 000cde26..6fe84dde 100644 --- a/src/gopt/include/megbrain/gopt/inference.h +++ b/src/gopt/include/megbrain/gopt/inference.h @@ -384,7 +384,7 @@ struct GraphTuningOptions { * This function applies a set of predefined optimizer passes to optimize * for inference. It assumes all params are constant. */ -SymbolVarArray optimize_for_inference( +MGE_WIN_DECLSPEC_FUC SymbolVarArray optimize_for_inference( const SymbolVarArray& dest_vars, const OptimizeForInferenceOptions& opt = {}); /*! @@ -393,7 +393,7 @@ SymbolVarArray optimize_for_inference( * The layout selection optimizers are target-dependent. And this function * applies a set of predefined optimizer passes designed for specific * device. */ -SymbolVarArray layout_transform( +MGE_WIN_DECLSPEC_FUC SymbolVarArray layout_transform( const SymbolVarArray& dest_vars, GraphTuningOptions::Target target = GraphTuningOptions::Target::UNSPEC); @@ -404,7 +404,7 @@ SymbolVarArray layout_transform( * This would modify the operators inplace. It can be used for implement * the fast-run mode. */ -void modify_opr_algo_strategy_inplace( +MGE_WIN_DECLSPEC_FUC void modify_opr_algo_strategy_inplace( const VarNodeArrayView& dest_vars, opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy strategy); @@ -418,7 +418,8 @@ void modify_opr_algo_strategy_inplace( * You may want to implement TimedFuncInvoker::ForkExecImpl and/or * PersistentCache for better performance in an SDK. */ -void enable_opr_algo_profiling_inplace(const VarNodeArrayView& dest_vars); +MGE_WIN_DECLSPEC_FUC void enable_opr_algo_profiling_inplace( + const VarNodeArrayView& dest_vars); /*! * \brief enable opr try profiling cache first, if failed, fallback to @@ -430,7 +431,8 @@ void enable_opr_algo_profiling_inplace(const VarNodeArrayView& dest_vars); * You may want to implement TimedFuncInvoker::ForkExecImpl and/or * PersistentCache for better performance in an SDK. */ -void enable_opr_use_profiling_cache_inplace(const VarNodeArrayView& dest_vars); +MGE_WIN_DECLSPEC_FUC void enable_opr_use_profiling_cache_inplace( + const VarNodeArrayView& dest_vars); /*! * \brief set workspace_limit for execution strategy for oprs with multiple @@ -442,7 +444,7 @@ void enable_opr_use_profiling_cache_inplace(const VarNodeArrayView& dest_vars); * \warning It will influence the default algo choosed, and maybe slower but * save memory. */ -void set_opr_algo_workspace_limit_inplace( +MGE_WIN_DECLSPEC_FUC void set_opr_algo_workspace_limit_inplace( const VarNodeArrayView& dest_vars, size_t workspace_limit); /*! diff --git a/src/megbrain_build_config.h.in b/src/megbrain_build_config.h.in index 94b8902f..3be29df3 100644 --- a/src/megbrain_build_config.h.in +++ b/src/megbrain_build_config.h.in @@ -60,37 +60,37 @@ // whether atlas is available #ifndef MGB_ATLAS -#define MGB_ATLAS 0 +#define MGB_ATLAS 0 #endif // whether cuda is available #ifndef MGB_CUDA -#define MGB_CUDA 1 +#define MGB_CUDA 1 #endif // whether to include file/line location for assert message #ifndef MGB_ASSERT_LOC -#define MGB_ASSERT_LOC 1 +#define MGB_ASSERT_LOC 1 #endif // whether to enable utils/debug.h and other debug methods #ifndef MGB_ENABLE_DEBUG_UTIL -#define MGB_ENABLE_DEBUG_UTIL 1 +#define MGB_ENABLE_DEBUG_UTIL 1 #endif // whether to enable logging #ifndef MGB_ENABLE_LOGGING -#define MGB_ENABLE_LOGGING 1 +#define MGB_ENABLE_LOGGING 1 #endif // whether to enable registering opr grad functions #ifndef MGB_ENABLE_GRAD -#define MGB_ENABLE_GRAD 1 +#define MGB_ENABLE_GRAD 1 #endif // whether to enable cpuinfo #ifndef MGB_ENABLE_CPUINFO -#define MGB_ENABLE_CPUINFO 1 +#define MGB_ENABLE_CPUINFO 1 #endif //! use one MACRO indicate enable_arm_dotprod @@ -101,7 +101,6 @@ #define MGB_ENABLE_DOT 1 #endif - //! ENABLE MGB DOT should enable CPUINFO #if MGB_ENABLE_DOT #if !defined(MGB_ENABLE_CPUINFO) || !MGB_ENABLE_CPUINFO @@ -115,38 +114,38 @@ //! IOS disabled cpuinfo and dotprod, cpuinfo has some problem on ios #ifdef IOS #undef MGB_ENABLE_CPUINFO -#define MGB_ENABLE_CPUINFO 0 +#define MGB_ENABLE_CPUINFO 0 #undef MGB_ENABLE_DOT #endif // whether to include actual class name in mgb::Typeinfo object; if this is // disabled, mgb::serialization::OprRegistry::find_opr_by_name would not work. #ifndef MGB_VERBOSE_TYPEINFO_NAME -#define MGB_VERBOSE_TYPEINFO_NAME 1 +#define MGB_VERBOSE_TYPEINFO_NAME 1 #endif // whether to enbale configuing megbrain internals through env vars #ifndef MGB_ENABLE_GETENV -#define MGB_ENABLE_GETENV MGB_ASSERT_LOC +#define MGB_ENABLE_GETENV MGB_ASSERT_LOC #endif // whether to remove unnecessary features when used for serving #ifndef MGB_BUILD_SLIM_SERVING -#define MGB_BUILD_SLIM_SERVING 0 +#define MGB_BUILD_SLIM_SERVING 0 #endif // whether to enable exception #ifndef MGB_ENABLE_EXCEPTION #if __EXCEPTIONS -#define MGB_ENABLE_EXCEPTION 1 +#define MGB_ENABLE_EXCEPTION 1 #else -#define MGB_ENABLE_EXCEPTION 0 +#define MGB_ENABLE_EXCEPTION 0 #endif #endif // whether is available and usable #ifndef MGB_HAVE_THREAD -#define MGB_HAVE_THREAD 1 +#define MGB_HAVE_THREAD 1 #endif // whether to trade thread safety for memory usage @@ -156,7 +155,7 @@ // whether to enable JIT #ifndef MGB_JIT -#define MGB_JIT 1 +#define MGB_JIT 1 #endif #ifndef MGB_JIT_HALIDE #define MGB_JIT_HALIDE 0 @@ -174,10 +173,9 @@ #define MGB_CAMBRICON MEGDNN_WITH_CAMBRICON #endif - // whether to enable TensorRT support #ifndef MGB_ENABLE_TENSOR_RT -#define MGB_ENABLE_TENSOR_RT MGB_CUDA +#define MGB_ENABLE_TENSOR_RT MGB_CUDA #endif // whether to enable fastrun profile @@ -252,4 +250,26 @@ #define MEGDNN_X86_WITH_MKL_DNN 0 #endif -#endif // _HEADER_MGB_BUILD_CONFIG +#ifdef WIN32 +#ifdef MGE_DLL_EXPORT +#define MGE_WIN_DECLSPEC_FUC __declspec(dllexport) +#else +#define MGE_WIN_DECLSPEC_FUC +#endif +#else +#define MGE_WIN_DECLSPEC_FUC +#endif + +#ifdef WIN32 +#if defined(MGE_DLL_EXPORT_DATA) +#define MGE_WIN_DECLSPEC_DATA __declspec(dllexport) +#elif defined(MGE_DLL_IMPORT_DATA) +#define MGE_WIN_DECLSPEC_DATA __declspec(dllimport) +#else +#define MGE_WIN_DECLSPEC_DATA +#endif +#else +#define MGE_WIN_DECLSPEC_DATA +#endif + +#endif // _HEADER_MGB_BUILD_CONFIG diff --git a/src/opr/include/megbrain/opr/basic_arith.h b/src/opr/include/megbrain/opr/basic_arith.h index 2dac223f..45f0123e 100644 --- a/src/opr/include/megbrain/opr/basic_arith.h +++ b/src/opr/include/megbrain/opr/basic_arith.h @@ -58,22 +58,22 @@ public: * The operands are broadcasted automatically on dimensions of shape one to * match shapes of each other; it works like broadcasting in numpy. */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( Elemwise, intl::ElemwiseBase, mixin::FwdIn2OutWritableHelper) // { using ModeTrait = megdnn::Elemwise::ModeTrait; public: using Mode = Param::Mode; - Elemwise( + MGE_WIN_DECLSPEC_FUC Elemwise( const ModeTrait& mode_trait, const VarNodeArrayView& inputs, Param param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( const VarNodeArrayView& inputs, Param param, const OperatorNodeConfig& config = {}); - static TensorShape get_output_var_shape( + MGE_WIN_DECLSPEC_FUC static TensorShape get_output_var_shape( Mode mode, const TensorShapeArray& input_shapes); /*! @@ -84,7 +84,7 @@ public: * \param opr the megdnn operator to be used; a new operator would be * created if it is null */ - static void perform( + MGE_WIN_DECLSPEC_FUC static void perform( Mode mode, DeviceTensorND& dest, const SmallVector& inputs, intl::UniqPtrWithCN& opr); @@ -98,10 +98,12 @@ public: * \param layouts the layouts to be collectively collapsed * */ - static TensorLayoutArray collective_collapse(const TensorLayoutArray& layouts); + MGE_WIN_DECLSPEC_FUC static TensorLayoutArray collective_collapse( + const TensorLayoutArray& layouts); //! like collective_collapse(), but modify the layouts inplace - static void collective_collapse_inplace(const TensorLayoutPtrArray& layouts); + MGE_WIN_DECLSPEC_FUC static void collective_collapse_inplace( + const TensorLayoutPtrArray& layouts); /*! * \brief wapper for broadcast and collective collapse @@ -111,7 +113,7 @@ public: * \param[in,out] target_layout broadcasted target layout; it would be * collapsed together with inputs */ - static void broadcast_collective_collapse( + MGE_WIN_DECLSPEC_FUC static void broadcast_collective_collapse( const TensorLayoutPtrArray& inp_layouts, TensorLayout* target_layout); /*! @@ -128,7 +130,8 @@ public: * \param[in,out] grads vars to be summed; it is also an output param, * which would contain all the intermediate results for summing */ - static VarNode* sum_grad_list(VarNode* wrt, VarNodeArray& grads); + MGE_WIN_DECLSPEC_FUC static VarNode* sum_grad_list( + VarNode* wrt, VarNodeArray& grads); //! whether input layouts mismatch ever happened for fused oprs; this //! method is public for debug purpose @@ -163,11 +166,11 @@ using TypeCvtBase = cg::OutshapePureByInshapeOpr< cg::mixin::IOSameShapeOperatorNode>; } -MGB_DEFINE_OPR_CLASS(TypeCvt, intl::TypeCvtBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(TypeCvt, intl::TypeCvtBase) // { public: TypeCvt(VarNode* inp, DType dest_type, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar input, DType dest_type, const OperatorNodeConfig& config = {}); static void perform( @@ -200,7 +203,7 @@ private: * Attention: AddUpdate will not be executed if disable flag is set to 1, * this is used for dynamic param-updating. */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( AddUpdate, cg::SingleCNOperatorNodeBaseT) // { public: using SharedScalar = std::shared_ptr; @@ -235,7 +238,7 @@ public: VarNode* dest, VarNode* delta, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar dest, SymbolVar delta, const Param& param = {}, const OperatorNodeConfig& config = {}); @@ -256,7 +259,7 @@ private: * Mode specifies the actual arithmetic; and exactly one of *axis* and * *target_shape* must be provided, to specify output shape. */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( Reduce, intl::DynamicOutputIfInputDynamic< intl::OutshapeBySymvarSCNOpr>) // { public: @@ -269,7 +272,7 @@ public: const Param& param() const { return m_param; } - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, Param param, SymbolVar target_shape = {}, const OperatorNodeConfig& config = {}); @@ -317,10 +320,10 @@ private: * graph with only Elemwise::Mode::POW, and this opr should only be inserted by * the optimizer. */ -MGB_DEFINE_OPR_CLASS(PowC, intl::MegDNNOprWrapperFwd) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(PowC, intl::MegDNNOprWrapperFwd) // { public: PowC(VarNode* inp, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar inp, const Param& param = {}, const OperatorNodeConfig& config = {}); diff --git a/src/opr/include/megbrain/opr/blas.h b/src/opr/include/megbrain/opr/blas.h index e9c5ddec..e5983ab8 100644 --- a/src/opr/include/megbrain/opr/blas.h +++ b/src/opr/include/megbrain/opr/blas.h @@ -31,11 +31,11 @@ MGB_DEFINE_OPR_CLASS( public mixin::AlgoChooserHelper) // { public: using AlgorithmInfo = megdnn::detail::Algorithm::Info; - MatrixMul( + MGE_WIN_DECLSPEC_FUC MatrixMul( VarNode* opr0, VarNode* opr1, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar opr0, SymbolVar opr1, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); @@ -62,11 +62,11 @@ MGB_DEFINE_OPR_CLASS( public mixin::AlgoChooserHelper) // { public: using AlgorithmInfo = megdnn::detail::Algorithm::Info; - BatchedMatrixMul( + MGE_WIN_DECLSPEC_FUC BatchedMatrixMul( VarNode* opr0, VarNode* opr1, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar opr0, SymbolVar opr1, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); @@ -91,9 +91,10 @@ private: MGB_DEFINE_OPR_CLASS( Dot, cg::SingleCNOperatorNodeBaseT>) // { public: - Dot(VarNode* opr0, VarNode* opr1, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC Dot( + VarNode* opr0, VarNode* opr1, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar opr0, SymbolVar opr1, const OperatorNodeConfig& config = {}); // for serialization @@ -115,8 +116,9 @@ MGB_DEFINE_MEGDNN_OPR_WRAPPER_FWD1(MatrixInverse); MGB_DEFINE_OPR_CLASS(SVD, intl::MegDNNOprWrapperFwd) // { public: - SVD(VarNode * src, const Param& param, const OperatorNodeConfig& config); - static SymbolVarArray make( + MGE_WIN_DECLSPEC_FUC SVD( + VarNode * src, const Param& param, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make( const SymbolVar& src, const Param& param = {}, const OperatorNodeConfig& config = {}); }; diff --git a/src/opr/include/megbrain/opr/custom_opnode.h b/src/opr/include/megbrain/opr/custom_opnode.h index 10fc4e29..e6f032d2 100644 --- a/src/opr/include/megbrain/opr/custom_opnode.h +++ b/src/opr/include/megbrain/opr/custom_opnode.h @@ -84,26 +84,26 @@ MGB_DEFINE_OPR_CLASS(CustomOpNode, cg::OperatorNodeBase) // { bool update_priority() const override final; public: - CustomOpNode( + MGE_WIN_DECLSPEC_FUC CustomOpNode( const std::shared_ptr& op, VarNodeArray inputs, const custom::Param& param, const OperatorNodeConfig& config); - static VarNodeArray make( + MGE_WIN_DECLSPEC_FUC static VarNodeArray make( const std::shared_ptr& op, VarNodeArray inputs, const custom::Param& param, const OperatorNodeConfig& config); - static SymbolVarArray make( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make( const std::shared_ptr& op, SymbolVarArray inputs, const custom::Param& param, const OperatorNodeConfig& config); - custom::RunTimeId runtime_id(void) const; - uint32_t param_tag(void) const; - custom::Param& param(void); - custom::Param param(void) const; - std::string op_type(void) const; - std::string op_desc(void) const; - size_t input_num(void) const; - size_t output_num(void) const; - custom::ArgInfo input_info(size_t idx) const; - custom::ArgInfo output_info(size_t idx) const; + MGE_WIN_DECLSPEC_FUC custom::RunTimeId runtime_id(void) const; + MGE_WIN_DECLSPEC_FUC uint32_t param_tag(void) const; + MGE_WIN_DECLSPEC_FUC custom::Param& param(void); + MGE_WIN_DECLSPEC_FUC custom::Param param(void) const; + MGE_WIN_DECLSPEC_FUC std::string op_type(void) const; + MGE_WIN_DECLSPEC_FUC std::string op_desc(void) const; + MGE_WIN_DECLSPEC_FUC size_t input_num(void) const; + MGE_WIN_DECLSPEC_FUC size_t output_num(void) const; + MGE_WIN_DECLSPEC_FUC custom::ArgInfo input_info(size_t idx) const; + MGE_WIN_DECLSPEC_FUC custom::ArgInfo output_info(size_t idx) const; }; } // namespace opr diff --git a/src/opr/include/megbrain/opr/dnn/adaptive_pooling.h b/src/opr/include/megbrain/opr/dnn/adaptive_pooling.h index d9ecd570..923396ff 100644 --- a/src/opr/include/megbrain/opr/dnn/adaptive_pooling.h +++ b/src/opr/include/megbrain/opr/dnn/adaptive_pooling.h @@ -25,10 +25,10 @@ MGB_DEFINE_OPR_CLASS( intl::WorkspaceSizeInfer>>) // { public: - AdaptivePoolingForward( + MGE_WIN_DECLSPEC_FUC AdaptivePoolingForward( VarNode* src, VarNode* out_shape, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar out_shape, const Param& param, const OperatorNodeConfig& config = {}); static SymbolVar make( @@ -55,10 +55,10 @@ MGB_DEFINE_OPR_CLASS( AdaptivePoolingBackward, intl::MegDNNOprWrapperBwd) // { public: - AdaptivePoolingBackward( + MGE_WIN_DECLSPEC_FUC AdaptivePoolingBackward( VarNode* src, VarNode* out_shape, VarNode* dst, VarNode* diff, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar out_shape, SymbolVar dst, SymbolVar diff, const Param& param, const OperatorNodeConfig& config = {}); diff --git a/src/opr/include/megbrain/opr/dnn/batch_norm.h b/src/opr/include/megbrain/opr/dnn/batch_norm.h index bd7f6c66..55ca865d 100644 --- a/src/opr/include/megbrain/opr/dnn/batch_norm.h +++ b/src/opr/include/megbrain/opr/dnn/batch_norm.h @@ -39,26 +39,26 @@ namespace opr { * Output reserve is used for cudnnBatchNormalizationForwardTrainingEx, and should * be preserved for backward. */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( BatchNormForward, cg::OutshapePureByInshapeOpr< intl::WorkspaceSizeInfer>>>) // { public: - BatchNormForward( + MGE_WIN_DECLSPEC_FUC BatchNormForward( VarNode* x, VarNode* scale, VarNode* bias, VarNode* mean, VarNode* variance, const Param& param, const OperatorNodeConfig& config); - BatchNormForward( + MGE_WIN_DECLSPEC_FUC BatchNormForward( VarNode* x, VarNode* scale, VarNode* bias, const Param& param, const OperatorNodeConfig& config); - static SymbolVarArray make( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make( SymbolVar x, SymbolVar scale, SymbolVar bias, SymbolVar mean, SymbolVar variance, const Param& param = {}, const OperatorNodeConfig& config = {}); - static SymbolVarArray make( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make( SymbolVar x, SymbolVar scale, SymbolVar bias, const Param& param = {}, const OperatorNodeConfig& config = {}); @@ -93,14 +93,14 @@ using BatchNorm = BatchNormForward; * scale_grad, bias_grad, x_grad */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( BatchNormBackward, intl::MegDNNOprWrapperBwd) // { public: - BatchNormBackward( + MGE_WIN_DECLSPEC_FUC BatchNormBackward( VarNode* x, VarNode* y_grad, VarNode* save_mean, VarNode* save_variance, VarNode* scale, VarNode* reserve, const Param& param, const OperatorNodeConfig& config); - static SymbolVarArray make( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make( SymbolVar x, SymbolVar y_grad, SymbolVar save_mean, SymbolVar save_variance, SymbolVar scale, SymbolVar reserve, const Param& param = {}, const OperatorNodeConfig& config = {}); diff --git a/src/opr/include/megbrain/opr/dnn/convolution.h b/src/opr/include/megbrain/opr/dnn/convolution.h index 18bae2bc..5c8cb5b9 100644 --- a/src/opr/include/megbrain/opr/dnn/convolution.h +++ b/src/opr/include/megbrain/opr/dnn/convolution.h @@ -93,7 +93,7 @@ class ConvolutionTestingPeer; } // namespace testing -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( ConvolutionForward, intl::ConvolutionForwardBase, public mixin::AlgoChooserHelper) // { void init_output_dtype() override; @@ -114,17 +114,17 @@ MGB_DEFINE_OPR_CLASS( friend testing::ConvolutionTestingPeer; public: - ConvolutionForward( + MGE_WIN_DECLSPEC_FUC ConvolutionForward( VarNode* src, VarNode* filter, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar filter, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); }; using Convolution = ConvolutionForward; -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( ConvBiasForward, intl::ConvBiasForwardBase, public mixin::AlgoChooserHelper) // { void init_output_dtype() override; size_t get_workspace_size_bytes( @@ -147,37 +147,37 @@ MGB_DEFINE_OPR_CLASS( public: //! src * filter - ConvBiasForward( + MGE_WIN_DECLSPEC_FUC ConvBiasForward( VarNode* src, VarNode* filter, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar filter, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); //! src * filter + bias - ConvBiasForward( + MGE_WIN_DECLSPEC_FUC ConvBiasForward( VarNode* src, VarNode* filter, VarNode* bias, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar filter, SymbolVar bias, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); //! src * filter + bias + z - ConvBiasForward( + MGE_WIN_DECLSPEC_FUC ConvBiasForward( VarNode* src, VarNode* filter, VarNode* bias, VarNode* z, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar filter, SymbolVar bias, SymbolVar z, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); - static void check_winograd_param_valid( + MGE_WIN_DECLSPEC_FUC static void check_winograd_param_valid( const megdnn::ConvBias::WinogradParam& param, const DType& dtype); - static megdnn::param::MatrixMul::Format get_matmul_format( + MGE_WIN_DECLSPEC_FUC static megdnn::param::MatrixMul::Format get_matmul_format( const megdnn::ConvBias::WinogradParam& param); }; using ConvBias = ConvBiasForward; @@ -185,7 +185,7 @@ using ConvBias = ConvBiasForward; /*! * \brief Can be used in two ways: compute gradient of conv, or deconv */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( ConvolutionBackwardData, cg::SingleCNOperatorNodeBaseT< mixin::MegDNNOprHolderImpl>, @@ -200,30 +200,30 @@ MGB_DEFINE_OPR_CLASS( NodeProp* do_make_node_prop() const override; public: - ConvolutionBackwardData( + MGE_WIN_DECLSPEC_FUC ConvolutionBackwardData( VarNode* filter, VarNode* diff, VarNode* src_for_shp, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); //! grad mode; original data shape is required - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar filter, SymbolVar diff, SymbolVar src_for_shp, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); //! sereg for deconvolution mode - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar filter, SymbolVar data, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); //! user interface for deconv - static SymbolVar make_deconv( + MGE_WIN_DECLSPEC_FUC static SymbolVar make_deconv( SymbolVar data, SymbolVar filter, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}) { return make(filter, data, param, policy, config); } }; -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( ConvolutionBackwardFilter, intl::MegDNNOprWrapperBwd, public mixin::AlgoChooserHelper) // { @@ -232,40 +232,41 @@ MGB_DEFINE_OPR_CLASS( const TensorShapeArray& output_shapes) const override final; public: - ConvolutionBackwardFilter( + MGE_WIN_DECLSPEC_FUC ConvolutionBackwardFilter( VarNode* src, VarNode* diff, VarNode* filter, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar diff, SymbolVar filter, const Param& param, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); }; -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( MaskConvolution, intl::MegDNNOprWrapperFwd) // { void init_output_dtype() override final; public: - MaskConvolution( + MGE_WIN_DECLSPEC_FUC MaskConvolution( VarNode* src, VarNode* filter, VarNode* mask, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar filter, SymbolVar mask, const Param& param, const OperatorNodeConfig& config = {}); }; -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( MaskPropagate, intl::MegDNNOprWrapperFwd) // { void init_output_dtype() override final; public: - MaskPropagate(VarNode* src, const Param& param, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC MaskPropagate( + VarNode* src, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, const Param& param, const OperatorNodeConfig& config = {}); }; -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( Convolution3DForward, intl::MegDNNOprWrapperFwd, public mixin::AlgoChooserHelper) // { void init_output_dtype() override; @@ -274,11 +275,11 @@ MGB_DEFINE_OPR_CLASS( const TensorShapeArray& output_shapes) const override final; public: - Convolution3DForward( + MGE_WIN_DECLSPEC_FUC Convolution3DForward( VarNode* src, VarNode* filter, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar filter, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); }; @@ -287,7 +288,7 @@ using Convolution3D = Convolution3DForward; /*! * \brief Can be used in two ways: compute gradient of conv, or deconv */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( Convolution3DBackwardData, cg::SingleCNOperatorNodeBaseT< mixin::MegDNNOprHolderImpl>, @@ -300,18 +301,18 @@ MGB_DEFINE_OPR_CLASS( NodeProp* do_make_node_prop() const override; public: - Convolution3DBackwardData( + MGE_WIN_DECLSPEC_FUC Convolution3DBackwardData( VarNode* filter, VarNode* diff, VarNode* src_for_shp, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); //! grad mode; original data shape is required - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar filter, SymbolVar diff, SymbolVar src_for_shp, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); //! sereg for deconvolution3D mode - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar filter, SymbolVar data, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); @@ -323,7 +324,7 @@ public: } }; -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( Convolution3DBackwardFilter, intl::MegDNNOprWrapperBwd, public mixin::AlgoChooserHelper) // { @@ -332,15 +333,15 @@ MGB_DEFINE_OPR_CLASS( const TensorShapeArray& output_shapes) const override final; public: - Convolution3DBackwardFilter( + MGE_WIN_DECLSPEC_FUC Convolution3DBackwardFilter( VarNode* src, VarNode* diff, VarNode* filter, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar diff, SymbolVar filter, const Param& param, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); }; -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( LocalShareForward, intl::MegDNNOprWrapperFwd, public mixin::AlgoChooserHelper) // { void init_output_dtype() override; @@ -351,16 +352,16 @@ MGB_DEFINE_OPR_CLASS( const TensorShapeArray& output_shapes) const override final; public: - LocalShareForward( + MGE_WIN_DECLSPEC_FUC LocalShareForward( VarNode* src, VarNode* filter, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar filter, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); }; using LocalShare = LocalShareForward; -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( LocalShareBackwardData, cg::SingleCNOperatorNodeBaseT< mixin::MegDNNOprHolderImpl>, @@ -374,18 +375,18 @@ MGB_DEFINE_OPR_CLASS( NodeProp* do_make_node_prop() const override; public: - LocalShareBackwardData( + MGE_WIN_DECLSPEC_FUC LocalShareBackwardData( VarNode* filter, VarNode* diff, VarNode* src_for_shp, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); //! grad mode; original data shape is required - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar filter, SymbolVar diff, SymbolVar src_for_shp, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); }; -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( LocalShareBackwardFilter, intl::MegDNNOprWrapperBwd, public mixin::AlgoChooserHelper) // { @@ -394,24 +395,24 @@ MGB_DEFINE_OPR_CLASS( const TensorShapeArray& output_shapes) const override final; public: - LocalShareBackwardFilter( + MGE_WIN_DECLSPEC_FUC LocalShareBackwardFilter( VarNode* src, VarNode* diff, VarNode* filter, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar diff, SymbolVar filter, const Param& param, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); }; -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( DeformableConvForward, intl::MegDNNOprWrapperFwd, public mixin::AlgoChooserHelper) // { public: - DeformableConvForward( + MGE_WIN_DECLSPEC_FUC DeformableConvForward( VarNode* src, VarNode* filter, VarNode* offset, VarNode* mask, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar filter, SymbolVar offset, SymbolVar mask, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); @@ -425,21 +426,21 @@ private: }; using DeformableConv = DeformableConvForward; -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( DeformableConvBackwardData, intl::DeformableConvBackwardDataBase, public mixin::AlgoChooserHelper, public mixin::ConvolutionBackwardDataMixin) // { public: - DeformableConvBackwardData( + MGE_WIN_DECLSPEC_FUC DeformableConvBackwardData( VarNode* src, VarNode* filter, VarNode* offset, VarNode* mask, VarNode* diff, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVarArray make_all( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make_all( SymbolVar src, SymbolVar filter, SymbolVar offset, SymbolVar mask, SymbolVar diff, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar filter, SymbolVar offset, SymbolVar mask, SymbolVar diff, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); @@ -463,17 +464,17 @@ private: } }; -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( DeformableConvBackwardFilter, intl::MegDNNOprWrapperBwd, public mixin::AlgoChooserHelper) // { public: - DeformableConvBackwardFilter( + MGE_WIN_DECLSPEC_FUC DeformableConvBackwardFilter( VarNode* src, VarNode* filter, VarNode* offset, VarNode* mask, VarNode* diff, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar filter, SymbolVar offset, SymbolVar mask, SymbolVar diff, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); @@ -486,7 +487,7 @@ private: const TensorShapeArray& output_shapes) const override final; }; -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( BatchConvBiasForward, intl::BatchConvBiasForwardBase, public mixin::AlgoChooserHelper) // { void init_output_dtype() override; @@ -506,30 +507,30 @@ MGB_DEFINE_OPR_CLASS( public: //! src * filter - BatchConvBiasForward( + MGE_WIN_DECLSPEC_FUC BatchConvBiasForward( VarNode* src, VarNode* filter, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar filter, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); //! src * filter + bias - BatchConvBiasForward( + MGE_WIN_DECLSPEC_FUC BatchConvBiasForward( VarNode* src, VarNode* filter, VarNode* bias, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar filter, SymbolVar bias, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); //! src * filter + bias + z - BatchConvBiasForward( + MGE_WIN_DECLSPEC_FUC BatchConvBiasForward( VarNode* src, VarNode* filter, VarNode* bias, VarNode* z, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar filter, SymbolVar bias, SymbolVar z, const Param& param = {}, const ExecutionPolicy& policy = {}, const OperatorNodeConfig& config = {}); diff --git a/src/opr/include/megbrain/opr/dnn/correlation.h b/src/opr/include/megbrain/opr/dnn/correlation.h index 2c19843f..8957c563 100644 --- a/src/opr/include/megbrain/opr/dnn/correlation.h +++ b/src/opr/include/megbrain/opr/dnn/correlation.h @@ -20,11 +20,11 @@ namespace opr { MGB_DEFINE_OPR_CLASS( CorrelationForward, intl::MegDNNOprWrapperFwd) // { public: - CorrelationForward( + MGE_WIN_DECLSPEC_FUC CorrelationForward( VarNode* data1, VarNode* data2, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar data1, SymbolVar data2, const Param& param = {}, const OperatorNodeConfig& config = {}); }; @@ -34,11 +34,11 @@ MGB_DEFINE_OPR_CLASS( CorrelationBackwardData1, intl::MegDNNOprWrapperBwd) // { public: - CorrelationBackwardData1( + MGE_WIN_DECLSPEC_FUC CorrelationBackwardData1( VarNode* diff, VarNode* data1, VarNode* data2, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar diff, SymbolVar data1, SymbolVar data2, const Param& param = {}, const OperatorNodeConfig& config = {}); @@ -53,11 +53,11 @@ MGB_DEFINE_OPR_CLASS( CorrelationBackwardData2, intl::MegDNNOprWrapperBwd) // { public: - CorrelationBackwardData2( + MGE_WIN_DECLSPEC_FUC CorrelationBackwardData2( VarNode* diff, VarNode* data1, VarNode* data2, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar diff, SymbolVar data1, SymbolVar data2, const Param& param = {}, const OperatorNodeConfig& config = {}); diff --git a/src/opr/include/megbrain/opr/dnn/fake_quant.h b/src/opr/include/megbrain/opr/dnn/fake_quant.h index 0b64888f..6129a8d0 100644 --- a/src/opr/include/megbrain/opr/dnn/fake_quant.h +++ b/src/opr/include/megbrain/opr/dnn/fake_quant.h @@ -18,11 +18,11 @@ namespace opr { MGB_DEFINE_OPR_CLASS( FakeQuantForward, intl::MegDNNOprWrapperFwd) // { public: - FakeQuantForward( + MGE_WIN_DECLSPEC_FUC FakeQuantForward( VarNode* src, VarNode* scale, VarNode* zero_point, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar scale, SymbolVar zero_point, const Param& param = {}, const OperatorNodeConfig& config = {}); }; // namespace opr @@ -31,14 +31,14 @@ using FakeQuant = FakeQuantForward; MGB_DEFINE_OPR_CLASS( FakeQuantBackward, intl::MegDNNOprWrapperBwd) // { public: - FakeQuantBackward( + MGE_WIN_DECLSPEC_FUC FakeQuantBackward( VarNode* diff, VarNode* input, VarNode* scale, VarNode* zero_point, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar diff, SymbolVar input, SymbolVar scale, SymbolVar zero_point, const Param& param = {}, const OperatorNodeConfig& config = {}); }; } // namespace opr -} // namespace mgb \ No newline at end of file +} // namespace mgb diff --git a/src/opr/include/megbrain/opr/dnn/images2neibs.h b/src/opr/include/megbrain/opr/dnn/images2neibs.h index 3ca9eacf..256bdb9a 100644 --- a/src/opr/include/megbrain/opr/dnn/images2neibs.h +++ b/src/opr/include/megbrain/opr/dnn/images2neibs.h @@ -19,10 +19,10 @@ namespace opr { MGB_DEFINE_OPR_CLASS( Images2NeibsForward, intl::MegDNNOprWrapperFwd) // { public: - Images2NeibsForward( + MGE_WIN_DECLSPEC_FUC Images2NeibsForward( VarNode* src, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, const Param& param = {}, const OperatorNodeConfig& config = {}); }; @@ -31,11 +31,11 @@ using Images2Neibs = Images2NeibsForward; MGB_DEFINE_OPR_CLASS( Images2NeibsBackward, intl::MegDNNOprWrapperBwd) // { public: - Images2NeibsBackward( + MGE_WIN_DECLSPEC_FUC Images2NeibsBackward( VarNode* diff, VarNode* src_for_shape, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar diff, SymbolVar src_for_shape, const Param& param = {}, const OperatorNodeConfig& config = {}); }; diff --git a/src/opr/include/megbrain/opr/dnn/lrn.h b/src/opr/include/megbrain/opr/dnn/lrn.h index 4ee9811d..e9cdf83b 100644 --- a/src/opr/include/megbrain/opr/dnn/lrn.h +++ b/src/opr/include/megbrain/opr/dnn/lrn.h @@ -19,8 +19,9 @@ namespace opr { MGB_DEFINE_OPR_CLASS(LRNForward, intl::MegDNNOprWrapperFwd) // { public: - LRNForward(VarNode* src, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC LRNForward( + VarNode* src, const Param& param, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, const Param& param, const OperatorNodeConfig& config = {}); }; using LRN = LRNForward; @@ -28,10 +29,10 @@ using LRN = LRNForward; MGB_DEFINE_OPR_CLASS( LRNBackward, intl::MegDNNOprWrapperBwd) // { public: - LRNBackward( + MGE_WIN_DECLSPEC_FUC LRNBackward( VarNode* src, VarNode* dst, VarNode* diff, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar dst, SymbolVar diff, const Param& param, const OperatorNodeConfig& config = {}); }; diff --git a/src/opr/include/megbrain/opr/dnn/lsq.h b/src/opr/include/megbrain/opr/dnn/lsq.h index fdd4e462..1552978c 100644 --- a/src/opr/include/megbrain/opr/dnn/lsq.h +++ b/src/opr/include/megbrain/opr/dnn/lsq.h @@ -18,11 +18,11 @@ namespace opr { MGB_DEFINE_OPR_CLASS(LSQForward, intl::MegDNNOprWrapperFwd) // { public: - LSQForward( + MGE_WIN_DECLSPEC_FUC LSQForward( VarNode* src, VarNode* scale, VarNode* zero_point, VarNode* grad_scale, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar scale, SymbolVar zero_point, SymbolVar grad_scale, const Param& param = {}, const OperatorNodeConfig& config = {}); }; @@ -31,11 +31,11 @@ using LSQ = LSQForward; MGB_DEFINE_OPR_CLASS( LSQBackward, intl::MegDNNOprWrapperBwd) // { public: - LSQBackward( + MGE_WIN_DECLSPEC_FUC LSQBackward( VarNode* y_grad, VarNode* x, VarNode* scale, VarNode* zero_point, VarNode* grad_scale, const Param& param, const OperatorNodeConfig& config); - static SymbolVarArray make( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make( SymbolVar y_grad, SymbolVar x, SymbolVar scale, SymbolVar zero_point, SymbolVar grad_scale, const Param& param = {}, const OperatorNodeConfig& config = {}); diff --git a/src/opr/include/megbrain/opr/dnn/pooling.h b/src/opr/include/megbrain/opr/dnn/pooling.h index 17cc596b..3e83e96e 100644 --- a/src/opr/include/megbrain/opr/dnn/pooling.h +++ b/src/opr/include/megbrain/opr/dnn/pooling.h @@ -22,10 +22,10 @@ MGB_DEFINE_OPR_CLASS( PoolingForward, intl::MegDNNOprWrapperFwd, public mixin::AlgoChooserHelper) //{ public: -PoolingForward( +MGE_WIN_DECLSPEC_FUC PoolingForward( VarNode* src, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); -static SymbolVar make( +MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, const Param& param, const OperatorNodeConfig& config = {}, const ExecutionPolicy& policy = {}); @@ -41,15 +41,15 @@ MGB_DEFINE_OPR_CLASS( PoolingBackward, intl::MegDNNOprWrapperBwd, public mixin::AlgoChooserHelper) //{ public: -PoolingBackward( +MGE_WIN_DECLSPEC_FUC PoolingBackward( VarNode* src, VarNode* dst, VarNode* diff, const Param& param, const ExecutionPolicy& policy, const OperatorNodeConfig& config); -static SymbolVar make( +MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar dst, SymbolVar diff, const Param& param, const OperatorNodeConfig& config = {}, const ExecutionPolicy& policy = {}); -size_t get_workspace_size_bytes( +MGE_WIN_DECLSPEC_FUC size_t get_workspace_size_bytes( const TensorShapeArray& input_shapes, const TensorShapeArray& output_shapes) const override final; }; diff --git a/src/opr/include/megbrain/opr/dnn/roi_align.h b/src/opr/include/megbrain/opr/dnn/roi_align.h index 760af123..79d30f2a 100644 --- a/src/opr/include/megbrain/opr/dnn/roi_align.h +++ b/src/opr/include/megbrain/opr/dnn/roi_align.h @@ -20,11 +20,11 @@ namespace opr { MGB_DEFINE_OPR_CLASS( ROIAlignForward, intl::MegDNNOprWrapperFwd) // { public: - ROIAlignForward( + MGE_WIN_DECLSPEC_FUC ROIAlignForward( VarNode* src, VarNode* rois, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar rois, const Param& param = {}, const OperatorNodeConfig& config = {}); }; @@ -33,11 +33,11 @@ using ROIAlign = ROIAlignForward; MGB_DEFINE_OPR_CLASS( ROIAlignBackward, intl::MegDNNOprWrapperBwd) // { public: - ROIAlignBackward( + MGE_WIN_DECLSPEC_FUC ROIAlignBackward( VarNode* diff, VarNode* src, VarNode* rois, VarNode* index, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar diff, SymbolVar src, SymbolVar rois, SymbolVar index, const Param& param = {}, const OperatorNodeConfig& config = {}); diff --git a/src/opr/include/megbrain/opr/dnn/roi_pooling.h b/src/opr/include/megbrain/opr/dnn/roi_pooling.h index 179209fe..da18ea08 100644 --- a/src/opr/include/megbrain/opr/dnn/roi_pooling.h +++ b/src/opr/include/megbrain/opr/dnn/roi_pooling.h @@ -43,11 +43,11 @@ MGB_DEFINE_OPR_CLASS( intl::WorkspaceSizeInfer>>) // { public: - ROIPoolingForward( + MGE_WIN_DECLSPEC_FUC ROIPoolingForward( VarNode* src, VarNode* rois, VarNode* dst_shape, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar rois, SymbolVar dst_shape, const Param& param = {}, const OperatorNodeConfig& config = {}); @@ -76,11 +76,11 @@ using ROIPooling = ROIPoolingForward; MGB_DEFINE_OPR_CLASS( ROIPoolingBackward, intl::MegDNNOprWrapperBwd) // { public: - ROIPoolingBackward( + MGE_WIN_DECLSPEC_FUC ROIPoolingBackward( VarNode* diff, VarNode* src, VarNode* rois, VarNode* index, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar diff, SymbolVar src, SymbolVar rois, SymbolVar index, const Param& param = {}, const OperatorNodeConfig& config = {}); }; @@ -94,14 +94,14 @@ MGB_DEFINE_OPR_CLASS( DeformablePSROIPoolingForward, intl::MegDNNOprWrapperFwd) // { public: - DeformablePSROIPoolingForward( + MGE_WIN_DECLSPEC_FUC DeformablePSROIPoolingForward( VarNode* src, VarNode* rois, VarNode* trans, const Param& param, const OperatorNodeConfig& config); - static SymbolVarArray make_all( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make_all( SymbolVar src, SymbolVar rois, SymbolVar trans, const Param& param = {}, const OperatorNodeConfig& config = {}); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar rois, SymbolVar trans, const Param& param = {}, const OperatorNodeConfig& config = {}); }; @@ -110,18 +110,18 @@ using DeformablePSROIPooling = DeformablePSROIPoolingForward; MGB_DEFINE_OPR_CLASS( DeformablePSROIPoolingBackward, intl::DeformablePSROIPoolingBackwardT) // { public: - DeformablePSROIPoolingBackward( + MGE_WIN_DECLSPEC_FUC DeformablePSROIPoolingBackward( VarNode* src, VarNode* rois, VarNode* trans, VarNode* grad, VarNode* count, const Param& param, const OperatorNodeConfig& config); - static SymbolVarArray make_all( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make_all( SymbolVar src, SymbolVar rois, SymbolVar trans, SymbolVar grad, SymbolVar count, const Param& param = {}, const OperatorNodeConfig& config = {}); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar rois, SymbolVar trans, SymbolVar grad, SymbolVar count, const Param& param = {}, const OperatorNodeConfig& config = {}); - void scn_do_execute() override; + MGE_WIN_DECLSPEC_FUC void scn_do_execute() override; private: void get_output_var_shape( diff --git a/src/opr/include/megbrain/opr/dnn/sliding_window_transpose.h b/src/opr/include/megbrain/opr/dnn/sliding_window_transpose.h index 02566553..13859bad 100644 --- a/src/opr/include/megbrain/opr/dnn/sliding_window_transpose.h +++ b/src/opr/include/megbrain/opr/dnn/sliding_window_transpose.h @@ -20,10 +20,10 @@ MGB_DEFINE_OPR_CLASS( SlidingWindowTransposeForward, intl::MegDNNOprWrapperFwd) // { public: - SlidingWindowTransposeForward( + MGE_WIN_DECLSPEC_FUC SlidingWindowTransposeForward( VarNode* src, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, const Param& param = {}, const OperatorNodeConfig& config = {}); }; @@ -33,11 +33,11 @@ MGB_DEFINE_OPR_CLASS( SlidingWindowTransposeBackward, intl::MegDNNOprWrapperBwd) // { public: - SlidingWindowTransposeBackward( + MGE_WIN_DECLSPEC_FUC SlidingWindowTransposeBackward( VarNode* diff, VarNode* src_for_shape, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar diff, SymbolVar src_for_shape, const Param& param = {}, const OperatorNodeConfig& config = {}); }; @@ -45,4 +45,4 @@ public: } // namespace opr } // namespace mgb -// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} \ No newline at end of file +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/opr/include/megbrain/opr/dnn/tqt.h b/src/opr/include/megbrain/opr/dnn/tqt.h index 8f358ed0..6e849a6e 100644 --- a/src/opr/include/megbrain/opr/dnn/tqt.h +++ b/src/opr/include/megbrain/opr/dnn/tqt.h @@ -18,11 +18,11 @@ namespace opr { MGB_DEFINE_OPR_CLASS(TQTForward, intl::MegDNNOprWrapperFwd) // { public: - TQTForward( + MGE_WIN_DECLSPEC_FUC TQTForward( VarNode* src, VarNode* scale, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar scale, const Param& param = {}, const OperatorNodeConfig& config = {}); }; @@ -31,11 +31,11 @@ using TQT = TQTForward; MGB_DEFINE_OPR_CLASS( TQTBackward, intl::MegDNNOprWrapperBwd) // { public: - TQTBackward( + MGE_WIN_DECLSPEC_FUC TQTBackward( VarNode* y_grad, VarNode* x, VarNode* scale, const Param& param, const OperatorNodeConfig& config); - static SymbolVarArray make( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make( SymbolVar y_grad, SymbolVar x, SymbolVar scale, const Param& param = {}, const OperatorNodeConfig& config = {}); diff --git a/src/opr/include/megbrain/opr/imgproc.h b/src/opr/include/megbrain/opr/imgproc.h index 17f54f82..be19ad84 100644 --- a/src/opr/include/megbrain/opr/imgproc.h +++ b/src/opr/include/megbrain/opr/imgproc.h @@ -43,7 +43,7 @@ public: VarNode* in_tensor, VarNode* mat, VarNode* mat_idx, VarNode* out_shape, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar in_tensor, SymbolVar mat, SymbolVar mat_idx, SymbolVar out_shape, const Param& param = {}, const OperatorNodeConfig& config = {}); @@ -89,11 +89,11 @@ public: VarNode* mat, VarNode* mat_idx, VarNode* out_diff, VarNode* in_for_shape, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar mat, SymbolVar out_diff, SymbolVar in_for_shape, const Param& param = {}, const OperatorNodeConfig& config = {}); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar mat, SymbolVar mat_idx, SymbolVar out_diff, SymbolVar in_for_shape, const Param& param = {}, const OperatorNodeConfig& config = {}); @@ -115,7 +115,7 @@ public: return make(src, mat, {}, out_diff, param, config); } - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar mat, SymbolVar mat_idx, SymbolVar out_diff, const Param& param = {}, const OperatorNodeConfig& config = {}); @@ -141,7 +141,7 @@ public: VarNode * in_tensor, VarNode * out_shape, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar in_tensor, SymbolVar out_shape, const Param& param = {}, const OperatorNodeConfig& config = {}); @@ -175,7 +175,7 @@ public: VarNode* out_diff, VarNode* in_for_shape, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar out_diff, SymbolVar in_for_shape, const Param& param = {}, const OperatorNodeConfig& config = {}); }; @@ -187,7 +187,7 @@ public: VarNode* in_tensor, VarNode* map, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar in_tensor, SymbolVar map, const Param& param = {}, const OperatorNodeConfig& config = {}); @@ -203,7 +203,7 @@ public: VarNode* map, VarNode* out_diff, VarNode* in_for_shape, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar map, SymbolVar out_diff, SymbolVar in_for_shape, const Param& param = {}, const OperatorNodeConfig& config = {}); }; @@ -215,7 +215,7 @@ public: VarNode* src, VarNode* map, VarNode* out_diff, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar map, SymbolVar out_diff, const Param& param = {}, const OperatorNodeConfig& config = {}); }; @@ -240,7 +240,7 @@ public: VarNode* in_tensor, VarNode* mat, VarNode* out_shape, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar in_tensor, SymbolVar mat, SymbolVar out_shape, const Param& param = {}, const OperatorNodeConfig& config = {}); @@ -278,25 +278,25 @@ public: VarNode* src, VarNode* mask_offset, VarNode* mask_val, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar mask_offset, SymbolVar mask_val, const Param& param, const OperatorNodeConfig& config = {}); - DctChannelSelectForward( + MGE_WIN_DECLSPEC_FUC DctChannelSelectForward( VarNode* src, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, const Param& param, const OperatorNodeConfig& config = {}); - void get_output_var_shape( + MGE_WIN_DECLSPEC_FUC void get_output_var_shape( const TensorShapeArray& inp_shape, TensorShapeArray& out_shape) const override; - size_t get_workspace_size_bytes( + MGE_WIN_DECLSPEC_FUC size_t get_workspace_size_bytes( const TensorShapeArray& input_shapes, const TensorShapeArray& output_shapes) const override; void scn_do_execute() override; - void valid_mask( + MGE_WIN_DECLSPEC_FUC void valid_mask( const int* mask_offset, int mask_len, const int* mask_val, int mask_val_len, const Param& param); }; diff --git a/src/opr/include/megbrain/opr/indexing.h b/src/opr/include/megbrain/opr/indexing.h index 88e084e2..c2f072c5 100644 --- a/src/opr/include/megbrain/opr/indexing.h +++ b/src/opr/include/megbrain/opr/indexing.h @@ -22,10 +22,10 @@ namespace opr { MGB_DEFINE_OPR_CLASS( IndexingOneHot, intl::MegDNNOprWrapperFwd) // { public: - IndexingOneHot( + MGE_WIN_DECLSPEC_FUC IndexingOneHot( VarNode* src, VarNode* index, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar index, const Param& param, const OperatorNodeConfig& config = {}); @@ -38,10 +38,10 @@ MGB_DEFINE_OPR_CLASS( intl::WorkspaceSizeInfer>>) // { public: - IndexingSetOneHot( + MGE_WIN_DECLSPEC_FUC IndexingSetOneHot( VarNode* data, VarNode* index, VarNode* sub, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar data, SymbolVar index, SymbolVar sub, const Param& param, const OperatorNodeConfig& config = {}); @@ -62,10 +62,10 @@ private: MGB_DEFINE_OPR_CLASS( IndexingRemap, intl::MegDNNOprWrapperFwd) // { public: - IndexingRemap( + MGE_WIN_DECLSPEC_FUC IndexingRemap( VarNode* src, VarNode* map, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar map, const Param& param, const OperatorNodeConfig& config = {}); @@ -77,10 +77,10 @@ MGB_DEFINE_OPR_CLASS( IndexingRemapBackward, intl::MegDNNOprWrapperBwd) // { public: - IndexingRemapBackward( + MGE_WIN_DECLSPEC_FUC IndexingRemapBackward( VarNode* out_diff, VarNode* map, VarNode* src_for_shape, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar out_diff, SymbolVar map, SymbolVar src_for_shape, const Param& param, const OperatorNodeConfig& config = {}); }; diff --git a/src/opr/include/megbrain/opr/internal/indexing_helper.h b/src/opr/include/megbrain/opr/internal/indexing_helper.h index 5271e944..9e1c3a15 100644 --- a/src/opr/include/megbrain/opr/internal/indexing_helper.h +++ b/src/opr/include/megbrain/opr/internal/indexing_helper.h @@ -53,12 +53,12 @@ struct AxisIndexer { SymbolVar idx; //! index an axis on an interval - static AxisIndexer make_interval( + MGE_WIN_DECLSPEC_FUC static AxisIndexer make_interval( AxisNum axis, Maybe begin, Maybe end, Maybe step); //! index an axis with scalar or vector indexer - static AxisIndexer make_index(AxisNum axis, SymbolVar idx); + MGE_WIN_DECLSPEC_FUC static AxisIndexer make_index(AxisNum axis, SymbolVar idx); /*! * \brief return true if axis of *lhs* is larger than (i.e. with smaller @@ -191,7 +191,7 @@ private: #define MGB_DECL_FANCY_INDEXING_OPR_GET(_opr) \ _opr(VarNode* inp, const IndexDesc& desc, const OperatorNodeConfig& config); \ - static SymbolVar make( \ + MGE_WIN_DECLSPEC_FUC static SymbolVar make( \ SymbolVar inp, const IndexDesc& desc, \ const OperatorNodeConfig& config = {}) @@ -212,7 +212,7 @@ private: _opr(VarNode* inp, VarNode* value, const IndexDesc& desc, \ const OperatorNodeConfig& config, \ const InputTensorReplacer& input_tensor_replacer); \ - static SymbolVar make( \ + MGE_WIN_DECLSPEC_FUC static SymbolVar make( \ SymbolVar inp, SymbolVar value, const IndexDesc& desc, \ const OperatorNodeConfig& config = {}, \ const InputTensorReplacer& input_tensor_replacer = {}) diff --git a/src/opr/include/megbrain/opr/internal/megdnn_opr_wrapper.h b/src/opr/include/megbrain/opr/internal/megdnn_opr_wrapper.h index 44a2eb11..c4fa8725 100644 --- a/src/opr/include/megbrain/opr/internal/megdnn_opr_wrapper.h +++ b/src/opr/include/megbrain/opr/internal/megdnn_opr_wrapper.h @@ -21,8 +21,9 @@ namespace opr { namespace intl { //! get megdnn handle from comp node -megdnn::Handle* get_megdnn_handle(CompNode comp_node); -std::shared_ptr get_megdnn_handle_shared(CompNode comp_node); +MGE_WIN_DECLSPEC_FUC megdnn::Handle* get_megdnn_handle(CompNode comp_node); +MGE_WIN_DECLSPEC_FUC std::shared_ptr get_megdnn_handle_shared( + CompNode comp_node); /*! * \brief get global megdnn operator asscoated with a computing node @@ -32,7 +33,7 @@ std::shared_ptr get_megdnn_handle_shared(CompNode comp_node); * * Checksum */ template -Opr* get_megdnn_global_opr(CompNode comp_node); +MGE_WIN_DECLSPEC_FUC Opr* get_megdnn_global_opr(CompNode comp_node); template class UniqPtrWithCN : public std::unique_ptr { @@ -63,7 +64,8 @@ UniqPtrWithCN create_megdnn_opr(CompNode comp_node) { * temp storage differs from workspace because the temp storage might * depends on runtime layout / pointer address */ -DeviceTensorStorage& get_temp_storage(ComputingGraph& graph, CompNode comp_node); +MGE_WIN_DECLSPEC_FUC DeviceTensorStorage& get_temp_storage( + ComputingGraph& graph, CompNode comp_node); /*! * \brief like get_temp_storage() but returns a DeviceTensorND instead @@ -79,10 +81,11 @@ namespace mixin { namespace megdnn_utils { //! add input layout constraint to require all inputs to be contiguous -void add_input_layout_constraint_contig(OperatorNodeBase& opr); +MGE_WIN_DECLSPEC_FUC void add_input_layout_constraint_contig(OperatorNodeBase& opr); //! called in constructor to add output vars -void add_output_vars(OperatorNodeBase& opr, size_t nr_output, bool add_workspace); +MGE_WIN_DECLSPEC_FUC void add_output_vars( + OperatorNodeBase& opr, size_t nr_output, bool add_workspace); } /*! @@ -110,27 +113,29 @@ protected: class MegDNNOprHolder : public cg::mixin::SingleCNOperatorNode { public: //! call create_opr() internally. - void mixin_init_output_comp_node(OperatorNodeBase& self); + MGE_WIN_DECLSPEC_FUC void mixin_init_output_comp_node(OperatorNodeBase& self); //! recreate operator when stream changes - void mixin_on_output_comp_node_stream_changed(OperatorNodeBase& self); + MGE_WIN_DECLSPEC_FUC void mixin_on_output_comp_node_stream_changed( + OperatorNodeBase& self); - static void record_megdnn_opr( + MGE_WIN_DECLSPEC_FUC static void record_megdnn_opr( std::unique_ptr opr, cg::GraphExecutable::ExecDependencyArray& deps); protected: - ~MegDNNOprHolder() noexcept; + MGE_WIN_DECLSPEC_FUC ~MegDNNOprHolder() noexcept; //! create actual megdnnn operator virtual void create_megdnn_opr() = 0; megdnn::OperatorBase* megdnn_opr() const { return m_dnn_opr.get(); } - void set_megdnn_opr(std::unique_ptr opr); + MGE_WIN_DECLSPEC_FUC void set_megdnn_opr(std::unique_ptr opr); //! record the megdnn opr owned by this opr to ExecDependencyArray - void record_megdnn_opr(cg::GraphExecutable::ExecDependencyArray& deps); + MGE_WIN_DECLSPEC_FUC void record_megdnn_opr( + cg::GraphExecutable::ExecDependencyArray& deps); private: std::unique_ptr m_dnn_opr; @@ -323,8 +328,10 @@ public: using GetWorkspaceLimitImpl = thin_function; WorkspaceLimitHook() = default; ~WorkspaceLimitHook() = default; - static void set_impl(ComputingGraph* graph, GetWorkspaceLimitImpl impl); - static const GetWorkspaceLimitImpl& get_impl(ComputingGraph* graph); + MGE_WIN_DECLSPEC_FUC static void set_impl( + ComputingGraph* graph, GetWorkspaceLimitImpl impl); + MGE_WIN_DECLSPEC_FUC static const GetWorkspaceLimitImpl& get_impl( + ComputingGraph* graph); private: void set_impl(GetWorkspaceLimitImpl impl); @@ -341,7 +348,7 @@ private: MGB_DEFINE_OPR_CLASS(_name, intl::MegDNNOprWrapperFwd) \ public: \ _name(VarNode* p0, const Param& param, const OperatorNodeConfig& config); \ - static SymbolVar make( \ + MGE_WIN_DECLSPEC_FUC static SymbolVar make( \ SymbolVar p0, const Param& param = {}, \ const OperatorNodeConfig& config = {}); \ } @@ -352,7 +359,7 @@ public: \ public: \ _name(VarNode* p0, VarNode* p1, const Param& param, \ const OperatorNodeConfig& config); \ - static SymbolVar make( \ + MGE_WIN_DECLSPEC_FUC static SymbolVar make( \ SymbolVar p0, SymbolVar p1, const Param& param = {}, \ const OperatorNodeConfig& config = {}); \ } @@ -362,7 +369,7 @@ public: \ MGB_DEFINE_OPR_CLASS(_name, intl::MegDNNOprWrapperBwd) \ _extra public : _name(VarNode* p0, VarNode* p1, VarNode* p2, const Param& param, \ const OperatorNodeConfig& config); \ - static SymbolVar make( \ + MGE_WIN_DECLSPEC_FUC static SymbolVar make( \ SymbolVar p0, SymbolVar p1, SymbolVar p2, const Param& param = {}, \ const OperatorNodeConfig& config = {}); \ } diff --git a/src/opr/include/megbrain/opr/io.h b/src/opr/include/megbrain/opr/io.h index 75e823ae..4e4118aa 100644 --- a/src/opr/include/megbrain/opr/io.h +++ b/src/opr/include/megbrain/opr/io.h @@ -154,7 +154,7 @@ private: * triggered. * 2. If host data is not contiguous, it would be relayouted on host. */ -MGB_DEFINE_OPR_CLASS(Host2DeviceCopy, intl::HostIONodeBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(Host2DeviceCopy, intl::HostIONodeBase) // { class HostValueExecDep; public: @@ -203,7 +203,7 @@ public: return make(graph, host_data, p, config); } - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( ComputingGraph& graph, const std::shared_ptr& host_data, const Param& param, const OperatorNodeConfig& config); @@ -246,13 +246,14 @@ private: * * \see intl::SharedDeviceTensorBase and VolatileSharedDeviceTensor */ -MGB_DEFINE_OPR_CLASS(SharedDeviceTensor, intl::SharedDeviceTensorBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( + SharedDeviceTensor, intl::SharedDeviceTensorBase) // { cg::static_infer::SourceType static_infer_src_type() const override; public: using Super::Super; - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( ComputingGraph& graph, const std::shared_ptr& dev_data, bool const_value, const OperatorNodeConfig& config); @@ -273,7 +274,7 @@ public: * * See SharedDeviceTensorBase::SharedDeviceTensorBase for const_value. */ - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( ComputingGraph& graph, const HostTensorND& value, bool const_value, const OperatorNodeConfig& config); @@ -295,7 +296,8 @@ public: * * This opr is usually used in serialized models. */ -MGB_DEFINE_OPR_CLASS(SharedDeviceTensorWithFormat, intl::SharedDeviceTensorBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( + SharedDeviceTensorWithFormat, intl::SharedDeviceTensorBase) // { cg::static_infer::SourceType static_infer_src_type() const override; public: @@ -303,7 +305,7 @@ public: void init_output_format() override; - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( ComputingGraph& graph, const std::shared_ptr& dev_data, bool const_value, const OperatorNodeConfig& config); @@ -328,13 +330,14 @@ public: * * \see intl::SharedDeviceTensorBase and SharedDeviceTensor */ -MGB_DEFINE_OPR_CLASS(VolatileSharedDeviceTensor, intl::SharedDeviceTensorBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( + VolatileSharedDeviceTensor, intl::SharedDeviceTensorBase) // { NodeProp* do_make_node_prop() const override; public: using Super::Super; - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( ComputingGraph& graph, const std::shared_ptr& dev_data, const OperatorNodeConfig& config = {}); @@ -350,7 +353,7 @@ public: /*! * \brief tensor with immutable value */ -MGB_DEFINE_OPR_CLASS(ImmutableTensor, intl::DeviceTensorHolder) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(ImmutableTensor, intl::DeviceTensorHolder) // { public: class Value; class DevValueCache; @@ -360,19 +363,19 @@ public: const OperatorNodeConfig& config); ~ImmutableTensor() noexcept; - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( ComputingGraph& graph, const HostTensorND& val, const OperatorNodeConfig& config = {}); //! make from DTypeScalar; comp node must be provided in config - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( ComputingGraph& graph, const DTypeScalar& val, const OperatorNodeConfig& config); //! get underlying value on device - const DeviceTensorND& value() const; + MGE_WIN_DECLSPEC_FUC const DeviceTensorND& value() const; - const DeviceTensorND& host_value(); + MGE_WIN_DECLSPEC_FUC const DeviceTensorND& host_value(); SymbolVar shallow_copy( ComputingGraph& graph, const OperatorNodeConfig& config) const { @@ -404,7 +407,7 @@ private: * * Output var would be placed on copy stream by default. */ -MGB_DEFINE_OPR_CLASS(Copy, cg::SingleCNIOSameShapeOperatorNodeBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(Copy, cg::SingleCNIOSameShapeOperatorNodeBase) // { bool m_mem_fwd_success = false; void scn_do_execute() override; @@ -418,7 +421,8 @@ MGB_DEFINE_OPR_CLASS(Copy, cg::SingleCNIOSameShapeOperatorNodeBase) // { public: Copy(VarNode* inp, const OperatorNodeConfig& config); - static SymbolVar make(SymbolVar inp, const OperatorNodeConfig& config = {}); + MGE_WIN_DECLSPEC_FUC static SymbolVar make( + SymbolVar inp, const OperatorNodeConfig& config = {}); // for serialization using Param = megdnn::param::Empty; @@ -433,11 +437,11 @@ public: * * \see intl::MultipleDeviceTensorHolderBase */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( MultipleDeviceTensorHolder, intl::MultipleDeviceTensorHolderBase) // { public: using Super::Super; - static SymbolVarArray make( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make( ComputingGraph& graph, ValueArray values, const OperatorNodeConfig& config = {}); }; @@ -447,11 +451,11 @@ public: * * \see intl::MultipleDeviceTensorHolderBase */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( MultipleDeviceTensorWithFormatHolder, intl::MultipleDeviceTensorHolderBase) // { public: using Super::Super; - static SymbolVarArray make( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make( ComputingGraph& graph, ValueArray values, const OperatorNodeConfig& config = {}); diff --git a/src/opr/include/megbrain/opr/loop.h b/src/opr/include/megbrain/opr/loop.h index 3c022062..f8b2f244 100644 --- a/src/opr/include/megbrain/opr/loop.h +++ b/src/opr/include/megbrain/opr/loop.h @@ -128,7 +128,7 @@ public: * which must have no side-effect so a desc could be made for grad * opr */ - static SymbolVarArray make( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make( DescMaker desc_maker, const Param& param = {}, const OperatorNodeConfig& config = {}); diff --git a/src/opr/include/megbrain/opr/misc.h b/src/opr/include/megbrain/opr/misc.h index 419c914e..33bc6c70 100644 --- a/src/opr/include/megbrain/opr/misc.h +++ b/src/opr/include/megbrain/opr/misc.h @@ -24,17 +24,21 @@ namespace mgb { namespace opr { -MGB_DEFINE_OPR_CLASS(Argmax, intl::MegDNNOprWrapperFwd) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( + Argmax, intl::MegDNNOprWrapperFwd) // { public: - Argmax(VarNode* src, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC Argmax( + VarNode* src, const Param& param, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, const Param& param, const OperatorNodeConfig& config = {}); }; -MGB_DEFINE_OPR_CLASS(Argmin, intl::MegDNNOprWrapperFwd) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( + Argmin, intl::MegDNNOprWrapperFwd) // { public: - Argmin(VarNode* src, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC Argmin( + VarNode* src, const Param& param, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, const Param& param, const OperatorNodeConfig& config = {}); }; @@ -47,7 +51,7 @@ public: * \param[out] out_tensor the first output: \f$(m, n)\f$ sorted output tensor * \param[out] indices the second output: \f$(m, n)\f$ sorted indices */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( ArgsortForward, intl::MegDNNOprWrapperFwd) // { protected: NodeProp* do_make_node_prop() const override; @@ -57,23 +61,23 @@ protected: TensorShapeArray& out_shape) const override; public: - ArgsortForward( + MGE_WIN_DECLSPEC_FUC ArgsortForward( VarNode* in_tensor, const Param& param, const OperatorNodeConfig& config); - static std::array make( + MGE_WIN_DECLSPEC_FUC static std::array make( SymbolVar in_tensor, const Param& param = {}, const OperatorNodeConfig& config = {}); }; using Argsort = ArgsortForward; -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( ArgsortBackward, intl::MegDNNOprWrapperBwd) // { public: - ArgsortBackward( + MGE_WIN_DECLSPEC_FUC ArgsortBackward( VarNode* out_diff, VarNode* indices, VarNode* result_shape, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar out_diff, SymbolVar indices, SymbolVar result_shape, const Param& param = {}, const OperatorNodeConfig& config = {}); static SymbolVar make( @@ -84,16 +88,17 @@ public: }; //! cumulative sum along given axis -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( Cumsum, cg::SingleCNOperatorNodeBaseT>) // { void add_input_layout_constraint() override; public: - Cumsum(VarNode* src, const Param& param, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC Cumsum( + VarNode* src, const Param& param, const OperatorNodeConfig& config); // for serialization - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar opr, const Param& param, const OperatorNodeConfig& config = {}); protected: @@ -102,13 +107,14 @@ protected: }; #if MGB_CUDA -MGB_DEFINE_OPR_CLASS(NvOf, cg::SingleCNOperatorNodeBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(NvOf, cg::SingleCNOperatorNodeBase) // { public: using Param = megdnn::param::NvOf; - NvOf(VarNode* src, const Param& param, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC NvOf( + VarNode* src, const Param& param, const OperatorNodeConfig& config); // for serialization - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar opr, const Param& param, const OperatorNodeConfig& config = {}); static SymbolVar make(SymbolVar opr, const OperatorNodeConfig& config = {}) { @@ -142,22 +148,22 @@ using TopKBase = cg::SingleCNOperatorNode< * \brief take values conditionally * outputs: values, indices */ -MGB_DEFINE_OPR_CLASS(CondTake, intl::CondTakeBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(CondTake, intl::CondTakeBase) // { void init_output_static_infer_desc() override; void scn_do_execute() override; void add_input_layout_constraint() override; NodeProp* do_make_node_prop() const override; public: - CondTake( + MGE_WIN_DECLSPEC_FUC CondTake( VarNode* data, VarNode* mask, const Param& param, const OperatorNodeConfig& config); - static std::array make( + MGE_WIN_DECLSPEC_FUC static std::array make( SymbolVar data, SymbolVar mask, const Param& param, const OperatorNodeConfig& config = {}); }; -MGB_DEFINE_OPR_CLASS(TopK, intl::TopKBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(TopK, intl::TopKBase) // { void init_output_dtype() override; void add_input_layout_constraint() override; void init_output_static_infer_desc() override; @@ -165,11 +171,12 @@ MGB_DEFINE_OPR_CLASS(TopK, intl::TopKBase) // { void record_execute_deps(ExecDependencyArray& deps) override; public: - TopK(VarNode* data, VarNode* k, const Param& param, - const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC TopK( + VarNode* data, VarNode* k, const Param& param, + const OperatorNodeConfig& config); //! note: for KTH_ONLY mode, the second output would be nullptr - static std::array make( + MGE_WIN_DECLSPEC_FUC static std::array make( SymbolVar data, SymbolVar k, const Param& param, const OperatorNodeConfig& config = {}); }; diff --git a/src/opr/include/megbrain/opr/nn_int.h b/src/opr/include/megbrain/opr/nn_int.h index 1187806b..52fa37dc 100644 --- a/src/opr/include/megbrain/opr/nn_int.h +++ b/src/opr/include/megbrain/opr/nn_int.h @@ -29,11 +29,11 @@ MGB_DEFINE_OPR_CLASS(ElemwiseMultiType, intl::ElemwiseMultiTypeBase) // { public: using Mode = Param::Mode; - ElemwiseMultiType( + MGE_WIN_DECLSPEC_FUC ElemwiseMultiType( const VarNodeArrayView& inputs, Param param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( const VarNodeArrayView& inputs, Param param, const OperatorNodeConfig& config = {}); @@ -57,7 +57,7 @@ class AffineInt final : public DynTypeObj { public: using Param = megdnn::param::Empty; - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar x, SymbolVar k, SymbolVar b, const Param& param = {}, const OperatorNodeConfig& config = {}) { return ElemwiseMultiType::make( diff --git a/src/opr/include/megbrain/opr/rand.h b/src/opr/include/megbrain/opr/rand.h index 4f560630..1424dca7 100644 --- a/src/opr/include/megbrain/opr/rand.h +++ b/src/opr/include/megbrain/opr/rand.h @@ -41,12 +41,12 @@ protected: /* ================= RNG with shape ================= */ #define _DEFINE_RNG_OPR_WITH_SHAPE_CLASS(RNG) \ - MGB_DEFINE_OPR_CLASS(RNG, RNGOprBase) \ + MGB_DEFINE_OPR_CLASS_WITH_EXPORT(RNG, RNGOprBase) \ cg::OperatorNodeBase::NodeProp* do_make_node_prop() const override; \ \ public: \ RNG(VarNode* shape, const Param& param, const OperatorNodeConfig& config); \ - static SymbolVar make( \ + MGE_WIN_DECLSPEC_FUC static SymbolVar make( \ SymbolVar shape, const Param& param = {}, \ const OperatorNodeConfig& config = {}); \ static SymbolVar make( \ @@ -67,13 +67,13 @@ _DEFINE_RNG_OPR_WITH_SHAPE_CLASS(PermutationRNG) /* ================= RNG with input ================= */ #define _DEFINE_RNG_OPR_WITH_INPUT_CLASS(RNG) \ - MGB_DEFINE_OPR_CLASS(RNG, RNGOprBase) \ + MGB_DEFINE_OPR_CLASS_WITH_EXPORT(RNG, RNGOprBase) \ void add_input_layout_constraint() override; \ cg::OperatorNodeBase::NodeProp* do_make_node_prop() const override; \ \ public: \ RNG(_INPUTS(VarNode*), const Param& param, const OperatorNodeConfig& config); \ - static _OUTPUTS make( \ + MGE_WIN_DECLSPEC_FUC static _OUTPUTS make( \ _INPUTS(SymbolVar), const Param& param = {}, \ const OperatorNodeConfig& config = {}); \ void init_output_static_infer_desc() override; \ @@ -110,7 +110,7 @@ using PoissonRNG = intl::PoissonRNG; using BetaRNG = intl::BetaRNG; using ShuffleRNG = intl::ShuffleRNGForward; -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( ShuffleRNGBackward, intl::MegDNNOprWrapperBwd) //{ public: @@ -118,7 +118,7 @@ ShuffleRNGBackward( VarNode* out_diff, VarNode* indices, VarNode* result_shape, const Param& param, const OperatorNodeConfig& config); -static SymbolVar make( +MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar out_diff, SymbolVar indices, SymbolVar result_shape, const Param& param = {}, const OperatorNodeConfig& config = {}); }; diff --git a/src/opr/include/megbrain/opr/standalone/nms_opr.h b/src/opr/include/megbrain/opr/standalone/nms_opr.h index fcb7d9fa..549a55b2 100644 --- a/src/opr/include/megbrain/opr/standalone/nms_opr.h +++ b/src/opr/include/megbrain/opr/standalone/nms_opr.h @@ -10,7 +10,8 @@ namespace standalone { * * See the docs in the python operator */ -MGB_DEFINE_OPR_CLASS(NMSKeep, cg::SingleCNOutshapePureByInshapeOprBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( + NMSKeep, cg::SingleCNOutshapePureByInshapeOprBase) // { public: struct Param { //! TAG is used by the serializer to check Param type; here we @@ -22,11 +23,12 @@ public: uint32_t max_output; //!< max number of output boxes per batch }; - NMSKeep(VarNode* boxes, const Param& param, const OperatorNodeConfig& config); - ~NMSKeep() noexcept; + MGE_WIN_DECLSPEC_FUC NMSKeep( + VarNode* boxes, const Param& param, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC ~NMSKeep() noexcept; //! factory method to insert the operator into a graph - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar boxes, const Param& param, const OperatorNodeConfig& config = {}); const Param& param() const { return m_param; } diff --git a/src/opr/include/megbrain/opr/tensor_gen.h b/src/opr/include/megbrain/opr/tensor_gen.h index 34561b39..53d566be 100644 --- a/src/opr/include/megbrain/opr/tensor_gen.h +++ b/src/opr/include/megbrain/opr/tensor_gen.h @@ -33,7 +33,7 @@ MGB_DEFINE_OPR_CLASS(Alloc, intl::OutshapeBySymvarSCNOprBase) // { public: Alloc(VarNode* shape, DType dtype, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar shape, DType dtype, const OperatorNodeConfig& config = {}); static SymbolVar make( @@ -61,7 +61,7 @@ public: VarNode* start, VarNode* stop, VarNode* num, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar start, SymbolVar stop, SymbolVar num, const Param& param, const OperatorNodeConfig& config = {}); @@ -83,7 +83,7 @@ public: using Param = megdnn::Eye::Param; Eye(VarNode* shape, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar shape, const Param& param, const OperatorNodeConfig& config = {}); const Param& param() const { return m_param; } diff --git a/src/opr/include/megbrain/opr/tensor_manip.h b/src/opr/include/megbrain/opr/tensor_manip.h index d2d09685..baee4baa 100644 --- a/src/opr/include/megbrain/opr/tensor_manip.h +++ b/src/opr/include/megbrain/opr/tensor_manip.h @@ -31,7 +31,7 @@ namespace opr { * * \param axis output shape of a single axis */ -MGB_DEFINE_OPR_CLASS(GetVarShape, cg::SingleCNOperatorNodeBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(GetVarShape, cg::SingleCNOperatorNodeBase) // { class ShapeDevValueExecDep; public: @@ -46,7 +46,7 @@ public: } //! get broadcasted shape - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( const VarNodeArrayView& inp, Param axis = {}, const OperatorNodeConfig& config = {}); @@ -62,15 +62,16 @@ private: DeviceTensorND m_cached_shape_cpu_v{CompNode::default_cpu()}, m_cached_shape_dev_v; //! update m_cached_shape from m_src_shapes - void update_cached_shape(); + MGE_WIN_DECLSPEC_FUC void update_cached_shape(); //! update m_cached_shape for static infer - void update_for_static_infer(const cg::static_infer::InpVal& inp); + MGE_WIN_DECLSPEC_FUC void update_for_static_infer( + const cg::static_infer::InpVal& inp); - NodeProp* do_make_node_prop() const override; - void scn_do_execute() override; - void init_output_static_infer_desc() override; - void record_execute_deps(ExecDependencyArray& deps) override; + MGE_WIN_DECLSPEC_FUC NodeProp* do_make_node_prop() const override; + MGE_WIN_DECLSPEC_FUC void scn_do_execute() override; + MGE_WIN_DECLSPEC_FUC void init_output_static_infer_desc() override; + MGE_WIN_DECLSPEC_FUC void record_execute_deps(ExecDependencyArray& deps) override; }; namespace intl { @@ -82,13 +83,13 @@ MGB_DEFINE_CLS_WITH_SUPER( ReshapeBrdcastHelper, ReadonlyFwdHelper) // { bool m_incompatible_inp_layout = false; - void mem_plan_fwd_in2out_readonly() override final; - void outshape_by_symvar_do_get_output_shape( + MGE_WIN_DECLSPEC_FUC void mem_plan_fwd_in2out_readonly() override final; + MGE_WIN_DECLSPEC_FUC void outshape_by_symvar_do_get_output_shape( TensorShape& dest, const ShapeInferInfo& shpinfo) override final; - void scn_do_execute() override final; - void add_input_layout_constraint() override final; - void init_output_static_infer_desc() override; - NodeProp* do_make_node_prop() const override; + MGE_WIN_DECLSPEC_FUC void scn_do_execute() override final; + MGE_WIN_DECLSPEC_FUC void add_input_layout_constraint() override final; + MGE_WIN_DECLSPEC_FUC void init_output_static_infer_desc() override; + MGE_WIN_DECLSPEC_FUC NodeProp* do_make_node_prop() const override; protected: using Super::Super; @@ -118,14 +119,15 @@ protected: * \param unspec_axis the axis that shape is not specified in input, but should * be calculated from total number of elements and other dims in dest shape */ -MGB_DEFINE_OPR_CLASS(Reshape, intl::ReshapeBrdcastHelper) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(Reshape, intl::ReshapeBrdcastHelper) // { public: using Param = megdnn::param::OptionalAxisV1; - Reshape(VarNode* inp, VarNode* tshp, Param unspec_axis, + MGE_WIN_DECLSPEC_FUC Reshape( + VarNode* inp, VarNode* tshp, Param unspec_axis, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar inp, SymbolVar tshp, Param unspec_axis = {}, const OperatorNodeConfig& config = {}); @@ -150,16 +152,17 @@ private: /*! * \brief broadcast tensor value along axes whose shape is 1 */ -MGB_DEFINE_OPR_CLASS(Broadcast, intl::ReshapeBrdcastHelper) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(Broadcast, intl::ReshapeBrdcastHelper) // { Maybe reshapebrdcast_get_dest_layout( const TensorLayout& src, const TensorShape& tshape) const override; bool reshapebrdcast_output_shape_need_input_shape() const override; public: - Broadcast(VarNode* inp, VarNode* tshp, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC Broadcast( + VarNode* inp, VarNode* tshp, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar inp, SymbolVar tshp, const OperatorNodeConfig& config = {}); static SymbolVar make( @@ -188,10 +191,10 @@ namespace intl { */ MGB_DEFINE_CLS_WITH_SUPER( AxisManipOprBase, ReadonlyFwdHelper) // { - void mem_plan_fwd_in2out_readonly() override final; - void scn_do_execute() override final; - void init_output_static_infer_desc() override final; - NodeProp* do_make_node_prop() const override; + MGE_WIN_DECLSPEC_FUC void mem_plan_fwd_in2out_readonly() override final; + MGE_WIN_DECLSPEC_FUC void scn_do_execute() override final; + MGE_WIN_DECLSPEC_FUC void init_output_static_infer_desc() override final; + MGE_WIN_DECLSPEC_FUC NodeProp* do_make_node_prop() const override; protected: using Super::Super; @@ -211,7 +214,7 @@ protected: * * Note that dimensions with shape-1 could be dropped */ -MGB_DEFINE_OPR_CLASS(Dimshuffle, intl::AxisManipOprBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(Dimshuffle, intl::AxisManipOprBase) // { std::vector m_pattern; size_t m_inp_ndim; @@ -219,15 +222,16 @@ MGB_DEFINE_OPR_CLASS(Dimshuffle, intl::AxisManipOprBase) // { const TensorLayout& inp_layout) const override; public: - Dimshuffle( + MGE_WIN_DECLSPEC_FUC Dimshuffle( VarNode* inp, const std::vector& pattern, size_t ndim, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar inp, const std::vector& pattern, size_t ndim = 0, const OperatorNodeConfig& config = {}); - VarNode* grad(size_t wrt_idx, const VarNodeArray& out_grad) const; + MGE_WIN_DECLSPEC_FUC VarNode* grad( + size_t wrt_idx, const VarNodeArray& out_grad) const; // used for serialization struct Param { @@ -256,7 +260,7 @@ public: * * All the axis descs would be processed in order */ -MGB_DEFINE_OPR_CLASS(AxisAddRemove, intl::AxisManipOprBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(AxisAddRemove, intl::AxisManipOprBase) // { public: struct AxisDesc { enum class Method { @@ -283,11 +287,11 @@ public: } }; - AxisAddRemove( + MGE_WIN_DECLSPEC_FUC AxisAddRemove( VarNode* inp, const std::vector& desc, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar inp, const std::vector& desc, const OperatorNodeConfig& config = {}); @@ -313,15 +317,15 @@ public: private: std::vector m_desc; - TensorLayout axis_manip_get_output_layout( - const TensorLayout& inp_layout) const override; + MGE_WIN_DECLSPEC_FUC TensorLayout + axis_manip_get_output_layout(const TensorLayout& inp_layout) const override; }; namespace intl { MGB_DEFINE_CLS_WITH_SUPER(ModifySubtensorImplHelper, FancyIndexingHelper) // { - void init_output_static_infer_desc() override final; - void scn_do_execute() override final; + MGE_WIN_DECLSPEC_FUC void init_output_static_infer_desc() override final; + MGE_WIN_DECLSPEC_FUC void scn_do_execute() override final; /*! * \brief implement the actual modifycation @@ -341,18 +345,19 @@ protected: /*! * \brief get subtensor in a python-like way */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( Subtensor, intl::ReadonlyFwdHelper) // { - void init_output_static_infer_desc() override; - void scn_do_execute() override; - void mem_plan_fwd_in2out_readonly() override; - void init_rt_force_dynamic_mem_alloc_imply_chain() override; - NodeProp* do_make_node_prop() const override; + MGE_WIN_DECLSPEC_FUC void init_output_static_infer_desc() override; + MGE_WIN_DECLSPEC_FUC void scn_do_execute() override; + MGE_WIN_DECLSPEC_FUC void mem_plan_fwd_in2out_readonly() override; + MGE_WIN_DECLSPEC_FUC void init_rt_force_dynamic_mem_alloc_imply_chain() override; + MGE_WIN_DECLSPEC_FUC NodeProp* do_make_node_prop() const override; public: - Subtensor(VarNode* inp, const IndexDesc& desc, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC Subtensor( + VarNode* inp, const IndexDesc& desc, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar inp, const IndexDesc& desc, const OperatorNodeConfig& config = {}); }; @@ -360,7 +365,7 @@ public: /*! * \brief replace the value of subtensor by another tensor */ -MGB_DEFINE_OPR_CLASS(SetSubtensor, intl::ModifySubtensorImplHelper) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(SetSubtensor, intl::ModifySubtensorImplHelper) // { void modify(DeviceTensorND& sub, const DeviceTensorND& val) override; NodeProp* do_make_node_prop() const override; @@ -371,7 +376,7 @@ public: /*! * \brief increase the value of subtensor by another tensor */ -MGB_DEFINE_OPR_CLASS(IncrSubtensor, intl::ModifySubtensorImplHelper) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(IncrSubtensor, intl::ModifySubtensorImplHelper) // { void modify(DeviceTensorND& sub, const DeviceTensorND& val) override; public: @@ -384,7 +389,7 @@ public: * \brief helper for Subtensor with only index * \param index list of pairs of (axis, index) */ - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar inp, const std::vector>& index, const OperatorNodeConfig& config = {}); }; @@ -399,7 +404,7 @@ public: * on this comp_node * 3. Specify comp_node for each output in OperatorNodeConfig */ -MGB_DEFINE_OPR_CLASS(Split, intl::OutshapeBySymvarOprBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(Split, intl::OutshapeBySymvarOprBase) // { public: struct Options { enum class Method { @@ -428,7 +433,7 @@ public: Split(VarNode* inp, const Options& opt, const OperatorNodeConfig& config); - static SymbolVarArray make( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make( SymbolVar inp, Options opt, const OperatorNodeConfig& config = {}); const Options& options() const { return m_opt; } @@ -444,30 +449,30 @@ private: Options m_opt; size_t m_output_shape_version = 0; - void init_output_comp_node() override; + MGE_WIN_DECLSPEC_FUC void init_output_comp_node() override; - NodeProp* do_make_node_prop() const override; + MGE_WIN_DECLSPEC_FUC NodeProp* do_make_node_prop() const override; - void do_execute(ExecEnv& env) override; + MGE_WIN_DECLSPEC_FUC void do_execute(ExecEnv& env) override; - void init_output_static_infer_desc() override; - void outshape_by_symvar_do_get_output_shape( + MGE_WIN_DECLSPEC_FUC void init_output_static_infer_desc() override; + MGE_WIN_DECLSPEC_FUC void outshape_by_symvar_do_get_output_shape( TensorShape& dest, const ShapeInferInfo& shpinfo) override; - void mem_plan_fwd_in2out_readonly() override; + MGE_WIN_DECLSPEC_FUC void mem_plan_fwd_in2out_readonly() override; - void add_input_layout_constraint() override; + MGE_WIN_DECLSPEC_FUC void add_input_layout_constraint() override; - bool infer_shape( + MGE_WIN_DECLSPEC_FUC bool infer_shape( size_t out_idx, TensorShape& dest, const cg::static_infer::InpVal& inp); - void on_mem_status_changed(); - OprEventCallback get_opr_event_callback() override final; + MGE_WIN_DECLSPEC_FUC void on_mem_status_changed(); + MGE_WIN_DECLSPEC_FUC OprEventCallback get_opr_event_callback() override final; - void init_subspec(bool memfwd); + MGE_WIN_DECLSPEC_FUC void init_subspec(bool memfwd); - void on_output_comp_node_stream_changed() override; - void init_rt_force_dynamic_mem_alloc_imply_chain() override; + MGE_WIN_DECLSPEC_FUC void on_output_comp_node_stream_changed() override; + MGE_WIN_DECLSPEC_FUC void init_rt_force_dynamic_mem_alloc_imply_chain() override; }; /*! @@ -476,12 +481,13 @@ private: * To concat to a different computing node, specify the destination in * OperatorNodeConfig */ -MGB_DEFINE_OPR_CLASS(Concat, cg::SingleCNOutshapePureByInshapeOprBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( + Concat, cg::SingleCNOutshapePureByInshapeOprBase) // { public: using Param = megdnn::param::Axis; Concat(const VarNodeArrayView& inp, int axis, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( const VarNodeArrayView& inp, int axis, const OperatorNodeConfig& config = {}); @@ -500,15 +506,15 @@ public: private: int m_axis; - void scn_do_execute() override; + MGE_WIN_DECLSPEC_FUC void scn_do_execute() override; - NodeProp* do_make_node_prop() const override; + MGE_WIN_DECLSPEC_FUC NodeProp* do_make_node_prop() const override; - void init_output_static_infer_desc() override; - void add_input_layout_constraint() override; - void init_output_comp_node() override; + MGE_WIN_DECLSPEC_FUC void init_output_static_infer_desc() override; + MGE_WIN_DECLSPEC_FUC void add_input_layout_constraint() override; + MGE_WIN_DECLSPEC_FUC void init_output_comp_node() override; - void get_output_var_shape( + MGE_WIN_DECLSPEC_FUC void get_output_var_shape( const TensorShapeArray& inp_shape, TensorShapeArray& out_shape) const override; }; @@ -521,27 +527,27 @@ private: * the begin and the end of inputs[i]'s offsets in output * \param offsets_val: offsets value on cpu */ -MGB_DEFINE_OPR_CLASS(ParamPackConcat, cg::SingleCNOperatorNodeBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(ParamPackConcat, cg::SingleCNOperatorNodeBase) // { //! input pointer buffer SmallVector m_inp_ptr; std::vector m_offsets; intl::UniqPtrWithCN m_opr; - void add_input_layout_constraint() override; - void scn_do_execute() override; - void init_output_static_infer_desc() override; - void init_output_dtype() override; - void on_output_comp_node_stream_changed() override; + MGE_WIN_DECLSPEC_FUC void add_input_layout_constraint() override; + MGE_WIN_DECLSPEC_FUC void scn_do_execute() override; + MGE_WIN_DECLSPEC_FUC void init_output_static_infer_desc() override; + MGE_WIN_DECLSPEC_FUC void init_output_dtype() override; + MGE_WIN_DECLSPEC_FUC void on_output_comp_node_stream_changed() override; public: using Param = megdnn::param::Empty; Param param() const { return {}; } - ParamPackConcat( + MGE_WIN_DECLSPEC_FUC ParamPackConcat( VarNodeArray& inp, VarNode* offsets, const std::vector offsets_val, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( const SmallVector& inp, const SymbolVar& offsets, const std::vector offsets_val, const OperatorNodeConfig& config = {}); @@ -564,24 +570,24 @@ public: * \param offsets_val: offsets value on cpu * \param shapes: shape of each output */ -MGB_DEFINE_OPR_CLASS(ParamPackSplit, cg::SingleCNOperatorNodeBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(ParamPackSplit, cg::SingleCNOperatorNodeBase) // { TensorShapeArray m_shapes; std::vector m_offsets; - void scn_do_execute() override; - void init_output_static_infer_desc() override; - bool infer_shape( + MGE_WIN_DECLSPEC_FUC void scn_do_execute() override; + MGE_WIN_DECLSPEC_FUC void init_output_static_infer_desc() override; + MGE_WIN_DECLSPEC_FUC bool infer_shape( size_t index, TensorShape& dest, const cg::static_infer::InpVal& inp); - void init_output_dtype() override; - void mem_plan_fwd_in2out_readonly() override; - void add_input_layout_constraint() override; + MGE_WIN_DECLSPEC_FUC void init_output_dtype() override; + MGE_WIN_DECLSPEC_FUC void mem_plan_fwd_in2out_readonly() override; + MGE_WIN_DECLSPEC_FUC void add_input_layout_constraint() override; public: - ParamPackSplit( + MGE_WIN_DECLSPEC_FUC ParamPackSplit( VarNode* src, const std::vector offsets, TensorShapeArray& shapes, const OperatorNodeConfig& config); - static SymbolVarArray make( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make( const SymbolVar& src, const std::vector offsets, TensorShapeArray shapes, const OperatorNodeConfig& config = {}); @@ -589,7 +595,7 @@ public: const TensorShapeArray& get_output_shapes() const { return m_shapes; } - void init_rt_force_dynamic_mem_alloc_imply_chain() override; + MGE_WIN_DECLSPEC_FUC void init_rt_force_dynamic_mem_alloc_imply_chain() override; }; /*! @@ -597,23 +603,25 @@ public: * * See docs of megdnn params for more details */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( RelayoutFormat, intl::MegDNNOprWrapperFwd) // { public: - RelayoutFormat(VarNode* src, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC RelayoutFormat( + VarNode* src, const Param& param, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, const Param& param, const OperatorNodeConfig& config = {}); - void init_output_format() override final; + MGE_WIN_DECLSPEC_FUC void init_output_format() override final; }; /*! * \brief padding the src tensor to dst tensor */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( PaddingForward, intl::MegDNNOprWrapperFwd) // { public: - PaddingForward(VarNode* src, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC PaddingForward( + VarNode* src, const Param& param, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, const Param& param = {}, const OperatorNodeConfig& config = {}); }; @@ -622,13 +630,13 @@ using Padding = PaddingForward; /*! * \brief padding backward */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( PaddingBackward, intl::MegDNNOprWrapperBwd) // { public: - PaddingBackward( + MGE_WIN_DECLSPEC_FUC PaddingBackward( VarNode* src, VarNode* in_for_shape, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar src, SymbolVar in_for_shape, const Param& param = {}, const OperatorNodeConfig& config = {}); }; diff --git a/src/opr/include/megbrain/opr/utility.h b/src/opr/include/megbrain/opr/utility.h index 3cc621d2..7faabf1e 100644 --- a/src/opr/include/megbrain/opr/utility.h +++ b/src/opr/include/megbrain/opr/utility.h @@ -27,7 +27,7 @@ namespace opr { /*! * \brief sleep for specific time on device */ -MGB_DEFINE_OPR_CLASS(Sleep, cg::SingleCNIOSameShapeOperatorNodeBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(Sleep, cg::SingleCNIOSameShapeOperatorNodeBase) // { public: /*! * \brief directly sleep without constructing an opr @@ -41,9 +41,10 @@ public: Type(bool d = true, bool h = false) : device(d), host(h) {} }; - Sleep(VarNode* node, double seconds, Type type, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC Sleep( + VarNode* node, double seconds, Type type, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar node, double seconds, Type type = {}, const OperatorNodeConfig& config = {}); @@ -82,13 +83,13 @@ private: * \param dest_off the offset on which \p dest should be modified; this helps * multiple Timestamp operator instances */ -MGB_DEFINE_OPR_CLASS(Timestamp, intl::ForwardInputToOutput) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(Timestamp, intl::ForwardInputToOutput) // { public: - Timestamp( + MGE_WIN_DECLSPEC_FUC Timestamp( VarNode* node, std::shared_ptr dest, size_t dest_off, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar node, std::shared_ptr dest, size_t dest_off, const OperatorNodeConfig& config = {}); @@ -111,14 +112,15 @@ private: * \brief To make sure inputs' owner oprs finished when executing this operator, * and forwarding input(0) to output. */ -MGB_DEFINE_OPR_CLASS(VirtualDep, intl::ForwardInputToOutput) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(VirtualDep, intl::ForwardInputToOutput) // { public: - VirtualDep(const VarNodeArray& inputs, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC VirtualDep( + const VarNodeArray& inputs, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( const SymbolVarArray& inputs, const OperatorNodeConfig& config = {}); - NodeProp* do_make_node_prop() const override; + MGE_WIN_DECLSPEC_FUC NodeProp* do_make_node_prop() const override; // void add_input(std::initializer_list list); }; @@ -128,7 +130,7 @@ public: * \brief do not provide any static infer on a var to mark it dynamic; used for * debug purposes */ -MGB_DEFINE_OPR_CLASS(MarkDynamicVar, cg::SingleCNOperatorNodeBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(MarkDynamicVar, cg::SingleCNOperatorNodeBase) // { void scn_do_execute() override; void init_output_static_infer_desc() override {} NodeProp* do_make_node_prop() const override; @@ -136,9 +138,11 @@ MGB_DEFINE_OPR_CLASS(MarkDynamicVar, cg::SingleCNOperatorNodeBase) // { public: using Param = megdnn::param::Empty; - MarkDynamicVar(VarNode* node, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC MarkDynamicVar( + VarNode* node, const OperatorNodeConfig& config); - static SymbolVar make(SymbolVar node, const OperatorNodeConfig& config = {}); + MGE_WIN_DECLSPEC_FUC static SymbolVar make( + SymbolVar node, const OperatorNodeConfig& config = {}); // for serialization Param param() const { return {}; } @@ -151,7 +155,7 @@ public: /*! * \brief inject a callback to be called whenever this operator is executed */ -MGB_DEFINE_OPR_CLASS(CallbackInjector, intl::ForwardInputToOutput) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(CallbackInjector, intl::ForwardInputToOutput) // { void scn_do_execute_finish(const DeviceTensorND& val) override; cg::static_infer::ValueInferDesc mixin_get_static_infer_desc( OperatorNodeBase& opr) override; @@ -197,10 +201,10 @@ public: callback{std::move(cb)} {} }; - CallbackInjector( + MGE_WIN_DECLSPEC_FUC CallbackInjector( VarNode* inp, const Param& param, const OperatorNodeConfig& config); - CallbackInjector( + MGE_WIN_DECLSPEC_FUC CallbackInjector( VarNodeArray& inp, const Param& param, const OperatorNodeConfig& config); //! create the operator disallowing auto dup @@ -226,7 +230,7 @@ public: return make(inp, Param{cb}, config); } - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVarArray inp, const Param& param, const OperatorNodeConfig& config = {}); @@ -244,12 +248,15 @@ private: * Useful for removing the reduce when computing grad, so graph optimizer can * work well. */ -MGB_DEFINE_OPR_CLASS(MarkNoBroadcastElemwise, intl::ForwardInputToOutput) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( + MarkNoBroadcastElemwise, intl::ForwardInputToOutput) // { public: using Param = megdnn::param::Empty; - MarkNoBroadcastElemwise(VarNode* input, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC MarkNoBroadcastElemwise( + VarNode* input, const OperatorNodeConfig& config); - static SymbolVar make(SymbolVar input, const OperatorNodeConfig& config = {}); + MGE_WIN_DECLSPEC_FUC static SymbolVar make( + SymbolVar input, const OperatorNodeConfig& config = {}); // for serialization Param param() const { return {}; } @@ -265,14 +272,15 @@ public: * Currently only used for preventing graph optimizer from removing some var so * its gradient can be correctly computed. */ -MGB_DEFINE_OPR_CLASS(Identity, intl::ForwardInputToOutput) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(Identity, intl::ForwardInputToOutput) // { NodeProp* do_make_node_prop() const override; public: using Param = megdnn::param::Empty; Identity(VarNode* input, const OperatorNodeConfig& config); - static SymbolVar make(SymbolVar input, const OperatorNodeConfig& config = {}); + MGE_WIN_DECLSPEC_FUC static SymbolVar make( + SymbolVar input, const OperatorNodeConfig& config = {}); // for serialization Param param() const { return {}; } @@ -288,7 +296,7 @@ public: * * raise UnequalError during exec if tensor not equal */ -MGB_DEFINE_OPR_CLASS(AssertEqual, intl::ForwardInputToOutput) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(AssertEqual, intl::ForwardInputToOutput) // { bool m_throw_on_error = true; HostTensorND m_hv; @@ -298,11 +306,11 @@ public: using Param = megdnn::param::AssertEqual; //! \p expect and \p get are only used for error message - AssertEqual( + MGE_WIN_DECLSPEC_FUC AssertEqual( VarNode* expect, VarNode* get, VarNode* err, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar expect, SymbolVar get, const Param& param = {}, const OperatorNodeConfig& config = {}); @@ -310,7 +318,7 @@ public: void disable_throw_on_error() { m_throw_on_error = false; } //! for serialization and shallow copy - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar expect, SymbolVar get, SymbolVar err, const Param& param, const OperatorNodeConfig& config); @@ -331,14 +339,15 @@ private: * \brief output equals to input, but grad(input) would be replaced by return * value of given callback at runtime */ -MGB_DEFINE_OPR_CLASS(SetGrad, intl::ForwardInputToOutput) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(SetGrad, intl::ForwardInputToOutput) // { public: using GradGetter = thin_function; - SetGrad(VarNode* input, const GradGetter& grad_getter, + MGE_WIN_DECLSPEC_FUC SetGrad( + VarNode* input, const GradGetter& grad_getter, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar input, const GradGetter& grad_getter, const OperatorNodeConfig& config = {}); @@ -354,7 +363,8 @@ private: /*! * \brief get a special marker for a grad being invalid */ -MGB_DEFINE_OPR_CLASS(InvalidGrad, cg::SingleCNIOSameShapeOperatorNodeBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( + InvalidGrad, cg::SingleCNIOSameShapeOperatorNodeBase) // { const OperatorNodeBase* m_grad_opr; size_t m_inp_idx; @@ -364,9 +374,11 @@ MGB_DEFINE_OPR_CLASS(InvalidGrad, cg::SingleCNIOSameShapeOperatorNodeBase) // { public: //! \p vinp should be grad_opr.input(inp_idx), unless in shallow copy - InvalidGrad(VarNode* vinp, const OperatorNodeBase* grad_opr, size_t inp_idx); + MGE_WIN_DECLSPEC_FUC InvalidGrad( + VarNode* vinp, const OperatorNodeBase* grad_opr, size_t inp_idx); - static VarNode* make(const OperatorNodeBase& grad_opr, size_t inp_idx); + MGE_WIN_DECLSPEC_FUC static VarNode* make( + const OperatorNodeBase& grad_opr, size_t inp_idx); size_t inp_idx() const { return m_inp_idx; } @@ -380,7 +392,7 @@ public: * This operator exists so graph optimization can be performed without actual * grad oprs. This operator must be expanded before graph execution. */ -MGB_DEFINE_OPR_CLASS(VirtualGrad, cg::OperatorNodeBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(VirtualGrad, cg::OperatorNodeBase) // { void do_execute(ExecEnv&) override; void init_output_comp_node() override; void init_output_static_infer_desc() override; @@ -390,10 +402,11 @@ MGB_DEFINE_OPR_CLASS(VirtualGrad, cg::OperatorNodeBase) // { public: using Param = megdnn::param::Empty; - VirtualGrad(VarNode* target, VarNode* wrt, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC VirtualGrad( + VarNode* target, VarNode* wrt, const OperatorNodeConfig& config); Param param() const { return {}; } - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar target, SymbolVar wrt, Param param = {}, const OperatorNodeConfig& config = {}); }; @@ -403,7 +416,7 @@ public: * * The gradient w.r.t. \p ys[i] would be \p y_grads[i] */ -MGB_DEFINE_OPR_CLASS(VirtualLoss, cg::OperatorNodeBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(VirtualLoss, cg::OperatorNodeBase) // { void do_execute(ExecEnv&) override; void init_output_comp_node() override; void init_output_static_infer_desc() override; @@ -414,9 +427,10 @@ public: using Param = megdnn::param::Empty; //! the first half of \p inputs contain ys, and the remaining are y_grads - VirtualLoss(const VarNodeArray& inputs, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC VirtualLoss( + const VarNodeArray& inputs, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( const SymbolVarArray& ys, const SymbolVarArray& y_grads, Param param = {}, const OperatorNodeConfig& config = {}); @@ -427,7 +441,8 @@ public: class InvalidGrad { public: using OperatorNodeBase = cg::OperatorNodeBase; - [[noreturn]] static VarNode* make(const OperatorNodeBase& grad_opr, size_t inp_idx); + [[noreturn]] MGE_WIN_DECLSPEC_FUC static VarNode* make( + const OperatorNodeBase& grad_opr, size_t inp_idx); }; #endif // MGB_ENABLE_GRAD @@ -447,15 +462,15 @@ public: * \see VarNode::Flag::NO_MEM_RECLAIM for eliminating only dynamic memory * deallocation */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( PersistentOutputStorage, cg::SingleCNIOSameShapeOperatorNodeBase) // { public: using Param = megdnn::param::PersistentOutputStorage; - PersistentOutputStorage( + MGE_WIN_DECLSPEC_FUC PersistentOutputStorage( VarNode* inp, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar inp, const Param& param = {}, const OperatorNodeConfig& config = {}); @@ -473,16 +488,19 @@ private: void record_execute_deps(ExecDependencyArray& deps) override; }; -MGB_DEFINE_OPR_CLASS(RequireInputDynamicStorage, intl::ForwardInputToOutput) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( + RequireInputDynamicStorage, intl::ForwardInputToOutput) // { public: - RequireInputDynamicStorage(VarNode* input, const OperatorNodeConfig& config); - static SymbolVar make(SymbolVar input, const OperatorNodeConfig& config = {}); + MGE_WIN_DECLSPEC_FUC RequireInputDynamicStorage( + VarNode* input, const OperatorNodeConfig& config); + MGE_WIN_DECLSPEC_FUC static SymbolVar make( + SymbolVar input, const OperatorNodeConfig& config = {}); }; /* * \brief a special op providing shape hint only used in graph compilation (gopt) */ -MGB_DEFINE_OPR_CLASS(ShapeHint, cg::SingleCNOperatorNodeBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT(ShapeHint, cg::SingleCNOperatorNodeBase) // { TensorShape m_shape; bool m_is_const; @@ -490,11 +508,11 @@ MGB_DEFINE_OPR_CLASS(ShapeHint, cg::SingleCNOperatorNodeBase) // { void init_output_static_infer_desc() override; public: - ShapeHint( + MGE_WIN_DECLSPEC_FUC ShapeHint( VarNode* inp, const TensorShape shape, bool is_const, const OperatorNodeConfig& config); - static SymbolVar make( + MGE_WIN_DECLSPEC_FUC static SymbolVar make( SymbolVar inp, const TensorShape shape, bool is_const = false, const OperatorNodeConfig& config = {}); diff --git a/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h b/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h index 766990c0..0ef78921 100644 --- a/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h +++ b/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h @@ -33,10 +33,10 @@ class CPUDispatchChecker final : public PluginBase { void check(CompNode cn, cg::OperatorNodeBase* opr); public: - CPUDispatchChecker(cg::ComputingGraph* graph); + MGE_WIN_DECLSPEC_FUC CPUDispatchChecker(cg::ComputingGraph* graph); //! get oprs that did not call cpu dispatch - auto&& failed_oprs() const { return *m_failed_oprs; } + MGE_WIN_DECLSPEC_FUC auto&& failed_oprs() const { return *m_failed_oprs; } }; } // namespace mgb diff --git a/src/plugin/include/megbrain/plugin/infkern_finder.h b/src/plugin/include/megbrain/plugin/infkern_finder.h index d0f4bbf9..f59ffd40 100644 --- a/src/plugin/include/megbrain/plugin/infkern_finder.h +++ b/src/plugin/include/megbrain/plugin/infkern_finder.h @@ -60,23 +60,25 @@ public: using FullRecord = std::vector>; }; - InfkernFinder(cg::ComputingGraph* graph, bool record_input_value); - ~InfkernFinder() noexcept; + MGE_WIN_DECLSPEC_FUC InfkernFinder( + cg::ComputingGraph* graph, bool record_input_value); + MGE_WIN_DECLSPEC_FUC ~InfkernFinder() noexcept; //! this constructor should not be called by user - InfkernFinder(cg::ComputingGraph* graph, GlobalState* global_state); + MGE_WIN_DECLSPEC_FUC InfkernFinder( + cg::ComputingGraph* graph, GlobalState* global_state); /*! * \brief write execution status to file * \return the first operator whose output is not finished; or * nullptr if all finished */ - cg::OperatorNodeBase* write_to_file(const char* fpath); + MGE_WIN_DECLSPEC_FUC cg::OperatorNodeBase* write_to_file(const char* fpath); /*! * \brief get previous input values for dumped operators */ - InputValueRecord::FullRecord get_input_values(size_t opr_id); + MGE_WIN_DECLSPEC_FUC InputValueRecord::FullRecord get_input_values(size_t opr_id); }; } // namespace mgb diff --git a/src/plugin/include/megbrain/plugin/num_range_checker.h b/src/plugin/include/megbrain/plugin/num_range_checker.h index e3ea0fa2..a388fec7 100644 --- a/src/plugin/include/megbrain/plugin/num_range_checker.h +++ b/src/plugin/include/megbrain/plugin/num_range_checker.h @@ -54,7 +54,7 @@ class NumRangeChecker final : public PluginBase { public: using Error = NumRangeCheckerError; - NumRangeChecker(cg::ComputingGraph* graph, float range); + MGE_WIN_DECLSPEC_FUC NumRangeChecker(cg::ComputingGraph* graph, float range); }; } // namespace mgb diff --git a/src/plugin/include/megbrain/plugin/opr_footprint.h b/src/plugin/include/megbrain/plugin/opr_footprint.h index 31775a42..ad4803d0 100644 --- a/src/plugin/include/megbrain/plugin/opr_footprint.h +++ b/src/plugin/include/megbrain/plugin/opr_footprint.h @@ -36,7 +36,7 @@ class OprFootprint { void add_single_param_json(); //! be invoked when OprFootprint initilizing. - void init_all_footprints(); + MGE_WIN_DECLSPEC_FUC void init_all_footprints(); public: struct Result { @@ -74,15 +74,16 @@ public: OprFootprint() { init_all_footprints(); } //! return footprint rst for associated opr. - Result calc_footprint(cg::OperatorNodeBase* opr); + MGE_WIN_DECLSPEC_FUC Result calc_footprint(cg::OperatorNodeBase* opr); //! get computation of a given operator - uint64_t get_computation(cg::OperatorNodeBase* opr); + MGE_WIN_DECLSPEC_FUC uint64_t get_computation(cg::OperatorNodeBase* opr); #if MGB_ENABLE_JSON - std::shared_ptr get_param_json(cg::OperatorNodeBase* opr); + MGE_WIN_DECLSPEC_FUC std::shared_ptr get_param_json( + cg::OperatorNodeBase* opr); //! get opr foot print and graph exec info //! the function will recompile graph, AsyncExecutable compiled before will //! be invalid - static std::shared_ptr get_opr_fp_graph_exec( + MGE_WIN_DECLSPEC_FUC static std::shared_ptr get_opr_fp_graph_exec( cg::ComputingGraph& graph, const SymbolVarArray& outputs); #endif }; diff --git a/src/plugin/include/megbrain/plugin/opr_io_dump.h b/src/plugin/include/megbrain/plugin/opr_io_dump.h index e5d1a91e..b5af744b 100644 --- a/src/plugin/include/megbrain/plugin/opr_io_dump.h +++ b/src/plugin/include/megbrain/plugin/opr_io_dump.h @@ -66,7 +66,7 @@ class TextOprIODump final : public OprIODumpBase { void dump_var(VarNode* var, bool lazy_sync) override; public: - TextOprIODump( + MGE_WIN_DECLSPEC_FUC TextOprIODump( cg::ComputingGraph* graph, const std::shared_ptr& fout = std::shared_ptr(stderr, [](FILE*) {})); @@ -74,7 +74,7 @@ public: TextOprIODump(cg::ComputingGraph* graph, const char* fpath) : TextOprIODump(graph, std::shared_ptr(fopen(fpath, "w"), fclose)) {} - ~TextOprIODump(); + MGE_WIN_DECLSPEC_FUC ~TextOprIODump(); void flush_lazy() override; @@ -109,8 +109,9 @@ class BinaryOprIODump final : public OprIODumpBase { void dump_var(VarNode* var, bool lazy_sync) override; public: - BinaryOprIODump(cg::ComputingGraph* graph, std::string output_dir); - ~BinaryOprIODump(); + MGE_WIN_DECLSPEC_FUC BinaryOprIODump( + cg::ComputingGraph* graph, std::string output_dir); + MGE_WIN_DECLSPEC_FUC ~BinaryOprIODump(); void flush_lazy() override; }; diff --git a/src/plugin/include/megbrain/plugin/profiler.h b/src/plugin/include/megbrain/plugin/profiler.h index b4536153..d63eda1b 100644 --- a/src/plugin/include/megbrain/plugin/profiler.h +++ b/src/plugin/include/megbrain/plugin/profiler.h @@ -83,13 +83,13 @@ class GraphProfiler final : public PluginBase { void record_event(CompNodeEventPtr& dest, CompNode comp_node); public: - GraphProfiler(cg::ComputingGraph* graph); - ~GraphProfiler() noexcept; + MGE_WIN_DECLSPEC_FUC GraphProfiler(cg::ComputingGraph* graph); + MGE_WIN_DECLSPEC_FUC ~GraphProfiler() noexcept; /*! * \brief convert only profiling result to json */ - std::shared_ptr to_json() const; + MGE_WIN_DECLSPEC_FUC std::shared_ptr to_json() const; /*! * \brief dump to visualizer format diff --git a/src/plugin/include/megbrain/plugin/var_value_checker.h b/src/plugin/include/megbrain/plugin/var_value_checker.h index 24f7879c..a270f891 100644 --- a/src/plugin/include/megbrain/plugin/var_value_checker.h +++ b/src/plugin/include/megbrain/plugin/var_value_checker.h @@ -60,7 +60,7 @@ class VarValueChecker final : public PluginBase { public: using Error = opr::AssertEqual::UnequalError; - VarValueChecker( + MGE_WIN_DECLSPEC_FUC VarValueChecker( ComputingGraph* graph, size_t var_switch_interval = 1, size_t init_var_idx = 0); }; diff --git a/src/serialization/include/megbrain/serialization/extern_c_opr.h b/src/serialization/include/megbrain/serialization/extern_c_opr.h index 579aabe4..a3f8f0cf 100644 --- a/src/serialization/include/megbrain/serialization/extern_c_opr.h +++ b/src/serialization/include/megbrain/serialization/extern_c_opr.h @@ -16,7 +16,11 @@ #include #include +#ifdef MGE_DLL_EXPORT +#define MGB_PUBLIC __declspec(dllexport) +#else #define MGB_PUBLIC __attribute__((visibility("default"))) +#endif #ifdef __cplusplus extern "C" { diff --git a/src/serialization/include/megbrain/serialization/extern_c_opr_io.h b/src/serialization/include/megbrain/serialization/extern_c_opr_io.h index c58313f1..9ea99be1 100644 --- a/src/serialization/include/megbrain/serialization/extern_c_opr_io.h +++ b/src/serialization/include/megbrain/serialization/extern_c_opr_io.h @@ -19,7 +19,8 @@ namespace mgb { namespace opr { //! an operator to run extern C oprs -MGB_DEFINE_OPR_CLASS(ExternCOprRunner, cg::SingleCNOutshapePureByInshapeOprBase) // { +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( + ExternCOprRunner, cg::SingleCNOutshapePureByInshapeOprBase) // { std::shared_ptr m_desc; //! store ExternCOprRunner opr full dump name std::string m_dump_name; @@ -40,12 +41,12 @@ MGB_DEFINE_OPR_CLASS(ExternCOprRunner, cg::SingleCNOutshapePureByInshapeOprBase) std::shared_ptr desc, const OperatorNodeConfig& config); public: - ExternCOprRunner( + MGE_WIN_DECLSPEC_FUC ExternCOprRunner( std::string& name, const VarNodeArray& inputs, std::shared_ptr desc, const OperatorNodeConfig& config); //! create from MGBOprDesc and steal its reference - static cg::OperatorNodeBase* make_from_desc( + MGE_WIN_DECLSPEC_FUC static cg::OperatorNodeBase* make_from_desc( std::string& name, const VarNodeArray& inputs, MGBOprDesc* desc, const OperatorNodeConfig& config = {}); @@ -61,7 +62,7 @@ public: * \param data_len length of \p data * \param output_dtypes predefined output dtypes */ - static cg::OperatorNodeBase* make_placeholder( + MGE_WIN_DECLSPEC_FUC static cg::OperatorNodeBase* make_placeholder( const SymbolVarArray& inputs, const TensorShapeArray& output_shapes, const char* name, const void* data, size_t data_len, const OperatorNodeConfig& config = {}, @@ -71,28 +72,30 @@ public: * \brief unregister a MGBOprLoader * \return whether any loader is removed (i.e. whether the name exists) */ - static bool unregister_loader(const char* name); + MGE_WIN_DECLSPEC_FUC static bool unregister_loader(const char* name); //! impl for serialization dump - static void dump( + MGE_WIN_DECLSPEC_FUC static void dump( serialization::OprDumpContext& ctx, const cg::OperatorNodeBase& opr); //! impl for serialization load - static cg::OperatorNodeBase* load( + MGE_WIN_DECLSPEC_FUC static cg::OperatorNodeBase* load( serialization::OprLoadContext& ctx, const cg::VarNodeArray& inputs, const OperatorNodeConfig& config); //! impl for serialization shallow copy - static cg::OperatorNodeBase* shallow_copy( + MGE_WIN_DECLSPEC_FUC static cg::OperatorNodeBase* shallow_copy( const serialization::OprShallowCopyContext& ctx, const cg::OperatorNodeBase& opr, const VarNodeArray& inputs, const OperatorNodeConfig& config); //! helper for converting TensorShape to MGBTensorShape - static ::MGBTensorShape tensor_shape_to_c(const TensorShape& shape); + MGE_WIN_DECLSPEC_FUC static ::MGBTensorShape tensor_shape_to_c( + const TensorShape& shape); //! helper for converting MGBTensorShape to TensorShape - static TensorShape tensor_shape_from_c(const MGBTensorShape& shape); + MGE_WIN_DECLSPEC_FUC static TensorShape tensor_shape_from_c( + const MGBTensorShape& shape); const std::string& get_dump_name() { return m_dump_name; } diff --git a/src/serialization/include/megbrain/serialization/file.h b/src/serialization/include/megbrain/serialization/file.h index 731b8e07..73603c5c 100644 --- a/src/serialization/include/megbrain/serialization/file.h +++ b/src/serialization/include/megbrain/serialization/file.h @@ -72,11 +72,12 @@ public: virtual SharedBuffer read_shared(size_t size); //! create an InputFile correspoding to a file on local file system - static std::unique_ptr make_fs(const char* path); + MGE_WIN_DECLSPEC_FUC static std::unique_ptr make_fs(const char* path); //! create an InputFile correspoding to a memory region; the memory //! region must be alive throughout lifespan of this InputFile - static std::unique_ptr make_mem_proxy(const void* ptr, size_t size); + MGE_WIN_DECLSPEC_FUC static std::unique_ptr make_mem_proxy( + const void* ptr, size_t size); /*! * \brief create an InputFile that would directly reuse the memory @@ -86,7 +87,7 @@ public: * If this is set to true, tensor storage can be aggressively * shared by reusing the buffer for alignment. */ - static std::unique_ptr make_mem_proxy( + MGE_WIN_DECLSPEC_FUC static std::unique_ptr make_mem_proxy( std::shared_ptr ptr, size_t size, bool writable = true); }; @@ -108,7 +109,8 @@ public: virtual size_t tell() = 0; //! create an OutputFile correspoding to a file on local file system - static std::unique_ptr make_fs(const char* path, char mode = 'w'); + MGE_WIN_DECLSPEC_FUC static std::unique_ptr make_fs( + const char* path, char mode = 'w'); /*! * \brief create an OutputFile to write to a std::vector @@ -116,8 +118,9 @@ public: * Note that the vector must be alive throughout lifespan of this * OutputFile. Current content in *buf* would not be cleared. */ - static std::unique_ptr make_vector_proxy(std::vector* buf); + MGE_WIN_DECLSPEC_FUC static std::unique_ptr make_vector_proxy( + std::vector* buf); }; } // namespace serialization -} // namespace mgb \ No newline at end of file +} // namespace mgb diff --git a/src/serialization/include/megbrain/serialization/helper.h b/src/serialization/include/megbrain/serialization/helper.h index ca30a415..8ff66a6e 100644 --- a/src/serialization/include/megbrain/serialization/helper.h +++ b/src/serialization/include/megbrain/serialization/helper.h @@ -19,7 +19,7 @@ namespace mgb { namespace serialization { -void serialize_dtype( +MGE_WIN_DECLSPEC_FUC void serialize_dtype( DType dtype, megdnn::thin_function write_fn); DType deserialize_dtype(megdnn::thin_function read_fn); diff --git a/src/serialization/include/megbrain/serialization/load_dump_config.h b/src/serialization/include/megbrain/serialization/load_dump_config.h index 52488acb..8a2d1ead 100644 --- a/src/serialization/include/megbrain/serialization/load_dump_config.h +++ b/src/serialization/include/megbrain/serialization/load_dump_config.h @@ -29,7 +29,7 @@ struct GraphDumpConfig { //! a fallback to implement custom tensor value dumper; it just writes //! the raw tensor value to output file. Implemented in serializer.cpp - static void default_tensor_value_dumper( + MGE_WIN_DECLSPEC_FUC static void default_tensor_value_dumper( OutputFile& fout, const cg::OperatorNodeBase& opr, const HostTensorND& tensor); @@ -100,7 +100,7 @@ struct GraphLoadConfig { //! a fallback to implement custom tensor value reader; it just reads //! the raw tensor value from input file. Implemented in serializer.cpp - static void default_tensor_value_loader( + MGE_WIN_DECLSPEC_FUC static void default_tensor_value_loader( void* ptr, const TensorLayout& layout, InputFile& fin); //! whether to make all SharedDeviceTensor and Host2DeviceCopy shapes diff --git a/src/serialization/include/megbrain/serialization/opr_load_dump.h b/src/serialization/include/megbrain/serialization/opr_load_dump.h index fa71c446..16c1f1c5 100644 --- a/src/serialization/include/megbrain/serialization/opr_load_dump.h +++ b/src/serialization/include/megbrain/serialization/opr_load_dump.h @@ -253,16 +253,16 @@ protected: : OprLoadContext(SerializationFormat::RAW_POD, mgb_version), m_check_param_tag{check_param_tag} {} - virtual void read_raw(void* dest, size_t size) = 0; + MGE_WIN_DECLSPEC_FUC virtual void read_raw(void* dest, size_t size) = 0; //! used for implementing load_shared_buf_with_len(); the default //! implementation uses read_raw() - virtual SharedBuffer load_shared_buf(size_t size); + MGE_WIN_DECLSPEC_FUC virtual SharedBuffer load_shared_buf(size_t size); public: - std::string load_buf_with_len() override; + MGE_WIN_DECLSPEC_FUC std::string load_buf_with_len() override; - SharedBuffer load_shared_buf_with_len() override; + MGE_WIN_DECLSPEC_FUC SharedBuffer load_shared_buf_with_len() override; template Param read_param() { diff --git a/src/serialization/include/megbrain/serialization/opr_registry.h b/src/serialization/include/megbrain/serialization/opr_registry.h index 77a59e7a..33931890 100644 --- a/src/serialization/include/megbrain/serialization/opr_registry.h +++ b/src/serialization/include/megbrain/serialization/opr_registry.h @@ -63,7 +63,7 @@ struct OprRegistry { OprShallowCopy shallow_copy; //!< set to empty to use default impl uint64_t unversioned_type_id; - static void add(const OprRegistry& record); + MGE_WIN_DECLSPEC_FUC static void add(const OprRegistry& record); /*! * \brief register an operator to use dynamic loader @@ -75,25 +75,28 @@ struct OprRegistry { * * See TestSerializer.DynamicLoader for an example */ - static void add_using_dynamic_loader( + MGE_WIN_DECLSPEC_FUC static void add_using_dynamic_loader( Typeinfo* type, const std::string& name, const OprDumper& dumper); //! find registry by opr type name; return nullptr if not found - static const OprRegistry* find_by_name(const std::string& name); + MGE_WIN_DECLSPEC_FUC static const OprRegistry* find_by_name( + const std::string& name); //! find registry by persist_type_id; return nullptr if not found - static const OprRegistry* find_by_id(size_t id); + MGE_WIN_DECLSPEC_FUC static const OprRegistry* find_by_id(size_t id); //! find registry by type; return nullptr if not found - static const OprRegistry* find_by_type(Typeinfo* type); + MGE_WIN_DECLSPEC_FUC static const OprRegistry* find_by_type(Typeinfo* type); // TODO: This is hack. Refactor this out. //! Find registry by unversioned id; return nullptr if not found - static const OprRegistry* find_by_unversioned_id(size_t unversioned_id); + MGE_WIN_DECLSPEC_FUC static const OprRegistry* find_by_unversioned_id( + size_t unversioned_id); #if MGB_ENABLE_DEBUG_UTIL //! dump registered oprs - static std::vector> dump_registries(); + MGE_WIN_DECLSPEC_FUC static std::vector> + dump_registries(); #endif }; diff --git a/src/serialization/include/megbrain/serialization/opr_shallow_copy.h b/src/serialization/include/megbrain/serialization/opr_shallow_copy.h index f7dfe350..d6d81e2f 100644 --- a/src/serialization/include/megbrain/serialization/opr_shallow_copy.h +++ b/src/serialization/include/megbrain/serialization/opr_shallow_copy.h @@ -33,7 +33,7 @@ public: } //! get owner graph and check that it matches opr and inputs - ComputingGraph* owner_graph( + MGE_WIN_DECLSPEC_FUC ComputingGraph* owner_graph( const cg::OperatorNodeBase& opr, const VarNodeArray& inputs) const; }; diff --git a/src/serialization/include/megbrain/serialization/serializer.h b/src/serialization/include/megbrain/serialization/serializer.h index bb350d3c..feea6efd 100644 --- a/src/serialization/include/megbrain/serialization/serializer.h +++ b/src/serialization/include/megbrain/serialization/serializer.h @@ -31,7 +31,7 @@ public: using LoadConfig = GraphLoadConfig; struct LoadResult { //! expliit dtor decl to reduce binary size - ~LoadResult() noexcept; + MGE_WIN_DECLSPEC_FUC ~LoadResult() noexcept; //! metadata Metadata metadata; @@ -61,7 +61,7 @@ public: * graph would be destructed if comp_node_seq_record_level == 2; * this method should be called in favor of graph->compile(). */ - std::unique_ptr graph_compile( + MGE_WIN_DECLSPEC_FUC std::unique_ptr graph_compile( const ComputingGraph::OutputSpec& outspec); }; @@ -97,10 +97,11 @@ public: using SharedTensorNameMap = std::unordered_map; - static std::unique_ptr make( + MGE_WIN_DECLSPEC_FUC static std::unique_ptr make( std::unique_ptr file, GraphDumpFormat format = {}); - static Maybe identify_graph_dump_format(InputFile& file); + MGE_WIN_DECLSPEC_FUC static Maybe identify_graph_dump_format( + InputFile& file); virtual ~GraphLoader() = default; @@ -161,7 +162,7 @@ public: params; //!< dumped param names }; - static std::unique_ptr make( + MGE_WIN_DECLSPEC_FUC static std::unique_ptr make( std::unique_ptr file, GraphDumpFormat format = {}); virtual ~GraphDumper() = default; @@ -170,7 +171,7 @@ public: * \brief whether an operator should be removed in graph * serialization file */ - static bool should_remove_in_dump(cg::OperatorNodeBase* opr); + MGE_WIN_DECLSPEC_FUC static bool should_remove_in_dump(cg::OperatorNodeBase* opr); virtual DumpResult dump( const SymbolVarArray& output_vars, const DumpConfig& config = {}, diff --git a/src/tensorrt/impl/tensorrt_engine_cache.cpp b/src/tensorrt/impl/tensorrt_engine_cache.cpp index 27eca894..6be19bbe 100644 --- a/src/tensorrt/impl/tensorrt_engine_cache.cpp +++ b/src/tensorrt/impl/tensorrt_engine_cache.cpp @@ -219,8 +219,8 @@ void TensorRTEngineCacheIO::put(const std::string& key, const Engine& value) { m_cache[key].init_from_buf(value.ptr, value.size); } -std::shared_ptr TensorRTEngineCache::sm_impl = - std::make_shared(); +MGE_WIN_DECLSPEC_DATA std::shared_ptr + TensorRTEngineCache::sm_impl = std::make_shared(); #endif // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/tensorrt/include/megbrain/tensorrt/tensorrt_engine_cache.h b/src/tensorrt/include/megbrain/tensorrt/tensorrt_engine_cache.h index 757248db..854e8f18 100644 --- a/src/tensorrt/include/megbrain/tensorrt/tensorrt_engine_cache.h +++ b/src/tensorrt/include/megbrain/tensorrt/tensorrt_engine_cache.h @@ -30,7 +30,7 @@ namespace mgb { * opr in the same computing graph are different. */ class TensorRTEngineCache : public NonCopyableObj { - static std::shared_ptr sm_impl; + static MGE_WIN_DECLSPEC_DATA std::shared_ptr sm_impl; static bool sm_enable_engine_cache; public: @@ -46,15 +46,17 @@ public: virtual void dump_cache() = 0; //! get the key of the TensorRTOpr - static std::string make_key_from_trt_opr(const opr::TensorRTOpr* opr); + MGE_WIN_DECLSPEC_FUC static std::string make_key_from_trt_opr( + const opr::TensorRTOpr* opr); //! enable the tensorrt engine cache, or query whether the cache is used - static bool enable_engine_cache(bool enable_engine_cache = false); + MGE_WIN_DECLSPEC_FUC static bool enable_engine_cache( + bool enable_engine_cache = false); //! disable the tensorrt engine cache - static void disable_engine_cache(); + MGE_WIN_DECLSPEC_FUC static void disable_engine_cache(); //! set an implementation; return the original implementation - static std::shared_ptr set_impl( + MGE_WIN_DECLSPEC_FUC static std::shared_ptr set_impl( std::shared_ptr impl); //! get the instance; the default implementation is an InMemoryCache @@ -151,14 +153,14 @@ class TensorRTEngineCacheIO final : public TensorRTEngineCache { std::mutex m_mtx; public: - TensorRTEngineCacheIO(std::string filename); - ~TensorRTEngineCacheIO() = default; + MGE_WIN_DECLSPEC_FUC TensorRTEngineCacheIO(std::string filename); + MGE_WIN_DECLSPEC_FUC ~TensorRTEngineCacheIO() = default; - void dump_cache() override; + MGE_WIN_DECLSPEC_FUC void dump_cache() override; - Maybe get(const std::string& key) override; + MGE_WIN_DECLSPEC_FUC Maybe get(const std::string& key) override; - void put(const std::string& key, const Engine& value) override; + MGE_WIN_DECLSPEC_FUC void put(const std::string& key, const Engine& value) override; }; } // namespace mgb #endif diff --git a/src/tensorrt/include/megbrain/tensorrt/tensorrt_runtime_opr.h b/src/tensorrt/include/megbrain/tensorrt/tensorrt_runtime_opr.h index 60eca087..10f233a3 100644 --- a/src/tensorrt/include/megbrain/tensorrt/tensorrt_runtime_opr.h +++ b/src/tensorrt/include/megbrain/tensorrt/tensorrt_runtime_opr.h @@ -62,13 +62,13 @@ public: //! serialization load/dump struct LoadDumpImpl; - static SymbolVarArray make( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make( std::shared_ptr engine, std::shared_ptr gpu_allocator, const SymbolVarArray& src, const OperatorNodeConfig& config = {}); //! create an operator from a serialized ICudaEngine - static SymbolVarArray make( + MGE_WIN_DECLSPEC_FUC static SymbolVarArray make( const void* buf, size_t buf_size, const SymbolVarArray& src, const OperatorNodeConfig& config = {}); diff --git a/src/version.ld b/src/version.ld index d8952e51..db71a72b 100644 --- a/src/version.ld +++ b/src/version.ld @@ -13,6 +13,10 @@ global: base_exceptions*; }; megcore*; + *GroupClientProxy*; + *create_zmqrpc_server*; + *custom*; + local: *;