From 8109a05a5e1d3463277a4381ef46f9a385eaeca2 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Wed, 21 Apr 2021 19:31:11 +0800 Subject: [PATCH] fix(mgb/dnn): fix cub potential issues Wrap cub with CUB_NS_PREFIX and remove dependency on Thrust to avoid potential linking issues GitOrigin-RevId: 53893b0a3957b9321fcdbca30a5c2659504f2553 --- dnn/src/cuda/cub/iterator/arg_index_input_iterator.cuh | 18 ------------------ .../cub/iterator/cache_modified_input_iterator.cuh | 16 ---------------- .../cub/iterator/cache_modified_output_iterator.cuh | 17 ----------------- dnn/src/cuda/cub/iterator/constant_input_iterator.cuh | 17 ----------------- dnn/src/cuda/cub/iterator/counting_input_iterator.cuh | 17 ----------------- dnn/src/cuda/cub/iterator/discard_output_iterator.cuh | 17 ----------------- dnn/src/cuda/cub/iterator/tex_obj_input_iterator.cuh | 17 ----------------- dnn/src/cuda/cub/iterator/tex_ref_input_iterator.cuh | 16 ---------------- dnn/src/cuda/cub/iterator/transform_input_iterator.cuh | 16 ---------------- dnn/src/cuda/cub/util_namespace.cuh | 4 ++-- dnn/src/cuda/dot/dot.cu | 1 + .../uint4x4x32_wmma/preprocess_quantize_sum.cu | 2 ++ dnn/src/cuda/topk/topk_radix.cu | 4 ++-- 13 files changed, 7 insertions(+), 155 deletions(-) diff --git a/dnn/src/cuda/cub/iterator/arg_index_input_iterator.cuh b/dnn/src/cuda/cub/iterator/arg_index_input_iterator.cuh index 95a84a57..7a206d0d 100644 --- a/dnn/src/cuda/cub/iterator/arg_index_input_iterator.cuh +++ b/dnn/src/cuda/cub/iterator/arg_index_input_iterator.cuh @@ -41,14 +41,6 @@ #include "../util_device.cuh" #include "../util_namespace.cuh" -#include - -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION - /// Optional outer namespace(s) CUB_NS_PREFIX @@ -121,17 +113,7 @@ public: typedef value_type* pointer; ///< The type of a pointer to an element the iterator can point to typedef value_type reference; ///< The type of a reference to an element the iterator can point to -#if (THRUST_VERSION >= 100700) - // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods - typedef typename thrust::detail::iterator_facade_category< - thrust::any_system_tag, - thrust::random_access_traversal_tag, - value_type, - reference - >::type iterator_category; ///< The iterator category -#else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category -#endif // THRUST_VERSION private: diff --git a/dnn/src/cuda/cub/iterator/cache_modified_input_iterator.cuh b/dnn/src/cuda/cub/iterator/cache_modified_input_iterator.cuh index b4ad91e2..2c28b1ce 100644 --- a/dnn/src/cuda/cub/iterator/cache_modified_input_iterator.cuh +++ b/dnn/src/cuda/cub/iterator/cache_modified_input_iterator.cuh @@ -41,12 +41,6 @@ #include "../util_device.cuh" #include "../util_namespace.cuh" -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION - /// Optional outer namespace(s) CUB_NS_PREFIX @@ -115,17 +109,7 @@ public: typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to typedef ValueType reference; ///< The type of a reference to an element the iterator can point to -#if (THRUST_VERSION >= 100700) - // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods - typedef typename thrust::detail::iterator_facade_category< - thrust::device_system_tag, - thrust::random_access_traversal_tag, - value_type, - reference - >::type iterator_category; ///< The iterator category -#else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category -#endif // THRUST_VERSION public: diff --git a/dnn/src/cuda/cub/iterator/cache_modified_output_iterator.cuh b/dnn/src/cuda/cub/iterator/cache_modified_output_iterator.cuh index c3e3321d..96d42920 100644 --- a/dnn/src/cuda/cub/iterator/cache_modified_output_iterator.cuh +++ b/dnn/src/cuda/cub/iterator/cache_modified_output_iterator.cuh @@ -41,13 +41,6 @@ #include "../util_device.cuh" #include "../util_namespace.cuh" -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION - - /// Optional outer namespace(s) CUB_NS_PREFIX @@ -135,17 +128,7 @@ public: typedef void pointer; ///< The type of a pointer to an element the iterator can point to typedef Reference reference; ///< The type of a reference to an element the iterator can point to -#if (THRUST_VERSION >= 100700) - // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods - typedef typename thrust::detail::iterator_facade_category< - thrust::device_system_tag, - thrust::random_access_traversal_tag, - value_type, - reference - >::type iterator_category; ///< The iterator category -#else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category -#endif // THRUST_VERSION private: diff --git a/dnn/src/cuda/cub/iterator/constant_input_iterator.cuh b/dnn/src/cuda/cub/iterator/constant_input_iterator.cuh index 1e0a9104..af9aefd1 100644 --- a/dnn/src/cuda/cub/iterator/constant_input_iterator.cuh +++ b/dnn/src/cuda/cub/iterator/constant_input_iterator.cuh @@ -40,13 +40,6 @@ #include "../thread/thread_store.cuh" #include "../util_namespace.cuh" -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION - - /// Optional outer namespace(s) CUB_NS_PREFIX @@ -104,17 +97,7 @@ public: typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to typedef ValueType reference; ///< The type of a reference to an element the iterator can point to -#if (THRUST_VERSION >= 100700) - // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods - typedef typename thrust::detail::iterator_facade_category< - thrust::any_system_tag, - thrust::random_access_traversal_tag, - value_type, - reference - >::type iterator_category; ///< The iterator category -#else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category -#endif // THRUST_VERSION private: diff --git a/dnn/src/cuda/cub/iterator/counting_input_iterator.cuh b/dnn/src/cuda/cub/iterator/counting_input_iterator.cuh index 7f49348d..1df42b1b 100644 --- a/dnn/src/cuda/cub/iterator/counting_input_iterator.cuh +++ b/dnn/src/cuda/cub/iterator/counting_input_iterator.cuh @@ -41,13 +41,6 @@ #include "../util_device.cuh" #include "../util_namespace.cuh" -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION - - /// Optional outer namespace(s) CUB_NS_PREFIX @@ -102,17 +95,7 @@ public: typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to typedef ValueType reference; ///< The type of a reference to an element the iterator can point to -#if (THRUST_VERSION >= 100700) - // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods - typedef typename thrust::detail::iterator_facade_category< - thrust::any_system_tag, - thrust::random_access_traversal_tag, - value_type, - reference - >::type iterator_category; ///< The iterator category -#else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category -#endif // THRUST_VERSION private: diff --git a/dnn/src/cuda/cub/iterator/discard_output_iterator.cuh b/dnn/src/cuda/cub/iterator/discard_output_iterator.cuh index 28473e5f..2a6cac7b 100644 --- a/dnn/src/cuda/cub/iterator/discard_output_iterator.cuh +++ b/dnn/src/cuda/cub/iterator/discard_output_iterator.cuh @@ -39,13 +39,6 @@ #include "../util_namespace.cuh" #include "../util_macro.cuh" -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION - - /// Optional outer namespace(s) CUB_NS_PREFIX @@ -74,17 +67,7 @@ public: typedef void pointer; ///< The type of a pointer to an element the iterator can point to typedef void reference; ///< The type of a reference to an element the iterator can point to -#if (THRUST_VERSION >= 100700) - // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods - typedef typename thrust::detail::iterator_facade_category< - thrust::any_system_tag, - thrust::random_access_traversal_tag, - value_type, - reference - >::type iterator_category; ///< The iterator category -#else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category -#endif // THRUST_VERSION private: diff --git a/dnn/src/cuda/cub/iterator/tex_obj_input_iterator.cuh b/dnn/src/cuda/cub/iterator/tex_obj_input_iterator.cuh index b99103ec..2267afad 100644 --- a/dnn/src/cuda/cub/iterator/tex_obj_input_iterator.cuh +++ b/dnn/src/cuda/cub/iterator/tex_obj_input_iterator.cuh @@ -42,13 +42,6 @@ #include "../util_debug.cuh" #include "../util_namespace.cuh" -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION - - /// Optional outer namespace(s) CUB_NS_PREFIX @@ -119,17 +112,7 @@ public: typedef T* pointer; ///< The type of a pointer to an element the iterator can point to typedef T reference; ///< The type of a reference to an element the iterator can point to -#if (THRUST_VERSION >= 100700) - // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods - typedef typename thrust::detail::iterator_facade_category< - thrust::device_system_tag, - thrust::random_access_traversal_tag, - value_type, - reference - >::type iterator_category; ///< The iterator category -#else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category -#endif // THRUST_VERSION private: diff --git a/dnn/src/cuda/cub/iterator/tex_ref_input_iterator.cuh b/dnn/src/cuda/cub/iterator/tex_ref_input_iterator.cuh index 95d0ffbc..9fcbf368 100644 --- a/dnn/src/cuda/cub/iterator/tex_ref_input_iterator.cuh +++ b/dnn/src/cuda/cub/iterator/tex_ref_input_iterator.cuh @@ -44,12 +44,6 @@ #if (CUDA_VERSION >= 5050) || defined(DOXYGEN_ACTIVE) // This iterator is compatible with CUDA 5.5 and newer -#if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION - - /// Optional outer namespace(s) CUB_NS_PREFIX @@ -212,17 +206,7 @@ public: typedef T* pointer; ///< The type of a pointer to an element the iterator can point to typedef T reference; ///< The type of a reference to an element the iterator can point to -#if (THRUST_VERSION >= 100700) - // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods - typedef typename thrust::detail::iterator_facade_category< - thrust::device_system_tag, - thrust::random_access_traversal_tag, - value_type, - reference - >::type iterator_category; ///< The iterator category -#else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category -#endif // THRUST_VERSION private: diff --git a/dnn/src/cuda/cub/iterator/transform_input_iterator.cuh b/dnn/src/cuda/cub/iterator/transform_input_iterator.cuh index dad1f500..3b995e25 100644 --- a/dnn/src/cuda/cub/iterator/transform_input_iterator.cuh +++ b/dnn/src/cuda/cub/iterator/transform_input_iterator.cuh @@ -41,12 +41,6 @@ #include "../util_device.cuh" #include "../util_namespace.cuh" -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION - /// Optional outer namespace(s) CUB_NS_PREFIX @@ -125,17 +119,7 @@ public: typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to typedef ValueType reference; ///< The type of a reference to an element the iterator can point to -#if (THRUST_VERSION >= 100700) - // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods - typedef typename thrust::detail::iterator_facade_category< - thrust::any_system_tag, - thrust::random_access_traversal_tag, - value_type, - reference - >::type iterator_category; ///< The iterator category -#else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category -#endif // THRUST_VERSION private: diff --git a/dnn/src/cuda/cub/util_namespace.cuh b/dnn/src/cuda/cub/util_namespace.cuh index c8991d08..5f99c1e0 100644 --- a/dnn/src/cuda/cub/util_namespace.cuh +++ b/dnn/src/cuda/cub/util_namespace.cuh @@ -38,9 +38,9 @@ //#define CUB_NS_POSTFIX } } #ifndef CUB_NS_PREFIX -#define CUB_NS_PREFIX +#define CUB_NS_PREFIX namespace megdnn { namespace cuda { #endif #ifndef CUB_NS_POSTFIX -#define CUB_NS_POSTFIX +#define CUB_NS_POSTFIX } } #endif diff --git a/dnn/src/cuda/dot/dot.cu b/dnn/src/cuda/dot/dot.cu index ca51b0a5..a4913f23 100644 --- a/dnn/src/cuda/dot/dot.cu +++ b/dnn/src/cuda/dot/dot.cu @@ -16,6 +16,7 @@ namespace { using namespace megdnn; +using namespace cuda; template __global__ void kernel(const T *a, const T *b, dt_float32 *c, diff --git a/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/preprocess_quantize_sum.cu b/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/preprocess_quantize_sum.cu index 14389f9b..af7d38a6 100644 --- a/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/preprocess_quantize_sum.cu +++ b/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/preprocess_quantize_sum.cu @@ -43,6 +43,8 @@ namespace { +using namespace megdnn::cuda; + template __global__ void reduce_column_with_scale_u4(const uint8_t* src, int32_t scale, int rows, int cols_int32, diff --git a/dnn/src/cuda/topk/topk_radix.cu b/dnn/src/cuda/topk/topk_radix.cu index 28cd43dd..700f0ea0 100644 --- a/dnn/src/cuda/topk/topk_radix.cu +++ b/dnn/src/cuda/topk/topk_radix.cu @@ -355,8 +355,8 @@ static __global__ void kern_reduce_block_cnt(const ctype* input_data, static MEGDNN_NOINLINE cudaError_t invoke_cub_scan(const uint64_t* input, uint64_t* output, void* workspace, size_t& workspace_size, uint32_t size, cudaStream_t stream) { - return cub::DeviceScan::InclusiveSum(workspace, workspace_size, input, - output, size, stream); + return cub::DeviceScan::InclusiveSum(workspace, workspace_size, + input, output, size, stream); } static __global__ void kern_init_zero(uint64_t* dst) {