GitOrigin-RevId: e3a12cf9b3
tags/v0.5.0
@@ -52,21 +52,9 @@ class GemmInterleaved<Strategy, true> { | |||||
} | } | ||||
size_t get_b_workspace_size() const { | size_t get_b_workspace_size() const { | ||||
#if __ARM_FEATURE_DOTPROD | |||||
size_t new_blockn = m_strategy.block_n; | |||||
if (m_strategy.KERNEL_W == 6 && m_strategy.UNROLL_K == 4 && | |||||
m_strategy.KERNEL_H == 8) { | |||||
new_blockn = round_up<size_t>((m_strategy.block_n-1) % 6, 4) + | |||||
m_strategy.block_n / 6 * 6; | |||||
} | |||||
size_t N = round_up(new_blockn, m_strategy.KERNEL_W); | |||||
size_t K = round_up(m_strategy.block_k, m_strategy.UNROLL_K); | |||||
return round_up(sizeof(stype) * N * K, CACHELINE_SIZE) + m_align_size; | |||||
#else | |||||
size_t N = round_up(m_strategy.block_n, m_strategy.KERNEL_W); | size_t N = round_up(m_strategy.block_n, m_strategy.KERNEL_W); | ||||
size_t K = round_up(m_strategy.block_k, m_strategy.UNROLL_K); | size_t K = round_up(m_strategy.block_k, m_strategy.UNROLL_K); | ||||
return round_up(sizeof(stype) * N * K, CACHELINE_SIZE) + m_align_size; | return round_up(sizeof(stype) * N * K, CACHELINE_SIZE) + m_align_size; | ||||
#endif | |||||
} | } | ||||
//! temporary storage for output, post process such as add bias or relu will | //! temporary storage for output, post process such as add bias or relu will | ||||
@@ -1452,7 +1452,7 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_MK4_DOT) { | |||||
#if MEGDNN_AARCH64 | #if MEGDNN_AARCH64 | ||||
cb("IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"); | cb("IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"); | ||||
#elif MEGDNN_ARMV7 | #elif MEGDNN_ARMV7 | ||||
cb("IM2COLMATMUL:AARCH32_INT8_MK4_8X6X4_DOTPROD:96"); | |||||
cb("IM2COLMATMUL:AARCH32_INT8_MK4_8X4X4_DOTPROD:96"); | |||||
#endif | #endif | ||||
#undef cb | #undef cb | ||||
} | } | ||||
@@ -1476,7 +1476,7 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_S8x8x32_MK4_DOT) { | |||||
#if MEGDNN_AARCH64 | #if MEGDNN_AARCH64 | ||||
cb("IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"); | cb("IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"); | ||||
#elif MEGDNN_ARMV7 | #elif MEGDNN_ARMV7 | ||||
cb("IM2COLMATMUL:AARCH32_INT8_MK4_8X6X4_DOTPROD:96"); | |||||
cb("IM2COLMATMUL:AARCH32_INT8_MK4_8X4X4_DOTPROD:96"); | |||||
#endif | #endif | ||||
#undef cb | #undef cb | ||||
} | } | ||||
@@ -1500,7 +1500,7 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_INT8x8x32_MK4_DOT) { | |||||
#if MEGDNN_AARCH64 | #if MEGDNN_AARCH64 | ||||
cb("IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"); | cb("IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"); | ||||
#elif MEGDNN_ARMV7 | #elif MEGDNN_ARMV7 | ||||
cb("IM2COLMATMUL:AARCH32_INT8_MK4_8X6X4_DOTPROD:96"); | |||||
cb("IM2COLMATMUL:AARCH32_INT8_MK4_8X4X4_DOTPROD:96"); | |||||
#endif | #endif | ||||
#undef cb | #undef cb | ||||
} | } | ||||
@@ -1529,7 +1529,7 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_CONV1x1_QUANTIZEDSYM_MK4_DOT) { | |||||
#if MEGDNN_AARCH64 | #if MEGDNN_AARCH64 | ||||
cb("CONV1x1:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD"); | cb("CONV1x1:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD"); | ||||
#elif MEGDNN_ARMV7 | #elif MEGDNN_ARMV7 | ||||
cb("CONV1x1:AARCH32_INT8_MK4_8X6X4_DOTPROD"); | |||||
cb("CONV1x1:AARCH32_INT8_MK4_8X4X4_DOTPROD"); | |||||
#endif | #endif | ||||
#undef cb | #undef cb | ||||
} | } | ||||