|
|
@@ -90,12 +90,11 @@ inline int8x16_t vqtbl1q_s8_v7(int8x16_t a, uint8x16_t index) { |
|
|
|
_sum1##_c_idx = vdotq_s32(_sum1##_c_idx, _k##_k2_idx, _elem); |
|
|
|
|
|
|
|
template <bool first_ic, bool last_ic, BiasMode bias_mode, typename Op> |
|
|
|
void conv_bias::conv_direct_stride1_2x2_int8_dot(const int8_t* src, |
|
|
|
const int8_t* filter, |
|
|
|
const int32_t* bias, int32_t* temp, |
|
|
|
int8_t* dst, const size_t IH, |
|
|
|
const size_t IW, const size_t OH, |
|
|
|
const size_t OW, const Op& op) { |
|
|
|
void conv_bias::conv_direct_stride1_2x2_int8_dot( |
|
|
|
const int8_t* src, const int8_t* filter, const int32_t* bias, |
|
|
|
int32_t* temp, int8_t* dst, const size_t IH, const size_t IW, |
|
|
|
const size_t OH, const size_t OW, const Op& op) { |
|
|
|
MEGDNN_MARK_USED_VAR(IH); |
|
|
|
const size_t tail_step = IW - OW; |
|
|
|
const uint8x16_t _idx0 = {0, 1, 16, 16, 1, 2, 16, 16, |
|
|
|
2, 3, 16, 16, 3, 4, 16, 16}; |
|
|
@@ -326,12 +325,11 @@ void conv_bias::conv_direct_stride1_2x2_int8_dot(const int8_t* src, |
|
|
|
} |
|
|
|
|
|
|
|
template <bool first_ic, bool last_ic, BiasMode bias_mode, typename Op> |
|
|
|
void conv_bias::conv_direct_stride1_3x3_int8_dot(const int8_t* src, |
|
|
|
const int8_t* filter, |
|
|
|
const int32_t* bias, int32_t* temp, |
|
|
|
int8_t* dst, const size_t IH, |
|
|
|
const size_t IW, const size_t OH, |
|
|
|
const size_t OW, const Op& op) { |
|
|
|
void conv_bias::conv_direct_stride1_3x3_int8_dot( |
|
|
|
const int8_t* src, const int8_t* filter, const int32_t* bias, |
|
|
|
int32_t* temp, int8_t* dst, const size_t IH, const size_t IW, |
|
|
|
const size_t OH, const size_t OW, const Op& op) { |
|
|
|
MEGDNN_MARK_USED_VAR(IH); |
|
|
|
const size_t tail_step = IW - OW; |
|
|
|
|
|
|
|
const uint8x16_t _idx0 = {0, 1, 2, 16, 1, 2, 3, 16, |
|
|
@@ -562,12 +560,11 @@ void conv_bias::conv_direct_stride1_3x3_int8_dot(const int8_t* src, |
|
|
|
} |
|
|
|
|
|
|
|
template <bool first_ic, bool last_ic, BiasMode bias_mode, typename Op> |
|
|
|
void conv_bias::conv_direct_stride2_2x2_int8_dot(const int8_t* src, |
|
|
|
const int8_t* filter, |
|
|
|
const int32_t* bias, int32_t* temp, |
|
|
|
int8_t* dst, const size_t IH, |
|
|
|
const size_t IW, const size_t OH, |
|
|
|
const size_t OW, const Op& op) { |
|
|
|
void conv_bias::conv_direct_stride2_2x2_int8_dot( |
|
|
|
const int8_t* src, const int8_t* filter, const int32_t* bias, |
|
|
|
int32_t* temp, int8_t* dst, const size_t IH, const size_t IW, |
|
|
|
const size_t OH, const size_t OW, const Op& op) { |
|
|
|
MEGDNN_MARK_USED_VAR(IH); |
|
|
|
const size_t tail_step = IW - 2 * OW + IW; |
|
|
|
|
|
|
|
const uint8x16_t _idx0 = {0, 1, 16, 16, 2, 3, 16, 16, |
|
|
@@ -658,12 +655,11 @@ void conv_bias::conv_direct_stride2_2x2_int8_dot(const int8_t* src, |
|
|
|
} |
|
|
|
|
|
|
|
template <bool first_ic, bool last_ic, BiasMode bias_mode, typename Op> |
|
|
|
void conv_bias::conv_direct_stride2_3x3_int8_dot(const int8_t* src, |
|
|
|
const int8_t* filter, |
|
|
|
const int32_t* bias, int32_t* temp, |
|
|
|
int8_t* dst, const size_t IH, |
|
|
|
const size_t IW, const size_t OH, |
|
|
|
const size_t OW, const Op& op) { |
|
|
|
void conv_bias::conv_direct_stride2_3x3_int8_dot( |
|
|
|
const int8_t* src, const int8_t* filter, const int32_t* bias, |
|
|
|
int32_t* temp, int8_t* dst, const size_t IH, const size_t IW, |
|
|
|
const size_t OH, const size_t OW, const Op& op) { |
|
|
|
MEGDNN_MARK_USED_VAR(IH); |
|
|
|
const size_t tail_step = IW - 2 * OW + IW; |
|
|
|
|
|
|
|
const uint8x16_t _idx0 = {0, 1, 2, 16, 2, 3, 4, 16, |
|
|
@@ -814,12 +810,11 @@ void conv_bias::conv_direct_stride2_3x3_int8_dot(const int8_t* src, |
|
|
|
_sum1##_c_idx = vdotq_s32(_sum1##_c_idx, _k##_k11_idx, _elem); |
|
|
|
|
|
|
|
template <bool first_ic, bool last_ic, BiasMode bias_mode, typename Op> |
|
|
|
void conv_bias::conv_direct_stride2_5x5_int8_dot(const int8_t* src, |
|
|
|
const int8_t* filter, |
|
|
|
const int32_t* bias, int32_t* temp, |
|
|
|
int8_t* dst, const size_t IH, |
|
|
|
const size_t IW, const size_t OH, |
|
|
|
const size_t OW, const Op& op) { |
|
|
|
void conv_bias::conv_direct_stride2_5x5_int8_dot( |
|
|
|
const int8_t* src, const int8_t* filter, const int32_t* bias, |
|
|
|
int32_t* temp, int8_t* dst, const size_t IH, const size_t IW, |
|
|
|
const size_t OH, const size_t OW, const Op& op) { |
|
|
|
MEGDNN_MARK_USED_VAR(IH); |
|
|
|
const size_t tail_step = IW - 2 * OW + IW; |
|
|
|
|
|
|
|
const uint8x16_t _idx00 = {0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9}; |
|
|
@@ -1113,12 +1108,11 @@ void conv_bias::conv_direct_stride2_5x5_int8_dot(const int8_t* src, |
|
|
|
} |
|
|
|
|
|
|
|
template <bool first_ic, bool last_ic, BiasMode bias_mode, typename Op> |
|
|
|
void conv_bias::conv_direct_stride2_7x7_int8_dot(const int8_t* src, |
|
|
|
const int8_t* filter, |
|
|
|
const int32_t* bias, int32_t* temp, |
|
|
|
int8_t* dst, const size_t IH, |
|
|
|
const size_t IW, const size_t OH, |
|
|
|
const size_t OW, const Op& op) { |
|
|
|
void conv_bias::conv_direct_stride2_7x7_int8_dot( |
|
|
|
const int8_t* src, const int8_t* filter, const int32_t* bias, |
|
|
|
int32_t* temp, int8_t* dst, const size_t IH, const size_t IW, |
|
|
|
const size_t OH, const size_t OW, const Op& op) { |
|
|
|
MEGDNN_MARK_USED_VAR(IH); |
|
|
|
const size_t tail_step = IW - 2 * OW + IW; |
|
|
|
|
|
|
|
const uint8x16_t _idx00 = {0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9}; |
|
|
@@ -1476,12 +1470,11 @@ void conv_bias::conv_direct_stride2_7x7_int8_dot(const int8_t* src, |
|
|
|
} |
|
|
|
|
|
|
|
template <bool first_ic, bool last_ic, BiasMode bias_mode, typename Op> |
|
|
|
void conv_bias::conv_direct_stride1_5x5_int8_dot(const int8_t* src, |
|
|
|
const int8_t* filter, |
|
|
|
const int32_t* bias, int32_t* temp, |
|
|
|
int8_t* dst, const size_t IH, |
|
|
|
const size_t IW, const size_t OH, |
|
|
|
const size_t OW, const Op& op) { |
|
|
|
void conv_bias::conv_direct_stride1_5x5_int8_dot( |
|
|
|
const int8_t* src, const int8_t* filter, const int32_t* bias, |
|
|
|
int32_t* temp, int8_t* dst, const size_t IH, const size_t IW, |
|
|
|
const size_t OH, const size_t OW, const Op& op) { |
|
|
|
MEGDNN_MARK_USED_VAR(IH); |
|
|
|
const size_t tail_step = IW - OW; |
|
|
|
|
|
|
|
const uint8x16_t _idx00 = {0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6}; |
|
|
@@ -1777,12 +1770,11 @@ void conv_bias::conv_direct_stride1_5x5_int8_dot(const int8_t* src, |
|
|
|
} |
|
|
|
|
|
|
|
template <bool first_ic, bool last_ic, BiasMode bias_mode, typename Op> |
|
|
|
void conv_bias::conv_direct_stride1_7x7_int8_dot(const int8_t* src, |
|
|
|
const int8_t* filter, |
|
|
|
const int32_t* bias, int32_t* temp, |
|
|
|
int8_t* dst, const size_t IH, |
|
|
|
const size_t IW, const size_t OH, |
|
|
|
const size_t OW, const Op& op) { |
|
|
|
void conv_bias::conv_direct_stride1_7x7_int8_dot( |
|
|
|
const int8_t* src, const int8_t* filter, const int32_t* bias, |
|
|
|
int32_t* temp, int8_t* dst, const size_t IH, const size_t IW, |
|
|
|
const size_t OH, const size_t OW, const Op& op) { |
|
|
|
MEGDNN_MARK_USED_VAR(IH); |
|
|
|
const size_t tail_step = IW - OW; |
|
|
|
|
|
|
|
const uint8x16_t _idx00 = {0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6}; |
|
|
|