From d2a1905ad556f1043c6d49e9526e610808a742cb Mon Sep 17 00:00:00 2001
From: Megvii Engine Team <megengine@megvii.com>
Date: Thu, 28 Jul 2022 19:49:24 +0800
Subject: [PATCH] Revert "feat(mgb): add cumprod opr"

This reverts commit 3436c3bdaa4c11de35e3e37849a11202318cc99f.

GitOrigin-RevId: 95ab3d1aa7dd316f4cf52f7b45a0206bcf60af54
---
 dnn/include/megdnn/oprs/general.h                  |  42 ---
 dnn/scripts/gen_elemwise_multi_type_utils.py       |   2 +-
 dnn/scripts/gen_elemwise_utils.py                  |   2 +-
 dnn/scripts/opr_param_defs.py                      |  21 +-
 dnn/src/common/cumprod.cpp                         |  25 --
 dnn/src/common/elemwise/each_mode.inl              |   3 +-
 dnn/src/common/elemwise/kern_defs.cuh              |   2 -
 dnn/src/common/elemwise/opr_impl.cpp               |   1 -
 dnn/src/common/handle_impl.h                       |   1 -
 dnn/src/common/opr_trait.h                         |   1 -
 dnn/src/cuda/cumprod/cumprod.cu                    |  25 --
 dnn/src/cuda/cumprod/kern.cuh                      |  62 ----
 dnn/src/cuda/cumprod/kern_helper.cuh               |  18 --
 dnn/src/cuda/cumprod/kern_impl.cu                  |  82 ------
 dnn/src/cuda/cumprod/kern_impl.cuinl               | 326 ---------------------
 dnn/src/cuda/cumprod/opr_impl.cpp                  |  63 ----
 dnn/src/cuda/cumprod/opr_impl.h                    |  19 --
 .../cuda/elemwise/kimpl/SAFE_DIV_dt_bfloat16.cu    |   7 -
 dnn/src/cuda/elemwise/kimpl/SAFE_DIV_dt_float16.cu |   7 -
 dnn/src/cuda/elemwise/kimpl/SAFE_DIV_dt_float32.cu |   5 -
 .../kimpl/SAFE_DIV_dt_qint8_dt_qint8.cu            |   6 -
 dnn/src/cuda/elemwise_multi_type/opr_impl.cpp      |   1 -
 dnn/src/cuda/handle_create.cpp                     |   2 -
 .../elemwise/fallback_impl/opr_binary_impl.cpp     |   1 -
 dnn/src/naive/cumprod/opr_impl.cpp                 |  72 -----
 dnn/src/naive/cumprod/opr_impl.h                   |  20 --
 .../naive/elemwise/kimpl/SAFE_DIV_dt_bfloat16.cpp  |   7 -
 .../naive/elemwise/kimpl/SAFE_DIV_dt_float16.cpp   |   7 -
 .../naive/elemwise/kimpl/SAFE_DIV_dt_float32.cpp   |   5 -
 dnn/src/naive/elemwise/opr_impl.cpp                |   2 -
 dnn/src/naive/handle.cpp                           |   1 -
 .../elemwise/kimpl/SAFE_DIV_dt_bfloat16.cpp.hip    |   7 -
 .../elemwise/kimpl/SAFE_DIV_dt_float16.cpp.hip     |   7 -
 .../elemwise/kimpl/SAFE_DIV_dt_float32.cpp.hip     |   5 -
 dnn/test/common/elemwise.cpp                       |   2 +-
 dnn/test/cuda/cumprod.cpp                          |  63 ----
 dnn/test/naive/record1.cpp                         |  43 ---
 imperative/python/megengine/functional/tensor.py   |  19 +-
 imperative/python/test/unit/core/test_autodiff.py  |  16 -
 imperative/src/impl/ops/cumxxx.cpp                 |  34 ---
 imperative/src/impl/ops/specializations.cpp        |  12 +
 imperative/tablegen/generated/hash.txt             |  12 +-
 imperative/tablegen/generated/opdef.cpp.inl        | 112 -------
 imperative/tablegen/generated/opdef.cpy.inl        | 255 +---------------
 imperative/tablegen/generated/opdef.h.inl          |  38 ---
 imperative/tablegen/generated/opdef.py.inl         |  54 ----
 src/core/include/megbrain/ir/ops.td                |   1 -
 src/gopt/test/basic_arith.cpp                      |  16 +-
 src/jit/impl/ast_c.cpp                             |   2 +-
 src/opr/impl/basic_arith.cpp                       |   2 -
 src/opr/impl/misc.cpp                              |  69 -----
 src/opr/impl/misc.oprdecl                          |  10 -
 src/opr/impl/misc.sereg.h                          |   1 -
 src/opr/include/megbrain/opr/misc.h                |  19 --
 src/opr/test/basic_arith/elemwise.cpp              |  23 +-
 .../test/basic_arith/elemwise_binary_trait_def.inl |   1 -
 src/opr/test/misc.cpp                              |  54 ----
 src/serialization/impl/schema.fbs                  |   1 -
 test/src/autocheck.cpp                             |   6 -
 59 files changed, 43 insertions(+), 1679 deletions(-)
 delete mode 100644 dnn/src/common/cumprod.cpp
 delete mode 100644 dnn/src/cuda/cumprod/cumprod.cu
 delete mode 100644 dnn/src/cuda/cumprod/kern.cuh
 delete mode 100644 dnn/src/cuda/cumprod/kern_helper.cuh
 delete mode 100644 dnn/src/cuda/cumprod/kern_impl.cu
 delete mode 100644 dnn/src/cuda/cumprod/kern_impl.cuinl
 delete mode 100644 dnn/src/cuda/cumprod/opr_impl.cpp
 delete mode 100644 dnn/src/cuda/cumprod/opr_impl.h
 delete mode 100644 dnn/src/cuda/elemwise/kimpl/SAFE_DIV_dt_bfloat16.cu
 delete mode 100644 dnn/src/cuda/elemwise/kimpl/SAFE_DIV_dt_float16.cu
 delete mode 100644 dnn/src/cuda/elemwise/kimpl/SAFE_DIV_dt_float32.cu
 delete mode 100644 dnn/src/cuda/elemwise_multi_type/kimpl/SAFE_DIV_dt_qint8_dt_qint8.cu
 delete mode 100644 dnn/src/naive/cumprod/opr_impl.cpp
 delete mode 100644 dnn/src/naive/cumprod/opr_impl.h
 delete mode 100644 dnn/src/naive/elemwise/kimpl/SAFE_DIV_dt_bfloat16.cpp
 delete mode 100644 dnn/src/naive/elemwise/kimpl/SAFE_DIV_dt_float16.cpp
 delete mode 100644 dnn/src/naive/elemwise/kimpl/SAFE_DIV_dt_float32.cpp
 delete mode 100644 dnn/src/rocm/elemwise/kimpl/SAFE_DIV_dt_bfloat16.cpp.hip
 delete mode 100644 dnn/src/rocm/elemwise/kimpl/SAFE_DIV_dt_float16.cpp.hip
 delete mode 100644 dnn/src/rocm/elemwise/kimpl/SAFE_DIV_dt_float32.cpp.hip
 delete mode 100644 dnn/test/cuda/cumprod.cpp
 delete mode 100644 imperative/src/impl/ops/cumxxx.cpp

diff --git a/dnn/include/megdnn/oprs/general.h b/dnn/include/megdnn/oprs/general.h
index 0800cab0..9b556bc3 100644
--- a/dnn/include/megdnn/oprs/general.h
+++ b/dnn/include/megdnn/oprs/general.h
@@ -314,48 +314,6 @@ protected:
 };
 using Cumsum = CumsumForward;
 
-class CumprodForward : public OperatorBase {
-    DEF_OPR_PARAM(Cumprod);
-    DEF_OPR_IMPL(CumprodForward, OperatorBase, 1, 1);
-
-public:
-    /**
-     * \param[in] src input tensor
-     * \param[out] dst output tensor
-     *
-     * src and dst should be contiguous.
-     * src and dst should have the same shape.
-     *
-     * The exclusive flag specifies whether the current element it taken
-     * into account when calculating results.
-     *
-     * The reverse flag specifies whether cumprod is forward (
-     * from 0 to n) or backward (from n downto 0).
-     *
-     * Example:
-     *  exclusive && reverse:
-     *   dst_i = src_{i+1} * src_{i+2} * ... * src_{n-1}
-     *  exclusive && !reverse
-     *   dst_i = src_0 * src_1 * ... * src_{i-1}
-     *  !exclusive && reverse:
-     *   dst_i = src_i * src_{i+1} * ... * src_{n-1}
-     *  !exclusive && !reverse:
-     *   dst_i = src_0 * src_1 * ... * src_i
-     */
-    virtual void exec(
-            _megdnn_tensor_in src, _megdnn_tensor_out dst,
-            _megdnn_workspace workspace) = 0;
-    void deduce_layout(const TensorLayout& src, TensorLayout& dst);
-    virtual size_t get_workspace_in_bytes(
-            const TensorLayout& src, const TensorLayout& dst) = 0;
-
-protected:
-    void check_exec(
-            const TensorLayout& src, const TensorLayout& dst,
-            size_t workspace_in_bytes);
-};
-using Cumprod = CumprodForward;
-
 // mxx can be max or min
 class ArgmxxBase : public OperatorBase {
     DEF_OPR_IMPL_CTOR(ArgmxxBase, OperatorBase);
diff --git a/dnn/scripts/gen_elemwise_multi_type_utils.py b/dnn/scripts/gen_elemwise_multi_type_utils.py
index 71126e2d..39aec818 100755
--- a/dnn/scripts/gen_elemwise_multi_type_utils.py
+++ b/dnn/scripts/gen_elemwise_multi_type_utils.py
@@ -24,7 +24,7 @@ MODES = {
         'FUSE_ADD_SIGMOID', 'ATAN2', 'H_SWISH_GRAD',
         'FUSE_ADD_H_SWISH', 'SILU_GRAD', 'GELU_GRAD', 'PRELU',
         'ASINH_GRAD', 'ACOSH_GRAD', 'ATANH_GRAD', 'SOFTPLUS_GRAD',
-        'RELU6_GRAD', 'HSIGMOID_GRAD', 'SAFE_DIV'],
+        'RELU6_GRAD', 'HSIGMOID_GRAD'],
     3: ['COND_LEQ_MOV', 'COND_LT_MOV', 'FUSE_MUL_ADD3', 'CLIP', 'PRELU_GRAD'],
 }
 
diff --git a/dnn/scripts/gen_elemwise_utils.py b/dnn/scripts/gen_elemwise_utils.py
index e7848d1e..52f28bb7 100755
--- a/dnn/scripts/gen_elemwise_utils.py
+++ b/dnn/scripts/gen_elemwise_utils.py
@@ -31,7 +31,7 @@ MODES = {
                    'FUSE_ADD_SIGMOID', 'ATAN2', 'H_SWISH_GRAD',
                    'FUSE_ADD_H_SWISH', 'SILU_GRAD', 'GELU_GRAD', 'PRELU',
                    'ASINH_GRAD', 'ACOSH_GRAD', 'ATANH_GRAD', 'SOFTPLUS_GRAD',
-                   'RELU6_GRAD', 'HSIGMOID_GRAD', 'SAFE_DIV'],
+                   'RELU6_GRAD', 'HSIGMOID_GRAD'],
     (3, 'FLOAT'): ['COND_LEQ_MOV', 'COND_LT_MOV', 'FUSE_MUL_ADD3', 'CLIP', 'PRELU_GRAD'],
     (1, 'BOOL'): ['NOT'],
     (2, 'BOOL'): ['AND', 'OR', 'XOR', 'LT', 'LEQ', 'EQ'],
diff --git a/dnn/scripts/opr_param_defs.py b/dnn/scripts/opr_param_defs.py
index c5dd9ec7..0e901b57 100755
--- a/dnn/scripts/opr_param_defs.py
+++ b/dnn/scripts/opr_param_defs.py
@@ -443,10 +443,9 @@ pdef('Elemwise').add_enum(
     Doc('SQRT = 80', 'unary: x^(1/2)'),
     Doc('SQUARE = 81', 'unary: x^2'),
     Doc('SIGN = 82', 'unary: sgn(x)'),
-    Doc('SAFE_DIV = 83', 'safe div: x / y'),
-    Doc('NEQ = 84', 'binary: x != y'),
-    Doc('ISNAN = 85', 'unary: isnan(x)'),
-    Doc('ISINF = 86', 'unary: isinf(x)'),
+    Doc('NEQ = 83', 'binary: x != y'),
+    Doc('ISNAN = 84', 'unary: isnan(x)'),
+    Doc('ISINF = 85', 'unary: isinf(x)'),
 )
 
 pdef('ElemwiseMultiType').add_enum(
@@ -740,20 +739,6 @@ Currently, ```DEFAULT``` mode means:
               'whether the cumsum is forward or backward'),
           'false'))
 
-(pdef('Cumprod', 'calculate accumulated product along given axis').
- add_fields('int32',
-          Doc('axis',
-              'axis along which cumprod is performed, default with INT_MAX'),
-          (1<<31)-1).
- add_fields('bool',
-          Doc('exclusive',
-              'whether the current element is taken into account'),
-          'true').
- add_fields('bool',
-          Doc('reverse',
-              'whether the cumprod is forward or backward'),
-          'false'))
-
 (pdef('CondTake').
  add_enum('Mode',
           Doc('EQ = 0', 'take if ``abs(data-val)<eps``'),
diff --git a/dnn/src/common/cumprod.cpp b/dnn/src/common/cumprod.cpp
deleted file mode 100644
index c088ae6c..00000000
--- a/dnn/src/common/cumprod.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-
-#include "megdnn/oprs.h"
-
-#include "src/common/utils.h"
-
-namespace megdnn {
-
-void CumprodForward::deduce_layout(const TensorLayout& src, TensorLayout& dst) {
-    megdnn_assert_contiguous(src);
-    dst = src;
-}
-
-void CumprodForward::check_exec(
-        const TensorLayout& src, const TensorLayout& dst, size_t workspace_in_bytes) {
-    megdnn_assert_contiguous(src);
-    megdnn_assert_eq_layout(src, dst);
-    megdnn_assert(param().axis >= 0);
-    megdnn_assert(static_cast<size_t>(param().axis) < src.ndim);
-    auto required_workspace_in_bytes = get_workspace_in_bytes(src, dst);
-    megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes);
-}
-
-}  // namespace megdnn
-
-// vim: syntax=cpp.doxygen
diff --git a/dnn/src/common/elemwise/each_mode.inl b/dnn/src/common/elemwise/each_mode.inl
index 8c05fcab..48ca51dc 100644
--- a/dnn/src/common/elemwise/each_mode.inl
+++ b/dnn/src/common/elemwise/each_mode.inl
@@ -89,8 +89,7 @@
     MEGDNN_ELEMWISE_MODE_ENABLE(ATANH_GRAD, cb)       \
     MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS_GRAD, cb)    \
     MEGDNN_ELEMWISE_MODE_ENABLE(RELU6_GRAD, cb)       \
-    MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID_GRAD, cb)    \
-    MEGDNN_ELEMWISE_MODE_ENABLE(SAFE_DIV, cb)
+    MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID_GRAD, cb)
 
 #define MEGDNN_FOREACH_ELEMWISE_MODE_BINARY_INT(cb) \
     MEGDNN_ELEMWISE_MODE_ENABLE(ABS_GRAD, cb)       \
diff --git a/dnn/src/common/elemwise/kern_defs.cuh b/dnn/src/common/elemwise/kern_defs.cuh
index b5dcca44..95105470 100644
--- a/dnn/src/common/elemwise/kern_defs.cuh
+++ b/dnn/src/common/elemwise/kern_defs.cuh
@@ -247,8 +247,6 @@ DEF_KERN(dt_bool, EQ, x == y);
 
 DEF_KERN_INT(FLOOR_DIV, dispatch_floordiv_int(x, y));
 DEF_KERN_FLOAT(FLOOR_DIV, floorf(x / y));
-DEF_KERN_INT(SAFE_DIV, y != 0 ? x / y : 0);
-DEF_KERN_FLOAT(SAFE_DIV, y != 0.f ? x / y : 0.f);
 
 DEF_KERN_INT(MOD, x % y);
 DEF_KERN_FLOAT(MOD, fmodf(x, y));
diff --git a/dnn/src/common/elemwise/opr_impl.cpp b/dnn/src/common/elemwise/opr_impl.cpp
index 3b38ddb2..2f0a1d5a 100644
--- a/dnn/src/common/elemwise/opr_impl.cpp
+++ b/dnn/src/common/elemwise/opr_impl.cpp
@@ -242,7 +242,6 @@ const ModeTrait& ModeTrait::from_mode(Mode mode) {
         CB_MODE(Mode::SQRT);
         CB_MODE(Mode::SQUARE);
         CB_MODE(Mode::SIGN);
-        CB_MODE(Mode::SAFE_DIV);
         default:
             megdnn_assert(
                     0,
diff --git a/dnn/src/common/handle_impl.h b/dnn/src/common/handle_impl.h
index 07a7535e..a63bd078 100644
--- a/dnn/src/common/handle_impl.h
+++ b/dnn/src/common/handle_impl.h
@@ -106,7 +106,6 @@ private:
     cb(SVDForward) \
     cb(ReduceForward) \
     cb(CondTake) \
-    cb(CumprodForward) \
     cb(CumsumForward) \
     cb(ArgmaxForward) \
     cb(ArgminForward) \
diff --git a/dnn/src/common/opr_trait.h b/dnn/src/common/opr_trait.h
index 8d13b960..89bfad73 100644
--- a/dnn/src/common/opr_trait.h
+++ b/dnn/src/common/opr_trait.h
@@ -62,7 +62,6 @@ DEF(BatchedMatrixMulForward, 3, true, true);
 DEF(MatrixInverse, 2, true, true);
 DEF(SVDForward, 4, true, true);
 DEF(ReduceForward, 2, true, true);
-DEF(CumprodForward, 2, true, true);
 DEF(CumsumForward, 2, true, true);
 DEF(ArgmaxForward, 2, true, true);
 DEF(ArgminForward, 2, true, true);
diff --git a/dnn/src/cuda/cumprod/cumprod.cu b/dnn/src/cuda/cumprod/cumprod.cu
deleted file mode 100644
index a1b21a6f..00000000
--- a/dnn/src/cuda/cumprod/cumprod.cu
+++ /dev/null
@@ -1,25 +0,0 @@
-#include "./kern_impl.cuinl"
-
-namespace megdnn {
-namespace cuda {
-namespace cumprod {
-
-#define INST_(T, Op, exclusive, reverse)                                  \
-    template void run_kern<T, Op, exclusive, reverse>(                    \
-            T*, void*, uint32_t, uint32_t, uint32_t, uint32_t, const Op&, \
-            cudaStream_t)
-#define INST(T)                       \
-    INST_(T, ProdOp<T>, true, true);  \
-    INST_(T, ProdOp<T>, false, true); \
-    INST_(T, ProdOp<T>, true, false); \
-    INST_(T, ProdOp<T>, false, false);
-
-#define cb(DType) INST(typename DTypeTrait<DType>::ctype)
-
-MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
-
-}  // namespace cumprod
-}  // namespace cuda
-}  // namespace megdnn
-
-// vim: ft=cuda syntax=cuda.doxygen
diff --git a/dnn/src/cuda/cumprod/kern.cuh b/dnn/src/cuda/cumprod/kern.cuh
deleted file mode 100644
index d9e94a97..00000000
--- a/dnn/src/cuda/cumprod/kern.cuh
+++ /dev/null
@@ -1,62 +0,0 @@
-#pragma once
-
-#include "src/cuda/utils.cuh"
-
-#include <cuda_runtime_api.h>
-#include <stdint.h>
-
-namespace megdnn {
-namespace cuda {
-namespace cumprod {
-
-//! compute conventional sum of elements
-template <typename T>
-struct ProdOp {
-    const T* data;
-    typedef ProdOp ContigOp;
-
-    ProdOp(const T* d) : data(d) {}
-
-    __host__ __device__ static T init() { return T(1); }
-    __device__ static T apply(T lhs, T rhs) { return lhs * rhs; }
-    __device__ T visit(uint32_t idx) const { return data[idx]; }
-
-    static ProdOp make_contig(const T* data) { return ProdOp(data); }
-};
-
-/*!
- * \brief cumprod kernel launcher; defined in kern_impl.cuinl
- * \tparam T output data type
- * \tparam Op reduction operator class, which must provide following interface:
- *      typdef ContigOp
- *      static T init(): the identity element
- *      static T apply(T lhs, T rhs): the reduction operation
- *      T visit(uint32_t idx) const: access input
- *      static ContigOp make_contig(const T *data): make an Oo to continue
- *          reduction on temp buffer
- *
- * Note that Op::init() must be accessible from both host and device.
- *
- * In exclusive mode, Op::init() would be filled to the boundary
- *
- * The buffer in *op* and *dst* should not have identical memory addresses.
- */
-template <typename T, typename Op, bool exclusive, bool reverse>
-void run_kern(
-        T* dst, void* workspace, uint32_t workspace_size, uint32_t A, uint32_t B,
-        uint32_t C, const Op& op, cudaStream_t stream);
-
-/*!
- * \brief get required workspace size for cumprod, in bytes
- * \param item_size size of item; i.e. sizeof(T) in run_kern
- *
- * Note: cuda device must be set to the computing device before calling this
- * function.
- */
-uint32_t get_workspace_in_bytes(uint32_t A, uint32_t B, uint32_t C, uint32_t item_size);
-
-}  // namespace cumprod
-}  // namespace cuda
-}  // namespace megdnn
-
-// vim: ft=cpp syntax=cpp.doxygen
diff --git a/dnn/src/cuda/cumprod/kern_helper.cuh b/dnn/src/cuda/cumprod/kern_helper.cuh
deleted file mode 100644
index 52a3bdc4..00000000
--- a/dnn/src/cuda/cumprod/kern_helper.cuh
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-
-#include <cuda_runtime_api.h>
-#include <stdint.h>
-
-namespace megdnn {
-namespace cuda {
-namespace cumprod {
-
-void get_BX_BY(uint32_t A, uint32_t B, uint32_t C, uint32_t& BX, uint32_t& BY);
-
-uint32_t get_workspace_bytes_for_cub_1d(uint32_t nr_item, uint32_t item_size);
-
-}  // namespace cumprod
-}  // namespace cuda
-}  // namespace megdnn
-
-// vim: ft=cpp syntax=cpp.doxygen
diff --git a/dnn/src/cuda/cumprod/kern_impl.cu b/dnn/src/cuda/cumprod/kern_impl.cu
deleted file mode 100644
index c3e02a31..00000000
--- a/dnn/src/cuda/cumprod/kern_impl.cu
+++ /dev/null
@@ -1,82 +0,0 @@
-#include "./kern.cuh"
-#include "./kern_helper.cuh"
-#include "./kern_impl.cuinl"
-#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
-
-using namespace megdnn::cuda;
-using namespace cumprod::detail::cubwrap;
-
-namespace {
-
-template <typename T>
-struct FakeOp {
-    __device__ T visit(int) { return 0; }
-    __device__ static T apply(T, T) { return 0; }
-};
-
-template <bool reverse, typename T>
-uint32_t get_workspace_elems_for_cub_1d_with_dtype_reverse(uint32_t nr_item) {
-    typedef FakeOp<T> Op;
-    Op op;
-    InputIterator<T, Op, reverse> inp_iter(op, nr_item);
-    OutputIterator<T, reverse> out_iter(NULL, nr_item);
-    ScanOp<T, Op> scan_op;
-
-    size_t wk_size0 = 0, wk_size1 = 0;
-    cuda_check(cub::DeviceScan::ExclusiveScan(
-            NULL, wk_size0, inp_iter, out_iter, scan_op, 0, nr_item));
-    cuda_check(cub::DeviceScan::InclusiveScan(
-            NULL, wk_size1, inp_iter, out_iter, scan_op, nr_item));
-    return std::max(wk_size0, wk_size1);
-}
-
-template <typename T>
-uint32_t get_workspace_elems_for_cub_1d_with_dtype(uint32_t nr_item) {
-    return std::max(
-            get_workspace_elems_for_cub_1d_with_dtype_reverse<false, T>(nr_item),
-            get_workspace_elems_for_cub_1d_with_dtype_reverse<true, T>(nr_item));
-}
-
-}  // namespace
-
-uint32_t cumprod::get_workspace_bytes_for_cub_1d(uint32_t nr_item, uint32_t item_size) {
-    switch (item_size) {
-#define CASE(size, type) \
-    case size:           \
-        return get_workspace_elems_for_cub_1d_with_dtype<type>(nr_item)
-        CASE(1, uint8_t);
-        CASE(2, uint16_t);
-        CASE(4, uint32_t);
-        CASE(8, uint64_t);
-#undef CASE
-        default:
-            report_error("unsupported item size in cumprod");
-    }
-}
-
-uint32_t cumprod::get_workspace_in_bytes(
-        uint32_t A, uint32_t B, uint32_t C, uint32_t item_size) {
-    if (A == 1 && C == 1) {
-        return get_workspace_bytes_for_cub_1d(B, item_size);
-    }
-    uint32_t BX, BY;
-    get_BX_BY(A, B, C, BX, BY);
-    uint32_t BY2 = BY * 2;
-    uint32_t res = 0;
-    while (B > BY2) {
-        B = (B + BY2 - 1) / BY2;
-        res += A * B * C;
-    }
-    return res * item_size;
-}
-
-void cumprod::get_BX_BY(
-        uint32_t /* A */, uint32_t /* B */, uint32_t C, uint32_t& BX, uint32_t& BY) {
-    BX = 1;
-    while (BX < C && BX * 2 <= 32)
-        BX *= 2;
-    BY = 512 / BX;
-}
-
-#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
-// vim: syntax=cpp.doxygen
diff --git a/dnn/src/cuda/cumprod/kern_impl.cuinl b/dnn/src/cuda/cumprod/kern_impl.cuinl
deleted file mode 100644
index 6f8e08a0..00000000
--- a/dnn/src/cuda/cumprod/kern_impl.cuinl
+++ /dev/null
@@ -1,326 +0,0 @@
-#include "./kern.cuh"
-#include "./kern_helper.cuh"
-#include "megdnn/dtype.h"
-#include "src/cuda/cub/device/device_scan.cuh"
-#include "src/cuda/cub/util_ptx.cuh"
-
-namespace megdnn {
-namespace cuda {
-namespace cumprod {
-namespace detail {
-
-/**
-  * src shape is (A, B, C), performing blockwise scan over B axis.
-  * Each CUDA block calculates a blockwise scan result of size (BY2, BX).
-  * The block area corresponds to a 2-D area on (B, C) dimension of src.
-  *
-  * Per-block prefix sum is stored in dst (dst has the same shape as src).
-  *
-  * The whole scan result of each block as a single value is stored in
-  * block_sum (of shape (A, B/BY2, C)).
-  *
-  * block_sum can be NULL.
-  *
-  * src and dst can be inplace.
-  *
-  * We need to launch (C/BX)*ceil(B/BY2)*A blocks in total.
-  * Because in CUDA the number of launched blocks over y and z axis are
-  * limited (at most 65535), we launch all blocks over axis x.
-  *
-  * Param: exclusive
-  *  This flag specifies whether the scan is inclusive or exclusive, namely
-  *  whether src_i influences dst_i.
-  *
-  * Param: reverse:
-  *  This flag specifies whether the scan is forward or backward.
-  *
-  * Example:
-  *  !exclusive && !reverse: dst_i = op(src_0, src_1, ..., src_i)
-  *  !exclusive && reverse: dst_i = op(src_i, src_{i+1}, ..., src_{n-1})
-  *  exclusive && !reverse: dst_i = op(src_0, src_1, ..., src{i-1})
-  *  exclusive && reverse: dst_i = op(src_{i+1}, src{i+2}, ..., src{n-1})
-  *
-  * Op should have the following methods:
-  *  static T init()
-  *  static T apply(T lhs, T rhs)
-  */
-template <typename T, typename Op, bool exclusive, bool reverse,
-         uint32_t BY, uint32_t BX>
-__global__ void scan_kernel(T *dst, T *block_sum,
-        uint32_t A, uint32_t B, uint32_t C, const Op op) {
-    constexpr size_t warp_size = 32;
-    const uint32_t BY2 = BY*2;
-    const uint32_t B_ = (B+BY2-1) / BY2;
-    const uint32_t C_ = (C+BX-1) / BX;
-    const uint32_t GX = C_;
-    const uint32_t GY = B_;
-    // src, dst: (A, B, C)
-    // block_sum: (A, B_, C)
-    // shared: (BY2+1, BX)
-    const uint32_t bx = blockIdx.x % GX;
-    const uint32_t by = blockIdx.x / GX % GY;
-    const uint32_t bz = blockIdx.x / GX / GY;
-    const uint32_t tx = threadIdx.x;
-    const uint32_t ty = threadIdx.y;
-    // TODO: shared memory bank conflict optimization
-#define shared_idx(x) ((x) + ((x) >> 5))
-    volatile __shared__ T cache[shared_idx((BY2+1)*BX)];
-    uint32_t base_offset = (bz)*B*C + (by*BY2)*C + (bx*BX);
-    dst += base_offset;
-    // load to cache
-    if (reverse) {
-        cache[shared_idx((BY2-ty)*BX+tx)] = ty+by*BY2 < B && tx+bx*BX < C ?
-            op.visit(base_offset + ty*C + tx) : Op::init();
-    } else {
-        cache[shared_idx((ty+1)*BX+tx)] = ty+by*BY2 < B && tx+bx*BX < C ?
-            op.visit(base_offset + ty*C + tx) : Op::init();
-    }
-    if (reverse) {
-        cache[shared_idx((BY-ty)*BX+tx)] =
-            (ty+BY) + by*BY2 < B && tx+bx*BX < C ?
-            op.visit(base_offset + (ty+BY)*C + tx) : Op::init();
-    } else {
-        cache[shared_idx((ty+BY+1)*BX+tx)] =
-            (ty+BY) + by*BY2 < B && tx+bx*BX < C ?
-            op.visit(base_offset + (ty+BY)*C + tx) : Op::init();
-    }
-    if (ty == 0) {
-        cache[shared_idx(tx)] = Op::init();
-    }
-    __syncthreads();
-    uint32_t total, stride;
-    // first pass
-#pragma unroll
-    for (total = BY, stride = 1;
-            total > 0;
-            total >>= 1, stride <<= 1)
-    {
-        if (ty < total) {
-            uint32_t ai = shared_idx(stride * (2*ty+1) * BX + tx);
-            uint32_t bi = shared_idx(stride * (2*ty+2) * BX + tx);
-            cache[bi] = Op::apply(cache[bi], cache[ai]);
-        }
-        if (total > warp_size/BX) __syncthreads();
-        else cub::WARP_SYNC(0xffffffff);
-    }
-    // second pass
-#pragma unroll
-    for (total = 1, stride = BY;
-            stride > 0;
-            total <<= 1, stride >>= 1)
-    {
-        if (total > warp_size/BX) __syncthreads();
-        else cub::WARP_SYNC(0xffffffff);
-        if (ty < total) {
-            uint32_t ai = shared_idx(stride * (2*ty+0) * BX + tx);
-            uint32_t bi = shared_idx(stride * (2*ty+1) * BX + tx);
-            cache[bi] = Op::apply(cache[bi], cache[ai]);
-        }
-    }
-    __syncthreads();
-    uint32_t ty_offset = (exclusive ? 0 : 1);
-    if (ty+by*BY2 < B && tx+bx*BX < C) {
-        if (reverse) {
-            dst[ty*C + tx] = cache[shared_idx((BY2-1-ty+ty_offset)*BX + tx)];
-        } else {
-            dst[ty*C + tx] = cache[shared_idx((ty+ty_offset)*BX + tx)];
-        }
-    }
-    if (ty+BY+by*BY2 < B && tx+bx*BX < C) {
-        if (reverse) {
-            dst[(ty+BY)*C + tx] =
-                cache[shared_idx((BY2-1-(ty+BY)+ty_offset)*BX + tx)];
-        } else {
-            dst[(ty+BY)*C + tx] =
-                cache[shared_idx((ty+BY+ty_offset)*BX + tx)];
-        }
-    }
-    if (block_sum && ty == 0 && bx*BX+tx < C) {
-        block_sum[(bz)*B_*C + (by)*C + (bx*BX) + tx] =
-            cache[shared_idx(BY2*BX + tx)];
-    }
-}
-
-template <typename T, typename Op, uint32_t BY, uint32_t BX>
-__global__ void update_kernel(T *dst, const T *delta,
-        uint32_t A, uint32_t B, uint32_t C) {
-    const uint32_t BY2 = BY*2;
-    const uint32_t B_ = (B+BY2-1) / BY2;
-    const uint32_t C_ = (C+BX-1) / BX;
-    const uint32_t GX = C_;
-    const uint32_t GY = B_;
-    // src: (A, B, C)
-    // delta: (A, B_, C)
-    const uint32_t bx = blockIdx.x % GX;
-    const uint32_t by = blockIdx.x / GX % GY;
-    const uint32_t bz = blockIdx.x / GX / GY;
-    const uint32_t tx = threadIdx.x;
-    const uint32_t ty = threadIdx.y;
-
-    if (tx + bx*BX < C) {
-        T delta_v = delta[(bz)*B_*C + (by)*C + (bx*BX) + tx];
-        if (ty+by*BY2 < B && tx+bx*BX < C) {
-            T &res = dst[bz*B*C + (ty+by*BY2)*C + (tx+bx*BX)];
-            res = Op::apply(res, delta_v);
-        }
-        if (ty+BY+by*BY2 < B && tx+bx*BX < C) {
-            T &res = dst[bz*B*C + (ty+BY+by*BY2)*C + (tx+bx*BX)];
-            res = Op::apply(res, delta_v);
-        }
-    }
-}
-
-template <typename T, typename Op, bool exclusive, bool reverse>
-void run_kern_multiAC(T* dst, T* workspace, uint32_t A, uint32_t B,
-                      uint32_t C, const Op& op, cudaStream_t stream);
-
-template <typename T, typename Op, bool exclusive, bool reverse,
-         uint32_t BX, uint32_t BY>
-void do_run_kern(T *dst, T *workspace,
-        uint32_t A, uint32_t B, uint32_t C, const Op &op, cudaStream_t stream) {
-    const uint32_t BY2 = BY*2;
-    const uint32_t B_ = (B+BY2-1)/BY2;
-    const uint32_t C_ = (C+BX-1)/BX;
-
-    dim3 blocks(C_*B_*A);
-    dim3 threads(BX, BY);
-
-    scan_kernel<T, Op, exclusive, reverse, BY, BX>
-        <<<blocks, threads, 0, stream>>>(
-                dst, B > BY2 ? workspace : NULL, A, B, C, op);
-    if (B <= BY2)
-        return;
-
-    run_kern_multiAC<T, typename Op::ContigOp, true, reverse>(
-                workspace, workspace + A*B_*C, A, B_, C,
-                Op::make_contig(workspace), stream);
-    update_kernel<T, Op, BY, BX><<<blocks, threads, 0, stream>>>(
-            dst, workspace, A, B, C);
-}
-
-template <typename T, typename Op, bool exclusive, bool reverse>
-void run_kern_multiAC(T* dst, T* workspace, uint32_t A, uint32_t B, uint32_t C,
-                      const Op& op, cudaStream_t stream) {
-#define IF(BX, BY)                                                 \
-    do {                                                           \
-        if (vBX == BX && vBY == BY) {                              \
-            return do_run_kern<T, Op, exclusive, reverse, BX, BY>( \
-                    dst, workspace, A, B, C, op, stream);           \
-        }                                                          \
-    } while (0)
-
-    uint32_t vBX, vBY;
-    get_BX_BY(A, B, C, vBX, vBY);
-    IF(1, 512);
-    IF(2, 256);
-    IF(4, 128);
-    IF(8, 64);
-    IF(16, 32);
-    IF(32, 16);
-    megdnn_trap();
-#undef IF
-}
-
-//! wrap cub library for 1-dim scan
-namespace cubwrap {
-
-template <typename T, typename Op, bool reverse>
-class InputIterator : public std::iterator<std::random_access_iterator_tag, T> {
-    int m_offset, m_len;
-    Op m_op;
-
-public:
-    InputIterator(Op op, int len) : m_offset(0), m_len(len), m_op(op) {}
-
-    __device__ InputIterator(int offset, int len, Op op)
-            : m_offset(offset), m_len(len), m_op(op) {}
-
-    __device__ T operator[](int idx) {
-        idx += m_offset;
-        if (reverse) {
-            idx = m_len - 1 - idx;
-        }
-        return m_op.visit(idx);
-    }
-
-    __device__ InputIterator operator+(int offset) {
-        return InputIterator(m_offset + offset, m_len, m_op);
-    }
-};
-
-template <typename T, bool reverse>
-class OutputIterator
-        : public std::iterator<std::random_access_iterator_tag, T> {
-    int m_offset, m_len;
-    T* m_dst;
-
-public:
-    OutputIterator(T* dst, int len) : m_offset(0), m_len(len), m_dst(dst) {}
-
-    __device__ OutputIterator(int offset, int len, T* dst)
-            : m_offset(offset), m_len(len), m_dst(dst) {}
-
-    __device__ T& operator[](int idx) {
-        idx += m_offset;
-        if (reverse) {
-            idx = m_len - 1 - idx;
-        }
-        return m_dst[idx];
-    }
-
-    __device__ OutputIterator operator+(int offset) {
-        return OutputIterator(m_offset + offset, m_len, m_dst);
-    }
-};
-
-template <typename T, typename Op>
-struct ScanOp {
-    __device__ __host__ T operator()(T a, T b) {
-        // cub requires it to be a __device__ __host__ function but MegDNN has
-        // no such contraint on Op::apply; so we just trap on host
-#ifdef __CUDA_ARCH__
-        return Op::apply(a, b);
-#else
-        megdnn_trap();
-#endif
-    }
-};
-
-template <typename T, typename Op, bool exclusive, bool reverse>
-void invoke(T* dst, void* workspace, size_t wk_size, const Op& op, uint32_t len,
-            cudaStream_t stream) {
-    InputIterator<T, Op, reverse> inp_iter(op, len);
-    OutputIterator<T, reverse> out_iter(dst, len);
-    ScanOp<T, Op> scan_op;
-
-    if (exclusive) {
-        cuda_check(cub::DeviceScan::ExclusiveScan(workspace, wk_size, inp_iter,
-                                                  out_iter, scan_op, Op::init(),
-                                                  len, stream));
-    } else {
-        cuda_check(cub::DeviceScan::InclusiveScan(
-                workspace, wk_size, inp_iter, out_iter, scan_op, len, stream));
-    }
-}
-}  // namespace cubwrap
-
-} // namespace detail
-
-template <typename T, typename Op, bool exclusive, bool reverse>
-void run_kern(T* dst, void* workspace, uint32_t workspace_size, uint32_t A,
-              uint32_t B, uint32_t C, const Op& op, cudaStream_t stream) {
-    if (A == 1 && C == 1) {
-        return detail::cubwrap::invoke<T, Op, exclusive, reverse>(
-                dst, workspace, workspace_size, op, B, stream);
-    }
-
-    return detail::run_kern_multiAC<T, Op, exclusive, reverse>(
-            dst, static_cast<T*>(workspace), A, B, C, op, stream);
-}
-
-} // namespace cumprod
-} // namespace cuda
-} // namespace megdnn
-
-
-// vim: ft=cuda syntax=cuda.doxygen
diff --git a/dnn/src/cuda/cumprod/opr_impl.cpp b/dnn/src/cuda/cumprod/opr_impl.cpp
deleted file mode 100644
index 13826930..00000000
--- a/dnn/src/cuda/cumprod/opr_impl.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-#include "./opr_impl.h"
-#include "./kern.cuh"
-
-#include "src/common/reduce_helper_device.h"
-#include "src/cuda/utils.h"
-
-using namespace megdnn;
-using namespace cuda;
-using namespace cumprod;
-
-namespace {
-
-/*!
- * \brief compute cumprod reduction on (A, B, C) tensor to (A, 1, C)
- */
-template <typename T, class Op>
-void dispatch(
-        T* dst, T* workspace, size_t workspace_size, size_t A, size_t B, size_t C,
-        bool exclusive, bool reverse, const Op& op, cudaStream_t stream) {
-#define IF(exclusive_v, reverse_v)                                    \
-    if (exclusive == exclusive_v && reverse == reverse_v) {           \
-        run_kern<T, Op, exclusive_v, reverse_v>(                      \
-                dst, workspace, workspace_size, A, B, C, op, stream); \
-        return;                                                       \
-    }
-    IF(true, true)
-    IF(true, false)
-    IF(false, true)
-    IF(false, false)
-    megdnn_assert_internal(false);
-#undef IF
-}
-
-}  // anonymous namespace
-
-void CumprodForwardImpl::exec(
-        _megdnn_tensor_in src, _megdnn_tensor_in dst, _megdnn_workspace workspace) {
-    check_exec(src.layout, dst.layout, workspace.size);
-    size_t A, B, C;
-    reduce::get_ABC(src.layout, A, B, C, param().axis);
-    auto stream = cuda_stream(handle());
-#define cb(DType)                                                                  \
-    if (src.layout.dtype == DType()) {                                             \
-        using ctype = DTypeTrait<DType>::ctype;                                    \
-        dispatch<ctype, ProdOp<ctype>>(                                            \
-                dst.ptr<ctype>(), workspace.ptr<ctype>(), workspace.size, A, B, C, \
-                param().exclusive, param().reverse, src.ptr<ctype>(), stream);     \
-        return;                                                                    \
-    }
-    MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
-#undef cb
-    megdnn_assert_internal(false);
-}
-
-size_t CumprodForwardImpl::get_workspace_in_bytes(
-        const TensorLayout& src, const TensorLayout&) {
-    size_t A, B, C;
-    reduce::get_ABC(src, A, B, C, param().axis);
-    cuda_check(cudaSetDevice(concrete_handle(handle())->device_id()));
-    return cumprod::get_workspace_in_bytes(A, B, C, src.dtype.size());
-}
-
-// vim: syntax=cpp.doxygen
diff --git a/dnn/src/cuda/cumprod/opr_impl.h b/dnn/src/cuda/cumprod/opr_impl.h
deleted file mode 100644
index 418b35e2..00000000
--- a/dnn/src/cuda/cumprod/opr_impl.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#pragma once
-#include "megdnn/oprs.h"
-
-namespace megdnn {
-namespace cuda {
-
-class CumprodForwardImpl : public CumprodForward {
-public:
-    using CumprodForward::CumprodForward;
-    void exec(
-            _megdnn_tensor_in src, _megdnn_tensor_out dst,
-            _megdnn_workspace workspace) override;
-    size_t get_workspace_in_bytes(
-            const TensorLayout& src, const TensorLayout& dst) override;
-};
-
-}  // namespace cuda
-}  // namespace megdnn
-// vim: syntax=cpp.doxygen
diff --git a/dnn/src/cuda/elemwise/kimpl/SAFE_DIV_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/SAFE_DIV_dt_bfloat16.cu
deleted file mode 100644
index cf04b51d..00000000
--- a/dnn/src/cuda/elemwise/kimpl/SAFE_DIV_dt_bfloat16.cu
+++ /dev/null
@@ -1,7 +0,0 @@
-// generated by gen_elemwise_kern_impls.py
-#if !MEGDNN_DISABLE_FLOAT16
-#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SAFE_DIV, cb)
-#define KERN_IMPL_ARITY    2
-#define KERN_IMPL_CTYPE    dt_bfloat16
-#include "../kern_impl.inl"
-#endif
diff --git a/dnn/src/cuda/elemwise/kimpl/SAFE_DIV_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/SAFE_DIV_dt_float16.cu
deleted file mode 100644
index bb023926..00000000
--- a/dnn/src/cuda/elemwise/kimpl/SAFE_DIV_dt_float16.cu
+++ /dev/null
@@ -1,7 +0,0 @@
-// generated by gen_elemwise_kern_impls.py
-#if !MEGDNN_DISABLE_FLOAT16
-#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SAFE_DIV, cb)
-#define KERN_IMPL_ARITY    2
-#define KERN_IMPL_CTYPE    dt_float16
-#include "../kern_impl.inl"
-#endif
diff --git a/dnn/src/cuda/elemwise/kimpl/SAFE_DIV_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/SAFE_DIV_dt_float32.cu
deleted file mode 100644
index 9c8ef576..00000000
--- a/dnn/src/cuda/elemwise/kimpl/SAFE_DIV_dt_float32.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// generated by gen_elemwise_kern_impls.py
-#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SAFE_DIV, cb)
-#define KERN_IMPL_ARITY    2
-#define KERN_IMPL_CTYPE    dt_float32
-#include "../kern_impl.inl"
diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/SAFE_DIV_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/SAFE_DIV_dt_qint8_dt_qint8.cu
deleted file mode 100644
index 85d4d9ad..00000000
--- a/dnn/src/cuda/elemwise_multi_type/kimpl/SAFE_DIV_dt_qint8_dt_qint8.cu
+++ /dev/null
@@ -1,6 +0,0 @@
-// generated by gen_elemwise_multi_type_kern_impls.py
-#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SAFE_DIV, cb)
-#define KERN_IMPL_ARITY    2
-#define KERN_IMPL_STYPE    dt_qint8
-#define KERN_IMPL_DTYPE    dt_qint8
-#include "../kern_impl.inl"
diff --git a/dnn/src/cuda/elemwise_multi_type/opr_impl.cpp b/dnn/src/cuda/elemwise_multi_type/opr_impl.cpp
index f381d7b5..2f45e524 100644
--- a/dnn/src/cuda/elemwise_multi_type/opr_impl.cpp
+++ b/dnn/src/cuda/elemwise_multi_type/opr_impl.cpp
@@ -268,7 +268,6 @@ IMPL_MODE_DISPATCHER(2, dt_quint4, dt_quint4);
 #undef FOREACH
 
 #define FOREACH(cb)                                \
-    MEGDNN_ELEMWISE_MODE_ENABLE(COND_LT_MOV, cb)   \
     MEGDNN_ELEMWISE_MODE_ENABLE(COND_LEQ_MOV, cb)  \
     MEGDNN_ELEMWISE_MODE_ENABLE(FUSE_MUL_ADD3, cb) \
     MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb)
diff --git a/dnn/src/cuda/handle_create.cpp b/dnn/src/cuda/handle_create.cpp
index 2669eedd..fee747c0 100644
--- a/dnn/src/cuda/handle_create.cpp
+++ b/dnn/src/cuda/handle_create.cpp
@@ -16,7 +16,6 @@
 #include "src/cuda/convolution3d/opr_impl.h"
 #include "src/cuda/convpooling/opr_impl.h"
 #include "src/cuda/correlation/opr_impl.h"
-#include "src/cuda/cumprod/opr_impl.h"
 #include "src/cuda/cumsum/opr_impl.h"
 #include "src/cuda/cvt_color/opr_impl.h"
 #include "src/cuda/dct/opr_impl.h"
@@ -117,7 +116,6 @@ MEGDNN_SPECIALIZE_CREATE_OPERATOR(BatchedMatrixMulForward);
 MEGDNN_SPECIALIZE_CREATE_OPERATOR(SVDForward);
 MEGDNN_SPECIALIZE_CREATE_OPERATOR(ReduceForward);
 MEGDNN_SPECIALIZE_CREATE_OPERATOR(CondTake);
-MEGDNN_SPECIALIZE_CREATE_OPERATOR(CumprodForward);
 MEGDNN_SPECIALIZE_CREATE_OPERATOR(CumsumForward);
 MEGDNN_SPECIALIZE_CREATE_OPERATOR(ArgmaxForward);
 MEGDNN_SPECIALIZE_CREATE_OPERATOR(ArgminForward);
diff --git a/dnn/src/fallback/elemwise/fallback_impl/opr_binary_impl.cpp b/dnn/src/fallback/elemwise/fallback_impl/opr_binary_impl.cpp
index 85e79ee1..f0cf6c13 100644
--- a/dnn/src/fallback/elemwise/fallback_impl/opr_binary_impl.cpp
+++ b/dnn/src/fallback/elemwise/fallback_impl/opr_binary_impl.cpp
@@ -266,7 +266,6 @@ INST(Mode::ATANH_GRAD);
 INST(Mode::SOFTPLUS_GRAD);
 INST(Mode::RELU6_GRAD);
 INST(Mode::HSIGMOID_GRAD);
-INST(Mode::SAFE_DIV);
 #undef INST
 }  // namespace fallback
 }  // namespace megdnn
diff --git a/dnn/src/naive/cumprod/opr_impl.cpp b/dnn/src/naive/cumprod/opr_impl.cpp
deleted file mode 100644
index 078b552f..00000000
--- a/dnn/src/naive/cumprod/opr_impl.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-#include "src/naive/cumprod/opr_impl.h"
-#include "src/naive/handle.h"
-
-#include "src/common/reduce_helper.h"
-#include "src/common/utils.h"
-
-namespace {
-
-template <typename T>
-void exec_internal(
-        const T* __restrict src, T* __restrict dst, size_t A, size_t B, size_t C,
-        bool exclusive, bool reverse) {
-    for (size_t a = 0; a < A; ++a)
-        for (size_t c = 0; c < C; ++c) {
-            if (exclusive && reverse) {
-                T prod = T(1);
-                for (size_t b = B; b > 0; --b) {
-                    dst[a * B * C + (b - 1) * C + c] = prod;
-                    prod *= src[a * B * C + (b - 1) * C + c];
-                }
-            } else if (exclusive && !reverse) {
-                T prod = T(1);
-                for (size_t b = 0; b < B; ++b) {
-                    dst[a * B * C + b * C + c] = prod;
-                    prod *= src[a * B * C + b * C + c];
-                }
-            } else if (!exclusive && reverse) {
-                T prod = T(1);
-                for (size_t b = B; b > 0; --b) {
-                    prod *= src[a * B * C + (b - 1) * C + c];
-                    dst[a * B * C + (b - 1) * C + c] = prod;
-                }
-            } else if (!exclusive && !reverse) {
-                T prod = T(1);
-                for (size_t b = 0; b < B; ++b) {
-                    prod *= src[a * B * C + b * C + c];
-                    dst[a * B * C + b * C + c] = prod;
-                }
-            } else {
-                megdnn_assert_internal(false);
-            }
-        }
-}
-
-}  // anonymous namespace
-
-namespace megdnn {
-namespace naive {
-
-void CumprodForwardImpl::exec(
-        _megdnn_tensor_in src, _megdnn_tensor_out dst, _megdnn_workspace workspace) {
-    check_exec(src.layout, dst.layout, workspace.size);
-
-    size_t A, B, C;
-    reduce::get_ABC(src.layout, A, B, C, param().axis);
-#define cb(DType)                                                               \
-    if (src.layout.dtype == DType()) {                                          \
-        using ctype = DTypeTrait<DType>::ctype;                                 \
-        MEGDNN_DISPATCH_CPU_KERN_OPR(exec_internal<ctype>(                      \
-                src.ptr<ctype>(), dst.ptr<ctype>(), A, B, C, param().exclusive, \
-                param().reverse));                                              \
-        return;                                                                 \
-    }
-    MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
-    megdnn_assert_internal(0);
-#undef cb
-}
-
-}  // namespace naive
-}  // namespace megdnn
-
-// vim: syntax=cpp.doxygen
diff --git a/dnn/src/naive/cumprod/opr_impl.h b/dnn/src/naive/cumprod/opr_impl.h
deleted file mode 100644
index 29b3e88e..00000000
--- a/dnn/src/naive/cumprod/opr_impl.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#pragma once
-#include "megdnn/oprs.h"
-
-namespace megdnn {
-namespace naive {
-
-class CumprodForwardImpl : public CumprodForward {
-public:
-    using CumprodForward::CumprodForward;
-    void exec(
-            _megdnn_tensor_in src, _megdnn_tensor_out dst,
-            _megdnn_workspace workspace) override;
-    size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&) override {
-        return 0;
-    }
-};
-
-}  // namespace naive
-}  // namespace megdnn
-// vim: syntax=cpp.doxygen
diff --git a/dnn/src/naive/elemwise/kimpl/SAFE_DIV_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/SAFE_DIV_dt_bfloat16.cpp
deleted file mode 100644
index cf04b51d..00000000
--- a/dnn/src/naive/elemwise/kimpl/SAFE_DIV_dt_bfloat16.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-// generated by gen_elemwise_kern_impls.py
-#if !MEGDNN_DISABLE_FLOAT16
-#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SAFE_DIV, cb)
-#define KERN_IMPL_ARITY    2
-#define KERN_IMPL_CTYPE    dt_bfloat16
-#include "../kern_impl.inl"
-#endif
diff --git a/dnn/src/naive/elemwise/kimpl/SAFE_DIV_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/SAFE_DIV_dt_float16.cpp
deleted file mode 100644
index bb023926..00000000
--- a/dnn/src/naive/elemwise/kimpl/SAFE_DIV_dt_float16.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-// generated by gen_elemwise_kern_impls.py
-#if !MEGDNN_DISABLE_FLOAT16
-#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SAFE_DIV, cb)
-#define KERN_IMPL_ARITY    2
-#define KERN_IMPL_CTYPE    dt_float16
-#include "../kern_impl.inl"
-#endif
diff --git a/dnn/src/naive/elemwise/kimpl/SAFE_DIV_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/SAFE_DIV_dt_float32.cpp
deleted file mode 100644
index 9c8ef576..00000000
--- a/dnn/src/naive/elemwise/kimpl/SAFE_DIV_dt_float32.cpp
+++ /dev/null
@@ -1,5 +0,0 @@
-// generated by gen_elemwise_kern_impls.py
-#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SAFE_DIV, cb)
-#define KERN_IMPL_ARITY    2
-#define KERN_IMPL_CTYPE    dt_float32
-#include "../kern_impl.inl"
diff --git a/dnn/src/naive/elemwise/opr_impl.cpp b/dnn/src/naive/elemwise/opr_impl.cpp
index cd48b7f6..243aa066 100644
--- a/dnn/src/naive/elemwise/opr_impl.cpp
+++ b/dnn/src/naive/elemwise/opr_impl.cpp
@@ -5,8 +5,6 @@
 #include "src/naive/elemwise/kern_caller.h"
 #include "src/naive/handle.h"
 
-#include <iostream>
-
 #include "midout.h"
 MIDOUT_DECL(megdnn_naive_elemwise)
 
diff --git a/dnn/src/naive/handle.cpp b/dnn/src/naive/handle.cpp
index d7d06966..21d23ad6 100644
--- a/dnn/src/naive/handle.cpp
+++ b/dnn/src/naive/handle.cpp
@@ -18,7 +18,6 @@
 #include "src/naive/convolution3d/opr_impl.h"
 #include "src/naive/convpooling/opr_impl.h"
 #include "src/naive/correlation/opr_impl.h"
-#include "src/naive/cumprod/opr_impl.h"
 #include "src/naive/cumsum/opr_impl.h"
 #include "src/naive/cvt_color/opr_impl.h"
 #include "src/naive/dct/opr_impl.h"
diff --git a/dnn/src/rocm/elemwise/kimpl/SAFE_DIV_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SAFE_DIV_dt_bfloat16.cpp.hip
deleted file mode 100644
index 94de95ac..00000000
--- a/dnn/src/rocm/elemwise/kimpl/SAFE_DIV_dt_bfloat16.cpp.hip
+++ /dev/null
@@ -1,7 +0,0 @@
-// generated by gen_elemwise_kern_impls.py
-#if !MEGDNN_DISABLE_FLOAT16
-#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SAFE_DIV, cb)
-#define KERN_IMPL_ARITY 2
-#define KERN_IMPL_CTYPE dt_bfloat16
-#include "../kern_impl.inl"
-#endif
diff --git a/dnn/src/rocm/elemwise/kimpl/SAFE_DIV_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SAFE_DIV_dt_float16.cpp.hip
deleted file mode 100644
index af3eaa2b..00000000
--- a/dnn/src/rocm/elemwise/kimpl/SAFE_DIV_dt_float16.cpp.hip
+++ /dev/null
@@ -1,7 +0,0 @@
-// generated by gen_elemwise_kern_impls.py
-#if !MEGDNN_DISABLE_FLOAT16
-#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SAFE_DIV, cb)
-#define KERN_IMPL_ARITY 2
-#define KERN_IMPL_CTYPE dt_float16
-#include "../kern_impl.inl"
-#endif
diff --git a/dnn/src/rocm/elemwise/kimpl/SAFE_DIV_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SAFE_DIV_dt_float32.cpp.hip
deleted file mode 100644
index 19234561..00000000
--- a/dnn/src/rocm/elemwise/kimpl/SAFE_DIV_dt_float32.cpp.hip
+++ /dev/null
@@ -1,5 +0,0 @@
-// generated by gen_elemwise_kern_impls.py
-#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SAFE_DIV, cb)
-#define KERN_IMPL_ARITY 2
-#define KERN_IMPL_CTYPE dt_float32
-#include "../kern_impl.inl"
diff --git a/dnn/test/common/elemwise.cpp b/dnn/test/common/elemwise.cpp
index d3123a65..2ef46962 100644
--- a/dnn/test/common/elemwise.cpp
+++ b/dnn/test/common/elemwise.cpp
@@ -815,7 +815,7 @@ DEF_TEST(all_modes) {
                 checker.set_rng(0, &abslt1_rng_f32);
             } else if (
                     mode == Mode::MOD || mode == Mode::TRUE_DIV ||
-                    mode == Mode::FLOOR_DIV || mode == Mode::SAFE_DIV) {
+                    mode == Mode::FLOOR_DIV) {
                 if (dtype.category() == DTypeCategory::INT) {
                     checker.set_rng(0, &default_rng_i32);
                     checker.set_rng(1, &nonzero_rng_i32);
diff --git a/dnn/test/cuda/cumprod.cpp b/dnn/test/cuda/cumprod.cpp
deleted file mode 100644
index f7fbe7a9..00000000
--- a/dnn/test/cuda/cumprod.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-#include "test/cuda/fixture.h"
-
-#include "megdnn/oprs.h"
-#include "test/common/checker.h"
-
-namespace megdnn {
-namespace test {
-
-TEST_F(CUDA, CUMPROD) {
-    Checker<Cumprod> checker(handle_cuda());
-    struct TestArg {
-        param::Cumprod param;
-        TensorShape shape;
-        TestArg(param::Cumprod param, TensorShape shape) : param(param), shape(shape) {}
-    };
-    std::vector<TestArg> args, args_int32;
-    for (auto shape :
-         TensorShapeArray{{10000}, {33000, 33}, {100, 100, 100}, {30, 30, 30, 30}}) {
-        for (size_t axis = 0; axis < shape.ndim; ++axis) {
-            args.emplace_back(param::Cumprod(axis, true, true), shape);
-            args.emplace_back(param::Cumprod(axis, true, false), shape);
-            args.emplace_back(param::Cumprod(axis, false, true), shape);
-            args.emplace_back(param::Cumprod(axis, false, false), shape);
-        }
-    }
-    for (auto shape : TensorShapeArray{{1}, {10}, {100}, {1000}, {10000}, {100000}}) {
-        args.emplace_back(param::Cumprod(0, true, true), shape);
-        args.emplace_back(param::Cumprod(0, true, false), shape);
-        args.emplace_back(param::Cumprod(0, false, true), shape);
-        args.emplace_back(param::Cumprod(0, false, false), shape);
-    }
-    for (auto shape : TensorShapeArray{
-                 {1},
-                 {10},
-                 {100},
-                 {1000},
-                 {10000},
-                 {100000},
-                 {1000000},
-                 {1050000},
-                 {2100000}}) {
-        args_int32.emplace_back(param::Cumprod(0, true, true), shape);
-        args_int32.emplace_back(param::Cumprod(0, true, false), shape);
-        args_int32.emplace_back(param::Cumprod(0, false, true), shape);
-        args_int32.emplace_back(param::Cumprod(0, false, false), shape);
-    }
-    for (auto arg : args) {
-        checker.set_param(arg.param);
-        checker.set_epsilon(1e-2);
-        checker.set_dtype(0, dtype::Float32()).execs({{arg.shape}, {}});
-        checker.set_dtype(0, dtype::Int16()).execs({{arg.shape}, {}});
-        checker.set_dtype(0, dtype::Int32()).execs({{arg.shape}, {}});
-    }
-    for (auto arg : args_int32) {
-        checker.set_param(arg.param);
-        checker.set_epsilon(1e-2);
-        checker.set_dtype(0, dtype::Int32()).execs({{arg.shape}, {}});
-    }
-}
-
-}  // namespace test
-}  // namespace megdnn
-// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
diff --git a/dnn/test/naive/record1.cpp b/dnn/test/naive/record1.cpp
index cfa84c3f..a715782a 100644
--- a/dnn/test/naive/record1.cpp
+++ b/dnn/test/naive/record1.cpp
@@ -494,49 +494,6 @@ TEST_F(NAIVE, CONV3D_RECORD) {
     }
 }
 
-//! cumprod
-TEST_F(NAIVE, CUMPROD_RECORD) {
-    TaskRecordChecker<Cumprod> checker(2);
-    struct TestArg {
-        param::Cumprod param;
-        TensorShape shape;
-        TestArg(param::Cumprod param, TensorShape shape) : param(param), shape(shape) {}
-    };
-    std::vector<TestArg> args, args_int32;
-    for (auto shape : TensorShapeArray{{1000}, {330, 33}, {10, 10, 10}, {5, 5, 5, 5}}) {
-        for (size_t axis = 0; axis < shape.ndim; ++axis) {
-            args.emplace_back(param::Cumprod(axis, true, true), shape);
-            args.emplace_back(param::Cumprod(axis, true, false), shape);
-            args.emplace_back(param::Cumprod(axis, false, true), shape);
-            args.emplace_back(param::Cumprod(axis, false, false), shape);
-        }
-    }
-    for (auto shape : TensorShapeArray{{1}, {10}, {100}, {1000}, {10000}}) {
-        args.emplace_back(param::Cumprod(0, true, true), shape);
-        args.emplace_back(param::Cumprod(0, true, false), shape);
-        args.emplace_back(param::Cumprod(0, false, true), shape);
-        args.emplace_back(param::Cumprod(0, false, false), shape);
-    }
-    for (auto shape : TensorShapeArray{{1}, {10}, {100}, {1000}, {10000}}) {
-        args_int32.emplace_back(param::Cumprod(0, true, true), shape);
-        args_int32.emplace_back(param::Cumprod(0, true, false), shape);
-        args_int32.emplace_back(param::Cumprod(0, false, true), shape);
-        args_int32.emplace_back(param::Cumprod(0, false, false), shape);
-    }
-    for (auto arg : args) {
-        checker.set_param(arg.param);
-        checker.set_epsilon(1e-2);
-        checker.set_dtype(0, dtype::Float32()).execs({{arg.shape}, {}});
-        checker.set_dtype(0, dtype::Int16()).execs({{arg.shape}, {}});
-        checker.set_dtype(0, dtype::Int32()).execs({{arg.shape}, {}});
-    }
-    for (auto arg : args_int32) {
-        checker.set_param(arg.param);
-        checker.set_epsilon(1e-2);
-        checker.set_dtype(0, dtype::Int32()).execs({{arg.shape}, {}});
-    }
-}
-
 //! cumsum
 TEST_F(NAIVE, CUMSUM_RECORD) {
     TaskRecordChecker<Cumsum> checker(2);
diff --git a/imperative/python/megengine/functional/tensor.py b/imperative/python/megengine/functional/tensor.py
index 7ce9b2de..24eabf54 100755
--- a/imperative/python/megengine/functional/tensor.py
+++ b/imperative/python/megengine/functional/tensor.py
@@ -27,7 +27,6 @@ __all__ = [
     "broadcast_to",
     "concat",
     "cond_take",
-    "cumprod",
     "cumsum",
     "diag",
     "expand_dims",
@@ -1140,22 +1139,6 @@ def cumsum(inp: Tensor, axis: int):
         Tensor([[ 1  3  6]
          [ 4  9 15]], dtype=int32, device=xpux:0)
     """
+    assert isinstance(inp, Tensor), "input of cumsum must be type of Tensor"
     op = builtin.Cumsum(axis=axis, exclusive=False, reverse=False)
     return apply(op, inp)[0]
-
-
-def cumprod(inp: Tensor, axis: int):
-    r"""Computes the cumulative product of elements along given axis.
-
-    Args:
-        inp: input tensor.
-        axis: axis along which cumprod is performed.
-
-    Examples:
-        >>> x = Tensor([[1, 2, 3], [4, 5, 6]], "int32")
-        >>> F.cumprod(x, 1)
-        Tensor([[ 1  2  6]
-         [ 4  20 120]], dtype=int32, device=xpux:0)
-    """
-    op = builtin.Cumprod(axis=axis, exclusive=False, reverse=False)
-    return apply(op, inp)[0]
diff --git a/imperative/python/test/unit/core/test_autodiff.py b/imperative/python/test/unit/core/test_autodiff.py
index 770149fe..a404b7c4 100644
--- a/imperative/python/test/unit/core/test_autodiff.py
+++ b/imperative/python/test/unit/core/test_autodiff.py
@@ -501,22 +501,6 @@ def test_dot():
     np.testing.assert_equal(np.ones((2, 2), dtype=np.float32), x.grad.numpy())
 
 
-def test_cumprod():
-    x = mge.Parameter(F.full((2, 2), 2.0, dtype="float32"))
-
-    with Grad() as grad:
-        grad.wrt(x, callback=save_to(x))
-
-        def f(x):
-            return F.cumprod(x, axis=0)
-
-        y = f(x)
-        grad(y, F.ones_like(y))
-
-    expected = np.array([[3.0, 3.0], [2.0, 2.0]], dtype=np.float32)
-    np.testing.assert_almost_equal(x.grad.numpy(), expected)
-
-
 def test_pixel_shuffle():
 
     x = np.random.rand(2, 3, 16, 3, 4).astype("float32")
diff --git a/imperative/src/impl/ops/cumxxx.cpp b/imperative/src/impl/ops/cumxxx.cpp
deleted file mode 100644
index c05a235e..00000000
--- a/imperative/src/impl/ops/cumxxx.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-#include "../blob_manager_impl.h"
-#include "../dnn_op_helper.h"
-#include "../op_trait.h"
-#include "megbrain/imperative/ops/autogen.h"
-#include "megbrain/opr/misc.h"
-
-namespace mgb::imperative {
-
-namespace {
-namespace cumsum {
-
-auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
-    auto&& op = static_cast<const Cumsum&>(def);
-    OperatorNodeConfig config{op.make_name()};
-    return opr::Cumsum::make(inputs[0], op.param(), config);
-}
-
-OP_TRAIT_REG(Cumsum, Cumsum).apply_on_var_node(apply_on_var_node).fallback();
-}  // namespace cumsum
-}  // namespace
-
-namespace {
-namespace cumprod {
-auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
-    auto&& op = def.cast_final_safe<Cumprod>();
-    OperatorNodeConfig config{op.make_name()};
-    return opr::Cumprod::make(inputs[0], op.param(), config);
-}
-
-OP_TRAIT_REG(Cumprod, Cumprod).apply_on_var_node(apply_on_var_node).fallback();
-}  // namespace cumprod
-}  // namespace
-
-}  // namespace mgb::imperative
diff --git a/imperative/src/impl/ops/specializations.cpp b/imperative/src/impl/ops/specializations.cpp
index 3b7b4d38..b049c432 100644
--- a/imperative/src/impl/ops/specializations.cpp
+++ b/imperative/src/impl/ops/specializations.cpp
@@ -652,6 +652,18 @@ OP_TRAIT_REG(SlidingWindowTranspose, SlidingWindowTranspose)
 }  // namespace sliding_window_transpose
 }  // namespace
 
+namespace {
+namespace cumsum {
+auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
+    auto&& op = static_cast<const Cumsum&>(def);
+    OperatorNodeConfig config{op.make_name()};
+    return opr::Cumsum::make(inputs[0], op.param(), config);
+}
+
+OP_TRAIT_REG(Cumsum, Cumsum).apply_on_var_node(apply_on_var_node).fallback();
+}  // namespace cumsum
+}  // namespace
+
 namespace lrn {
 auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
     auto&& op = static_cast<const LRN&>(def);
diff --git a/imperative/tablegen/generated/hash.txt b/imperative/tablegen/generated/hash.txt
index 39699415..d0a73119 100644
--- a/imperative/tablegen/generated/hash.txt
+++ b/imperative/tablegen/generated/hash.txt
@@ -1,7 +1,7 @@
-792a91e469d151906d5275ebd65cedf0  ../../dnn/scripts/opr_param_defs.py
-dcf9ae8b7881e9f93870aaed1b18f1dd  ../../src/core/include/megbrain/ir/ops.td
-59bf6c34770e60b030d962484f2eac3b  generated/opdef.h.inl
-d406f17445b9cec6c34b77377f15a61f  generated/opdef.cpp.inl
-67fd2dee346d795220231fc266d260cc  generated/opdef.py.inl
-65740fff1c79fdb2f955dd52df88fb6e  generated/opdef.cpy.inl
+905bdf78e5413b06873be64b4ba55db9  ../../dnn/scripts/opr_param_defs.py
+759bfbf27fd3f0dd6b6edf06377e1d6b  ../../src/core/include/megbrain/ir/ops.td
+2a5851d0e2470d4d045811e7a20b1a3f  generated/opdef.h.inl
+55b862badeed19aed8e84c5d6f468ff2  generated/opdef.cpp.inl
+f3f4c7f0ee1b39392df8a679f6d22596  generated/opdef.py.inl
+6b11ca844a7855fdc5eebffaf563a89c  generated/opdef.cpy.inl
 71e1462bf4d882e2615c3c632cb671cc  generated/enum_macro.h
diff --git a/imperative/tablegen/generated/opdef.cpp.inl b/imperative/tablegen/generated/opdef.cpp.inl
index 6c5ec041..a4b4adf9 100644
--- a/imperative/tablegen/generated/opdef.cpp.inl
+++ b/imperative/tablegen/generated/opdef.cpp.inl
@@ -2303,49 +2303,6 @@ OP_TRAIT_REG(Correlation, Correlation)
     .props(Correlation_props_impl)
     .make_name(Correlation_make_name_impl);
 
-MGB_DYN_TYPE_OBJ_FINAL_IMPL(Cumprod);
-
-namespace {
-size_t Cumprod_hash_impl(const OpDef& def_) {
-    auto&& op_ = def_.cast_final_safe<Cumprod>();
-    static_cast<void>(op_);
-    size_t val = mgb::hash(op_.dyn_typeinfo());
-    val = mgb::hash_pair_combine(val, mgb::hash(op_.axis));
-    val = mgb::hash_pair_combine(val, mgb::hash(op_.exclusive));
-    val = mgb::hash_pair_combine(val, mgb::hash(op_.reverse));
-    return val;
-}
-bool Cumprod_is_same_st_impl(const OpDef& lhs_, const OpDef& rhs_) {
-    auto &&a_ = lhs_.cast_final_safe<Cumprod>(),
-         &&b_ = rhs_.cast_final_safe<Cumprod>();
-    static_cast<void>(a_);
-    static_cast<void>(b_);
-    if (a_.axis != b_.axis) return false;
-    if (a_.exclusive != b_.exclusive) return false;
-    if (a_.reverse != b_.reverse) return false;
-    return true;
-}
-std::vector<std::pair<const char*, std::string>> Cumprod_props_impl(const OpDef& def_) {
-    auto&& op_ = def_.cast_final_safe<Cumprod>();
-    static_cast<void>(op_);
-    std::vector<std::pair<const char*, std::string>> props_;
-    props_.emplace_back("axis", std::to_string(op_.axis));
-    props_.emplace_back("exclusive", std::to_string(op_.exclusive));
-    props_.emplace_back("reverse", std::to_string(op_.reverse));
-    return props_;
-}
-std::string Cumprod_make_name_impl(const OpDef& def_) {
-    auto&& op_ = def_.cast_final_safe<Cumprod>();
-    static_cast<void>(op_);
-    return "Cumprod";
-}
-} // anonymous namespace
-OP_TRAIT_REG(Cumprod, Cumprod)
-    .hash(Cumprod_hash_impl)
-    .is_same_st(Cumprod_is_same_st_impl)
-    .props(Cumprod_props_impl)
-    .make_name(Cumprod_make_name_impl);
-
 MGB_DYN_TYPE_OBJ_FINAL_IMPL(Cumsum);
 
 namespace {
@@ -3110,75 +3067,6 @@ std::vector<std::pair<const char*, std::string>> Elemwise_props_impl(const OpDef
     case Elemwise::Mode::COND_LT_MOV:
         props_.emplace_back("mode", "COND_LT_MOV");
         break;
-    case Elemwise::Mode::SINH:
-        props_.emplace_back("mode", "SINH");
-        break;
-    case Elemwise::Mode::COSH:
-        props_.emplace_back("mode", "COSH");
-        break;
-    case Elemwise::Mode::ASINH:
-        props_.emplace_back("mode", "ASINH");
-        break;
-    case Elemwise::Mode::ACOSH:
-        props_.emplace_back("mode", "ACOSH");
-        break;
-    case Elemwise::Mode::ATANH:
-        props_.emplace_back("mode", "ATANH");
-        break;
-    case Elemwise::Mode::TAN:
-        props_.emplace_back("mode", "TAN");
-        break;
-    case Elemwise::Mode::ASINH_GRAD:
-        props_.emplace_back("mode", "ASINH_GRAD");
-        break;
-    case Elemwise::Mode::ACOSH_GRAD:
-        props_.emplace_back("mode", "ACOSH_GRAD");
-        break;
-    case Elemwise::Mode::ATANH_GRAD:
-        props_.emplace_back("mode", "ATANH_GRAD");
-        break;
-    case Elemwise::Mode::PRELU:
-        props_.emplace_back("mode", "PRELU");
-        break;
-    case Elemwise::Mode::CLIP:
-        props_.emplace_back("mode", "CLIP");
-        break;
-    case Elemwise::Mode::PRELU_GRAD:
-        props_.emplace_back("mode", "PRELU_GRAD");
-        break;
-    case Elemwise::Mode::SOFTPLUS:
-        props_.emplace_back("mode", "SOFTPLUS");
-        break;
-    case Elemwise::Mode::SOFTPLUS_GRAD:
-        props_.emplace_back("mode", "SOFTPLUS_GRAD");
-        break;
-    case Elemwise::Mode::RELU6:
-        props_.emplace_back("mode", "RELU6");
-        break;
-    case Elemwise::Mode::RELU6_GRAD:
-        props_.emplace_back("mode", "RELU6_GRAD");
-        break;
-    case Elemwise::Mode::HSIGMOID:
-        props_.emplace_back("mode", "HSIGMOID");
-        break;
-    case Elemwise::Mode::HSIGMOID_GRAD:
-        props_.emplace_back("mode", "HSIGMOID_GRAD");
-        break;
-    case Elemwise::Mode::LOGSIGMOID:
-        props_.emplace_back("mode", "LOGSIGMOID");
-        break;
-    case Elemwise::Mode::SQRT:
-        props_.emplace_back("mode", "SQRT");
-        break;
-    case Elemwise::Mode::SQUARE:
-        props_.emplace_back("mode", "SQUARE");
-        break;
-    case Elemwise::Mode::SIGN:
-        props_.emplace_back("mode", "SIGN");
-        break;
-    case Elemwise::Mode::SAFE_DIV:
-        props_.emplace_back("mode", "SAFE_DIV");
-        break;
     case Elemwise::Mode::NEQ:
         props_.emplace_back("mode", "NEQ");
         break;
diff --git a/imperative/tablegen/generated/opdef.cpy.inl b/imperative/tablegen/generated/opdef.cpy.inl
index 0cc0d4f6..68704332 100644
--- a/imperative/tablegen/generated/opdef.cpy.inl
+++ b/imperative/tablegen/generated/opdef.cpy.inl
@@ -6674,131 +6674,6 @@ void _init_py_Correlation(py::module m) {
     mgb_assert(PyOp(OpDef)::ctype2pytype.emplace(Correlation::typeinfo(), &py_type).second);
 }
 
-PyOpDefBegin(Cumprod) // {
-    static PyGetSetDef py_getsetters[];
-    static PyMethodDef tp_methods[];
-    
-    static PyObject* getstate(PyObject* self, PyObject*) {
-        auto& opdef = reinterpret_cast<PyOp(Cumprod)*>(self)->inst();
-        static_cast<void>(opdef);
-        std::unordered_map<std::string, py::object> state {
-            
-            {"axis", serialization<decltype(opdef.axis)>::dump(opdef.axis)},
-            {"exclusive", serialization<decltype(opdef.exclusive)>::dump(opdef.exclusive)},
-            {"reverse", serialization<decltype(opdef.reverse)>::dump(opdef.reverse)}
-        };
-        return py::cast(state).release().ptr();
-    }
-    static PyObject* setstate(PyObject* self, PyObject* args) {
-        PyObject* dict = PyTuple_GetItem(args, 0);
-        if (!dict) return NULL;
-        auto state = py::cast<std::unordered_map<std::string, py::object>>(dict);
-        auto& opdef = reinterpret_cast<PyOp(Cumprod)*>(self)->inst();
-        static_cast<void>(opdef);
-        
-        {
-        auto&& iter = state.find("axis");
-        if (iter != state.end()) {
-            opdef.axis = serialization<decltype(opdef.axis)>::load(iter->second);
-        }
-        }
-
-        {
-        auto&& iter = state.find("exclusive");
-        if (iter != state.end()) {
-            opdef.exclusive = serialization<decltype(opdef.exclusive)>::load(iter->second);
-        }
-        }
-
-        {
-        auto&& iter = state.find("reverse");
-        if (iter != state.end()) {
-            opdef.reverse = serialization<decltype(opdef.reverse)>::load(iter->second);
-        }
-        }
-        Py_RETURN_NONE;
-    }
-    static int py_init(PyObject *self, PyObject *args, PyObject *kwds);
-// };
-PyOpDefEnd(Cumprod)
-
-int PyOp(Cumprod)::py_init(PyObject *self, PyObject *args, PyObject *kwds) {
-    static const char* kwlist[] = {"axis", "exclusive", "reverse", "scope", NULL};
-    PyObject *axis = NULL, *exclusive = NULL, *reverse = NULL, *scope = NULL;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOOO", const_cast<char**>(kwlist), &axis, &exclusive, &reverse, &scope))
-    return -1;
-
-    if (axis) {
-        try {
-            // TODO: remove this guard which is used for pybind11 implicit conversion
-            py::detail::loader_life_support guard{};
-            reinterpret_cast<PyOp(Cumprod)*>(self)->inst().axis =
-                    py::cast<decltype(Cumprod::axis)>(py::handle(axis));
-        } CATCH_ALL(-1)
-    }
-
-    if (exclusive) {
-        try {
-            // TODO: remove this guard which is used for pybind11 implicit conversion
-            py::detail::loader_life_support guard{};
-            reinterpret_cast<PyOp(Cumprod)*>(self)->inst().exclusive =
-                    py::cast<decltype(Cumprod::exclusive)>(py::handle(exclusive));
-        } CATCH_ALL(-1)
-    }
-
-    if (reverse) {
-        try {
-            // TODO: remove this guard which is used for pybind11 implicit conversion
-            py::detail::loader_life_support guard{};
-            reinterpret_cast<PyOp(Cumprod)*>(self)->inst().reverse =
-                    py::cast<decltype(Cumprod::reverse)>(py::handle(reverse));
-        } CATCH_ALL(-1)
-    }
-
-    if (scope) {
-        try {
-            reinterpret_cast<PyOp(OpDef)*>(self)->op
-                ->set_scope(py::cast<std::string>(py::handle(scope)));
-        } CATCH_ALL(-1)
-    }
-
-    return 0;
-}
-
-PyGetSetDef PyOp(Cumprod)::py_getsetters[] = {
-    {const_cast<char*>("axis"), py_get_generic(Cumprod, axis), py_set_generic(Cumprod, axis), const_cast<char*>("axis"), NULL},
-    {const_cast<char*>("exclusive"), py_get_generic(Cumprod, exclusive), py_set_generic(Cumprod, exclusive), const_cast<char*>("exclusive"), NULL},
-    {const_cast<char*>("reverse"), py_get_generic(Cumprod, reverse), py_set_generic(Cumprod, reverse), const_cast<char*>("reverse"), NULL},
-    {NULL}  /* Sentinel */
-};
-
-    PyMethodDef PyOp(Cumprod)::tp_methods[] = {
-        {const_cast<char*>("__getstate__"), PyOp(Cumprod)::getstate, METH_NOARGS, "Cumprod getstate"},
-    {const_cast<char*>("__setstate__"), PyOp(Cumprod)::setstate, METH_VARARGS, "Cumprod setstate"},
-        {NULL}  /* Sentinel */
-    };
-    
-void _init_py_Cumprod(py::module m) {
-    using py_op = PyOp(Cumprod);
-    auto& py_type = PyOpType(Cumprod);
-    py_type = {PyVarObject_HEAD_INIT(NULL, 0)};
-    py_type.tp_name = "megengine.core._imperative_rt.ops.Cumprod";
-    py_type.tp_basicsize = sizeof(PyOp(Cumprod));
-    py_type.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE;
-    py_type.tp_doc = "Cumprod";
-    py_type.tp_base = &PyOpType(OpDef);
-    py_type.tp_dealloc = py_dealloc_generic<py_op>;
-    py_type.tp_new = py_new_generic<py_op>;
-    py_type.tp_init = py_op::py_init;
-    py_type.tp_methods = py_op::tp_methods;
-    py_type.tp_getset = py_op::py_getsetters;
-    mgb_assert(PyType_Ready(&py_type) >= 0);
-    
-    PyType_Modified(&py_type);
-    m.add_object("Cumprod", reinterpret_cast<PyObject*>(&py_type));
-    mgb_assert(PyOp(OpDef)::ctype2pytype.emplace(Cumprod::typeinfo(), &py_type).second);
-}
-
 PyOpDefBegin(Cumsum) // {
     static PyGetSetDef py_getsetters[];
     static PyMethodDef tp_methods[];
@@ -8135,16 +8010,16 @@ void _init_py_Dropout(py::module m) {
 
 template<> struct EnumTrait<Elemwise::Mode> {
     static constexpr const char *name = "Elemwise.Mode";
-    static constexpr std::underlying_type_t<Elemwise::Mode> max = 87 - 1;
+    static constexpr std::underlying_type_t<Elemwise::Mode> max = 64 - 1;
 };
 template<> PyTypeObject* EnumWrapper<Elemwise::Mode>::type = nullptr;
 
 template<> const char*
-EnumWrapper<Elemwise::Mode>::members[] = {"RELU", "ABS", "ACOS", "ASIN", "CEIL", "COS", "EXP", "EXPM1", "FLOOR", "LOG", "LOG1P", "NEGATE", "SIGMOID", "SIN", "TANH", "ABS_GRAD", "ADD", "FLOOR_DIV", "MAX", "MIN", "MOD", "MUL", "POW", "SIGMOID_GRAD", "SUB", "SWITCH_GT0", "TANH_GRAD", "TRUE_DIV", "LOG_SUM_EXP", "LT", "LEQ", "EQ", "SHL", "SHR", "COND_LEQ_MOV", "FUSE_MUL_ADD3", "FUSE_MUL_ADD4", "FUSE_ADD_RELU", "FUSE_ADD_SIGMOID", "FUSE_ADD_TANH", "FAST_TANH", "FAST_TANH_GRAD", "ROUND", "RMULH", "ATAN2", "ERF", "ERFINV", "ERFC", "ERFCINV", "H_SWISH", "H_SWISH_GRAD", "FUSE_ADD_H_SWISH", "NOT", "AND", "OR", "XOR", "SILU", "SILU_GRAD", "GELU", "GELU_GRAD", "COND_LT_MOV", "SINH", "COSH", "ASINH", "ACOSH", "ATANH", "TAN", "ASINH_GRAD", "ACOSH_GRAD", "ATANH_GRAD", "PRELU", "CLIP", "PRELU_GRAD", "SOFTPLUS", "SOFTPLUS_GRAD", "RELU6", "RELU6_GRAD", "HSIGMOID", "HSIGMOID_GRAD", "LOGSIGMOID", "SQRT", "SQUARE", "SIGN", "SAFE_DIV", "NEQ", "ISNAN", "ISINF"};
+EnumWrapper<Elemwise::Mode>::members[] = {"RELU", "ABS", "ACOS", "ASIN", "CEIL", "COS", "EXP", "EXPM1", "FLOOR", "LOG", "LOG1P", "NEGATE", "SIGMOID", "SIN", "TANH", "ABS_GRAD", "ADD", "FLOOR_DIV", "MAX", "MIN", "MOD", "MUL", "POW", "SIGMOID_GRAD", "SUB", "SWITCH_GT0", "TANH_GRAD", "TRUE_DIV", "LOG_SUM_EXP", "LT", "LEQ", "EQ", "SHL", "SHR", "COND_LEQ_MOV", "FUSE_MUL_ADD3", "FUSE_MUL_ADD4", "FUSE_ADD_RELU", "FUSE_ADD_SIGMOID", "FUSE_ADD_TANH", "FAST_TANH", "FAST_TANH_GRAD", "ROUND", "RMULH", "ATAN2", "ERF", "ERFINV", "ERFC", "ERFCINV", "H_SWISH", "H_SWISH_GRAD", "FUSE_ADD_H_SWISH", "NOT", "AND", "OR", "XOR", "SILU", "SILU_GRAD", "GELU", "GELU_GRAD", "COND_LT_MOV", "NEQ", "ISNAN", "ISINF"};
 
 template<> std::unordered_map<std::string, Elemwise::Mode>
-EnumWrapper<Elemwise::Mode>::mem2value = {{normalize_enum("RELU"), Elemwise::Mode::RELU}, {normalize_enum("ABS"), Elemwise::Mode::ABS}, {normalize_enum("ACOS"), Elemwise::Mode::ACOS}, {normalize_enum("ASIN"), Elemwise::Mode::ASIN}, {normalize_enum("CEIL"), Elemwise::Mode::CEIL}, {normalize_enum("COS"), Elemwise::Mode::COS}, {normalize_enum("EXP"), Elemwise::Mode::EXP}, {normalize_enum("EXPM1"), Elemwise::Mode::EXPM1}, {normalize_enum("FLOOR"), Elemwise::Mode::FLOOR}, {normalize_enum("LOG"), Elemwise::Mode::LOG}, {normalize_enum("LOG1P"), Elemwise::Mode::LOG1P}, {normalize_enum("NEGATE"), Elemwise::Mode::NEGATE}, {normalize_enum("SIGMOID"), Elemwise::Mode::SIGMOID}, {normalize_enum("SIN"), Elemwise::Mode::SIN}, {normalize_enum("TANH"), Elemwise::Mode::TANH}, {normalize_enum("ABS_GRAD"), Elemwise::Mode::ABS_GRAD}, {normalize_enum("ADD"), Elemwise::Mode::ADD}, {normalize_enum("FLOOR_DIV"), Elemwise::Mode::FLOOR_DIV}, {normalize_enum("MAX"), Elemwise::Mode::MAX}, {normalize_enum("MIN"), Elemwise::Mode::MIN}, {normalize_enum("MOD"), Elemwise::Mode::MOD}, {normalize_enum("MUL"), Elemwise::Mode::MUL}, {normalize_enum("POW"), Elemwise::Mode::POW}, {normalize_enum("SIGMOID_GRAD"), Elemwise::Mode::SIGMOID_GRAD}, {normalize_enum("SUB"), Elemwise::Mode::SUB}, {normalize_enum("SWITCH_GT0"), Elemwise::Mode::SWITCH_GT0}, {normalize_enum("TANH_GRAD"), Elemwise::Mode::TANH_GRAD}, {normalize_enum("TRUE_DIV"), Elemwise::Mode::TRUE_DIV}, {normalize_enum("LOG_SUM_EXP"), Elemwise::Mode::LOG_SUM_EXP}, {normalize_enum("LT"), Elemwise::Mode::LT}, {normalize_enum("LEQ"), Elemwise::Mode::LEQ}, {normalize_enum("EQ"), Elemwise::Mode::EQ}, {normalize_enum("SHL"), Elemwise::Mode::SHL}, {normalize_enum("SHR"), Elemwise::Mode::SHR}, {normalize_enum("COND_LEQ_MOV"), Elemwise::Mode::COND_LEQ_MOV}, {normalize_enum("FUSE_MUL_ADD3"), Elemwise::Mode::FUSE_MUL_ADD3}, {normalize_enum("FUSE_MUL_ADD4"), Elemwise::Mode::FUSE_MUL_ADD4}, {normalize_enum("FUSE_ADD_RELU"), Elemwise::Mode::FUSE_ADD_RELU}, {normalize_enum("FUSE_ADD_SIGMOID"), Elemwise::Mode::FUSE_ADD_SIGMOID}, {normalize_enum("FUSE_ADD_TANH"), Elemwise::Mode::FUSE_ADD_TANH}, {normalize_enum("FAST_TANH"), Elemwise::Mode::FAST_TANH}, {normalize_enum("FAST_TANH_GRAD"), Elemwise::Mode::FAST_TANH_GRAD}, {normalize_enum("ROUND"), Elemwise::Mode::ROUND}, {normalize_enum("RMULH"), Elemwise::Mode::RMULH}, {normalize_enum("ATAN2"), Elemwise::Mode::ATAN2}, {normalize_enum("ERF"), Elemwise::Mode::ERF}, {normalize_enum("ERFINV"), Elemwise::Mode::ERFINV}, {normalize_enum("ERFC"), Elemwise::Mode::ERFC}, {normalize_enum("ERFCINV"), Elemwise::Mode::ERFCINV}, {normalize_enum("H_SWISH"), Elemwise::Mode::H_SWISH}, {normalize_enum("H_SWISH_GRAD"), Elemwise::Mode::H_SWISH_GRAD}, {normalize_enum("FUSE_ADD_H_SWISH"), Elemwise::Mode::FUSE_ADD_H_SWISH}, {normalize_enum("NOT"), Elemwise::Mode::NOT}, {normalize_enum("AND"), Elemwise::Mode::AND}, {normalize_enum("OR"), Elemwise::Mode::OR}, {normalize_enum("XOR"), Elemwise::Mode::XOR}, {normalize_enum("SILU"), Elemwise::Mode::SILU}, {normalize_enum("SILU_GRAD"), Elemwise::Mode::SILU_GRAD}, {normalize_enum("GELU"), Elemwise::Mode::GELU}, {normalize_enum("GELU_GRAD"), Elemwise::Mode::GELU_GRAD}, {normalize_enum("COND_LT_MOV"), Elemwise::Mode::COND_LT_MOV}, {normalize_enum("SINH"), Elemwise::Mode::SINH}, {normalize_enum("COSH"), Elemwise::Mode::COSH}, {normalize_enum("ASINH"), Elemwise::Mode::ASINH}, {normalize_enum("ACOSH"), Elemwise::Mode::ACOSH}, {normalize_enum("ATANH"), Elemwise::Mode::ATANH}, {normalize_enum("TAN"), Elemwise::Mode::TAN}, {normalize_enum("ASINH_GRAD"), Elemwise::Mode::ASINH_GRAD}, {normalize_enum("ACOSH_GRAD"), Elemwise::Mode::ACOSH_GRAD}, {normalize_enum("ATANH_GRAD"), Elemwise::Mode::ATANH_GRAD}, {normalize_enum("PRELU"), Elemwise::Mode::PRELU}, {normalize_enum("CLIP"), Elemwise::Mode::CLIP}, {normalize_enum("PRELU_GRAD"), Elemwise::Mode::PRELU_GRAD}, {normalize_enum("SOFTPLUS"), Elemwise::Mode::SOFTPLUS}, {normalize_enum("SOFTPLUS_GRAD"), Elemwise::Mode::SOFTPLUS_GRAD}, {normalize_enum("RELU6"), Elemwise::Mode::RELU6}, {normalize_enum("RELU6_GRAD"), Elemwise::Mode::RELU6_GRAD}, {normalize_enum("HSIGMOID"), Elemwise::Mode::HSIGMOID}, {normalize_enum("HSIGMOID_GRAD"), Elemwise::Mode::HSIGMOID_GRAD}, {normalize_enum("LOGSIGMOID"), Elemwise::Mode::LOGSIGMOID}, {normalize_enum("SQRT"), Elemwise::Mode::SQRT}, {normalize_enum("SQUARE"), Elemwise::Mode::SQUARE}, {normalize_enum("SIGN"), Elemwise::Mode::SIGN}, {normalize_enum("SAFE_DIV"), Elemwise::Mode::SAFE_DIV}, {normalize_enum("NEQ"), Elemwise::Mode::NEQ}, {normalize_enum("ISNAN"), Elemwise::Mode::ISNAN}, {normalize_enum("ISINF"), Elemwise::Mode::ISINF}};
-template<> PyObject* EnumWrapper<Elemwise::Mode>::pyobj_insts[87] = {nullptr};
+EnumWrapper<Elemwise::Mode>::mem2value = {{normalize_enum("RELU"), Elemwise::Mode::RELU}, {normalize_enum("ABS"), Elemwise::Mode::ABS}, {normalize_enum("ACOS"), Elemwise::Mode::ACOS}, {normalize_enum("ASIN"), Elemwise::Mode::ASIN}, {normalize_enum("CEIL"), Elemwise::Mode::CEIL}, {normalize_enum("COS"), Elemwise::Mode::COS}, {normalize_enum("EXP"), Elemwise::Mode::EXP}, {normalize_enum("EXPM1"), Elemwise::Mode::EXPM1}, {normalize_enum("FLOOR"), Elemwise::Mode::FLOOR}, {normalize_enum("LOG"), Elemwise::Mode::LOG}, {normalize_enum("LOG1P"), Elemwise::Mode::LOG1P}, {normalize_enum("NEGATE"), Elemwise::Mode::NEGATE}, {normalize_enum("SIGMOID"), Elemwise::Mode::SIGMOID}, {normalize_enum("SIN"), Elemwise::Mode::SIN}, {normalize_enum("TANH"), Elemwise::Mode::TANH}, {normalize_enum("ABS_GRAD"), Elemwise::Mode::ABS_GRAD}, {normalize_enum("ADD"), Elemwise::Mode::ADD}, {normalize_enum("FLOOR_DIV"), Elemwise::Mode::FLOOR_DIV}, {normalize_enum("MAX"), Elemwise::Mode::MAX}, {normalize_enum("MIN"), Elemwise::Mode::MIN}, {normalize_enum("MOD"), Elemwise::Mode::MOD}, {normalize_enum("MUL"), Elemwise::Mode::MUL}, {normalize_enum("POW"), Elemwise::Mode::POW}, {normalize_enum("SIGMOID_GRAD"), Elemwise::Mode::SIGMOID_GRAD}, {normalize_enum("SUB"), Elemwise::Mode::SUB}, {normalize_enum("SWITCH_GT0"), Elemwise::Mode::SWITCH_GT0}, {normalize_enum("TANH_GRAD"), Elemwise::Mode::TANH_GRAD}, {normalize_enum("TRUE_DIV"), Elemwise::Mode::TRUE_DIV}, {normalize_enum("LOG_SUM_EXP"), Elemwise::Mode::LOG_SUM_EXP}, {normalize_enum("LT"), Elemwise::Mode::LT}, {normalize_enum("LEQ"), Elemwise::Mode::LEQ}, {normalize_enum("EQ"), Elemwise::Mode::EQ}, {normalize_enum("SHL"), Elemwise::Mode::SHL}, {normalize_enum("SHR"), Elemwise::Mode::SHR}, {normalize_enum("COND_LEQ_MOV"), Elemwise::Mode::COND_LEQ_MOV}, {normalize_enum("FUSE_MUL_ADD3"), Elemwise::Mode::FUSE_MUL_ADD3}, {normalize_enum("FUSE_MUL_ADD4"), Elemwise::Mode::FUSE_MUL_ADD4}, {normalize_enum("FUSE_ADD_RELU"), Elemwise::Mode::FUSE_ADD_RELU}, {normalize_enum("FUSE_ADD_SIGMOID"), Elemwise::Mode::FUSE_ADD_SIGMOID}, {normalize_enum("FUSE_ADD_TANH"), Elemwise::Mode::FUSE_ADD_TANH}, {normalize_enum("FAST_TANH"), Elemwise::Mode::FAST_TANH}, {normalize_enum("FAST_TANH_GRAD"), Elemwise::Mode::FAST_TANH_GRAD}, {normalize_enum("ROUND"), Elemwise::Mode::ROUND}, {normalize_enum("RMULH"), Elemwise::Mode::RMULH}, {normalize_enum("ATAN2"), Elemwise::Mode::ATAN2}, {normalize_enum("ERF"), Elemwise::Mode::ERF}, {normalize_enum("ERFINV"), Elemwise::Mode::ERFINV}, {normalize_enum("ERFC"), Elemwise::Mode::ERFC}, {normalize_enum("ERFCINV"), Elemwise::Mode::ERFCINV}, {normalize_enum("H_SWISH"), Elemwise::Mode::H_SWISH}, {normalize_enum("H_SWISH_GRAD"), Elemwise::Mode::H_SWISH_GRAD}, {normalize_enum("FUSE_ADD_H_SWISH"), Elemwise::Mode::FUSE_ADD_H_SWISH}, {normalize_enum("NOT"), Elemwise::Mode::NOT}, {normalize_enum("AND"), Elemwise::Mode::AND}, {normalize_enum("OR"), Elemwise::Mode::OR}, {normalize_enum("XOR"), Elemwise::Mode::XOR}, {normalize_enum("SILU"), Elemwise::Mode::SILU}, {normalize_enum("SILU_GRAD"), Elemwise::Mode::SILU_GRAD}, {normalize_enum("GELU"), Elemwise::Mode::GELU}, {normalize_enum("GELU_GRAD"), Elemwise::Mode::GELU_GRAD}, {normalize_enum("COND_LT_MOV"), Elemwise::Mode::COND_LT_MOV}, {normalize_enum("NEQ"), Elemwise::Mode::NEQ}, {normalize_enum("ISNAN"), Elemwise::Mode::ISNAN}, {normalize_enum("ISINF"), Elemwise::Mode::ISINF}};
+template<> PyObject* EnumWrapper<Elemwise::Mode>::pyobj_insts[64] = {nullptr};
 
 void _init_py_Elemwise_Mode(PyTypeObject& py_type) {
     auto& e_type = EnumWrapper<Elemwise::Mode>::type;
@@ -8499,134 +8374,19 @@ void _init_py_Elemwise_Mode(PyTypeObject& py_type) {
     EnumWrapper<Elemwise::Mode>::pyobj_insts[60] = inst;
 }{
     PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::SINH;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "SINH", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[61] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::COSH;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "COSH", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[62] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::ASINH;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "ASINH", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[63] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::ACOSH;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "ACOSH", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[64] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::ATANH;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "ATANH", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[65] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::TAN;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "TAN", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[66] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::ASINH_GRAD;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "ASINH_GRAD", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[67] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::ACOSH_GRAD;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "ACOSH_GRAD", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[68] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::ATANH_GRAD;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "ATANH_GRAD", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[69] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::PRELU;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "PRELU", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[70] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::CLIP;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "CLIP", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[71] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::PRELU_GRAD;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "PRELU_GRAD", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[72] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::SOFTPLUS;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "SOFTPLUS", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[73] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::SOFTPLUS_GRAD;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "SOFTPLUS_GRAD", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[74] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::RELU6;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "RELU6", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[75] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::RELU6_GRAD;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "RELU6_GRAD", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[76] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::HSIGMOID;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "HSIGMOID", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[77] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::HSIGMOID_GRAD;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "HSIGMOID_GRAD", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[78] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::LOGSIGMOID;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "LOGSIGMOID", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[79] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::SQRT;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "SQRT", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[80] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::SQUARE;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "SQUARE", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[81] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::SIGN;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "SIGN", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[82] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
-    reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::SAFE_DIV;
-    mgb_assert(PyDict_SetItemString(e_type->tp_dict, "SAFE_DIV", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[83] = inst;
-}{
-    PyObject* inst = e_type->tp_alloc(e_type, 0);
     reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::NEQ;
     mgb_assert(PyDict_SetItemString(e_type->tp_dict, "NEQ", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[84] = inst;
+    EnumWrapper<Elemwise::Mode>::pyobj_insts[61] = inst;
 }{
     PyObject* inst = e_type->tp_alloc(e_type, 0);
     reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::ISNAN;
     mgb_assert(PyDict_SetItemString(e_type->tp_dict, "ISNAN", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[85] = inst;
+    EnumWrapper<Elemwise::Mode>::pyobj_insts[62] = inst;
 }{
     PyObject* inst = e_type->tp_alloc(e_type, 0);
     reinterpret_cast<EnumWrapper<Elemwise::Mode>*>(inst)->value = Elemwise::Mode::ISINF;
     mgb_assert(PyDict_SetItemString(e_type->tp_dict, "ISINF", inst) >= 0);
-    EnumWrapper<Elemwise::Mode>::pyobj_insts[86] = inst;
+    EnumWrapper<Elemwise::Mode>::pyobj_insts[63] = inst;
 }
     Py_INCREF(e_type);
     mgb_assert(PyDict_SetItemString(
@@ -18456,7 +18216,6 @@ void _init_py_WarpPerspective(py::module m) {
     _init_py_ConvolutionBackwardData(m); \
     _init_py_Copy(m); \
     _init_py_Correlation(m); \
-    _init_py_Cumprod(m); \
     _init_py_Cumsum(m); \
     _init_py_CvtColor(m); \
     _init_py_DeformableConv(m); \
diff --git a/imperative/tablegen/generated/opdef.h.inl b/imperative/tablegen/generated/opdef.h.inl
index 8545893c..cd51a572 100644
--- a/imperative/tablegen/generated/opdef.h.inl
+++ b/imperative/tablegen/generated/opdef.h.inl
@@ -567,21 +567,6 @@ public:
     }
 };
 
-class Cumprod : public OpDefImplBase<Cumprod> {
-    MGB_DYN_TYPE_OBJ_FINAL_DECL;
-
-public:
-    int32_t axis = 2147483647;
-    bool exclusive = true;
-    bool reverse = false;
-    Cumprod() = default;
-    Cumprod(int32_t axis_, bool exclusive_, bool reverse_, std::string scope_ = {}): axis(axis_), exclusive(exclusive_), reverse(reverse_) { set_scope(scope_); }
-    Cumprod(::megdnn::param::Cumprod packed_param_0): axis(packed_param_0.axis), exclusive(packed_param_0.exclusive), reverse(packed_param_0.reverse) {}
-    ::megdnn::param::Cumprod param() const {
-        return {axis, exclusive, reverse};
-    }
-};
-
 class Cumsum : public OpDefImplBase<Cumsum> {
     MGB_DYN_TYPE_OBJ_FINAL_DECL;
 
@@ -795,29 +780,6 @@ case Elemwise::Mode::SILU_GRAD: return "SILU_GRAD";
 case Elemwise::Mode::GELU: return "GELU";
 case Elemwise::Mode::GELU_GRAD: return "GELU_GRAD";
 case Elemwise::Mode::COND_LT_MOV: return "COND_LT_MOV";
-case Elemwise::Mode::SINH: return "SINH";
-case Elemwise::Mode::COSH: return "COSH";
-case Elemwise::Mode::ASINH: return "ASINH";
-case Elemwise::Mode::ACOSH: return "ACOSH";
-case Elemwise::Mode::ATANH: return "ATANH";
-case Elemwise::Mode::TAN: return "TAN";
-case Elemwise::Mode::ASINH_GRAD: return "ASINH_GRAD";
-case Elemwise::Mode::ACOSH_GRAD: return "ACOSH_GRAD";
-case Elemwise::Mode::ATANH_GRAD: return "ATANH_GRAD";
-case Elemwise::Mode::PRELU: return "PRELU";
-case Elemwise::Mode::CLIP: return "CLIP";
-case Elemwise::Mode::PRELU_GRAD: return "PRELU_GRAD";
-case Elemwise::Mode::SOFTPLUS: return "SOFTPLUS";
-case Elemwise::Mode::SOFTPLUS_GRAD: return "SOFTPLUS_GRAD";
-case Elemwise::Mode::RELU6: return "RELU6";
-case Elemwise::Mode::RELU6_GRAD: return "RELU6_GRAD";
-case Elemwise::Mode::HSIGMOID: return "HSIGMOID";
-case Elemwise::Mode::HSIGMOID_GRAD: return "HSIGMOID_GRAD";
-case Elemwise::Mode::LOGSIGMOID: return "LOGSIGMOID";
-case Elemwise::Mode::SQRT: return "SQRT";
-case Elemwise::Mode::SQUARE: return "SQUARE";
-case Elemwise::Mode::SIGN: return "SIGN";
-case Elemwise::Mode::SAFE_DIV: return "SAFE_DIV";
 case Elemwise::Mode::NEQ: return "NEQ";
 case Elemwise::Mode::ISNAN: return "ISNAN";
 case Elemwise::Mode::ISINF: return "ISINF";
diff --git a/imperative/tablegen/generated/opdef.py.inl b/imperative/tablegen/generated/opdef.py.inl
index b594b261..631639c7 100644
--- a/imperative/tablegen/generated/opdef.py.inl
+++ b/imperative/tablegen/generated/opdef.py.inl
@@ -678,14 +678,6 @@ CorrelationInst
     .def_readwrite("pad_size", &Correlation::pad_size)
     .def_readwrite("is_multiply", &Correlation::is_multiply);
 
-py::class_<Cumprod, std::shared_ptr<Cumprod>, OpDef> CumprodInst(m, "Cumprod");
-
-CumprodInst
-    .def(py::init<int32_t, bool, bool, std::string>(), py::arg("axis") = 2147483647, py::arg("exclusive") = true, py::arg("reverse") = false, py::arg("scope") = {})
-    .def_readwrite("axis", &Cumprod::axis)
-    .def_readwrite("exclusive", &Cumprod::exclusive)
-    .def_readwrite("reverse", &Cumprod::reverse);
-
 py::class_<Cumsum, std::shared_ptr<Cumsum>, OpDef> CumsumInst(m, "Cumsum");
 
 CumsumInst
@@ -901,29 +893,6 @@ py::enum_<Elemwise::Mode>(ElemwiseInst, "Mode")
     .value("GELU", Elemwise::Mode::GELU)
     .value("GELU_GRAD", Elemwise::Mode::GELU_GRAD)
     .value("COND_LT_MOV", Elemwise::Mode::COND_LT_MOV)
-    .value("SINH", Elemwise::Mode::SINH)
-    .value("COSH", Elemwise::Mode::COSH)
-    .value("ASINH", Elemwise::Mode::ASINH)
-    .value("ACOSH", Elemwise::Mode::ACOSH)
-    .value("ATANH", Elemwise::Mode::ATANH)
-    .value("TAN", Elemwise::Mode::TAN)
-    .value("ASINH_GRAD", Elemwise::Mode::ASINH_GRAD)
-    .value("ACOSH_GRAD", Elemwise::Mode::ACOSH_GRAD)
-    .value("ATANH_GRAD", Elemwise::Mode::ATANH_GRAD)
-    .value("PRELU", Elemwise::Mode::PRELU)
-    .value("CLIP", Elemwise::Mode::CLIP)
-    .value("PRELU_GRAD", Elemwise::Mode::PRELU_GRAD)
-    .value("SOFTPLUS", Elemwise::Mode::SOFTPLUS)
-    .value("SOFTPLUS_GRAD", Elemwise::Mode::SOFTPLUS_GRAD)
-    .value("RELU6", Elemwise::Mode::RELU6)
-    .value("RELU6_GRAD", Elemwise::Mode::RELU6_GRAD)
-    .value("HSIGMOID", Elemwise::Mode::HSIGMOID)
-    .value("HSIGMOID_GRAD", Elemwise::Mode::HSIGMOID_GRAD)
-    .value("LOGSIGMOID", Elemwise::Mode::LOGSIGMOID)
-    .value("SQRT", Elemwise::Mode::SQRT)
-    .value("SQUARE", Elemwise::Mode::SQUARE)
-    .value("SIGN", Elemwise::Mode::SIGN)
-    .value("SAFE_DIV", Elemwise::Mode::SAFE_DIV)
     .value("NEQ", Elemwise::Mode::NEQ)
     .value("ISNAN", Elemwise::Mode::ISNAN)
     .value("ISINF", Elemwise::Mode::ISINF)
@@ -990,29 +959,6 @@ py::enum_<Elemwise::Mode>(ElemwiseInst, "Mode")
         if (str == "GELU") return Elemwise::Mode::GELU;
         if (str == "GELU_GRAD") return Elemwise::Mode::GELU_GRAD;
         if (str == "COND_LT_MOV") return Elemwise::Mode::COND_LT_MOV;
-        if (str == "SINH") return Elemwise::Mode::SINH;
-        if (str == "COSH") return Elemwise::Mode::COSH;
-        if (str == "ASINH") return Elemwise::Mode::ASINH;
-        if (str == "ACOSH") return Elemwise::Mode::ACOSH;
-        if (str == "ATANH") return Elemwise::Mode::ATANH;
-        if (str == "TAN") return Elemwise::Mode::TAN;
-        if (str == "ASINH_GRAD") return Elemwise::Mode::ASINH_GRAD;
-        if (str == "ACOSH_GRAD") return Elemwise::Mode::ACOSH_GRAD;
-        if (str == "ATANH_GRAD") return Elemwise::Mode::ATANH_GRAD;
-        if (str == "PRELU") return Elemwise::Mode::PRELU;
-        if (str == "CLIP") return Elemwise::Mode::CLIP;
-        if (str == "PRELU_GRAD") return Elemwise::Mode::PRELU_GRAD;
-        if (str == "SOFTPLUS") return Elemwise::Mode::SOFTPLUS;
-        if (str == "SOFTPLUS_GRAD") return Elemwise::Mode::SOFTPLUS_GRAD;
-        if (str == "RELU6") return Elemwise::Mode::RELU6;
-        if (str == "RELU6_GRAD") return Elemwise::Mode::RELU6_GRAD;
-        if (str == "HSIGMOID") return Elemwise::Mode::HSIGMOID;
-        if (str == "HSIGMOID_GRAD") return Elemwise::Mode::HSIGMOID_GRAD;
-        if (str == "LOGSIGMOID") return Elemwise::Mode::LOGSIGMOID;
-        if (str == "SQRT") return Elemwise::Mode::SQRT;
-        if (str == "SQUARE") return Elemwise::Mode::SQUARE;
-        if (str == "SIGN") return Elemwise::Mode::SIGN;
-        if (str == "SAFE_DIV") return Elemwise::Mode::SAFE_DIV;
         if (str == "NEQ") return Elemwise::Mode::NEQ;
         if (str == "ISNAN") return Elemwise::Mode::ISNAN;
         if (str == "ISINF") return Elemwise::Mode::ISINF;
diff --git a/src/core/include/megbrain/ir/ops.td b/src/core/include/megbrain/ir/ops.td
index 1853cdfc..d70cd20b 100644
--- a/src/core/include/megbrain/ir/ops.td
+++ b/src/core/include/megbrain/ir/ops.td
@@ -472,7 +472,6 @@ def ExternOpr: MgbHashableOp<"ExternOpr"> {
 }
 
 def Cumsum: MgbHashableOp<"Cumsum", [CumsumParam]>;
-def Cumprod: MgbHashableOp<"Cumprod", [CumprodParam]>;
 
 def Split: MgbHashableOp<"Split", [EmptyParam]> {
   let extraArguments = (ins
diff --git a/src/gopt/test/basic_arith.cpp b/src/gopt/test/basic_arith.cpp
index 483cc7af..c61ce9eb 100644
--- a/src/gopt/test/basic_arith.cpp
+++ b/src/gopt/test/basic_arith.cpp
@@ -107,12 +107,6 @@ TEST(TestGoptBasicArithInplace, Absorbing) {
     ASSERT_EQ(y.as_immutable_scalar()->get_cast<float>(), 0.f);
 }
 
-auto gen_postive = [](HostTensorND& dest) {
-    HostTensorGenerator<dtype::Float32, RandomDistribution::UNIFORM> mask_generator{
-            2.f, 4.f};
-    dest = *mask_generator(dest.shape(), dest.comp_node());
-};
-
 TEST(TestGoptBasicArithInplace, LogExpExpand) {
     // test log(exp(a) * (exp(b) / (exp(c) * d**2))) -> a + b - c - log(d**2)
 
@@ -150,13 +144,9 @@ TEST(TestGoptBasicArithInplace, LogExpExpand) {
     opt.numdiff_eps_single_inp[3] = 1e-3;
     opt.numdiff_max_err_single_inp[3] = 1e-2;
     Checker{make_graph, fwd}
-            .set_input_generator(0, gen_postive)
-            .set_input_generator(1, gen_postive)
-            .set_input_generator(2, gen_postive)
-            .set_input_generator(3, gen_postive)
-            .run(ms({32, 1}, {32, 1}), opt)
-            .run(ms({2, 32}, {2, 32}), opt)
-            .run(ms({1, 32}, {1, 32}), opt);
+            .run(ms({2, 3}, {2, 3}), opt)
+            .run(ms({1, 3}, {2, 3}), opt)
+            .run(ms({3, 2}, {1}), opt);
 }
 
 TEST(TestGoptBasicArithInplace, LogSumExp) {
diff --git a/src/jit/impl/ast_c.cpp b/src/jit/impl/ast_c.cpp
index 0d4b47a5..eb1dc794 100644
--- a/src/jit/impl/ast_c.cpp
+++ b/src/jit/impl/ast_c.cpp
@@ -133,7 +133,7 @@ const ElemGeneratorMap& ast_c::elem_opr_generator() {
                                    0.f}) /
                           6.f),
     };
-    mgb_assert(map.size() + 42 == opr::Elemwise::Param::MODE_NR_MEMBER);
+    mgb_assert(map.size() + 41 == opr::Elemwise::Param::MODE_NR_MEMBER);
     // unimplemented modes: SHL, SHR, FAST_TANH, FAST_TANH_GRAD, ROUND, RMULH,
     // ERFINV, ERFCINV, NOT, AND, OR, XOR, NEQ, ISNAN, ISINF
     return map;
diff --git a/src/opr/impl/basic_arith.cpp b/src/opr/impl/basic_arith.cpp
index d70c7326..9924f747 100644
--- a/src/opr/impl/basic_arith.cpp
+++ b/src/opr/impl/basic_arith.cpp
@@ -580,8 +580,6 @@ MGB_IMPL_OPR_GRAD(Elemwise) {
             RET(EL2(ABS_GRAD, i0, og));
         case Mode::ADD:
             RET(og);
-        case Mode::SAFE_DIV:
-            RET_INVALID();
         case Mode::FLOOR_DIV:
             return nullptr;
         case Mode::MAX:
diff --git a/src/opr/impl/misc.cpp b/src/opr/impl/misc.cpp
index 166833f7..3b45f822 100644
--- a/src/opr/impl/misc.cpp
+++ b/src/opr/impl/misc.cpp
@@ -119,75 +119,6 @@ MGB_IMPL_OPR_GRAD(ArgsortForward) {
 MGB_DYN_TYPE_OBJ_FINAL_IMPL(ArgsortBackward);
 MEGDNN_OPR_INIT3(ArgsortBackward, "argsort_bwd", 2, false)
 
-/* ================= Cumprod =================  */
-
-MGB_DYN_TYPE_OBJ_FINAL_IMPL(Cumprod);
-
-Cumprod::Cumprod(VarNode* opr, const Param& param, const OperatorNodeConfig& config)
-        : Super{opr->owner_graph(), config, "Cumprod", {opr}} {
-    init_megdnn_opr(*this, param);
-    add_input({opr}, AddInputSortType::CUR_ADDED);
-}
-
-#if MGB_ENABLE_GRAD
-MGB_IMPL_OPR_GRAD(Cumprod) {
-    mgb_assert(out_grad[0] && !out_grad[1]);
-    auto x = SymbolVar{opr.input(0)}, y = SymbolVar{opr.output(0)},
-         grad = SymbolVar{out_grad[0]};
-    auto prod_param = opr.param();
-    Cumsum::Param reversed_param;
-    reversed_param.axis = prod_param.axis;
-    reversed_param.exclusive = prod_param.exclusive;
-    reversed_param.reverse = !prod_param.reverse;
-
-    auto w = y * grad;
-    return Elemwise::make(
-                   {Cumsum::make(w, reversed_param), x}, Elemwise::Mode::SAFE_DIV)
-            .node();
-}
-#endif
-
-SymbolVar Cumprod::make(
-        SymbolVar opr, const Param& param, const OperatorNodeConfig& config) {
-    return opr.insert_single_output_opr<Cumprod>(opr.node(), param, config);
-}
-
-void Cumprod::scn_do_execute() {
-    megdnn_opr()->exec(
-            input(0)->dev_tensor().as_megdnn(), output(0)->dev_tensor().as_megdnn(),
-            intl::get_megdnn_workspace_from_var(output().back()));
-}
-
-void Cumprod::add_input_layout_constraint() {
-    input(0)->add_layout_constraint_contiguous();
-}
-
-void Cumprod::init_output_static_infer_desc() {
-    using namespace cg::static_infer;
-    auto infer_shape = [](TensorShape& dest, const InpVal& iv) {
-        auto ishp = iv.val.at(0).shape();
-        dest = ishp;
-        return true;
-    };
-    owner_graph()->static_infer_manager().register_shape_infer(
-            output(0), {SourceType::DEP, {{input(0), DepType::SHAPE}}, infer_shape});
-    auto infer_workspace = [this](TensorShape& dest, const InpVal& iv) {
-        auto dtype = input(0)->dtype();
-        auto ishp = iv.val.at(0).shape();
-        TensorLayout ily(ishp, dtype);
-        Param real_param = param();
-        if (real_param.axis < 0)
-            real_param.axis += ishp.ndim;
-        megdnn_opr()->param() = real_param;
-        dest.ndim = 1;
-        dest[0] = megdnn_opr()->get_workspace_in_bytes(ily, ily);
-        return true;
-    };
-    owner_graph()->static_infer_manager().register_shape_infer(
-            output(1),
-            {SourceType::DEP, {{input(0), DepType::SHAPE}}, infer_workspace});
-}
-
 /* ================= Cumsum =================  */
 
 MGB_DYN_TYPE_OBJ_FINAL_IMPL(Cumsum);
diff --git a/src/opr/impl/misc.oprdecl b/src/opr/impl/misc.oprdecl
index 05ee72ec..0e7d5201 100644
--- a/src/opr/impl/misc.oprdecl
+++ b/src/opr/impl/misc.oprdecl
@@ -42,16 +42,6 @@ decl_opr('Argsort',
          'performed. Two vars are returned: the sorted array, and the '
          'indices. ')
 
-decl_opr('Cumprod',
-         inputs=['src'], params='Cumprod',
-         body=[
-             'if param.axis == (1<<31)-1:',
-             '    all_inputs[0] = all_inputs[0].flatten()',
-             '    param.axis = 0'
-         ],
-         desc='Return the cumulative product of the elements along a given axis.'
-         '  If axis is INT_MAX, compute on flattened input.', version=1)
-
 decl_opr('Cumsum',
          inputs=['src'], params='Cumsum',
          body=[
diff --git a/src/opr/impl/misc.sereg.h b/src/opr/impl/misc.sereg.h
index 1fb5523c..b06beecb 100644
--- a/src/opr/impl/misc.sereg.h
+++ b/src/opr/impl/misc.sereg.h
@@ -70,7 +70,6 @@ MGB_SEREG_OPR(TopK, 2);
 //! current cumsum version
 using CumsumV1 = opr::Cumsum;
 MGB_SEREG_OPR(CumsumV1, 1);
-MGB_SEREG_OPR(Cumprod, 1);
 
 #if MGB_CUDA
 MGB_SEREG_OPR(NvOf, 1);
diff --git a/src/opr/include/megbrain/opr/misc.h b/src/opr/include/megbrain/opr/misc.h
index fa872ae6..b214b583 100644
--- a/src/opr/include/megbrain/opr/misc.h
+++ b/src/opr/include/megbrain/opr/misc.h
@@ -76,25 +76,6 @@ public:
     }
 };
 
-//! cumulative product along given axis
-MGB_DEFINE_OPR_CLASS_WITH_EXPORT(
-        Cumprod,
-        cg::SingleCNOperatorNodeBaseT<mixin::MegDNNOprHolderImpl<megdnn::Cumprod>>) // {
-    void add_input_layout_constraint() override;
-
-public:
-    MGE_WIN_DECLSPEC_FUC Cumprod(
-            VarNode* src, const Param& param, const OperatorNodeConfig& config);
-
-    // for serialization
-    MGE_WIN_DECLSPEC_FUC static SymbolVar make(
-            SymbolVar opr, const Param& param, const OperatorNodeConfig& config = {});
-
-protected:
-    void scn_do_execute() override;
-    void init_output_static_infer_desc() override;
-};
-
 //! cumulative sum along given axis
 MGB_DEFINE_OPR_CLASS_WITH_EXPORT(
         Cumsum,
diff --git a/src/opr/test/basic_arith/elemwise.cpp b/src/opr/test/basic_arith/elemwise.cpp
index 9ae14bca..89ae9ca3 100644
--- a/src/opr/test/basic_arith/elemwise.cpp
+++ b/src/opr/test/basic_arith/elemwise.cpp
@@ -350,9 +350,6 @@ template <>
 struct CheckerConfig<H_SWISH_GRAD> : public NoGradCheckerConfig {};
 
 template <>
-struct CheckerConfig<SAFE_DIV> : public NoGradCheckerConfig {};
-
-template <>
 struct CheckerConfig<TAN> : public NoGradCheckerConfig {
     template <typename ctype>
     static InputGenerator get_inp_gen(size_t) {
@@ -696,13 +693,9 @@ struct CheckerConfig<HSIGMOID_GRAD> : public NoGradCheckerConfig {
 /* ======================= ternary config ======================= */
 template <>
 struct CheckerConfig<COND_LEQ_MOV> : public BinaryInputMinGap<false> {};
-
 template <>
 struct CheckerConfig<COND_LT_MOV> : public BinaryInputMinGap<false> {};
-
-template <>
 struct CheckerConfig<PRELU_GRAD> : public NoGradCheckerConfig {};
-
 template <>
 struct CheckerConfig<CLIP> : public CheckerConfig<void> {
     template <typename ctype, class Checker>
@@ -893,10 +886,6 @@ void TestRunner<Trait, dtype, true>::run() {
     }
 
     TensorShape shapes[] = {{1}, {23, 3}, {666}};
-    if (Trait::ARITY == 4) {
-        checker.disable_graph_opt();
-        shapes[0] = {32};
-    }
     typename Checker::RunOptions opt;
     Config::update_opt(opt);
     Config::update_checker(checker);
@@ -1045,13 +1034,13 @@ TEST(TestOprBasicArithElemwise, FuseMulAdd4Shapes) {
     };
 
     Checker checker{make_graph, fwd};
-    checker.run({TensorShape{1, 32}, {1, 32}, {1, 32}, {1, 32}})
-            .run({TensorShape{1, 1, 1, 1, 1, 32},
-                  {1, 1, 1, 1, 1, 32},
-                  {1, 1, 1, 1, 1, 32},
-                  {1, 1, 1, 1, 1, 32}});
+    checker.run({TensorShape{1, 2}, {2, 1}, {1, 2}, {2, 1}})
+            .run({TensorShape{1, 2, 1, 2, 1, 2},
+                  {2, 1, 2, 1, 2, 1},
+                  {2, 1, 2, 1, 2, 1},
+                  {1, 2, 1, 2, 1, 2}});
     ASSERT_FALSE(opr->fuse_badlayout_warn_printed());
-    checker.run({TensorShape{1, 32}, {32, 1}, {32, 32}, {32, 32}});
+    checker.run({TensorShape{1, 2}, {2, 1}, {2, 2}, {2, 2}});
     ASSERT_TRUE(opr->fuse_badlayout_warn_printed());
 }
 
diff --git a/src/opr/test/basic_arith/elemwise_binary_trait_def.inl b/src/opr/test/basic_arith/elemwise_binary_trait_def.inl
index 4d88c852..1ed742db 100644
--- a/src/opr/test/basic_arith/elemwise_binary_trait_def.inl
+++ b/src/opr/test/basic_arith/elemwise_binary_trait_def.inl
@@ -47,7 +47,6 @@ DEF_TRAIT(PRELU, (x > 0) ? x : (x* y))
 #define _ALLOW_INT false
 DEF_TRAIT(POW, std::pow(x, y))
 DEF_TRAIT(TRUE_DIV, x / y)
-DEF_TRAIT(SAFE_DIV, y != 0 ? x / y : 0)
 DEF_TRAIT(LOG_SUM_EXP, do_log_sum_exp(x, y))
 DEF_TRAIT(FUSE_ADD_SIGMOID, 1 / (1 + std::exp(-(x + y))))
 DEF_TRAIT(FUSE_ADD_TANH, std::tanh(x + y))
diff --git a/src/opr/test/misc.cpp b/src/opr/test/misc.cpp
index 47ec317a..e5964108 100644
--- a/src/opr/test/misc.cpp
+++ b/src/opr/test/misc.cpp
@@ -145,60 +145,6 @@ TEST(TestOprMisc, Argsort) {
     run(Order::DESCENDING);
 }
 
-TEST(TestOprMisc, Cumprod) {
-    using Param = opr::Cumprod::Param;
-    auto run = [](const Param& param) {
-        using Checker = AutoOprChecker<1, 1>;
-        auto make_graph =
-                [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
-            return {opr::Cumprod::make(inputs[0], param)};
-        };
-        auto fwd = [&](Checker::NumOutArray& out, Checker::NumInpArray inp) {
-            out[0].resize(inp[0]->shape());
-
-            auto pin = inp[0]->ptr<float>(), pout = out[0].ptr<float>();
-            size_t A, B, C;
-            int real_axis = param.axis;
-            if (real_axis < 0)
-                real_axis += 3;
-            shape_abc(inp[0]->shape(), real_axis, A, B, C);
-            ptrdiff_t stride = C;
-            if (param.reverse)
-                stride = -stride;
-            for (size_t i = 0; i < A; ++i) {
-                for (size_t k = 0; k < C; ++k) {
-                    auto pi = pin + i * B * C + k, po = pout + i * B * C + k;
-                    if (param.reverse) {
-                        pi += (B - 1) * C;
-                        po += (B - 1) * C;
-                    }
-                    if (param.exclusive) {
-                        *po = 1;
-                        po += stride;
-                    }
-                    float prod = 1;
-                    for (size_t j = 0; j < B - 1; ++j) {
-                        prod *= pi[j * stride];
-                        po[j * stride] = prod;
-                    }
-                    if (!param.exclusive) {
-                        po[(B - 1) * stride] = prod * pi[(B - 1) * stride];
-                    }
-                }
-            }
-        };
-        Checker{make_graph, fwd}
-                .run({TensorShape{2, 3, 4}})
-                .run({TensorShape{3, 1, 2}})
-                .run({TensorShape{4, 2, 3}});
-    };
-
-    // test negative axis
-    for (int32_t axis = -3; axis < 3; ++axis)
-        for (int mask = 0; mask < 4; ++mask)
-            run({axis, bool(mask >> 1), bool(mask & 1)});
-}
-
 TEST(TestOprMisc, Cumsum) {
     using Param = opr::Cumsum::Param;
     auto run = [](const Param& param) {
diff --git a/src/serialization/impl/schema.fbs b/src/serialization/impl/schema.fbs
index 6f2d9733..a7ac763b 100644
--- a/src/serialization/impl/schema.fbs
+++ b/src/serialization/impl/schema.fbs
@@ -123,7 +123,6 @@ union OperatorParam {
     param.LSTM = 89,
     param.Softmax = 90,
     param.Diag = 91,
-    param.Cumprod = 92,
 }
 
 table Operator {
diff --git a/test/src/autocheck.cpp b/test/src/autocheck.cpp
index 1e502fc7..1de0194c 100644
--- a/test/src/autocheck.cpp
+++ b/test/src/autocheck.cpp
@@ -224,12 +224,6 @@ DEF_IMPL(void)::do_run(const ShapeInpArray& shapes, const RunOptions& opt) {
         m_inputs_generator[i](*m_inputs[i]);
         mgb_assert(m_inputs[i]->shape().eq_shape(shapes[i]));
     }
-    if (shapes.size() == 4u) {
-        m_extra_err_msg = ssprintf("%d,", *((int*)(m_inputs[0]->raw_ptr()) + 11));
-        m_extra_err_msg += ssprintf("%d,", *((int*)(m_inputs[1]->raw_ptr()) + 11));
-        m_extra_err_msg += ssprintf("%d,", *((int*)(m_inputs[2]->raw_ptr()) + 11));
-        m_extra_err_msg += ssprintf("%d,", *((int*)(m_inputs[3]->raw_ptr()) + 11));
-    }
     if (MGB_GETENV("MGB_AUTOCHECK_DUMP_INPUT")) {
         static size_t run_id;
         auto fname = output_file(ssprintf("autocheck-inp-%zu.bin", run_id++));