OpenI
/
MegEngine

// generated by gen_param_defs.py for 53ca6252b5b9568f67b9767fb4fd0d2ef6b717b28a861692e9105d5f796a9472
include "dtype.fbs";
namespace mgb.serialization.fbs.param;

enum ArgsortOrder : uint  {
    ASCENDING = 0,
    DESCENDING = 1,
}

enum BNFwdMode : uint  {
    /// Training phase.
    TRAINING = 0,
    /// Inference phase.
    INFERENCE = 1,
}

enum BNParamDim : uint  {
    /// Dim of params (Sigma, Mu) is 1 x 1 x H x W
    DIM_11HW = 0,
    /// Dim of params (Sigma, Mu) is 1 x C x H x W
    DIM_1CHW = 1,
    /// Dim of params (Sigma, Mu) is 1 x C x 1 x 1
    DIM_1C11 = 2,
    /// Dim of params (Sigma, Mu) is 1 x 1 x 1 x C
    DIM_111C = 3,
}

enum CondTakeMode : uint  {
    /// take if ``abs(data-val)<eps``
    EQ = 0,
    /// take if ``abs(data-val)>=eps``
    NEQ = 1,
    /// take if ``data<val``
    LT = 2,
    /// take if ``data<=val``
    LEQ = 3,
    /// take if ``data>val``
    GT = 4,
    /// take if ``data>=val``
    GEQ = 5,
}

enum Conv3DBiasNonlineMode : uint  {
    IDENTITY = 0,
    RELU = 1,
    SIGMOID = 2,
}

enum ConvBiasV0NonlineMode : uint  {
    IDENTITY = 0,
    RELU = 1,
    SIGMOID = 2,
    H_SWISH = 3,
}

enum ConvPoolingMethod : uint  {
    WITH_TEXTURE_OBJ = 0,
    WITH_SHARED_MEM = 1,
}

enum ConvPoolingNonlineMode : uint  {
    IDENTITY = 0,
    RELU = 1,
    SIGMOID = 2,
}

enum ConvPoolingPoolMode : uint  {
    AVERAGE = 0,
    MAX_ = 1,
}

/// convolution data/filter/output format; see :class:`RelayoutFormat` for more
/// details
enum ConvolutionFormat : uint  {
    NCHW = 0,
    NHWC = 1,
    NHWCD4 = 2,
    NCHW4 = 3,
    NCHW8 = 4,
    NCHW32 = 5,
    NCHW88 = 6,
    NCHW44 = 7,
    NCHW44_DOT = 8,
    /// NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is
    /// nchw32 layout
    NCHW4_NCHW32 = 9,
    /// NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is
    /// nchw4 layout
    NCHW32_NCHW4 = 10,
    /// NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw
    /// layout
    NCHW4_NCHW = 11,
    /// NHWC_NCHW means input tensors are nhwc layout, output tensor is nchw
    /// layout
    NHWC_NCHW = 12,
    /// NHWC_NCHW4_IC_SMALL means input tensors are nhwc(c < 4) layout, output
    /// tensor is nchw4 layout, padding c=4
    NHWC_NCHW4_IC_SMALL = 13,
    /// NCHW_NCHW4_IC_SMALL means input tensors are nchw(c < 4) layout, output
    /// tensor is nchw4 layout, padding c=4
    NCHW_NCHW4_IC_SMALL = 14,
    /// CHWN4 is currently only used on Nvidia platform for fast implementation
    /// of convolution using CUDA/SASS. The channels are splitted to groups of 4
    /// channels.
    CHWN4 = 15,
    /// NCHW64 is designed for convolution implementation to utilizing
    /// TensorCore instructions for 4-bit integers on Nvidia platforms
    NCHW64 = 16,
    /// NCHW4_NHWC means input tensors are nchw4 layout, output tensor is nhwc
    /// layout
    NCHW4_NHWC = 17,
}

enum Convolution3DDataType : uint  {
    /// input/output both float32/float16
    FLOAT = 0,
    /// input/output both float16, the internal compute is float32
    FLOAT_IO16xC32 = 1,
}

enum Convolution3DFormat : uint  {
    NCDHW = 0,
    NDHWC = 1,
}

enum Convolution3DMode : uint  {
    CROSS_CORRELATION = 0,
    CONVOLUTION = 1,
}

enum Convolution3DSparse : uint  {
    /// dense convolution: filter shape should be [oc, ic, spatial...] if format
    /// is NCDHW, [oc, spatial..., ic] if format is NDHWC
    DENSE = 0,
    /// group convolution: filter shape should be [group, oc_per_group,
    /// ic_per_group, spatial...] if format is NCDHW, [group, oc_per_group,
    /// spatial..., ic_per_group] if format is NDHWC
    GROUP = 1,
}

enum ConvolutionV0DataType : uint  {
    /// input/output both float32/float16
    FLOAT = 0,
    INT8x8x16 = 1,
    INT8x8x32 = 2,
    /// input/output both float16, the internal compute is float32
    FLOAT_IO16xC32 = 3,
    /// input QuantizedAsymm8, output QuantizedS32
    QUINT8x8x32 = 4,
    /// input int8, output specified by tensor DType
    INT8x8xX = 5,
    /// input QuantizedAsymm4, output QuantizedS32
    QUINT4x4x32 = 6,
}

/// convolution data/filter/output format; see :class:`RelayoutFormat` for more
/// details
enum ConvolutionV0Format : uint  {
    NCHW = 0,
    NHWC = 1,
    NHWCD4 = 2,
    NCHW4 = 3,
    NCHW8 = 4,
    NCHW32 = 5,
    NCHW88 = 6,
    NCHW44 = 7,
    NCHW44_DOT = 8,
    /// NCHW layout with weights tranformed by winograd
    NCHW_WINOGRAD = 9,
    /// NCHW88 layout with weights tranformed by winograd
    NCHW88_WINOGRAD = 10,
    /// NCHW44 layout with weights tranformed by winograd
    NCHW44_WINOGRAD = 11,
    /// NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is
    /// nchw32 layout
    NCHW4_NCHW32 = 12,
    /// NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is
    /// nchw4 layout
    NCHW32_NCHW4 = 13,
    /// NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw
    /// layout
    NCHW4_NCHW = 14,
    /// NHWC_NCHW means input tensors are nhwc layout, output tensor is nchw
    /// layout
    NHWC_NCHW = 15,
    /// NHWC_NCHW4_IC_SMALL means input tensors are nhwc(c < 4) layout, output
    /// tensor is nchw4 layout, padding c=4
    NHWC_NCHW4_IC_SMALL = 16,
    /// NCHW_NCHW4_IC_SMALL means input tensors are nchw(c < 4) layout, output
    /// tensor is nchw4 layout, padding c=4
    NCHW_NCHW4_IC_SMALL = 17,
    /// CHWN4 is currently only used on Nvidia platform for fast implementation
    /// of convolution using CUDA/SASS. The channels are splitted to groups of 4
    /// channels.
    CHWN4 = 18,
    /// NCHW4_NHWC means input tensors are nchw4 layout, output tensor is nhwc
    /// layout
    NCHW4_NHWC = 19,
}

enum ConvolutionV0Mode : uint  {
    CROSS_CORRELATION = 0,
    CONVOLUTION = 1,
}

enum ConvolutionV0Sparse : uint  {
    /// dense convolution: filter shape should be [oc, ic, spatial...] if format
    /// is NCHW, [oc, spatial..., ic] if format is NHWC
    DENSE = 0,
    /// group convolution: filter shape should be [group, oc_per_group,
    /// ic_per_group, spatial...] if format is NCHW, [group, oc_per_group,
    /// spatial..., ic_per_group] if format is NHWC
    GROUP = 1,
}

/// Specifies special computation modes, e.g. different combinations of
/// intermediate result data types.
enum ConvolutionV1ComputeMode : uint  {
    /// No special requirements on the precision of intermediate results.
    DEFAULT = 0,
    /// Use Float32 accumulator and intermediate result. Only supported when
    /// input and output is Float16.
    FLOAT32 = 1,
}

enum CvtColorMode : uint  {
    RGB2GRAY = 0,
    RGB2YUV = 1,
    YUV2RGB = 2,
    GRAY2RGB = 3,
    RGBA2RGB = 4,
    RGBA2BGR = 5,
    RGBA2GRAY = 6,
    RGB2BGR = 7,
    BGR2GRAY = 8,
    BGR2RGB = 9,
    /// For historical reasons, referred to as YCC by opencv
    YUV2GRAY_NV21 = 10,
    YUV2RGB_NV21 = 11,
    YUV2BGR_NV21 = 12,
    YUV2GRAY_NV12 = 13,
    YUV2RGB_NV12 = 14,
    YUV2BGR_NV12 = 15,
    YUV2GRAY_YV12 = 16,
    YUV2RGB_YV12 = 17,
    YUV2BGR_YV12 = 18,
    YUV2GRAY_YU12 = 19,
    YUV2RGB_YU12 = 20,
    YUV2BGR_YU12 = 21,
    YCrCb2RGB = 22,
    YCrCb2BGR = 23,
    /// BT601 yuv format, referred to as YUV by opencv
    BT601_YUV2RGB_NV21 = 24,
    BT601_YUV2BGR_NV21 = 25,
    BT601_YUV2RGB_NV12 = 26,
    BT601_YUV2BGR_NV12 = 27,
    BT601_YUV2RGB_YV12 = 28,
    BT601_YUV2BGR_YV12 = 29,
    BT601_YUV2RGB_YU12 = 30,
    BT601_YUV2BGR_YU12 = 31,
}

enum DctChannelSelectV0FastImpl : uint  {
    NONE = 0,
    FIX_32_MASK = 1,
}

enum ElemwiseMode : uint  {
    /// unary: max(x, 0)
    RELU = 0,
    /// unary: abs(x)
    ABS = 1,
    /// unary: acos(x)
    ACOS = 2,
    /// unary: asin(x)
    ASIN = 3,
    /// unary: ceil(x)
    CEIL = 4,
    /// unary: cos(x)
    COS = 5,
    /// unary: exp(x)
    EXP = 6,
    /// unary: numerically stable exp(x)-1
    EXPM1 = 7,
    /// unary: floor(x)
    FLOOR = 8,
    /// unary: natural logarithm, log(x)
    LOG = 9,
    /// unary: numerically stable log(x+1)
    LOG1P = 10,
    /// unary: -x
    NEGATE = 11,
    /// unary: 1/(1+exp(-x))
    SIGMOID = 12,
    /// unary: sin(x)
    SIN = 13,
    /// unary: tanh(x)
    TANH = 14,
    /// binary: x > 0 ? y : -y
    ABS_GRAD = 15,
    /// binary: x + y
    ADD = 16,
    /// binary: floor(x / y)
    FLOOR_DIV = 17,
    /// binary: max(x, y)
    MAX_ = 18,
    /// binary: min(x, y)
    MIN_ = 19,
    /// binary: x % y or fmodf(x, y)
    MOD = 20,
    /// binary: x * y
    MUL = 21,
    /// binary: pow(x, y)
    POW = 22,
    /// binary: x * (1 - x) * y
    SIGMOID_GRAD = 23,
    /// binary: x - y
    SUB = 24,
    /// binary: (x > 0) * y
    SWITCH_GT0 = 25,
    /// binary: (1 - x * x) * y
    TANH_GRAD = 26,
    /// binary: x / y
    TRUE_DIV = 27,
    /// binary: numerically stable log(exp(x) + exp(y))
    LOG_SUM_EXP = 28,
    /// binary: x < y
    LT = 29,
    /// binary: x <= y
    LEQ = 30,
    /// binary: x == y
    EQ = 31,
    /// bitwise binary: x << y. Note that result is undefined if y < 0 or y >=
    /// bitwidth. Logical shift is performed for unsigned intergers, and
    /// arithmetic shift for signed ones.
    SHL = 32,
    /// bitwise binary: x >> y; see SHL mode for more details
    SHR = 33,
    /// ternary: x <= y ? z : 0
    COND_LEQ_MOV = 34,
    /// compute ``a * b + c`` where c must either have same layout as a or b, or
    /// be a scalar
    FUSE_MUL_ADD3 = 35,
    /// compute ``a * A + b * B`` where a and b must have equal layout, and A
    /// and B must have equal layout. In the inputs ``b`` and ``B`` can be
    /// swapped
    FUSE_MUL_ADD4 = 36,
    /// binary: max(x+y, 0)
    FUSE_ADD_RELU = 37,
    /// binary: 1/(1+exp(-(x+y)))
    FUSE_ADD_SIGMOID = 38,
    /// binary: tanh(x+y)
    FUSE_ADD_TANH = 39,
    /// unary: rational approximation of tanh(x)
    FAST_TANH = 40,
    /// binary: grad of the rational approximation of tanh(x)
    FAST_TANH_GRAD = 41,
    /// unary: round(x), the nearest integer value to x, rounding halfway cases
    /// away from zero. Float only.
    ROUND = 42,
    /// binary: rounded higher l bits of x * y, where l is the bit length of x.
    RMULH = 43,
    /// binary: atan2(y,x)
    ATAN2 = 44,
    /// unary: erf(x)
    ERF = 45,
    /// unary: inverse function of erf(x)
    ERFINV = 46,
    /// unary: erfc(x)
    ERFC = 47,
    /// unary: inverse function of erfc(x)
    ERFCINV = 48,
    /// unary: x * clip(x + 3, 0, 6) / 6
    H_SWISH = 49,
    /// binary: x < -3 ? 0 : (x > 3 ? y : (2 * x + 3) / 6 * y)
    H_SWISH_GRAD = 50,
    /// binary: hswish(x+y)
    FUSE_ADD_H_SWISH = 51,
    /// unary: !x
    NOT = 52,
    /// binary: x && y
    AND = 53,
    /// binary: x || y
    OR = 54,
    /// binary: x ^ y
    XOR = 55,
    /// unary: x / (1 + exp(-x))
    SILU = 56,
    /// binary: grad(x / (1 + exp(-x))
    SILU_GRAD = 57,
    /// unary: x Phi(x)
    GELU = 58,
    /// binary: grad(x Phi(x))
    GELU_GRAD = 59,
}

enum ElemwiseMultiTypeMode : uint  {
    /// compute ``a * b + c`` requiring that ``a`` be int16 and ``b`` and ``c``
    /// int32, and the result is int32. This mode is optimized for the channel-
    /// broadacsted case, i.e. ``a`` has shape (A, B, C) and ``b`` and ``c``
    /// have shape (1, C, 1)
    FUSE_MUL_ADD3_INT16x32x32x32 = 0,
    /// compuate ``a * b + c`` where the inputs ``a`` is an integer type ``b``
    /// and ``c`` are both ``float32``, the result is ``int8``. This is
    /// currently only optimized for ``(1, x)`` broadcast for ``b`` and ``c``.
    /// Computation is carried in floating points and results are rounded
    /// towards zero with saturated cast to int.
    FUSE_MUL_ADD3_IXxF32xF32xI8 = 1,
    /// Compute ``a >> b``, round the result according to lower ``b`` bits of
    /// ``a``` and make a saturating conversion to int8. Where ``a`` should be
    /// an integer tensor and ``b`` should be an int8 scalar.
    ROUND_SHR_SATURATE_IXxI8xI8 = 2,
    /// Fused operation of an int16 elemwise add, an int16 rounding multiply
    /// high and an int16 to int8 rounding right shift with saturation.
    FUSE_ADD_RMULH_ROUND_SHR_SATURATE_INT16x16x16x8 = 3,
    /// Fused operation of an int32 elemwise add, an int32 rounding multiply
    /// high and an int32 to int8 rounding right shift with saturation.
    FUSE_ADD_RMULH_ROUND_SHR_SATURATE_INT32x32x32x8 = 4,
    /// Compute ``a >> b``, round the result according to lower ``b`` bits of
    /// ``a``` and make a saturating conversion to int16. Where ``a`` should be
    /// an integer tensor and ``b`` should be an int8 scalar.
    ROUND_SHR_SATURATE_IXxI8xI16 = 5,
    /// Fused elemwise add two quantized int8 with specifiedoutput quantized
    /// dtype
    QADD = 6,
    /// Fused elemwise add two quantized int8 followed by ReLU and typecvt to
    /// specified dtype
    QFUSE_ADD_RELU = 7,
    /// Fused elemwise multiply two quantized int8 with specifiedoutput
    /// quantized dtype
    QMUL = 8,
    /// Fused elemwise min two quantized int8 with specifiedoutput quantized
    /// dtype
    QMIN = 9,
    /// quantized: max(x, y), with specified output quantized dtype
    QMAX = 10,
    /// quantized: x - y
    QSUB = 11,
    /// quantized: x / y
    QTRUE_DIV = 12,
    /// quantized: sigmoid(x + y)
    QFUSE_ADD_SIGMOID = 13,
    /// quantized: tanh(x + y)
    QFUSE_ADD_TANH = 14,
    /// quantized: x > 0 ? x : 0
    QRELU = 15,
    /// quantized: x > 0 ? x : -x
    QABS = 16,
    /// quantized: sigmoid(x)
    QSIGMOID = 17,
    /// quantized: exp(x)
    QEXP = 18,
    /// quantized: tanh(x)
    QTANH = 19,
    /// quantized: x * y + z
    QFUSE_MUL_ADD3 = 20,
    /// quantized: fast_tanh(x)
    QFAST_TANH = 21,
    /// quantized: -x
    QNEGATE = 22,
    /// quantized: acos(x)
    QACOS = 23,
    /// quantized: asin(x)
    QASIN = 24,
    /// quantized: ceil(x)
    QCEIL = 25,
    /// quantized: cos(x)
    QCOS = 26,
    /// quantized: expm1(x)
    QEXPM1 = 27,
    /// quantized: floor(x)
    QFLOOR = 28,
    /// quantized: log(x)
    QLOG = 29,
    /// quantized: log1p(x)
    QLOG1P = 30,
    /// quantized: sin(x)
    QSIN = 31,
    /// quantized: round(x)
    QROUND = 32,
    /// quantized: erf(x)
    QERF = 33,
    /// quantized: erfinv(x)
    QERFINV = 34,
    /// quantized: erfc(x)
    QERFC = 35,
    /// quantized: erfcinv(x)
    QERFCINV = 36,
    /// quantized: abs_grad
    QABS_GRAD = 37,
    /// quantized floor_div
    QFLOOR_DIV = 38,
    /// quantized mod
    QMOD = 39,
    /// quantized sigmoid_grad
    QSIGMOID_GRAD = 40,
    /// quantized switch_gt0
    QSWITCH_GT0 = 41,
    /// quantized tanh_grad
    QTANH_GRAD = 42,
    /// quantized lt
    QLT = 43,
    /// quantized leq
    QLEQ = 44,
    /// quantized eq
    QEQ = 45,
    /// quantized pow
    QPOW = 46,
    /// quantized log_sum_exp
    QLOG_SUM_EXP = 47,
    /// quantized fast_tanh_grad
    QFAST_TANH_GRAD = 48,
    /// quantized atan2
    QATAN2 = 49,
    /// quantized cond_leq_mov
    QCOND_LEQ_MOV = 50,
    /// quantized h_swish
    QH_SWISH = 51,
    /// quantized h_swish(x+y)
    QFUSE_ADD_H_SWISH = 52,
    /// quantized h_swish_grad
    QH_SWISH_GRAD = 53,
    /// compute ``a * b + c`` requiring that ``a`` be int16 and ``b`` and ``c``
    /// float32, and the result is float32.
    FUSE_MUL_ADD3_INT16xF32xF32xF32 = 54,
    /// compute ``a * b `` requiring that ``a`` be int16 and ``b`` float32, and
    /// the result is float32.
    MUL_INT16xF32xF32 = 55,
    /// compute ``a * b + c`` requiring that ``a`` be uint8 and ``b`` and ``c``
    /// float32, and the result is float32.
    FUSE_MUL_ADD3_UINT8xF32xF32xF32 = 56,
}

enum MatrixMulFormat : uint  {
    /// Normal matrix mul: (M, K) x (K, N) = (M, N)
    DEFAULT = 0,
    /// Split 4 from M and K, better for neon compute:(M/4, K/4, 4(k), 4(m)) x
    /// (K/4, N, 4(k)). if transposeA the layout is (K/4, M/4, 4(k), 4(m)) x
    /// (K/4, N, 4(k))
    MK4 = 1,
    /// Split 8 from M and K, better for neon compute:(M/8, K/8, 8(k), 8(m)) x
    /// (K/8, N, 8(k)). if transposeA the layout is (K/8, M/8, 8(k), 8(m)) x
    /// (K/8, N, 8(k))
    MK8 = 2,
    /// Split 4 from M and K, better for neon dotprod:M/4, K/4, 4(m), 4(k)) x
    /// (K/4, N, 4(k)). if transposeA the layout is (K/4, M/4, 4(m), 4(k)) x
    /// (K/4, N, 4(k))
    MK4_DOT = 3,
}

enum MatrixMulV0DataType : uint  {
    /// input/output both float32/float16
    FLOAT = 0,
    INT8x8x16 = 1,
    INT8x8x32 = 2,
    /// input/output both float16, the internal compute is float32
    FLOAT_IO16xC32 = 3,
    /// input QuantizedAsymm8, output QuantizedS32
    QUINT8x8x32 = 4,
    /// input QuantizedAsymm4, output QuantizedS32
    QUINT4x4x32 = 5,
}

/// Specifies special computation modes, e.g. different combinations of
/// intermediate result data types.
enum MatrixMulV1ComputeMode : uint  {
    /// No special requirements on the precision of intermediate results.
    DEFAULT = 0,
    /// Use Float32 accumulator and intermediate result. Only supported when
    /// input and output is Float16.
    FLOAT32 = 1,
}

enum PaddingPaddingMode : uint  {
    /// aaaaaa|abcdefgh|hhhhhhh
    REPLICATE = 0,
    /// fedcba|abcdefgh|hgfedcb
    REFLECT = 1,
    /// iiiiii|abcdefgh|iiiiiii
    CONSTANT = 2,
}

enum PoolingV0Mode : uint  {
    /// maximum value inside pooling window
    MAX_ = 0,
    /// arithmetic mean of all values inside pooling window. Padding values are
    /// taken into account and are viewed as zero
    AVERAGE = 1,
    /// arithmetic mean of all values inside pooling window. No padding isused.
    AVERAGE_COUNT_EXCLUDE_PADDING = 2,
}

enum RNNCellNonlineMode : uint  {
    IDENTITY = 0,
    RELU = 1,
    TANH = 2,
}

enum ROIAlignV0Mode : uint  {
    MAX_ = 0,
    AVERAGE = 1,
}

enum ROIPoolingMode : uint  {
    /// maximum value inside pooling window; pooling result would be 0 if
    /// pooling window is empty
    MAX_ = 0,
    /// arithmetic mean of all values inside pooling window; pooling result
    /// would be 0 if pooling window is empty
    AVERAGE = 1,
}

enum ReduceDataType : uint  {
    /// input/output are the same data type, and the internal computation type would be chosen by the input/output dtypes and the reduction mode.
    /// Currently, ```DEFAULT``` mode means:
    /// 
    /// +--------------------+-----------------------------------+-------------------+
    /// | Input/Output DType | Mode                              | Computation DType |
    /// +====================+===================================+===================+
    /// | FLOAT32            | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT  | FLOAT32           |
    /// +--------------------+-----------------------------------+-------------------+
    /// | FLOAT16            | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT  | FLOAT16           |
    /// +--------------------+-----------------------------------+-------------------+
    /// | INT32              | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT  | INT32             |
    /// +--------------------+-----------------------------------+-------------------+
    /// | INT8               | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT  | INT8              |
    /// +--------------------+-----------------------------------+-------------------+
    /// | QuantizedS8        | MIN/MAX                           | QuantizedS8       |
    /// +--------------------+-----------------------------------+-------------------+
    /// | QuantizedS8        | MEAN/SUM                          | QuantizedS32      |
    /// +--------------------+-----------------------------------+-------------------+
    /// | Quantized8Asymm    | MIN/MAX                           | Quantized8Asymm   |
    /// +--------------------+-----------------------------------+-------------------+
    /// | Quantized8Asymm    | MEAN/SUM                          | QuantizedS32      |
    /// +--------------------+-----------------------------------+-------------------+
    /// 
    /// 
    DEFAULT = 0,
    /// Deprecated. This was replaced by FLOAT_O16xC32, and input's dtype
    /// decided by actual input tensor.
    FLOAT_IO16xC32 = 1,
    /// compute/output both are float32
    FLOAT_O32xC32 = 2,
    /// compute are float32, output float16
    FLOAT_O16xC32 = 3,
    /// input quint8, compute and output are qint32
    QUINT_I8xO32 = 4,
    /// input qint8, compute and output are qint32
    QINT_I8xO32 = 5,
}

enum ReduceMode : uint  {
    SUM = 0,
    /// sum of x * x for each element x
    SUM_SQR = 1,
    PRODUCT = 2,
    MIN_ = 3,
    MAX_ = 4,
    MEAN = 5,
}

enum ReduceV0Mode : uint  {
    SUM = 0,
    /// sum of x * x for each element x
    SUM_SQR = 1,
    PRODUCT = 2,
    MIN_ = 3,
    MAX_ = 4,
}

enum ReduceV1DataType : uint  {
    /// input/output are the same data type, and the internal computation type would be chosen by the input/output dtypes and the reduction mode.
    /// Currently, ```DEFAULT``` mode means:
    /// 
    /// +--------------------+-----------------------------------+-------------------+
    /// | Input/Output DType | Mode                              | Computation DType |
    /// +====================+===================================+===================+
    /// | FLOAT32            | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT  | FLOAT32           |
    /// +--------------------+-----------------------------------+-------------------+
    /// | FLOAT16            | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT  | FLOAT16           |
    /// +--------------------+-----------------------------------+-------------------+
    /// | INT32              | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT  | INT32             |
    /// +--------------------+-----------------------------------+-------------------+
    /// | INT8               | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT  | INT8              |
    /// +--------------------+-----------------------------------+-------------------+
    /// | QuantizedS8        | MIN/MAX                           | QuantizedS8       |
    /// +--------------------+-----------------------------------+-------------------+
    /// | QuantizedS8        | MEAN/SUM                          | QuantizedS32      |
    /// +--------------------+-----------------------------------+-------------------+
    /// | Quantized8Asymm    | MIN/MAX                           | Quantized8Asymm   |
    /// +--------------------+-----------------------------------+-------------------+
    /// | Quantized8Asymm    | MEAN/SUM                          | QuantizedS32      |
    /// +--------------------+-----------------------------------+-------------------+
    /// 
    /// 
    DEFAULT = 0,
    /// Deprecated. This was replaced by FLOAT_O16xC32, and input's dtype
    /// decided by actual input tensor.
    FLOAT_IO16xC32 = 1,
    /// compute/output both are float32
    FLOAT_O32xC32 = 2,
    /// compute are float32, output float16
    FLOAT_O16xC32 = 3,
    /// input quint8, compute and output are qint32
    QUINT_I8xO32 = 4,
    /// input qint8, compute and output are qint32
    QINT_I8xO32 = 5,
}

enum ReduceV1Mode : uint  {
    SUM = 0,
    /// sum of x * x for each element x
    SUM_SQR = 1,
    PRODUCT = 2,
    MIN_ = 3,
    MAX_ = 4,
    MEAN = 5,
}

/// Relayout mode.
/// 
/// **Naming conventions**
/// 
/// 1. ``A_B`` means change from layout format ``A`` to ``B``.
/// 2. ``INTER_WEIGHT_xx`` means relayout the weight for faster processing by
///    :attr:`Convolution.Format.NHWCD4` convolutions.
/// 3. A suffix of ``I`` means ``Image2DPack4TensorFormat`` tensor format is used
///    for faster processing on GPUs.
/// 
/// **Layout definitions**
/// 
/// * ``NCHW`` layout: ``{N, C, H, W}``
/// * ``NHWC`` layout: ``{N, H, W, C}``
/// * ``NHWCD4`` layout: ``{N, H, (C + 3) / 4, W, 4}``
/// * ``NHWCD4I`` layout: with ``align_axis = 2``
/// * ``NCHW4`` layout: ``{N, C/4, H, W, 4}``
/// * ``NCHW88`` layout: ``{N, C/8, H, W, 8}``
/// * ``CHWN4`` layout: ``{C/4, H, W, N, 4}``
/// * ``NCHW64`` layout: ``{N, C/64, H, W, 64}``
/// 
/// **Float weight transformation definitions**
/// 
/// +---------------+---------------------------------+--------------------+--------------------------------------+------+
/// | Sparsity Type | Input Layout                    | Input Req          | Output Layout                        | Axis |
/// +===============+=================================+====================+======================================+======+
/// | DENSE         | ``{OC, IC, FH, FW}``            | ``OC % 4 == 0``    | ``{OC/4, FH, FW, IC, 4}``            | 3    |
/// +---------------+---------------------------------+--------------------+--------------------------------------+------+
/// | GROUP         | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 4 == 0``  | ``{GROUP, OCPG/4, FH, FW, ICPG, 4}`` | 4    |
/// |               |                                 | ``ICPG % 4 == 0``  |                                      |      |
/// +---------------+---------------------------------+--------------------+--------------------------------------+------+
/// | CHAN          | ``{GROUP, 1, 1, FH, FW}``       | ``GROUP % 4 == 0`` | ``{GROUP / 4, 1, FH ,FW, 4}``        | 1    |
/// +---------------+---------------------------------+--------------------+--------------------------------------+------+
/// 
/// **Float weight transformation nchw88 definitions**
/// 
/// +---------------+---------------------------------+--------------------+--------------------------------------+
/// | Sparsity Type | Input Layout                    | Input Req          | Output Layout                        |
/// +===============+=================================+====================+======================================+
/// | DENSE         | ``{OC, IC, FH, FW}``            | ``OC % 8 == 0``    |``{OC/8, IC/8 ,FH, FW, 8(IC), 8(OC)}``|
/// |               |                                 | ``IC % 8 == 0``    |                                      |
/// +---------------+---------------------------------+--------------------+--------------------------------------+
/// | GROUP         | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 8 == 0``  | ``{GROUP, OCPG/8, ICPG/8 FH, FW,     |
/// |               |                                 | ``ICPG % 8 == 0``  |  8(ICPG), 8(OCPG)} ``                |
/// +---------------+---------------------------------+--------------------+--------------------------------------+
/// | CHAN          | ``{GROUP, 1, 1, FH, FW}``       | ``GROUP % 8 == 0`` | ``{GROUP / 8, 1, FH ,FW, 8}``        |
/// +---------------+---------------------------------+--------------------+--------------------------------------+
/// 
/// **Int8(DOT) weight transformation definitions**
/// 
/// +---------------+---------------------------------+--------------------+------------------------------------------+------+
/// | Sparsity Type | Input Layout                    | Input Req          | Output Layout                            | Axis |
/// +===============+=================================+====================+==========================================+======+
/// | DENSE         | ``{OC, IC, FH, FW}``            | ``OC % 4 == 0``    | ``{OC/4, FH, FW, IC/4, 4, 4}`            | 3    |
/// +---------------+---------------------------------+--------------------+------------------------------------------+------+
/// | GROUP         | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 4 == 0``  | ``{GROUP, OCPG/4, FH, FW, ICPG/4, 4, 4}``| 4    |
/// |               |                                 | ``ICPG % 4 == 0``  |                                          |      |
/// +---------------+---------------------------------+--------------------+------------------------------------------+------+
/// 
/// Note: the axis column means the corresponding ``align_axis`` for image format
/// when the ``I`` suffix is present.
/// 
/// Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later opr by seting group and oc param with NCHW4_NCHW
/// 
enum RelayoutFormatV0Mode : uint  {
    NHWC_NHWCD4 = 0,
    NHWCD4_NHWC = 1,
    NHWC_NHWCD4I = 2,
    NCHW_NHWCD4 = 3,
    NCHW_NHWCD4I = 4,
    NHWCD4I_NCHW = 5,
    NHWCD4_NCHW = 6,
    INTER_WEIGHT_DENSE = 7,
    INTER_WEIGHT_DENSEI = 8,
    INTER_WEIGHT_GROUP = 9,
    INTER_WEIGHT_GROUPI = 10,
    INTER_WEIGHT_CHAN = 11,
    INTER_WEIGHT_CHANI = 12,
    INTER_WEIGHT_DENSEI_DOT = 13,
    INTER_WEIGHT_GROUPI_DOT = 14,
    NCHW4_CHWN4 = 15,
    CHWN4_NCHW4 = 16,
    NCHW_NCHW88_CONV_DENSE_WEIGHT = 17,
    NCHW_NCHW88_CONV_CHAN_WEIGHT = 18,
    NCHW_NCHW88_CONV_GROUP_WEIGHT = 19,
    NCHW_NCHW88 = 20,
    NCHW88_NCHW = 21,
    NCHW_NCHW4_IC_SMALL = 22,
    NCHW_NCHW4_IC_SMALL_CONV_DENSE_WEIGHT = 23,
    NCHW_NCHW4 = 24,
    NCHW4_NCHW = 25,
    NCHW_NCHW4_WEIGHT = 26,
    NCHW_NCHW64 = 27,
    NCHW64_NCHW = 28,
    NCHW_NHWC = 29,
    NHWC_NCHW = 30,
    NHWCD4I_NHWC = 31,
}

enum SeparableConvBorderMode : uint  {
    BORDER_REPLICATE = 0,
    BORDER_REFLECT = 1,
    BORDER_REFLECT_101 = 2,
    BORDER_WRAP = 3,
    BORDER_CONSTANT = 4,
    BORDER_TRANSPARENT = 5,
    BORDER_ISOLATED = 6,
}

enum SeparableConv3DBorderMode : uint  {
    BORDER_REPLICATE = 0,
    BORDER_REFLECT = 1,
    BORDER_REFLECT_101 = 2,
    BORDER_WRAP = 3,
    BORDER_CONSTANT = 4,
    BORDER_TRANSPARENT = 5,
    BORDER_ISOLATED = 6,
}

enum SpatialTfGridGeneratorMode : uint  {
    AFFINE = 0,
}

enum SpatialTfSamplerMode : uint  {
    BILINEAR = 0,
}

enum TopKMode : uint  {
    /// only the value of the k'th element would be computed
    KTH_ONLY = 0,
    /// all the top-k values and corresponding indices would be computed; no
    /// order is guaranteed
    VALUE_IDX_NOSORT = 1,
    /// all the top-k values and corresponding indices sorted
    VALUE_IDX_SORTED = 2,
}

enum WarpPerspectiveV1BorderMode : uint  {
    /// aaaaaa|abcdefgh|hhhhhhh
    REPLICATE = 0,
    /// fedcba|abcdefgh|hgfedcb
    REFLECT = 1,
    /// gfedcb|abcdefgh|gfedcba
    REFLECT_101 = 2,
    /// cdefgh|abcdefgh|abcdefg
    WRAP = 3,
    /// iiiiii|abcdefgh|iiiiiii
    CONSTANT = 4,
    TRANSPARENT = 5,
    ISOLATED = 6,
}

enum WarpPerspectiveV1InterpolationMode : uint  {
    NEAREST = 0,
    LINEAR = 1,
    AREA = 2,
    CUBIC = 3,
    LANCZOS4 = 4,
}

table Empty {
}

table Axis {
    axis:int = 0;
}

table ConvolutionV0 {
    mode:ConvolutionV0Mode = CROSS_CORRELATION;
    /// padding on one side on the first dimension
    pad_h:uint = 0;
    /// padding on one side on the second dimension
    pad_w:uint = 0;
    /// kernel stride on the first dimension
    stride_h:uint = 1;
    /// kernel stride on the second dimension
    stride_w:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_h:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_w:uint = 1;
    data_type:ConvolutionV0DataType = FLOAT;
    sparse:ConvolutionV0Sparse = DENSE;
    /// convolution data/filter/output format; see :class:`RelayoutFormat` for
    /// more details
    format:ConvolutionV0Format = NCHW;
}

table ConvolutionV1 {
    mode:ConvolutionV0Mode = CROSS_CORRELATION;
    /// padding on one side on the first dimension
    pad_h:uint = 0;
    /// padding on one side on the second dimension
    pad_w:uint = 0;
    /// kernel stride on the first dimension
    stride_h:uint = 1;
    /// kernel stride on the second dimension
    stride_w:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_h:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_w:uint = 1;
    sparse:ConvolutionV0Sparse = DENSE;
    format:ConvolutionV0Format = NCHW;
    /// Specifies special computation modes, e.g. different combinations of
    /// intermediate result data types.
    compute_mode:ConvolutionV1ComputeMode = DEFAULT;
}

table Convolution {
    mode:ConvolutionV0Mode = CROSS_CORRELATION;
    /// padding on one side on the first dimension
    pad_h:uint = 0;
    /// padding on one side on the second dimension
    pad_w:uint = 0;
    /// kernel stride on the first dimension
    stride_h:uint = 1;
    /// kernel stride on the second dimension
    stride_w:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_h:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_w:uint = 1;
    sparse:ConvolutionV0Sparse = DENSE;
    /// convolution data/filter/output format; see :class:`RelayoutFormat` for
    /// more details
    format:ConvolutionFormat = NCHW;
    compute_mode:ConvolutionV1ComputeMode = DEFAULT;
}

table MaskPropagate {
    /// padding on one side on the first dimension
    pad_h:uint = 0;
    /// padding on one side on the second dimension
    pad_w:uint = 0;
    /// kernel stride on the first dimension
    stride_h:uint = 1;
    /// kernel stride on the second dimension
    stride_w:uint = 1;
    /// kernel height
    kernel_h:uint = 1;
    /// kernel width
    kernel_w:uint = 1;
    /// dilate height
    dilate_h:uint = 1;
    /// dilate width
    dilate_w:uint = 1;
}

table ConvPooling {
    method:ConvPoolingMethod = WITH_TEXTURE_OBJ;
    convMode:ConvolutionV0Mode = CROSS_CORRELATION;
    poolMode:ConvPoolingPoolMode = AVERAGE;
    nonlineMode:ConvPoolingNonlineMode = IDENTITY;
    pool_shape_h:uint = 1;
    pool_shape_w:uint = 1;
    pool_stride_h:uint = 1;
    pool_stride_w:uint = 1;
    pool_pad_h:uint = 0;
    pool_pad_w:uint = 0;
    conv_stride_h:uint = 1;
    conv_stride_w:uint = 1;
    conv_pad_h:uint = 0;
    conv_pad_w:uint = 0;
}

/// legacy conv_bias
table ConvBiasV0 {
    nonlineMode:ConvBiasV0NonlineMode = IDENTITY;
    mode:ConvolutionV0Mode = CROSS_CORRELATION;
    pad_h:uint = 0;
    pad_w:uint = 0;
    stride_h:uint = 1;
    stride_w:uint = 1;
}

/// active(conv(x, w) + bias)
table ConvBiasV1 {
    nonlineMode:ConvBiasV0NonlineMode = IDENTITY;
    mode:ConvolutionV0Mode = CROSS_CORRELATION;
    data_type:ConvolutionV0DataType = FLOAT;
    sparse:ConvolutionV0Sparse = DENSE;
    format:ConvolutionV0Format = NCHW;
    /// padding on one side on the first dimension
    pad_h:uint = 0;
    /// padding on one side on the second dimension
    pad_w:uint = 0;
    /// kernel stride on the first dimension
    stride_h:uint = 1;
    /// kernel stride on the second dimension
    stride_w:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_h:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_w:uint = 1;
}

/// active(conv(x, w) + bias)
table ConvBiasV2 {
    nonlineMode:ConvBiasV0NonlineMode = IDENTITY;
    mode:ConvolutionV0Mode = CROSS_CORRELATION;
    sparse:ConvolutionV0Sparse = DENSE;
    format:ConvolutionV0Format = NCHW;
    /// padding on one side on the first dimension
    pad_h:uint = 0;
    /// padding on one side on the second dimension
    pad_w:uint = 0;
    /// kernel stride on the first dimension
    stride_h:uint = 1;
    /// kernel stride on the second dimension
    stride_w:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_h:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_w:uint = 1;
    compute_mode:ConvolutionV1ComputeMode = DEFAULT;
}

/// active(conv(x, w) + bias)
table ConvBiasV3 {
    nonlineMode:ConvBiasV0NonlineMode = IDENTITY;
    mode:ConvolutionV0Mode = CROSS_CORRELATION;
    sparse:ConvolutionV0Sparse = DENSE;
    format:ConvolutionV0Format = NCHW;
    /// padding on one side on the first dimension
    pad_h:uint = 0;
    /// padding on one side on the second dimension
    pad_w:uint = 0;
    /// kernel stride on the first dimension
    stride_h:uint = 1;
    /// kernel stride on the second dimension
    stride_w:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_h:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_w:uint = 1;
    /// detail meaning \see winograd in conv bias
    output_block_size:uint = 0;
    compute_mode:ConvolutionV1ComputeMode = DEFAULT;
}

/// active(conv(x, w) + bias)
table ConvBias {
    nonlineMode:ConvBiasV0NonlineMode = IDENTITY;
    mode:ConvolutionV0Mode = CROSS_CORRELATION;
    sparse:ConvolutionV0Sparse = DENSE;
    format:ConvolutionFormat = NCHW;
    /// padding on one side on the first dimension
    pad_h:uint = 0;
    /// padding on one side on the second dimension
    pad_w:uint = 0;
    /// kernel stride on the first dimension
    stride_h:uint = 1;
    /// kernel stride on the second dimension
    stride_w:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_h:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_w:uint = 1;
    compute_mode:ConvolutionV1ComputeMode = DEFAULT;
}

table SeparableConv {
    mode:ConvolutionV0Mode = CROSS_CORRELATION;
    borderMode:SeparableConvBorderMode = BORDER_REPLICATE;
    is_symm_kernel:bool = true;
    pad_h:uint = 0;
    pad_w:uint = 0;
    stride_h:uint = 1;
    stride_w:uint = 1;
    ksize_h:uint = 3;
    ksize_w:uint = 3;
    anchor_h:uint = 1;
    anchor_w:uint = 1;
}

table Images2Neibs {
    pad_h:uint = 0;
    pad_w:uint = 0;
    stride_h:uint = 1;
    stride_w:uint = 1;
    dilate_h:uint = 1;
    dilate_w:uint = 1;
    window_h:uint = 3;
    window_w:uint = 3;
}

table SlidingWindowTranspose {
    out_h:uint = 0;
    out_w:uint = 0;
    pad_h:uint = 0;
    pad_w:uint = 0;
    stride_h:uint = 1;
    stride_w:uint = 1;
    dilate_h:uint = 1;
    dilate_w:uint = 1;
    window_h:uint = 3;
    window_w:uint = 3;
}

table PoolingV0 {
    mode:PoolingV0Mode = MAX_;
    pad_h:uint = 0;
    pad_w:uint = 0;
    stride_h:uint = 2;
    stride_w:uint = 2;
    window_h:uint = 2;
    window_w:uint = 2;
    format:ConvolutionV0Format = NCHW;
}

table Pooling {
    mode:PoolingV0Mode = MAX_;
    pad_h:uint = 0;
    pad_w:uint = 0;
    stride_h:uint = 2;
    stride_w:uint = 2;
    window_h:uint = 2;
    window_w:uint = 2;
    format:ConvolutionFormat = NCHW;
}

table Softmax {
    axis:int = -1;
}

table AdaptivePoolingV0 {
    mode:PoolingV0Mode = MAX_;
    format:ConvolutionV0Format = NCHW;
}

table AdaptivePooling {
    mode:PoolingV0Mode = MAX_;
    format:ConvolutionFormat = NCHW;
}

/// see ImageNet Classification with Deep Convolutional Neural Networks for
/// meaning of the fields
table LRN {
    /// must be odd
    n:uint = 5;
    k:float = 2.;
    alpha:float = 1e-4;
    beta:float = 0.75;
}

table BN {
    param_dim:BNParamDim = DIM_11HW;
    fwd_mode:BNFwdMode = TRAINING;
    epsilon:double = 1e-4;
    avg_factor:double = 1.;
    scale:float = 1.;
    bias:float = 0.;
}

table ROIPooling {
    mode:ROIPoolingMode = MAX_;
    scale:float = 1.;
}

table WarpPerspectiveV1 {
    imode:WarpPerspectiveV1InterpolationMode = LINEAR;
    bmode:WarpPerspectiveV1BorderMode = REPLICATE;
    format:ConvolutionV0Format = NCHW;
    /// used for CONSTANT bmode
    border_val:float = .0;
}

table WarpPerspective {
    imode:WarpPerspectiveV1InterpolationMode = LINEAR;
    bmode:WarpPerspectiveV1BorderMode = REPLICATE;
    format:ConvolutionFormat = NCHW;
    /// used for CONSTANT bmode
    border_val:float = .0;
}

table SpatialTfGridGenerator {
    mode:SpatialTfGridGeneratorMode = AFFINE;
}

table SpatialTfSampler {
    mode:SpatialTfSamplerMode = BILINEAR;
}

table AddUpdate {
    alpha:float = 1.;
    beta:float = 1.;
    bias:float = 0.;
}

table Elemwise {
    mode:ElemwiseMode = RELU;
}

table ElemwiseMultiType {
    mode:ElemwiseMultiTypeMode = FUSE_MUL_ADD3_INT16x32x32x32;
}

/// power with constant exponent
table PowC {
    exp:float = 0;
}

/// 2d discrete cosine transform
table DctChannelSelectV0 {
    format:ConvolutionV0Format = NCHW;
    fastImpl:DctChannelSelectV0FastImpl = NONE;
    dct_block_size:int = 8;
}

/// 2d discrete cosine transform
table DctChannelSelect {
    format:ConvolutionFormat = NCHW;
    fastImpl:DctChannelSelectV0FastImpl = NONE;
    dct_block_size:int = 8;
}

table MatrixMulV0 {
    transposeA:bool = false;
    transposeB:bool = false;
    data_type:MatrixMulV0DataType = FLOAT;
}

table MatrixMulV1 {
    transposeA:bool = false;
    transposeB:bool = false;
    /// Specifies special computation modes, e.g. different combinations of
    /// intermediate result data types.
    compute_mode:MatrixMulV1ComputeMode = DEFAULT;
}

table MatrixMul {
    transposeA:bool = false;
    transposeB:bool = false;
    compute_mode:MatrixMulV1ComputeMode = DEFAULT;
    format:MatrixMulFormat = DEFAULT;
}

table SVD {
    /// Whether to compute the full-sized u and v or only the leading min(m, n)
    /// singular vectors. Ignored if compute_uv is false.
    full_matrices:bool = false;
    /// Whether the left (u) and right (v) singular vectors will be computed and
    /// outputted.
    compute_uv:bool = true;
}

/// legacy reduce
table ReduceV0 {
    mode:ReduceV0Mode = SUM;
    /// axis along which reduction is performed; if -1 is given, reduce to given
    /// target shape (only used in megbrain)
    axis:int = -1;
}

/// reduce along given axis
table ReduceV1 {
    mode:ReduceV1Mode = SUM;
    /// axis along which reduction is performed; if -1 is given, reduce to given
    /// target shape (only used in megbrain)
    axis:int = -1;
    data_type:ReduceV1DataType = DEFAULT;
}

/// reduce along given axis
table Reduce {
    mode:ReduceMode = SUM;
    /// axis along which reduction is performed; if INT_MAX is given, reduce to
    /// given target shape (only used in megbrain)
    axis:int = 2147483647;
    data_type:ReduceDataType = DEFAULT;
}

/// calculate accumulated sum along given axis
table CumsumV0 {
    /// axis along which cumsum is performed
    axis:int = -1;
    /// whether the current element is taken into account
    exclusive:bool = true;
    /// whether the cumsum is forward or backward
    reverse:bool = false;
}

/// calculate accumulated sum along given axis
table Cumsum {
    /// axis along which cumsum is performed, default with INT_MAX
    axis:int = 2147483647;
    /// whether the current element is taken into account
    exclusive:bool = true;
    /// whether the cumsum is forward or backward
    reverse:bool = false;
}

table CondTake {
    mode:CondTakeMode = EQ;
    /// the value to be compared with; note that for integer data, val is also
    /// converted to int
    val:float = 0;
    /// used for float equality comparison
    eps:float = 1e-06;
}

table Argsort {
    order:ArgsortOrder = ASCENDING;
}

table IndexingRemap {
    /// Whether no two dst element maps to the same src element. Enabling this
    /// option can accelerate gradient operator since atomic adding operations
    /// could be avoided.
    is_non_overlapping:bool = false;
}

table Sleep {
    /// time to sleep in seconds
    time:float = 0;
}

table Linspace {
    /// Whether stop is included in the generated tensor
    endpoint:bool = true;
}

table LinspaceFull {
    /// The first val.
    start:double = 0;
    /// The last val.
    stop:double = 1;
    /// Whether stop is included in the generated tensor
    endpoint:bool = true;
}

table Eye {
    /// Index of the diagonal: 0 (the default) refers to the main diagonal, a
    /// positive value refers to an upper diagonal, and a negative value to a
    /// lower diagonal.
    k:int = 0;
    /// data type of output value
    dtype:DTypeEnum = Float32;
}

table Diag {
    /// Index of the diagonal: 0 (the default) refers to the main diagonal, a
    /// positive value refers to an upper diagonal, and a negative value to a
    /// lower diagonal.
    k:int = 0;
}

table UniformRNGV0 {
    seed:ulong = 0;
}

table UniformRNG {
    seed:ulong = 0;
    /// The dtype of output Tensor. Only support Float32.
    dtype:DTypeEnum = Float32;
}

table GaussianRNGV0 {
    seed:ulong = 0;
    mean:float = 0;
    std:float = 1;
}

table GaussianRNG {
    seed:ulong = 0;
    mean:float = 0;
    std:float = 1;
    /// The dtype of output Tensor. Only support Float32.
    dtype:DTypeEnum = Float32;
}

table GammaRNG {
    seed:ulong = 0;
}

table BetaRNG {
    seed:ulong = 0;
}

table PoissonRNG {
    seed:ulong = 0;
}

table PermutationRNG {
    seed:ulong = 0;
    /// The dtype of output Tensor. Int32, Int16 and Float32 are supported.
    dtype:DTypeEnum = Int32;
}

table ShuffleRNG {
    seed:ulong = 0;
}

table Flip {
    vertical:bool = false;
    horizontal:bool = false;
}

table Rotate {
    clockwise:bool = true;
}

table ROICopy {
    row_from:uint = 0;
    row_to:uint = 0;
    col_from:uint = 0;
    col_to:uint = 0;
}

table CvtColor {
    mode:CvtColorMode = RGB2GRAY;
}

table WarpAffineV0 {
    imode:WarpPerspectiveV1InterpolationMode = LINEAR;
    border_mode:WarpPerspectiveV1BorderMode = REPLICATE;
    /// used for CONSTANT bmode
    border_val:float = .0;
}

table WarpAffineV1 {
    imode:WarpPerspectiveV1InterpolationMode = LINEAR;
    border_mode:WarpPerspectiveV1BorderMode = REPLICATE;
    /// used for CONSTANT bmode
    border_val:float = .0;
    format:ConvolutionV0Format = NHWC;
}

table WarpAffine {
    imode:WarpPerspectiveV1InterpolationMode = LINEAR;
    border_mode:WarpPerspectiveV1BorderMode = REPLICATE;
    /// used for CONSTANT bmode
    border_val:float = .0;
    format:ConvolutionFormat = NHWC;
}

table GaussianBlur {
    border_mode:WarpPerspectiveV1BorderMode = REPLICATE;
    kernel_height:uint = 0;
    kernel_width:uint = 0;
    sigma_x:float = 0.;
    sigma_y:float = 0.;
}

table ResizeV0 {
    imode:WarpPerspectiveV1InterpolationMode = LINEAR;
}

table ResizeV1 {
    imode:WarpPerspectiveV1InterpolationMode = LINEAR;
    format:ConvolutionV0Format = NHWC;
}

table Resize {
    imode:WarpPerspectiveV1InterpolationMode = LINEAR;
    format:ConvolutionFormat = NHWC;
}

table RemapV0 {
    imode:WarpPerspectiveV1InterpolationMode = LINEAR;
    border_type:WarpPerspectiveV1BorderMode = REPLICATE;
    format:ConvolutionV0Format = NHWC;
    scalar:float = 0.;
}

table Remap {
    imode:WarpPerspectiveV1InterpolationMode = LINEAR;
    border_type:WarpPerspectiveV1BorderMode = REPLICATE;
    format:ConvolutionFormat = NHWC;
    scalar:float = 0.;
}

table Convolution3D {
    mode:Convolution3DMode = CROSS_CORRELATION;
    /// padding on one side on the first dimension
    pad_d:uint = 0;
    /// padding on one side on the second dimension
    pad_h:uint = 0;
    /// padding on one side on the third dimension
    pad_w:uint = 0;
    /// kernel stride on the first dimension
    stride_d:uint = 1;
    /// kernel stride on the second dimension
    stride_h:uint = 1;
    /// kernel stride on the third dimension
    stride_w:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the first
    /// dimension
    dilate_d:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_h:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the third
    /// dimension
    dilate_w:uint = 1;
    sparse:Convolution3DSparse = DENSE;
    data_type:Convolution3DDataType = FLOAT;
    format:Convolution3DFormat = NCDHW;
}

table Conv3DBias {
    nonlineMode:Conv3DBiasNonlineMode = IDENTITY;
    mode:Convolution3DMode = CROSS_CORRELATION;
    pad_d:uint = 0;
    pad_h:uint = 0;
    pad_w:uint = 0;
    stride_d:uint = 1;
    stride_h:uint = 1;
    stride_w:uint = 0;
}

table SeparableConv3D {
    mode:Convolution3DMode = CROSS_CORRELATION;
    borderMode:SeparableConv3DBorderMode = BORDER_REPLICATE;
    is_symm_kernel:bool = true;
    pad_d:uint = 0;
    pad_h:uint = 0;
    pad_w:uint = 0;
    stride_d:uint = 0;
    stride_h:uint = 1;
    stride_w:uint = 1;
    ksize_d:uint = 0;
    ksize_h:uint = 3;
    ksize_w:uint = 3;
    anchor_d:uint = 0;
    anchor_h:uint = 1;
    anchor_w:uint = 1;
}

table TopK {
    mode:TopKMode = KTH_ONLY;
}

/// Change the tensor layout format
table RelayoutFormatV0 {
    /// Relayout mode.
    /// 
    /// **Naming conventions**
    /// 
    /// 1. ``A_B`` means change from layout format ``A`` to ``B``.
    /// 2. ``INTER_WEIGHT_xx`` means relayout the weight for faster processing by
    ///    :attr:`Convolution.Format.NHWCD4` convolutions.
    /// 3. A suffix of ``I`` means ``Image2DPack4TensorFormat`` tensor format is used
    ///    for faster processing on GPUs.
    /// 
    /// **Layout definitions**
    /// 
    /// * ``NCHW`` layout: ``{N, C, H, W}``
    /// * ``NHWC`` layout: ``{N, H, W, C}``
    /// * ``NHWCD4`` layout: ``{N, H, (C + 3) / 4, W, 4}``
    /// * ``NHWCD4I`` layout: with ``align_axis = 2``
    /// * ``NCHW4`` layout: ``{N, C/4, H, W, 4}``
    /// * ``NCHW88`` layout: ``{N, C/8, H, W, 8}``
    /// * ``CHWN4`` layout: ``{C/4, H, W, N, 4}``
    /// * ``NCHW64`` layout: ``{N, C/64, H, W, 64}``
    /// 
    /// **Float weight transformation definitions**
    /// 
    /// +---------------+---------------------------------+--------------------+--------------------------------------+------+
    /// | Sparsity Type | Input Layout                    | Input Req          | Output Layout                        | Axis |
    /// +===============+=================================+====================+======================================+======+
    /// | DENSE         | ``{OC, IC, FH, FW}``            | ``OC % 4 == 0``    | ``{OC/4, FH, FW, IC, 4}``            | 3    |
    /// +---------------+---------------------------------+--------------------+--------------------------------------+------+
    /// | GROUP         | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 4 == 0``  | ``{GROUP, OCPG/4, FH, FW, ICPG, 4}`` | 4    |
    /// |               |                                 | ``ICPG % 4 == 0``  |                                      |      |
    /// +---------------+---------------------------------+--------------------+--------------------------------------+------+
    /// | CHAN          | ``{GROUP, 1, 1, FH, FW}``       | ``GROUP % 4 == 0`` | ``{GROUP / 4, 1, FH ,FW, 4}``        | 1    |
    /// +---------------+---------------------------------+--------------------+--------------------------------------+------+
    /// 
    /// **Float weight transformation nchw88 definitions**
    /// 
    /// +---------------+---------------------------------+--------------------+--------------------------------------+
    /// | Sparsity Type | Input Layout                    | Input Req          | Output Layout                        |
    /// +===============+=================================+====================+======================================+
    /// | DENSE         | ``{OC, IC, FH, FW}``            | ``OC % 8 == 0``    |``{OC/8, IC/8 ,FH, FW, 8(IC), 8(OC)}``|
    /// |               |                                 | ``IC % 8 == 0``    |                                      |
    /// +---------------+---------------------------------+--------------------+--------------------------------------+
    /// | GROUP         | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 8 == 0``  | ``{GROUP, OCPG/8, ICPG/8 FH, FW,     |
    /// |               |                                 | ``ICPG % 8 == 0``  |  8(ICPG), 8(OCPG)} ``                |
    /// +---------------+---------------------------------+--------------------+--------------------------------------+
    /// | CHAN          | ``{GROUP, 1, 1, FH, FW}``       | ``GROUP % 8 == 0`` | ``{GROUP / 8, 1, FH ,FW, 8}``        |
    /// +---------------+---------------------------------+--------------------+--------------------------------------+
    /// 
    /// **Int8(DOT) weight transformation definitions**
    /// 
    /// +---------------+---------------------------------+--------------------+------------------------------------------+------+
    /// | Sparsity Type | Input Layout                    | Input Req          | Output Layout                            | Axis |
    /// +===============+=================================+====================+==========================================+======+
    /// | DENSE         | ``{OC, IC, FH, FW}``            | ``OC % 4 == 0``    | ``{OC/4, FH, FW, IC/4, 4, 4}`            | 3    |
    /// +---------------+---------------------------------+--------------------+------------------------------------------+------+
    /// | GROUP         | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 4 == 0``  | ``{GROUP, OCPG/4, FH, FW, ICPG/4, 4, 4}``| 4    |
    /// |               |                                 | ``ICPG % 4 == 0``  |                                          |      |
    /// +---------------+---------------------------------+--------------------+------------------------------------------+------+
    /// 
    /// Note: the axis column means the corresponding ``align_axis`` for image format
    /// when the ``I`` suffix is present.
    /// 
    /// Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later opr by seting group and oc param with NCHW4_NCHW
    /// 
    mode:RelayoutFormatV0Mode = NHWC_NHWCD4;
}

/// Change the tensor layout format
table RelayoutFormat {
    mode:RelayoutFormatV0Mode = NHWC_NHWCD4;
    oc:uint = 0;
    group:uint = 1;
}

table SeparableFilterV0 {
    format:ConvolutionV0Format = NCHW;
    borderMode:WarpPerspectiveV1BorderMode = REPLICATE;
    is_symm_kernel:bool = true;
    ksize_h:uint = 3;
    ksize_w:uint = 3;
    anchor_h:uint = 1;
    anchor_w:uint = 1;
}

table SeparableFilter {
    format:ConvolutionFormat = NCHW;
    borderMode:WarpPerspectiveV1BorderMode = REPLICATE;
    is_symm_kernel:bool = true;
    ksize_h:uint = 3;
    ksize_w:uint = 3;
    anchor_h:uint = 1;
    anchor_w:uint = 1;
}

/// Local share convolution
table LocalShareV0 {
    mode:ConvolutionV0Mode = CROSS_CORRELATION;
    /// padding on one side on the first dimension
    pad_h:uint = 0;
    /// padding on one side on the second dimension
    pad_w:uint = 0;
    /// kernel stride on the first dimension
    stride_h:uint = 1;
    /// kernel stride on the second dimension
    stride_w:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_h:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_w:uint = 1;
    /// spatial groups on the first dimension
    spatial_groups_h:uint = 1;
    /// spatial groups on the second dimension
    spatial_groups_w:uint = 1;
    sparse:ConvolutionV0Sparse = DENSE;
    format:ConvolutionV0Format = NCHW;
    computeMode:ConvolutionV1ComputeMode = DEFAULT;
}

/// Local share convolution
table LocalShare {
    mode:ConvolutionV0Mode = CROSS_CORRELATION;
    /// padding on one side on the first dimension
    pad_h:uint = 0;
    /// padding on one side on the second dimension
    pad_w:uint = 0;
    /// kernel stride on the first dimension
    stride_h:uint = 1;
    /// kernel stride on the second dimension
    stride_w:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_h:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_w:uint = 1;
    /// spatial groups on the first dimension
    spatial_groups_h:uint = 1;
    /// spatial groups on the second dimension
    spatial_groups_w:uint = 1;
    sparse:ConvolutionV0Sparse = DENSE;
    format:ConvolutionFormat = NCHW;
    computeMode:ConvolutionV1ComputeMode = DEFAULT;
}

table ROIAlignV0 {
    mode:ROIAlignV0Mode = MAX_;
    format:ConvolutionV0Format = NCHW;
    spatial_scale:float = 1.0;
    offset:float = 0.0;
    pooled_height:uint = 1;
    pooled_width:uint = 1;
    sample_height:uint = 2;
    sample_width:uint = 2;
}

table ROIAlign {
    mode:ROIAlignV0Mode = MAX_;
    format:ConvolutionFormat = NCHW;
    spatial_scale:float = 1.0;
    offset:float = 0.0;
    pooled_height:uint = 1;
    pooled_width:uint = 1;
    sample_height:uint = 2;
    sample_width:uint = 2;
}

table Correlation {
    format:ConvolutionV0Format = NCHW;
    kernel_size:uint = 1;
    max_displacement:uint = 1;
    stride1:uint = 1;
    stride2:uint = 1;
    pad_size:uint = 0;
    is_multiply:bool = true;
}

table DeformablePSROIPooling {
    no_trans:bool = true;
    spatial_scale:float = 1;
    trans_std:float = 1;
    /// height of pooling output
    pooled_h:uint = 1;
    /// width of pooling output
    pooled_w:uint = 1;
    /// size of each deformable part
    part_size:uint = 1;
    /// sample count of each bbox
    sample_per_part:uint = 1;
}

/// Batch convolution (unshare weights on the batch dimension)
table BatchConvBiasV0 {
    nonlineMode:ConvBiasV0NonlineMode = IDENTITY;
    mode:ConvolutionV0Mode = CROSS_CORRELATION;
    /// padding on one side on the first dimension
    pad_h:uint = 0;
    /// padding on one side on the second dimension
    pad_w:uint = 0;
    /// kernel stride on the first dimension
    stride_h:uint = 1;
    /// kernel stride on the second dimension
    stride_w:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_h:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_w:uint = 1;
    sparse:ConvolutionV0Sparse = DENSE;
    format:ConvolutionV0Format = NCHW;
    compute_mode:ConvolutionV1ComputeMode = DEFAULT;
}

/// Batch convolution (unshare weights on the batch dimension)
table BatchConvBias {
    nonlineMode:ConvBiasV0NonlineMode = IDENTITY;
    mode:ConvolutionV0Mode = CROSS_CORRELATION;
    /// padding on one side on the first dimension
    pad_h:uint = 0;
    /// padding on one side on the second dimension
    pad_w:uint = 0;
    /// kernel stride on the first dimension
    stride_h:uint = 1;
    /// kernel stride on the second dimension
    stride_w:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_h:uint = 1;
    /// dilation (i.e. size of each zero-padded kernel block) on the second
    /// dimension
    dilate_w:uint = 1;
    sparse:ConvolutionV0Sparse = DENSE;
    format:ConvolutionFormat = NCHW;
    compute_mode:ConvolutionV1ComputeMode = DEFAULT;
}

table FakeQuant {
    qmin:int = -2147483648;
    qmax:int = 2147483647;
}

table TQT {
    qmin:int = -2147483648;
    qmax:int = 2147483647;
}

table LSQ {
    qmin:int = -2147483648;
    qmax:int = 2147483647;
}

table Fill {
    value:float = 0;
}

table CheckNonFinite {
    scale:float = 1.0;
}

table Padding {
    /// offset in dim 0
    front_offset_dim0:uint = 0;
    /// offset in dim 1
    front_offset_dim1:uint = 0;
    /// offset in dim 2
    front_offset_dim2:uint = 0;
    /// offset in dim 3
    front_offset_dim3:uint = 0;
    /// offset in dim 4
    front_offset_dim4:uint = 0;
    /// offset in dim 5
    front_offset_dim5:uint = 0;
    /// offset in dim 6
    front_offset_dim6:uint = 0;
    /// back offset in dim0
    back_offset_dim0:uint = 0;
    /// back offset in dim1
    back_offset_dim1:uint = 0;
    /// back offset in dim2
    back_offset_dim2:uint = 0;
    /// back offset in dim3
    back_offset_dim3:uint = 0;
    /// back offset in dim4
    back_offset_dim4:uint = 0;
    /// back offset in dim5
    back_offset_dim5:uint = 0;
    /// back offset in dim6
    back_offset_dim6:uint = 0;
    /// param of padding opr
    padding_val:float = 0;
    padding_mode:PaddingPaddingMode = CONSTANT;
}

table LayerNorm {
    affine:bool = true;
    eps:float = 1e-5;
    normalized_dim:ulong = 1;
    normalized_size:ulong = 1;
}

table GroupNorm {
    affine:bool = true;
    eps:float = 1e-5;
    group:uint = 1;
    format:ConvolutionFormat = NCHW;
}

table Dropout {
    drop_prob:float = 0;
    seed:ulong = 0;
}

table RNNCell {
    nonlineMode:RNNCellNonlineMode = IDENTITY;
}

table RNN {
    /// Number of recurrent layers
    num_layers:uint = 1;
    /// If becomes a bidirectional RNN
    bidirectional:bool = false;
    /// If the layer use bias weights b_ih and b_hh
    bias:bool = true;
    /// The number of features in the hidden state
    hidden_size:uint = 128;
    /// If introduce a Dropout layer on the outputs of each RNN layer
    dropout:float = 0.;
    nonlineMode:RNNCellNonlineMode = IDENTITY;
    fwd_mode:BNFwdMode = TRAINING;
}

table LSTM {
    /// Number of recurrent layers
    num_layers:uint = 1;
    /// If becomes a bidirectional LSTM
    bidirectional:bool = false;
    /// If the layer use bias weights b_ih and b_hh
    bias:bool = true;
    /// The number of features in the hidden state
    hidden_size:uint = 128;
    /// If use LSTM with projections of corresponding size
    proj_size:uint = 0;
    /// If introduce a Dropout layer on the outputs of each LSTM layer
    dropout:float = 0.;
    fwd_mode:BNFwdMode = TRAINING;
}