OpenI
/
MegEngine

// generated by gen_param_defs.py for c23d51f3c4f33119fd74f58f04d112ccea8f64f1249ab372300975ab7e710e9a
include "dtype.fbs";
namespace mgb.serialization.fbs.param;

/// mode of collective communication
enum CollectiveCommMode : uint  {
    /// reduce by sum to output computing node
    REDUCE_SUM = 0,
    /// copy input value to each output computing node
    BROADCAST = 1,
    /// each output comp node gets the concatenated value of all inputs
    ALL_GATHER = 2,
    /// reduce inputs by sum and each output gets one part of it
    REDUCE_SCATTER_SUM = 3,
    /// every output gets the sum of all inputs
    ALL_REDUCE_SUM = 4,
    /// every output gets the max of all inputs
    ALL_REDUCE_MAX = 5,
    /// every output gets the min of all inputs
    ALL_REDUCE_MIN = 6,
    /// every output gets the prod of all inputs
    ALL_REDUCE_PROD = 7,
    /// concat inputs to one node
    GATHER = 8,
    /// scatter input to each output computing node
    SCATTER = 9,
    /// scatter inputs and gather them on each computing node
    ALL_TO_ALL = 10,
}

/// mode for computing the gradient
enum CondExecMarkGradMode : uint  {
    /// normal gradient mode: sum all the activated components
    SUM = 0,
    /// use :attr:`CondExecMerge.SUM_COND_OUT` mode so oprs that depend on the
    /// gradient opr would not be executed if the forward var is not used.
    SUM_COND_OUT = 1,
}

/// static inference option. **Note:** This is a workaround: since
/// currently static inference in MegBrain does not take conditional
/// execution into account, this option can be used to bypass static
/// inference errors. This is currently only used by automatically
/// generated gradient oprs.
enum CondExecMarkStaticInfer : uint  {
    /// enable both shape and value inference
    SHAPE_VALUE = 0,
    /// only enable shape inference (disable value inference)
    SHAPE_ONLY = 1,
    /// disable both shape and value inference
    NONE = 2,
}

enum CondExecMergeMode : uint  {
    /// copy the var whose mask is activated to the output, requiring that
    /// exactly one branch is active
    EXACT_ONE = 0,
    /// like :attr:`EXACT_ONE` with the requirement that all branches have the
    /// same shape, so shape inference can be easier
    EXACT_ONE_SAME_SHAPE = 1,
    /// sum all the active branches into output var; require all branches to
    /// have the same shape. Extra shape vars are needed in this mod, so the
    /// outputs can be initialized to zero when no input is active (and their
    /// shapes are probably unknown).
    SUM = 2,
    /// like :attr:`SUM` but also add an ExecutionMask to the readers of output
    /// vars, so they would be skipped if  no branch is taken
    SUM_COND_OUT = 3,
}

/// how to compare predicate var with branch keys
enum CondExecPredMode : uint  {
    /// The outputs correspond to branch keys, and the one which equals
    /// predicate would be activated. This behaves like a case-statement in many
    /// languages.
    CASE = 0,
    /// like :attr:`CASE`, but add an extra output that would be activated if no
    /// branch is matched
    CASE_FALLBACK = 1,
    /// One more outputs would be produced than the number of branch keys,
    /// representing the interval in which the predicate var fits in. The
    /// intervals are defined as :math:`(-\\infty, k_0), [k_0, k_1), \\ldots,
    /// [k_{n-2}, k_{n-1}), [k_{n-1}, \infty)`. The keys must be given in
    /// ascending order.
    PIECEWISE = 2,
}

enum CondExecPredLogicalMode : uint  {
    /// logical or
    OR = 0,
    /// logical and
    AND = 1,
    /// exclusive-or
    XOR = 2,
    /// not or(inputs)
    NOR = 3,
    /// not and(inputs)
    NAND = 4,
    /// not xor(inputs)
    XNOR = 5,
}

enum ExecutionPolicyStrategy : uint (bit_flags) {
    /// use heuristic to choose the fastest algorithm
    HEURISTIC =  0,
    /// run possible algorithms on real device to find the best
    PROFILE =  1,
    /// when profile or heuristic algo selection it require the algosmust be
    /// reproducible
    REPRODUCIBLE =  2,
    /// profile require algos are optmized to achieve fast-profile
    OPTIMIZED =  3,
}

enum ExecutionPolicyV0Strategy : uint  {
    /// use heuristic to choose the fastest algorithm
    HEURISTIC = 0,
    /// use heuristic to choose the fastest algorithm, and the chosen algorithm
    /// is reproducible
    HEURISTIC_REPRODUCIBLE = 1,
    /// run possible algorithms on real device to find the best
    PROFILE = 2,
    /// the fastest of profile result that is also reproducible
    PROFILE_REPRODUCIBLE = 3,
    /// use profile result and heuristic to choose the fastest algorithm
    PROFILE_HEURISTIC = 4,
}

table DType {
    dtype:DTypeEnum = Byte;
}

table PersistentOutputStorage {
    /// This is used for controlling memory sharing. Multiple
    /// ``PersistentOutputStorage'' oprs with the same ``share_key'' would share
    /// underlying tensor storage. Note that the value ``-1'' is treated
    /// specially: storage of oprs with this key would be private and would not
    /// be shared with any other opr.
    share_key:int = -1;
}

/// optinal axis: axis == -1 means no axis
table OptionalAxis {
    axis:int = -1;
}

/// optinal axis: axis == MAX_NDIM means no axis
table OptionalAxisV1 {
    axis:int = 7;
}

table ExecutionPolicyV0 {
    strategy:ExecutionPolicyV0Strategy = HEURISTIC;
    /// workspace limit in bytes
    workspace_limit:ulong = 18446744073709551615;
}

/// specify how to select an algorithm for an operator
table ExecutionPolicy {
    strategy:ExecutionPolicyStrategy = 1;
    /// workspace limit in bytes
    workspace_limit:ulong = 18446744073709551615;
}

table AssertEqual {
    /// max allowed error; error is defined as the minimal of absolute and
    /// relative error
    maxerr:float = 0.0001;
    /// whether to print maxerr to stdout during opr exec
    verbose:bool = false;
}

table FpgaConv {
    need_output_quantize:bool = false;
    need_output_threshold:bool = false;
    stride:int = 1;
    input_bit_width:int = 2;
    output_bit_width:int = 2;
    weight_bit_width:int = 2;
    thres0:int = 0;
    thres1:int = 1;
    unpool_size:uint = 4;
    direct_size:uint = 4;
}

/// collective communication between multiple computing nodes on localhost
table CollectiveComm {
    /// mode of collective communication
    mode:CollectiveCommMode = REDUCE_SUM;
}

/// HACK: The tag of this param def is actually used for another non-generated
/// param def SerializedDType, the sole purpose of this param def is to provide
/// a spare tag. Do not use.
table FakeSerializedDType {
}

/// evaluate a predicate and branch keys to setup ExecutionMask objects with
/// associated predicate proxy vars (PPVs)
table CondExecPred {
    /// how to compare predicate var with branch keys
    mode:CondExecPredMode = CASE;
    /// threshold for checking equality of float point values
    eps:float = 0.0001;
}

/// compute a logical function over a set of PPVs
table CondExecPredLogical {
    mode:CondExecPredLogicalMode = OR;
}

/// add ExecutionMask of the input PPV to this opr and readers of the outputs of
/// this opr
table CondExecMark {
    /// mode for computing the gradient
    grad_mode:CondExecMarkGradMode = SUM;
    /// static inference option. **Note:** This is a workaround: since
    /// currently static inference in MegBrain does not take conditional
    /// execution into account, this option can be used to bypass static
    /// inference errors. This is currently only used by automatically
    /// generated gradient oprs.
    static_infer:CondExecMarkStaticInfer = SHAPE_VALUE;
}

/// merge multiple conditional execution branches
table CondExecMerge {
    /// number of output vars (i.e. vars per branch)
    nr_output:uint = 1;
    mode:CondExecMergeMode = EXACT_ONE;
}

/// opr Implements NVIDIA Optical Flow SDK.
table NvOf {
    precision:uint = 1;
}