// generated by gen_param_defs.py for c23d51f3c4f33119fd74f58f04d112ccea8f64f1249ab372300975ab7e710e9a include "dtype.fbs"; namespace mgb.serialization.fbs.param; /// mode of collective communication enum CollectiveCommMode : uint { /// reduce by sum to output computing node REDUCE_SUM = 0, /// copy input value to each output computing node BROADCAST = 1, /// each output comp node gets the concatenated value of all inputs ALL_GATHER = 2, /// reduce inputs by sum and each output gets one part of it REDUCE_SCATTER_SUM = 3, /// every output gets the sum of all inputs ALL_REDUCE_SUM = 4, /// every output gets the max of all inputs ALL_REDUCE_MAX = 5, /// every output gets the min of all inputs ALL_REDUCE_MIN = 6, /// every output gets the prod of all inputs ALL_REDUCE_PROD = 7, /// concat inputs to one node GATHER = 8, /// scatter input to each output computing node SCATTER = 9, /// scatter inputs and gather them on each computing node ALL_TO_ALL = 10, } /// mode for computing the gradient enum CondExecMarkGradMode : uint { /// normal gradient mode: sum all the activated components SUM = 0, /// use :attr:`CondExecMerge.SUM_COND_OUT` mode so oprs that depend on the /// gradient opr would not be executed if the forward var is not used. SUM_COND_OUT = 1, } /// static inference option. **Note:** This is a workaround: since /// currently static inference in MegBrain does not take conditional /// execution into account, this option can be used to bypass static /// inference errors. This is currently only used by automatically /// generated gradient oprs. enum CondExecMarkStaticInfer : uint { /// enable both shape and value inference SHAPE_VALUE = 0, /// only enable shape inference (disable value inference) SHAPE_ONLY = 1, /// disable both shape and value inference NONE = 2, } enum CondExecMergeMode : uint { /// copy the var whose mask is activated to the output, requiring that /// exactly one branch is active EXACT_ONE = 0, /// like :attr:`EXACT_ONE` with the requirement that all branches have the /// same shape, so shape inference can be easier EXACT_ONE_SAME_SHAPE = 1, /// sum all the active branches into output var; require all branches to /// have the same shape. Extra shape vars are needed in this mod, so the /// outputs can be initialized to zero when no input is active (and their /// shapes are probably unknown). SUM = 2, /// like :attr:`SUM` but also add an ExecutionMask to the readers of output /// vars, so they would be skipped if no branch is taken SUM_COND_OUT = 3, } /// how to compare predicate var with branch keys enum CondExecPredMode : uint { /// The outputs correspond to branch keys, and the one which equals /// predicate would be activated. This behaves like a case-statement in many /// languages. CASE = 0, /// like :attr:`CASE`, but add an extra output that would be activated if no /// branch is matched CASE_FALLBACK = 1, /// One more outputs would be produced than the number of branch keys, /// representing the interval in which the predicate var fits in. The /// intervals are defined as :math:`(-\\infty, k_0), [k_0, k_1), \\ldots, /// [k_{n-2}, k_{n-1}), [k_{n-1}, \infty)`. The keys must be given in /// ascending order. PIECEWISE = 2, } enum CondExecPredLogicalMode : uint { /// logical or OR = 0, /// logical and AND = 1, /// exclusive-or XOR = 2, /// not or(inputs) NOR = 3, /// not and(inputs) NAND = 4, /// not xor(inputs) XNOR = 5, } enum ExecutionPolicyStrategy : uint (bit_flags) { /// use heuristic to choose the fastest algorithm HEURISTIC = 0, /// run possible algorithms on real device to find the best PROFILE = 1, /// when profile or heuristic algo selection it require the algosmust be /// reproducible REPRODUCIBLE = 2, /// profile require algos are optmized to achieve fast-profile OPTIMIZED = 3, } enum ExecutionPolicyV0Strategy : uint { /// use heuristic to choose the fastest algorithm HEURISTIC = 0, /// use heuristic to choose the fastest algorithm, and the chosen algorithm /// is reproducible HEURISTIC_REPRODUCIBLE = 1, /// run possible algorithms on real device to find the best PROFILE = 2, /// the fastest of profile result that is also reproducible PROFILE_REPRODUCIBLE = 3, /// use profile result and heuristic to choose the fastest algorithm PROFILE_HEURISTIC = 4, } table DType { dtype:DTypeEnum = Byte; } table PersistentOutputStorage { /// This is used for controlling memory sharing. Multiple /// ``PersistentOutputStorage'' oprs with the same ``share_key'' would share /// underlying tensor storage. Note that the value ``-1'' is treated /// specially: storage of oprs with this key would be private and would not /// be shared with any other opr. share_key:int = -1; } /// optinal axis: axis == -1 means no axis table OptionalAxis { axis:int = -1; } /// optinal axis: axis == MAX_NDIM means no axis table OptionalAxisV1 { axis:int = 7; } table ExecutionPolicyV0 { strategy:ExecutionPolicyV0Strategy = HEURISTIC; /// workspace limit in bytes workspace_limit:ulong = 18446744073709551615; } /// specify how to select an algorithm for an operator table ExecutionPolicy { strategy:ExecutionPolicyStrategy = 1; /// workspace limit in bytes workspace_limit:ulong = 18446744073709551615; } table AssertEqual { /// max allowed error; error is defined as the minimal of absolute and /// relative error maxerr:float = 0.0001; /// whether to print maxerr to stdout during opr exec verbose:bool = false; } table FpgaConv { need_output_quantize:bool = false; need_output_threshold:bool = false; stride:int = 1; input_bit_width:int = 2; output_bit_width:int = 2; weight_bit_width:int = 2; thres0:int = 0; thres1:int = 1; unpool_size:uint = 4; direct_size:uint = 4; } /// collective communication between multiple computing nodes on localhost table CollectiveComm { /// mode of collective communication mode:CollectiveCommMode = REDUCE_SUM; } /// HACK: The tag of this param def is actually used for another non-generated /// param def SerializedDType, the sole purpose of this param def is to provide /// a spare tag. Do not use. table FakeSerializedDType { } /// evaluate a predicate and branch keys to setup ExecutionMask objects with /// associated predicate proxy vars (PPVs) table CondExecPred { /// how to compare predicate var with branch keys mode:CondExecPredMode = CASE; /// threshold for checking equality of float point values eps:float = 0.0001; } /// compute a logical function over a set of PPVs table CondExecPredLogical { mode:CondExecPredLogicalMode = OR; } /// add ExecutionMask of the input PPV to this opr and readers of the outputs of /// this opr table CondExecMark { /// mode for computing the gradient grad_mode:CondExecMarkGradMode = SUM; /// static inference option. **Note:** This is a workaround: since /// currently static inference in MegBrain does not take conditional /// execution into account, this option can be used to bypass static /// inference errors. This is currently only used by automatically /// generated gradient oprs. static_infer:CondExecMarkStaticInfer = SHAPE_VALUE; } /// merge multiple conditional execution branches table CondExecMerge { /// number of output vars (i.e. vars per branch) nr_output:uint = 1; mode:CondExecMergeMode = EXACT_ONE; } /// opr Implements NVIDIA Optical Flow SDK. table NvOf { precision:uint = 1; }