# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

import collections
import os

from . import mgb as _mgb

_default_device_type = "CUDA"


def set_device_map(logical_dev, physical_dev, device_type=None):
    """map from *logical_dev* to *physical_dev* for furture comp node
    loading

    example::

        set_device_map(0, 2, 'CPU') # cpu0 -> cpu2
        set_device_map('gpu3', 'gpu0') # gpu0 -> gpu0

    :param device_type: specify the device type if devices are given by
        integers; if devices are given by integers and ``device_type`` is not
        given, the default value ``'CUDA'`` would be used. Possible values are
        ``'CUDA'`` and ``'CPU'``.
    """

    if device_type is None:
        device_type = _default_device_type

    if device_type == "CUDA":
        xpu = "gpu"
    else:
        assert device_type == "CPU"
        xpu = "cpu"

    def rmxpu(v):
        if isinstance(v, str):
            assert v.startswith(xpu) or v.startswith("xpu"), (
                "bad comp node in set_device_map: "
                "device_type={} comp_node={}".format(device_type, v)
            )
            return v[3:]
        return v

    logical_dev, physical_dev = map(rmxpu, [logical_dev, physical_dev])
    _mgb.CompNode._set_device_map(device_type, int(logical_dev), int(physical_dev))


def set_default_device(physical_dev, device_type=None):
    """set physcal device for xpux

    when *device_type* is None and *physical_dev* starts with *gpu* or *cpu*,
    the default device type would be modified accordingly for future calls to
    :func:`set_device_map` when remapping device number.
    """
    global _default_device_type
    if (
        device_type is None
        and isinstance(physical_dev, str)
        and not physical_dev.isdigit()
        and not physical_dev.startswith("xpu")
    ):
        t = physical_dev[:3]
        if t == "gpu":
            _default_device_type = "CUDA"
        else:
            assert t == "cpu", "bad physical_dev: {}".format(physical_dev)
            _default_device_type = "CPU"
        set_default_device_type(_default_device_type)
        device_type = _default_device_type
    set_device_map(-1, physical_dev, device_type)


def set_default_device_type(device_type):
    """set device type for xpu"""
    global _default_device_type
    device_type = device_type.upper()
    _mgb.CompNode._set_unspec_device_type(device_type)
    _default_device_type = device_type


def set_fork_cuda_warning_flag(flag):
    """set warning to be printed at fork if cuda has been initialized

    :type flag: int
    :param flag: controls how the warning should be printed:

        * 0: disable warning
        * 1: print warning to log
        * 2: print warning to log and raise exception
    """
    _mgb._config.set_fork_cuda_warning_flag(int(flag))


def get_device_count(device_type="xpu", warn=True):
    """get number of devices installed on this system

    :param device_type: device type, one of 'xpu', 'gpu' or 'cpu'
    :type device_type: str
    """
    return _mgb.CompNode._get_device_count(device_type.upper(), warn)


def parse_locator(device_name: str) -> tuple:
    """get the tensor locator expression by device name.

    :param device_name: device name, like 'cpu0', 'gpu1' and 'xpux'
    :type device_name: str

    :return: (device_type, dev_num, stream_num)
    """
    return _mgb.CompNode._parse_locator(device_name)


def set_mem_reserve_size(size):
    """set memory reserve size:

        * If *size* is greater than 1, it is the absolute amount of memory to
          be reserved in MB;
        * If *size* is in the range (0, 1), it is the ratio of total memory;
        * If *size* is 0, memory reservation and pre-allocation would be
          disabled;
        * If *size* is -1, disable custom memory allocator and use cuda APIs
          directly.
    """
    _mgb._config.set_mem_reserve_size(float(size))


def set_comp_graph_option(comp_graph, name, val):
    """set computing graph option and return its old value
    :type comp_graph: :class:`.CompGraph`
    :param comp_graph: the computing graph whose option should be modified
    :type name: str
    :param name: option name
        Currently supported options are:

            * "no_profiling_on_shape_change": bool;
              When execution strategy is set to profiling, always use the
              initial profile result and do not re-run profiling even if input
              shape changes.
            * "seq_opt.enable_mem_plan_opt": bool
            * "seq_opt.enable_mem_reuse_alloc": bool
            * "seq_opt.enable_seq_comp_node_opt": bool
            * "force_dynamic_alloc": bool
            * "var_sanity_check_first_run": bool
            * "enable_sublinear_memory_opt": bool
            * "enable_memory_swap": bool; whether to enable memory swap; it
                usually performs worse than sublinear memory
            * "enable_var_mem_defragment": bool
            * "allocate_static_mem_after_graph_compile": bool
            * "enable_grad_var_static_reshape": bool:
               If set to ``True``, dynamically-shaped gradients whose original
               shape is statically inferrable would be reshaped, so static
               shape inference can continue
            * "async_exec_level": int

                 * ``0``: do not dispatch asynchronously
                 * ``1``: async dispatch if there are more than 1 cuda comp
                   nodes
                 * mask ``0b10``: async for comp nodes with unlimited queue
                   (e.g. CPU comp nodes)
                 * mask ``0b100``: async for even one comp node
            * "log_level": int

                 * ``0``: no log info for graph construction/compiling
                 * ``1``: static memory allocation status,
                   WorkspaceLimitGetter summary, and optimizer summary
                 * ``2``: optimizer details and duplicated operators tha are
                   removed
            * "graph_opt.jit": whether to enable JIT
            * "graph_opt.tensorrt": whether to enable fine-grained automatic
              replacement for TensorRT operators
            * "graph_opt.android_nn": whether to enable fine-grained automatic
              replacement for Android NN operators
            * "graph_opt_level": int

                 * ``0``: disable
                 * ``1``: level-1: inplace arith transformations during graph
                   construction
                 * ``2``: (default) level-2: level-1, plus global optimization
                   before graph compiling
                 * ``3``: also enable JIT
    :param val: new option value
    :return: old option value
    """
    if name == "log_static_mem_alloc":
        name = "log_level"
    if name == "enable_async_exec":
        name = "async_exec_level"
    return _mgb._config.set_comp_graph_option(comp_graph, name, int(val))


def comp_graph_is_eager(comp_graph):
    return _mgb._config.comp_graph_is_eager(comp_graph)


def add_extra_vardep(var, dep):
    """add *dep* as an extra dependency of *var*, so if *var* is required to
    compute the final output when compiling a comp graph, *dep* would also be
    included in the computing sequence. Note that the order computing of these
    two vars is not guaranteed.
    """
    assert isinstance(var, _mgb.SymbolVar) and isinstance(dep, _mgb.SymbolVar)
    assert var.owner_graph == dep.owner_graph
    return _mgb._config.add_extra_vardep(var, dep)


class _GraphPropertyBase:
    """helper class for implementing operator property setter context managers"""

    _cur_graph = None

    _graph2stack = None
    """class attribute that maintains mapping from graph to property stack;
    should be defined by child classes"""

    __prop_setup__ = None
    """overwritten by subclass to setup property"""

    __prop_clear__ = None
    """overwritten by subclass to clear property"""

    def __init__(self, comp_graph, prop):
        """:param comp_graph: computing graph, or None to not set this
        property"""
        if comp_graph is not None:
            assert isinstance(
                comp_graph, _mgb.CompGraph
            ), "invalid comp graph: {!r}".format(comp_graph)
        self._cur_graph = comp_graph
        self._graph2stack.setdefault(comp_graph, []).append(prop)

    def __setup(self, prop):
        self.__prop_setup__(self._cur_graph, prop)

    def __clear(self):
        self.__prop_clear__(self._cur_graph)

    def __enter__(self):
        if self._cur_graph is None:
            return

        stack = self._graph2stack[self._cur_graph]
        if len(stack) > 1:
            # clear nested property
            self.__clear()
        self.__setup(stack[-1])

    def __exit__(self, exc_type, exc_value, exc_traceback):
        if self._cur_graph is None:
            return

        stack = self._graph2stack[self._cur_graph]
        self.__clear()
        stack.pop()
        if stack:
            # restore nested property
            self.__setup(stack[-1])
        else:
            del self._graph2stack[self._cur_graph]


class exc_opr_tracker_scope(_GraphPropertyBase):
    """context manager for associating an object with all operators created
    within this context; so when an exception is raised, information about the
    corresponding operator could be retrieved from
    :attr:`.MegBrainError.tracker`

    :param comp_graph: the computing graph where the operators should be tracked
    :type comp_graph: :class:`.CompGraph`
    :param tracker: an arbitrary python object to track the operators
    """

    _graph2stack = {}

    def __init__(self, comp_graph, tracker):
        assert (
            tracker is not None
        ), "bad args for exc_opr_tracker_scope: {!r} {!r}".format(comp_graph, tracker)
        super().__init__(comp_graph, tracker)

    __prop_setup__ = _mgb._config.begin_set_exc_opr_tracker
    __prop_clear__ = _mgb._config.end_set_exc_opr_tracker


class opr_priority_scope(_GraphPropertyBase):
    """context manager for setting priority for all operators created in this
    context

    :param comp_graph: the computing graph for which operator priority should
        be set
    :type comp_graph: :class:`.CompGraph`
    :param priority: operator priority. Smaller number means higher priority.
        Default value is 0. Grad operator would use negative priority by
        default.
    """

    _graph2stack = {}

    LOWEST_PRIORITY = 2 ** 31 - 1
    """lowest prority (i.e. max possible value)"""

    HIGHEST_PRIORITY = -LOWEST_PRIORITY
    """highest prority (i.e. min possible value)"""

    def __init__(self, comp_graph, priority):
        super().__init__(comp_graph, int(priority))

    __prop_setup__ = _mgb._config.begin_set_opr_priority
    __prop_clear__ = _mgb._config.end_set_opr_priority


OprTrackerResult = collections.namedtuple(
    "OprTrackerResult", ["msg", "tracker", "grad_tracker"]
)


def get_opr_tracker(cg, var_id):
    """get the tracking object associated with the owner operator of a var

    :param cg: the computing graph
    :param var_id: id of the var whose owner opr tracker should be found

    :return: if no var is found, ``None`` is returned; otherwise return an
        :class:`OprTrackerResult` object
    """
    assert isinstance(cg, _mgb.CompGraph)
    ret = _mgb._config.get_opr_tracker(cg, int(var_id))
    if ret is None:
        return
    return OprTrackerResult(*ret)


def set_opr_sublinear_memory_endpoint(var):
    """set the owner operator of a symvar to be endpoint of sublinear memory
    optimizer


    :type var: :class:`.SymbolVar`
    """
    _mgb._config.set_opr_sublinear_memory_endpoint(var)


def max_size_t():
    """get max value of size_t type on local architecture"""
    return _mgb.max_size_t()


def is_cuda_ctx_set():
    """return whether current thread has an active cuda driver context"""
    return _mgb._config.is_cuda_ctx_set()


def get_include_path():
    """get include path for building megbrain extensions"""
    return os.path.join(os.path.realpath(os.path.dirname(__file__)), "include")


def get_cuda_gencode(only_cap=False):
    """get -gencode options to be passed to nvcc for compiling on local
    machine

    :param only_cap: if True, return only a list of cuda compute capability
        strings (like ``['35', '52']`` )
    """
    ret = _mgb._config.get_cuda_gencode().split()
    if not only_cap:
        ret = " ".join(map("-gencode arch=compute_{0},code=sm_{0}".format, ret))
    return ret


def get_cuda_lib_path():
    """get the cuda root path by locating loaded libcudart.so
    """
    return _mgb._config.get_cuda_lib_path()


def get_cuda_include_path():
    """get the cuda include path by locating loaded libcudart.so, including
        libcudart.so's path, parent path and `parent path`/include
    """
    return _mgb._config.get_cuda_include_path()


def get_cuda_version():
    """get runtime cuda version
    """
    return _mgb._config.get_cuda_version()


def is_compiled_with_cuda():
    """whether cuda is enabled at compile time"""
    return _mgb._config.is_compiled_with_cuda()


def load_opr_library(path):
    """Load an external operator library. This essentially sets megbrain
    symbols as public and load the library.

    :param path: path to the shared object; if it is None, then only megbrain
    symbols are made public.
    """
    _mgb._config.load_opr_library(
        os.path.realpath(os.path.join(os.path.dirname(__file__), "_mgb.so")), path
    )


def dump_registered_oprs():
    """
    get all registered oprs, return dict(id, name)
    """
    return dict(_mgb._config.dump_registered_oprs())


def create_mm_server(server_addr, port):
    """
    create mm server with server address
    throw exception if server_addr is already used
    """
    return _mgb._config.create_mm_server(server_addr, port)


def group_barrier(server_addr, port, size, rank):
    """
    block until all ranks reach this barrier
    """
    return _mgb._config.group_barrier(server_addr, port, size, rank)