You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

function.py 6.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import copy
  10. from abc import ABCMeta, abstractmethod
  11. from typing import Iterable, Tuple, Union
  12. import megengine._internal as mgb
  13. from .tensor import Tensor
  14. class _OverrideGradientCraniotome(mgb.craniotome.CraniotomeBase):
  15. __nr_inputs__ = None
  16. __nr_outputs__ = None
  17. __expand_single_outputs__ = False
  18. __allow_duplicate__ = False
  19. grad_func = None
  20. def setup(self, nr_inputs, nr_outputs, grad_func):
  21. self.__nr_inputs__ = nr_inputs + nr_outputs
  22. self.__nr_outputs__ = nr_outputs
  23. self.grad_func = grad_func
  24. def infer_shape(self, inp_shapes):
  25. return inp_shapes[-self.__nr_outputs__ :]
  26. def init_output_dtype(self, input_dtypes):
  27. return input_dtypes[-self.__nr_outputs__ :]
  28. def execute(self, inputs, outputs):
  29. for ivar, ovar in zip(inputs[-self.__nr_outputs__ :], outputs):
  30. ovar.set_value(ivar)
  31. def grad(self, wrt_idx, inputs, outputs, out_grad):
  32. # TODO: Make sure grad_values really have values in eager mode.
  33. # Porting to the new imperative engine would solve this, but if it
  34. # don't happen, EagerEvalManager should be changed.
  35. grads = self.grad_func(
  36. *(Tensor(x) if x is not None else None for x in out_grad)
  37. )
  38. # pylint: disable=literal-comparison
  39. if isinstance(grads, Tensor) or grads is None or grads is 0:
  40. grads = (grads,)
  41. assert (
  42. len(grads) == self.__nr_inputs__ - self.__nr_outputs__
  43. ), "Function.backward should return a tuple with len = {}, got {}".format(
  44. self.__nr_inputs__ - self.__nr_outputs__, len(grads)
  45. )
  46. # pylint: disable=literal-comparison
  47. return (
  48. list(x._symvar if x is not None and x is not 0 else 0 for x in grads)
  49. + [0] * self.__nr_outputs__
  50. )
  51. def get_serialize_params(self):
  52. raise NotImplementedError("Serialization of Function is not implemented")
  53. class Function(metaclass=ABCMeta):
  54. """
  55. Defines a block of operations with customizable differentiation.
  56. The computation should be defined in ``forward`` method, with gradient
  57. computation defined in ``backward`` method.
  58. Each instance of ``Function`` should be used only once during forwardding.
  59. Examples:
  60. .. testcode::
  61. class Sigmoid(Function):
  62. def forward(self, x):
  63. y = 1 / (1 + F.exp(-x))
  64. self.save_for_backward(y)
  65. return y
  66. def backward(self. output_grads):
  67. (y, ) = self.saved_tensors
  68. return output_grads * y * (1-y)
  69. """
  70. _has_saved_state = False
  71. saved_tensors = None
  72. def __init__(self):
  73. self.saved_tensors = ()
  74. @abstractmethod
  75. def forward(self, *inputs: Iterable[Tensor]) -> Union[Tuple[Tensor], Tensor]:
  76. """
  77. Applies operations to ``inputs`` and returns results. It must be overriden by all subclasses.
  78. Users can call :meth:`~.function.Function.save_for_backward` in this method to save tensors.
  79. :param input: Input tensors.
  80. :return: A tuple of Tensor or a single Tensor.
  81. .. note::
  82. This method should return a tuple of Tensor or a single Tensor representing the output
  83. of the function.
  84. """
  85. raise NotImplementedError
  86. @abstractmethod
  87. def backward(
  88. self, *output_grads: Iterable[Union[Tensor, None]]
  89. ) -> Union[Tuple[Tensor], Tensor]:
  90. """
  91. Compute the gradient of the forward function. It must be overriden by all subclasses.
  92. :param output_grads: gradients of outputs that are returned by :meth:`~.function.Function.forward`
  93. .. note::
  94. In case when some tensors of outputs are not related to loss function, the corresponding
  95. values in ``output_grads`` would be ``None``.
  96. .. note::
  97. This method should return a tuple which containing the gradients of all inputs, in the same order
  98. as the ``inputs`` argument of :meth:`~.function.Function.forward` . A ``Tensor`` could be returned
  99. instead if there is only one input. If users want to stop the propagation of some gradients,
  100. the corresponding returned values should be set ``None`` .
  101. """
  102. raise NotImplementedError
  103. def save_for_backward(self, *tensors: Iterable[Tensor]):
  104. """
  105. Saves tensors needed for gradient computation. This method should be called only
  106. once in :meth:`~.function.Function.forward`, additional calls will replace values saved previously.
  107. The saved tensors can be accessed through the ``saved_tensors`` attribute.
  108. """
  109. self.saved_tensors = tensors
  110. def __deepcopy__(self, memo):
  111. """
  112. Defines how the operator is deeply copied
  113. """
  114. cls = self.__class__
  115. result = cls.__new__(cls)
  116. tmp = self.saved_tensors
  117. self.saved_tensors = None
  118. memo[id(self)] = result
  119. for k, v in self.__dict__.items():
  120. setattr(result, k, copy.deepcopy(v, memo))
  121. setattr(result, "saved_tensors", tmp)
  122. self.saved_tensors = tmp
  123. return result
  124. def __call__(self, *inputs):
  125. assert (
  126. not self._has_saved_state
  127. ), "A Function instance should not be called multiple times"
  128. outputs = self.forward(*inputs)
  129. if isinstance(outputs, Tensor):
  130. outputs = (outputs,)
  131. self._has_saved_state = True
  132. sv = (x._symvar for x in inputs + outputs)
  133. outputs = _OverrideGradientCraniotome.make(
  134. *sv, nr_inputs=len(inputs), nr_outputs=len(outputs), grad_func=self.backward
  135. )
  136. outputs = tuple(map(Tensor, outputs))
  137. if len(outputs) == 1:
  138. outputs = outputs[0]
  139. return outputs

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台