You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

loss.py 8.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import numpy as np
  10. from ..core.tensor.utils import make_shape_tuple
  11. from ..tensor import Tensor
  12. from .elemwise import abs, equal, exp, log, maximum, pow, relu
  13. from .nn import indexing_one_hot, logsigmoid, logsumexp
  14. from .tensor import where
  15. __all__ = [
  16. "l1_loss",
  17. "square_loss",
  18. "cross_entropy",
  19. "binary_cross_entropy",
  20. "hinge_loss",
  21. ]
  22. def l1_loss(pred: Tensor, label: Tensor) -> Tensor:
  23. r"""Calculates the mean absolute error (MAE) between
  24. each element in the pred :math:`x` and label :math:`y`.
  25. The mean absolute error can be described as:
  26. .. math:: \ell(x,y) = mean\left(L \right)
  27. where
  28. .. math::
  29. L = \{l_1,\dots,l_N\}, \quad
  30. l_n = \left| x_n - y_n \right|,
  31. :math:`x` and :math:`y` are tensors of arbitrary shapes with a total
  32. of :math:`N` elements each. :math:`N` is the batch size.
  33. :param pred: predicted result from model.
  34. :param label: ground truth to compare.
  35. :return: loss value.
  36. Examples:
  37. .. testcode::
  38. import numpy as np
  39. import megengine as mge
  40. import megengine.functional as F
  41. ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32))
  42. tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32))
  43. loss = F.nn.l1_loss(ipt, tgt)
  44. print(loss.numpy())
  45. Outputs:
  46. .. testoutput::
  47. [2.75]
  48. """
  49. diff = pred - label
  50. return abs(diff).mean()
  51. def square_loss(pred: Tensor, label: Tensor) -> Tensor:
  52. r"""Calculates the mean squared error (squared L2 norm) between
  53. each element in the pred :math:`x` and label :math:`y`.
  54. The mean squared error can be described as:
  55. .. math:: \ell(x, y) = mean\left( L \right)
  56. where
  57. .. math::
  58. L = \{l_1,\dots,l_N\}, \quad
  59. l_n = \left( x_n - y_n \right)^2,
  60. :math:`x` and :math:`y` are tensors of arbitrary shapes with a total
  61. of :math:`N` elements each. :math:`N` is the batch size.
  62. :param pred: predicted result from model.
  63. :param label: ground truth to compare.
  64. :return: loss value.
  65. Shape:
  66. - pred: :math:`(N, *)` where :math:`*` means any number of additional
  67. dimensions.
  68. - label: :math:`(N, *)`. Same shape as ``pred``.
  69. Examples:
  70. .. testcode::
  71. import numpy as np
  72. import megengine as mge
  73. import megengine.functional as F
  74. ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32))
  75. tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32))
  76. loss = F.nn.square_loss(ipt, tgt)
  77. print(loss.numpy())
  78. Outputs:
  79. .. testoutput::
  80. [9.75]
  81. """
  82. diff = pred - label
  83. return (diff ** 2).mean()
  84. def cross_entropy(
  85. pred: Tensor,
  86. label: Tensor,
  87. axis: int = 1,
  88. with_logits: bool = True,
  89. label_smooth: float = 0,
  90. ) -> Tensor:
  91. r"""Compute the multi-class cross entropy loss (using logits by default).
  92. By default, prediction is assumed to be logits, whose softmax gives probabilities.
  93. It has better numerical stability compared with sequential calls to :func:`~.softmax` and :func:`~.cross_entropy`.
  94. When using label smoothing, the label distribution is as follows:
  95. .. math:: y^{LS}_{k}=y_{k}\left(1-\alpha\right)+\alpha/K
  96. where :math:`y^{LS}` and :math:`y` are new label distribution and origin label distribution respectively.
  97. k is the index of label distribution. :math:`\alpha` is ``label_smooth`` and :math:`K` is the number of classes.
  98. :param pred: input tensor representing the predicted probability.
  99. :param label: input tensor representing the classification label.
  100. :param axis: an axis along which softmax will be applied. Default: 1
  101. :param with_logits: whether to apply softmax first. Default: True
  102. :param label_smooth: a label smoothing of parameter that can re-distribute target distribution. Default: 0
  103. :return: loss value.
  104. Examples:
  105. .. testcode::
  106. import numpy as np
  107. from megengine import tensor
  108. import megengine.functional as F
  109. data_shape = (1, 2)
  110. label_shape = (1, )
  111. pred = tensor(np.array([0, 0], dtype=np.float32).reshape(data_shape))
  112. label = tensor(np.ones(label_shape, dtype=np.int32))
  113. loss = F.nn.cross_entropy(pred, label)
  114. print(loss.numpy())
  115. Outputs:
  116. .. testoutput::
  117. [0.6931]
  118. """
  119. n0 = pred.ndim
  120. n1 = label.ndim
  121. assert n0 == n1 + 1, (
  122. "target ndim must be one less than input ndim; input_ndim={} "
  123. "target_ndim={}".format(n0, n1)
  124. )
  125. num_classes = pred.shape[axis]
  126. no_label_smooth = (
  127. label_smooth is None or type(label_smooth) in (int, float) and label_smooth == 0
  128. )
  129. if not with_logits:
  130. if no_label_smooth:
  131. return -log(indexing_one_hot(pred, label, axis)).mean()
  132. pred = log(pred)
  133. return (
  134. label_smooth * pred.mean()
  135. - (1 - label_smooth) * indexing_one_hot(pred, label, axis).mean()
  136. )
  137. # Denominator of the softmax
  138. down = logsumexp(pred, axis=axis, keepdims=True)
  139. up = indexing_one_hot(pred, label, axis)
  140. if not no_label_smooth:
  141. factor = label_smooth / num_classes
  142. up = up * (1 - label_smooth) + pred.sum(axis=axis, keepdims=True) * factor
  143. return (down - up).mean()
  144. def binary_cross_entropy(
  145. pred: Tensor, label: Tensor, with_logits: bool = True
  146. ) -> Tensor:
  147. r"""Compute the binary cross entropy loss (using logits by default).
  148. By default, prediction is assumed to be logits, whose sigmoid gives probabilities.
  149. :param pred: `(N, *)`, where `*` means any number of additional dimensions.
  150. :param label: `(N, *)`, same shape as the input.
  151. :param with_logits: bool, whether to apply sigmoid first. Default: True
  152. :return: loss value.
  153. Examples:
  154. .. testcode::
  155. import numpy as np
  156. from megengine import tensor
  157. import megengine.functional as F
  158. pred = tensor(np.array([0, 0], dtype=np.float32).reshape(1, 2))
  159. label = tensor(np.ones((1, 2), dtype=np.float32))
  160. loss = F.nn.binary_cross_entropy(pred, label)
  161. print(loss.numpy())
  162. Outputs:
  163. .. testoutput::
  164. [0.6931]
  165. """
  166. if not with_logits:
  167. return -(label * log(pred) + (1 - label) * log(1 - pred)).mean()
  168. # logsigmoid(pred) and logsigmoid(-pred) has common sub-expression
  169. # hopefully the backend would optimize this
  170. return -(label * logsigmoid(pred) + (1 - label) * logsigmoid(-pred)).mean()
  171. def hinge_loss(pred: Tensor, label: Tensor, norm: str = "L1") -> Tensor:
  172. r"""Caculates the hinge loss which is often used in SVM.
  173. The hinge loss can be described as:
  174. .. math:: loss(x, y) = \frac{1}{N}\sum_i\sum_j(max(0, 1 - x_{ij}*y_{ij}))
  175. :param pred: input tensor representing the predicted probability, shape is `(N, C)`.
  176. :param label: input tensor representing the binary classification label, shape is `(N, C)`.
  177. :param norm: specify the norm to caculate the loss, should be "L1" or "L2".
  178. :return: loss value.
  179. Examples:
  180. .. testcode::
  181. from megengine import tensor
  182. import megengine.functional as F
  183. pred = tensor([[0.5, -0.5, 0.1], [-0.6, 0.7, 0.8]], dtype="float32")
  184. label = tensor([[1, -1, -1], [-1, 1, 1]], dtype="float32")
  185. loss = F.nn.hinge_loss(pred, label)
  186. print(loss.numpy())
  187. Outputs:
  188. .. testoutput::
  189. [1.5]
  190. """
  191. assert norm in ["L1", "L2"], "norm must be L1 or L2"
  192. # Converts binary labels to -1/1 labels.
  193. loss = relu(1.0 - pred * label)
  194. if norm == "L1":
  195. return loss.sum(axis=1).mean()
  196. else:
  197. return (loss ** 2).sum(axis=1).mean()

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台