You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

loss.py 8.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import megengine._internal as mgb
  10. from ..core.tensor import Tensor
  11. from .elemwise import abs, equal, log, maximum, power
  12. from .nn import assert_equal, indexing_one_hot
  13. from .utils import zero_grad
  14. def l1_loss(pred: Tensor, label: Tensor) -> Tensor:
  15. r"""
  16. Calculates the mean absolute error (MAE) between
  17. each element in the pred :math:`x` and label :math:`y`.
  18. The mean absolute error can be described as:
  19. .. math:: \ell(x,y) = mean\left(L \right)
  20. where
  21. .. math::
  22. L = \{l_1,\dots,l_N\}, \quad
  23. l_n = \left| x_n - y_n \right|,
  24. :math:`x` and :math:`y` are tensors of arbitrary shapes with a total
  25. of :math:`N` elements each. :math:`N` is the batch size.
  26. :param pred: The predicted result from model.
  27. :param label: The ground truth to compare.
  28. Examples:
  29. .. testcode::
  30. import numpy as np
  31. import megengine as mge
  32. import megengine.functional as F
  33. ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32))
  34. tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32))
  35. loss = F.l1_loss(ipt,tgt)
  36. print(loss.numpy())
  37. Outputs:
  38. .. testoutput::
  39. [2.75]
  40. """
  41. diff = pred - label
  42. return abs(diff).mean()
  43. def square_loss(pred: Tensor, label: Tensor) -> Tensor:
  44. r"""
  45. Calculates the mean squared error (squared L2 norm) between
  46. each element in the pred :math:`x` and label :math:`y`.
  47. The mean squared error can be described as:
  48. .. math:: \ell(x, y) = mean\left( L \right)
  49. where
  50. .. math::
  51. L = \{l_1,\dots,l_N\}, \quad
  52. l_n = \left( x_n - y_n \right)^2,
  53. :math:`x` and :math:`y` are tensors of arbitrary shapes with a total
  54. of :math:`N` elements each. :math:`N` is the batch size.
  55. :param pred: The predicted result from model.
  56. :param label: The ground truth to compare.
  57. Shape:
  58. - pred: :math:`(N, *)` where :math:`*` means any number of additional
  59. dimensions
  60. - label: :math:`(N, *)`. Same shape as ``pred``
  61. """
  62. diff = pred - label
  63. return (diff ** 2).mean()
  64. def cross_entropy(
  65. inp: Tensor, target: Tensor, axis: int = 1, ignore_index: int = -1
  66. ) -> Tensor:
  67. r"""
  68. Returns the cross entropy loss in a classification problem.
  69. .. math:: \textrm{CrossEntropy}(x, y) = - \sum_{i} y_i\log(x_i)
  70. :param inp: The input tensor representing the predicted probability.
  71. :param label: The input tensor representing the classification label.
  72. :param axis: An axis along which cross_entropy will be applied. Default: 1
  73. :param ignore_index: Specifies a target value that is ignored and does not contribute to the input gradient. Default: -1
  74. Examples:
  75. .. testcode::
  76. import numpy as np
  77. from megengine import tensor
  78. import megengine.functional as F
  79. data_shape = (1, 2)
  80. label_shape = (1, )
  81. pred = tensor(np.array([0.5, 0.5], dtype=np.float32).reshape(data_shape))
  82. label = tensor(np.ones(label_shape, dtype=np.int32))
  83. loss = F.cross_entropy(pred, label)
  84. print(loss.numpy())
  85. Outputs:
  86. .. testoutput::
  87. [0.69]
  88. """
  89. n0 = inp.ndim
  90. n1 = target.ndim
  91. assert n0 == n1 + 1, (
  92. "target ndim must be one less than input ndim; input_ndim={} "
  93. "target_ndim={}".format(n0, n1)
  94. )
  95. if ignore_index != -1:
  96. mask = 1 - equal(target, ignore_index)
  97. target = target * mask
  98. loss = -log(indexing_one_hot(inp, target, axis)) * mask
  99. return loss.sum() / maximum(mask.sum(), 1.0)
  100. else:
  101. return -log(indexing_one_hot(inp, target, axis)).mean()
  102. def cross_entropy_with_softmax(
  103. pred: Tensor, label: Tensor, axis: int = 1, label_smooth: float = 0
  104. ) -> Tensor:
  105. r"""
  106. Returns loss after applying :func:`~.softmax` + :func:`~.cross_entropy`.
  107. It has better numerical stability compared with sequential calls to :func:`~.softmax` and :func:`~.cross_entropy`.
  108. When using label smoothing, the label distribution is as follows:
  109. .. math:: y^{LS}_{k}=y_{k}\left(1-\alpha\right)+\alpha/K
  110. where :math:`y^{LS}` and :math:`y` are new label distribution and origin label distribution respectively.
  111. k is the index of label distribution. :math:`\alpha` is label_smooth and :math:`K` is the number of classes.
  112. :param pred: The input tensor representing the predicted probability.
  113. :param label: The input tensor representing the classification label.
  114. :param axis: An axis along which softmax will be applied. Default: 1.
  115. :param label_smooth: A label smoothing of parameter that can re-distribute target distribution. Default: 0.
  116. """
  117. n0 = pred.ndim
  118. n1 = label.ndim
  119. assert n0 == n1 + 1, (
  120. "target ndim must be one less than input ndim; input_ndim={} "
  121. "target_ndim={}".format(n0, n1)
  122. )
  123. num_classes = pred.shapeof(axis)
  124. # Denominator of the softmax
  125. offset = zero_grad(pred.max(axis=axis, keepdims=True))
  126. pred = pred - offset
  127. down = mgb.opr.elem.exp(pred).sum(axis=axis, keepdims=True)
  128. up = indexing_one_hot(pred, label, axis)
  129. if label_smooth != 0:
  130. factor = label_smooth / num_classes
  131. up = up * (1 - label_smooth) + pred.sum(axis=axis, keepdims=True) * factor
  132. return (log(down) - up).mean()
  133. def triplet_margin_loss(
  134. anchor: Tensor, positive: Tensor, negative: Tensor, margin: float = 1.0, p: int = 2
  135. ) -> Tensor:
  136. r"""
  137. Creates a criterion that measures the triplet loss given an input tensors.
  138. .. math::
  139. L(a, p, n) = max\left\{d\left(a_{i},p_{i}\right)-d\left(a_{i}, n_{i}\right)+margin, 0\right\},\
  140. d\left(x_{i},y_{i}\right)=\left\|x_{i}-y_{i}\right\|_{p}
  141. :param anchor: The input tensor representing the anchor samples.
  142. :param positive: The input tensor representing the positive samples.
  143. :param negative: The input tensor representing the negative samples.
  144. :param margin: Default: 1.0
  145. :param p: The norm degree for pairwise distance. Default: 2.0
  146. """
  147. s0 = anchor.shapeof()
  148. s1 = positive.shapeof()
  149. s2 = negative.shapeof()
  150. assert_equal(s0, s1)
  151. assert_equal(s1, s2)
  152. n0 = anchor.ndim
  153. n1 = positive.ndim
  154. n2 = negative.ndim
  155. assert n0 == 2 and n1 == 2 and n2 == 2, (
  156. "anchor ndim, positive ndim, and negative ndim must be 2; "
  157. "anchor_ndim={} positive_ndim={} negative_ndim={}".format(n0, n1, n2)
  158. )
  159. assert p > 0, "a margin with a value greater than 0; p={}".format(p)
  160. diff0 = abs(anchor - positive)
  161. diff1 = abs(anchor - negative)
  162. d1 = power(power(diff0, p).sum(axis=1, keepdims=True), 1 / p)
  163. d2 = power(power(diff1, p).sum(axis=1, keepdims=True), 1 / p)
  164. loss = maximum(d1 - d2 + margin, 0)
  165. return loss.mean()
  166. def binary_cross_entropy(pred: Tensor, label: Tensor) -> Tensor:
  167. r"""Function that measures the Binary Cross Entropy between the target and the prediction.
  168. :param pred: (N,*) where * means, any number of additional dimensions.
  169. :param label: (N,*), same shape as the input.
  170. """
  171. s0 = pred.shapeof()
  172. s1 = label.shapeof()
  173. assert_equal(s0, s1)
  174. return -1.0 * (label * log(pred) + (1.0 - label) * log(1 - pred)).mean()
  175. def nll_loss(
  176. pred: Tensor, label: Tensor, axis: int = 1, ignore_index: int = -1
  177. ) -> Tensor:
  178. r"""
  179. The negative log likelihood loss.
  180. :param pred: The predicted result from model.
  181. :param label: The ground truth to compare.
  182. Examples:
  183. .. testcode::
  184. import numpy as np
  185. from megengine import tensor
  186. import megengine.functional as F
  187. data_shape = (2, 2)
  188. label_shape = (2, )
  189. data = tensor(
  190. np.array([[1, 0.5], [0.3, 1.2]], dtype=np.float32).reshape(data_shape),
  191. )
  192. label = tensor(
  193. np.ones(label_shape, dtype=np.int32)
  194. )
  195. pred = F.log(F.softmax(data))
  196. loss1 = F.nll_loss(pred, label)
  197. loss2 = F.cross_entropy_with_softmax(data, label)
  198. print(loss1.numpy(), loss2.numpy())
  199. Outputs:
  200. .. testoutput::
  201. [0.6576154] [0.6576154]
  202. """
  203. n0 = pred.ndim
  204. n1 = label.ndim
  205. assert n0 == n1 + 1, (
  206. "target ndim must be one less than input ndim; input_ndim={} "
  207. "target_ndim={}".format(n0, n1)
  208. )
  209. mask = 1.0 - equal(label, ignore_index)
  210. label = label * mask
  211. loss = indexing_one_hot(pred, label, axis) * mask
  212. return -1.0 * loss.sum() / maximum(mask.sum(), 1.0)

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台