You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_op.py 5.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  2. #
  3. # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4. #
  5. # Unless required by applicable law or agreed to in writing,
  6. # software distributed under the License is distributed on an
  7. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  8. import numpy as np
  9. import pytest
  10. import megengine as mge
  11. import megengine.functional as F
  12. from megengine.core.tensor import dtype
  13. from megengine.distributed.helper import get_device_count_by_fork
  14. from megengine.functional.elemwise import _elemwise_multi_type, _elwise
  15. from megengine.quantization import QuantMode, create_qparams
  16. def quant(x, scale):
  17. x_dtype = dtype.qint8(scale)
  18. return x.astype(x_dtype)
  19. def fake_quant(x, scale):
  20. x = x / scale
  21. x = F.round(x)
  22. x = F.clip(x, -128, 127)
  23. x = x * scale
  24. return x
  25. @pytest.mark.parametrize("kind", ["abs", "sin", "sub", "mul", "fuse_add_tanh"])
  26. def test_elemwise(kind):
  27. x1 = mge.tensor(np.random.normal(size=(3, 3)).astype("float32"))
  28. x1_scale = np.float32(np.random.rand() + 1)
  29. x1 = fake_quant(x1, x1_scale)
  30. x1.qparams.update(create_qparams(QuantMode.SYMMERTIC, "qint8", x1_scale))
  31. x1_int8 = quant(x1, x1_scale)
  32. x2 = mge.tensor(np.random.normal(size=(3, 3)).astype("float32"))
  33. x2_scale = np.float32(np.random.rand() + 1)
  34. x2 = fake_quant(x2, x2_scale)
  35. x2.qparams.update(create_qparams(QuantMode.SYMMERTIC, "qint8", x2_scale))
  36. x2_int8 = quant(x2, x2_scale)
  37. output_scale = np.float32(np.random.rand() + 1)
  38. output_dtype = dtype.qint8(output_scale)
  39. quantized_kind = "q" + kind
  40. if kind in ("abs", "sin"):
  41. desired_out = fake_quant(_elwise(x1, mode=kind), output_scale)
  42. actual_out = (
  43. _elemwise_multi_type(
  44. x1_int8, mode=quantized_kind, dtype=output_dtype
  45. ).numpy()
  46. * output_scale
  47. )
  48. else:
  49. desired_out = fake_quant(_elwise(x1, x2, mode=kind), output_scale)
  50. actual_out = (
  51. _elemwise_multi_type(
  52. x1_int8, x2_int8, mode=quantized_kind, dtype=output_dtype
  53. ).numpy()
  54. * output_scale
  55. )
  56. np.testing.assert_allclose(actual_out, desired_out.numpy())
  57. @pytest.mark.skipif(
  58. get_device_count_by_fork("gpu") > 0, reason="cuda does not support nchw int8"
  59. )
  60. def test_conv_bias():
  61. inp_scale = np.float32(np.random.rand() + 1)
  62. w_scale = np.float32(np.random.rand() + 1)
  63. outp_scale = np.float32(np.random.rand() + 1)
  64. inp_dtype = dtype.qint8(inp_scale)
  65. w_dtype = dtype.qint8(w_scale)
  66. b_dtype = dtype.qint32(inp_scale * w_scale)
  67. out_dtype = dtype.qint8(outp_scale)
  68. def run(
  69. N,
  70. IC,
  71. OC,
  72. IH,
  73. IW,
  74. KH,
  75. KW,
  76. PH,
  77. PW,
  78. SH,
  79. SW,
  80. has_bias=True,
  81. nonlinear_mode="identity",
  82. ):
  83. inp_v = np.random.normal(size=(N, IC, IH, IW))
  84. w_v = np.random.normal(size=(OC, IC, KH, KW))
  85. b_v = np.random.normal(size=(1, OC, 1, 1))
  86. inp_scale = dtype.get_scale(inp_dtype)
  87. w_scale = dtype.get_scale(w_dtype)
  88. b_scale = dtype.get_scale(b_dtype)
  89. inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype)
  90. wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype)
  91. bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype)
  92. inp_int8 = mge.tensor(inpv, dtype=inp_dtype)
  93. w_int8 = mge.Parameter(wv, dtype=w_dtype)
  94. b_int32 = mge.Parameter(bv, dtype=b_dtype)
  95. inp_fp32 = inp_int8.astype("float32")
  96. w_fp32 = w_int8.astype("float32")
  97. b_fp32 = b_int32.astype("float32")
  98. def convert_to_nchw4(var):
  99. var = F.reshape(
  100. var, (var.shape[0], var.shape[1] // 4, 4, var.shape[2], var.shape[3])
  101. )
  102. var = F.transpose(var, (0, 1, 3, 4, 2))
  103. return var
  104. def run_conv2d(inp, w, b):
  105. O = F.conv2d(
  106. inp, w, b if has_bias else None, stride=(SH, SW), padding=(PH, PW),
  107. )
  108. if nonlinear_mode == "relu":
  109. return F.relu(O)
  110. else:
  111. return O
  112. def run_conv_bias(inp, w, b, format="NCHW"):
  113. b = b if has_bias else mge.Parameter(np.zeros_like(b.numpy()))
  114. if format == "NCHW4":
  115. inp = convert_to_nchw4(inp)
  116. w = convert_to_nchw4(w)
  117. b = convert_to_nchw4(b)
  118. return F.quantized.conv_bias_activation(
  119. inp,
  120. w,
  121. b,
  122. stride=(SH, SW),
  123. padding=(PH, PW),
  124. dtype=out_dtype,
  125. nonlinear_mode=nonlinear_mode,
  126. )
  127. format = "NCHW4" if mge.is_cuda_available() else "NCHW"
  128. expected = run_conv2d(inp_fp32, w_fp32, b_fp32)
  129. expected = expected.astype(out_dtype).astype("float32")
  130. result = run_conv_bias(inp_int8, w_int8, b_int32, format=format).astype(
  131. "float32"
  132. )
  133. if format == "NCHW4":
  134. result = F.transpose(result, (0, 1, 4, 2, 3))
  135. expected = F.flatten(expected)
  136. result = F.flatten(result)
  137. np.testing.assert_allclose(result.numpy(), expected.numpy(), atol=outp_scale)
  138. run(1, 4, 4, 24, 33, 1, 1, 2, 3, 1, 1, False)
  139. run(10, 12, 24, 46, 46, 1, 1, 2, 1, 3, 1, False)
  140. run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False)
  141. run(1, 4, 4, 24, 33, 1, 1, 2, 3, 1, 1)
  142. run(10, 12, 24, 46, 46, 1, 1, 2, 1, 3, 1)
  143. run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2)
  144. run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False, "relu")
  145. run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, True, "relu")

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台