You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_op.py 5.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. import numpy as np
  2. import pytest
  3. import megengine as mge
  4. import megengine.functional as F
  5. from megengine.core.tensor import dtype
  6. from megengine.distributed.helper import get_device_count_by_fork
  7. from megengine.functional.elemwise import _elemwise_multi_type, _elwise
  8. from megengine.quantization import QuantMode, create_qparams
  9. def quant(x, scale):
  10. x_dtype = dtype.qint8(scale)
  11. return x.astype(x_dtype)
  12. def fake_quant(x, scale):
  13. x = x / scale
  14. x = F.round(x)
  15. x = F.clip(x, -128, 127)
  16. x = x * scale
  17. return x
  18. @pytest.mark.parametrize("kind", ["abs", "sin", "sub", "mul", "fuse_add_tanh"])
  19. def test_elemwise(kind):
  20. x1 = mge.tensor(np.random.normal(size=(3, 3)).astype("float32"))
  21. x1_scale = np.float32(np.random.rand() + 1)
  22. x1 = fake_quant(x1, x1_scale)
  23. x1.qparams.update(create_qparams(QuantMode.SYMMERTIC, "qint8", x1_scale))
  24. x1_int8 = quant(x1, x1_scale)
  25. x2 = mge.tensor(np.random.normal(size=(3, 3)).astype("float32"))
  26. x2_scale = np.float32(np.random.rand() + 1)
  27. x2 = fake_quant(x2, x2_scale)
  28. x2.qparams.update(create_qparams(QuantMode.SYMMERTIC, "qint8", x2_scale))
  29. x2_int8 = quant(x2, x2_scale)
  30. output_scale = np.float32(np.random.rand() + 1)
  31. output_dtype = dtype.qint8(output_scale)
  32. quantized_kind = "q" + kind
  33. if kind in ("abs", "sin"):
  34. desired_out = fake_quant(_elwise(x1, mode=kind), output_scale)
  35. actual_out = (
  36. _elemwise_multi_type(
  37. x1_int8, mode=quantized_kind, dtype=output_dtype
  38. ).numpy()
  39. * output_scale
  40. )
  41. else:
  42. desired_out = fake_quant(_elwise(x1, x2, mode=kind), output_scale)
  43. actual_out = (
  44. _elemwise_multi_type(
  45. x1_int8, x2_int8, mode=quantized_kind, dtype=output_dtype
  46. ).numpy()
  47. * output_scale
  48. )
  49. np.testing.assert_allclose(actual_out, desired_out.numpy())
  50. @pytest.mark.skipif(
  51. get_device_count_by_fork("gpu") > 0, reason="cuda does not support nchw int8"
  52. )
  53. def test_conv_bias():
  54. inp_scale = np.float32(np.random.rand() + 1)
  55. w_scale = np.float32(np.random.rand() + 1)
  56. outp_scale = np.float32(np.random.rand() + 1)
  57. inp_dtype = dtype.qint8(inp_scale)
  58. w_dtype = dtype.qint8(w_scale)
  59. b_dtype = dtype.qint32(inp_scale * w_scale)
  60. out_dtype = dtype.qint8(outp_scale)
  61. def run(
  62. N,
  63. IC,
  64. OC,
  65. IH,
  66. IW,
  67. KH,
  68. KW,
  69. PH,
  70. PW,
  71. SH,
  72. SW,
  73. has_bias=True,
  74. nonlinear_mode="identity",
  75. ):
  76. inp_v = np.random.normal(size=(N, IC, IH, IW))
  77. w_v = np.random.normal(size=(OC, IC, KH, KW))
  78. b_v = np.random.normal(size=(1, OC, 1, 1))
  79. inp_scale = dtype.get_scale(inp_dtype)
  80. w_scale = dtype.get_scale(w_dtype)
  81. b_scale = dtype.get_scale(b_dtype)
  82. inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype)
  83. wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype)
  84. bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype)
  85. inp_int8 = mge.tensor(inpv, dtype=inp_dtype)
  86. w_int8 = mge.Parameter(wv, dtype=w_dtype)
  87. b_int32 = mge.Parameter(bv, dtype=b_dtype)
  88. inp_fp32 = inp_int8.astype("float32")
  89. w_fp32 = w_int8.astype("float32")
  90. b_fp32 = b_int32.astype("float32")
  91. def convert_to_nchw4(var):
  92. var = F.reshape(
  93. var, (var.shape[0], var.shape[1] // 4, 4, var.shape[2], var.shape[3])
  94. )
  95. var = F.transpose(var, (0, 1, 3, 4, 2))
  96. return var
  97. def run_conv2d(inp, w, b):
  98. O = F.conv2d(
  99. inp, w, b if has_bias else None, stride=(SH, SW), padding=(PH, PW),
  100. )
  101. if nonlinear_mode == "relu":
  102. return F.relu(O)
  103. else:
  104. return O
  105. def run_conv_bias(inp, w, b, format="NCHW"):
  106. b = b if has_bias else mge.Parameter(np.zeros_like(b.numpy()))
  107. if format == "NCHW4":
  108. inp = convert_to_nchw4(inp)
  109. w = convert_to_nchw4(w)
  110. b = convert_to_nchw4(b)
  111. return F.quantized.conv_bias_activation(
  112. inp,
  113. w,
  114. b,
  115. stride=(SH, SW),
  116. padding=(PH, PW),
  117. dtype=out_dtype,
  118. nonlinear_mode=nonlinear_mode,
  119. )
  120. format = "NCHW4" if mge.is_cuda_available() else "NCHW"
  121. expected = run_conv2d(inp_fp32, w_fp32, b_fp32)
  122. expected = expected.astype(out_dtype).astype("float32")
  123. result = run_conv_bias(inp_int8, w_int8, b_int32, format=format).astype(
  124. "float32"
  125. )
  126. if format == "NCHW4":
  127. result = F.transpose(result, (0, 1, 4, 2, 3))
  128. expected = F.flatten(expected)
  129. result = F.flatten(result)
  130. np.testing.assert_allclose(result.numpy(), expected.numpy(), atol=outp_scale)
  131. run(1, 4, 4, 24, 33, 1, 1, 2, 3, 1, 1, False)
  132. run(10, 12, 24, 46, 46, 1, 1, 2, 1, 3, 1, False)
  133. run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False)
  134. run(1, 4, 4, 24, 33, 1, 1, 2, 3, 1, 1)
  135. run(10, 12, 24, 46, 46, 1, 1, 2, 1, 3, 1)
  136. run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2)
  137. run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False, "relu")
  138. run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, True, "relu")

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台