You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_tracing.py 8.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import io
  10. from tempfile import mkstemp
  11. import numpy as np
  12. import pytest
  13. import megengine.core.tensor.megbrain_graph as G
  14. from megengine import cgtools, tensor
  15. from megengine.core._trace_option import set_tensor_shape
  16. from megengine.core.ops import builtin as ops
  17. from megengine.core.tensor.core import apply
  18. from megengine.core.tensor.raw_tensor import as_raw_tensor
  19. from megengine.functional import exp, log
  20. from megengine.jit import exclude_from_trace, trace
  21. def load_and_inference(file, inp_data):
  22. cg, _, out_list = G.load_graph(file)
  23. inputs = cgtools.get_dep_vars(out_list, "Host2DeviceCopy")
  24. replace_dict = {}
  25. inp_node_list = []
  26. for i in inputs:
  27. inp_node = G.InputNode(
  28. device="xpux", dtype=inputs[0].dtype, graph=inputs[0].graph
  29. )
  30. replace_dict[i] = inp_node.outputs[0]
  31. inp_node_list.append(inp_node)
  32. new_out = cgtools.replace_vars(out_list, replace_dict)
  33. out_node_list = [G.OutputNode(i) for i in new_out]
  34. new_out_list = [i.outputs[0] for i in out_node_list]
  35. new_cg = new_out_list[0].graph
  36. func = new_cg.compile(new_out_list)
  37. for node, value in zip(inp_node_list, inp_data):
  38. node.set_value(as_raw_tensor(value)._dev_tensor())
  39. func.execute()
  40. out_data_list = [o.get_value().numpy() for o in out_node_list]
  41. return out_data_list
  42. def test_trace():
  43. for symbolic in [False, True]:
  44. @trace(symbolic=symbolic)
  45. def f(x):
  46. op = ops.Elemwise(mode="negate")
  47. (y,) = apply(op, x)
  48. return y
  49. x = as_raw_tensor([1]).numpy()
  50. y = f.__wrapped__(as_raw_tensor(x)).numpy()
  51. for i in range(3):
  52. np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y)
  53. def test_exclude_from_trace():
  54. for symbolic in [False, True]:
  55. @trace(symbolic=symbolic)
  56. def f(x):
  57. neg = ops.Elemwise(mode="negate")
  58. (x,) = apply(neg, x)
  59. with exclude_from_trace():
  60. if i % 2:
  61. (x,) = apply(neg, x)
  62. (x,) = apply(neg, x)
  63. return x
  64. x = as_raw_tensor([1]).numpy()
  65. for i in range(3):
  66. y = f.__wrapped__(as_raw_tensor(x)).numpy()
  67. np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y)
  68. def test_print_in_trace():
  69. for symbolic in [False]: # cannot read value in symbolic mode
  70. @trace(symbolic=symbolic)
  71. def f(x):
  72. nonlocal buf
  73. neg = ops.Elemwise(mode="negate")
  74. (x,) = apply(neg, x)
  75. buf = x.numpy()
  76. (x,) = apply(neg, x)
  77. return x
  78. buf = None
  79. x = as_raw_tensor([1]).numpy()
  80. for i in range(3):
  81. y = f.__wrapped__(as_raw_tensor(x)).numpy()
  82. z = buf
  83. buf = None
  84. np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y)
  85. np.testing.assert_equal(z, buf)
  86. def test_dump():
  87. @trace(symbolic=True, capture_as_const=True)
  88. def f(a, b):
  89. op = ops.Elemwise(mode="add")
  90. (y,) = apply(op, a, b)
  91. return y
  92. a = as_raw_tensor([2]).numpy()
  93. b = as_raw_tensor([4]).numpy()
  94. y = f.__wrapped__(as_raw_tensor(a), as_raw_tensor(b)).numpy()
  95. for i in range(3):
  96. np.testing.assert_equal(f(as_raw_tensor(a), as_raw_tensor(b)).numpy(), y)
  97. file = io.BytesIO()
  98. dump_info = f.dump(file)
  99. assert dump_info.nr_opr == 3
  100. np.testing.assert_equal(dump_info.inputs, ["h2d[0]", "h2d[2]"])
  101. np.testing.assert_equal(dump_info.outputs, ["ADD(h2d[0],h2d[2])[4]"])
  102. file.seek(0)
  103. result = load_and_inference(file, [a, b])
  104. np.testing.assert_equal(result[0], y)
  105. def test_capture_dump():
  106. a = as_raw_tensor([2])
  107. @trace(symbolic=True, capture_as_const=True)
  108. def f(x):
  109. op = ops.Elemwise(mode="mul")
  110. (y,) = apply(op, x, a)
  111. return y
  112. x = as_raw_tensor([3]).numpy()
  113. y = f.__wrapped__(as_raw_tensor(x)).numpy()
  114. for i in range(3):
  115. np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y)
  116. file = io.BytesIO()
  117. f.dump(file)
  118. file.seek(0)
  119. result = load_and_inference(file, [x])
  120. np.testing.assert_equal(result[0], y)
  121. def test_dump_volatile():
  122. p = as_raw_tensor([2])
  123. @trace(symbolic=True, capture_as_const=True)
  124. def f(x):
  125. op = ops.Elemwise(mode="mul")
  126. (y,) = apply(op, x, p)
  127. return y
  128. x = as_raw_tensor([3]).numpy()
  129. y = f.__wrapped__(as_raw_tensor(x)).numpy()
  130. for i in range(3):
  131. np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y)
  132. file = io.BytesIO()
  133. f.dump(file, optimize_for_inference=False)
  134. file.seek(0)
  135. cg, _, outputs = G.load_graph(file)
  136. (out,) = outputs
  137. assert (
  138. cgtools.get_owner_opr_type(cgtools.get_owner_opr_inputs(out)[1])
  139. == "SharedDeviceTensor"
  140. )
  141. def test_trace_profiler():
  142. for symbolic in [False, True]:
  143. @trace(symbolic=symbolic, profiling=True)
  144. def f(x):
  145. op = ops.Elemwise(mode="negate")
  146. (y,) = apply(op, x)
  147. return y
  148. x = as_raw_tensor([1]).numpy()
  149. y = f.__wrapped__(as_raw_tensor(x)).numpy()
  150. f(as_raw_tensor(x))
  151. f(as_raw_tensor(x)) # XXX: has to run twice
  152. out = f.get_profile()
  153. assert out.get("profiler")
  154. @pytest.mark.skip(reason="could not disable opt_level")
  155. def test_goptions_log_exp():
  156. @trace(symbolic=True, opt_level=0, capture_as_const=True)
  157. def f(x):
  158. return log(exp(x))
  159. @trace(symbolic=True, opt_level=1, capture_as_const=True)
  160. def g(x):
  161. return log(exp(x))
  162. f(tensor(1.0))
  163. _, out = mkstemp()
  164. f.dump(out, optimize_for_inference=False)
  165. *_, outputs = G.load_graph(out)
  166. oprs_1 = cgtools.get_oprs_seq(outputs)
  167. g(tensor(1.0))
  168. g.dump(out, optimize_for_inference=False)
  169. *_, outputs = G.load_graph(out)
  170. oprs_2 = cgtools.get_oprs_seq(outputs)
  171. assert len(oprs_1) - len(oprs_2) == 2
  172. @pytest.mark.skip(reason="could not disable opt_level")
  173. def test_goptions_log_sum_exp():
  174. @trace(symbolic=True, opt_level=0, capture_as_const=True)
  175. def f(x, y):
  176. return log(exp(x) + exp(y))
  177. @trace(symbolic=True, opt_level=1, capture_as_const=True)
  178. def g(x, y):
  179. return log(exp(x) + exp(y))
  180. f(tensor(1.0), tensor(2.0))
  181. _, out = mkstemp()
  182. f.dump(out, optimize_for_inference=False)
  183. *_, outputs = G.load_graph(out)
  184. oprs_1 = cgtools.get_oprs_seq(outputs)
  185. g(tensor(1.0), tensor(2.0))
  186. g.dump(out, optimize_for_inference=False)
  187. *_, outputs = G.load_graph(out)
  188. oprs_2 = cgtools.get_oprs_seq(outputs)
  189. assert len(oprs_1) - len(oprs_2) == 2
  190. def test_optimize_for_inference():
  191. @trace(symbolic=True, capture_as_const=True)
  192. def f(x):
  193. return exp(x)
  194. _, out = mkstemp()
  195. f(tensor(5.0))
  196. f.dump(out, enable_io16xc32=True)
  197. res = G.load_graph(out)
  198. computing_input = res.output_vars_list[0].owner.inputs[0]
  199. assert computing_input.dtype == np.float16
  200. def test_trace_cvt_bool():
  201. set_tensor_shape(True)
  202. x = tensor([0], dtype=np.int32)
  203. @trace(symbolic=True)
  204. def f(x):
  205. return x.shape[0] == 0
  206. for i in range(3):
  207. np.testing.assert_equal(f(x).numpy()[0], False)
  208. def test_trace_reshape():
  209. for symbolic in [False, True]:
  210. set_tensor_shape(True)
  211. x1 = tensor(np.random.randn(2, 10, 10))
  212. x2 = tensor(np.random.randn(4, 10, 10))
  213. x3 = tensor(np.random.randn(8, 10, 10))
  214. @trace(symbolic=symbolic, capture_as_const=True)
  215. def f(x):
  216. y = x.reshape(x.shape[0], 100)
  217. return y
  218. f(x1)
  219. f(x2)
  220. f(x3)

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台