You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_tracing.py 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import io
  10. from tempfile import mkstemp
  11. import numpy as np
  12. import pytest
  13. import megengine.core.tensor.megbrain_graph as G
  14. import megengine.functional as F
  15. from megengine import cgtools, tensor
  16. from megengine.core._trace_option import set_symbolic_shape
  17. from megengine.core.ops import builtin as ops
  18. from megengine.core.ops.builtin import Elemwise
  19. from megengine.core.tensor.core import apply
  20. from megengine.core.tensor.raw_tensor import as_raw_tensor
  21. from megengine.functional import exp, log
  22. from megengine.jit import exclude_from_trace, trace
  23. def test_trace():
  24. for symbolic in [False, True]:
  25. @trace(symbolic=symbolic)
  26. def f(x):
  27. op = ops.Elemwise(Elemwise.Mode.NEGATE)
  28. (y,) = apply(op, x)
  29. return y
  30. x = as_raw_tensor([1]).numpy()
  31. y = f.__wrapped__(as_raw_tensor(x)).numpy()
  32. for i in range(3):
  33. np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y)
  34. def test_exclude_from_trace():
  35. for symbolic in [False, True]:
  36. @trace(symbolic=symbolic)
  37. def f(x):
  38. neg = ops.Elemwise(Elemwise.Mode.NEGATE)
  39. (x,) = apply(neg, x)
  40. with exclude_from_trace():
  41. if i % 2:
  42. (x,) = apply(neg, x)
  43. (x,) = apply(neg, x)
  44. return x
  45. x = as_raw_tensor([1]).numpy()
  46. for i in range(3):
  47. y = f.__wrapped__(as_raw_tensor(x)).numpy()
  48. np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y)
  49. def test_print_in_trace():
  50. for symbolic in [False]: # cannot read value in symbolic mode
  51. @trace(symbolic=symbolic)
  52. def f(x):
  53. nonlocal buf
  54. neg = ops.Elemwise(Elemwise.Mode.NEGATE)
  55. (x,) = apply(neg, x)
  56. buf = x.numpy()
  57. (x,) = apply(neg, x)
  58. return x
  59. buf = None
  60. x = as_raw_tensor([1]).numpy()
  61. for i in range(3):
  62. y = f.__wrapped__(as_raw_tensor(x)).numpy()
  63. z = buf
  64. buf = None
  65. np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y)
  66. np.testing.assert_equal(z, buf)
  67. def test_dump():
  68. @trace(symbolic=True, capture_as_const=True)
  69. def f(a, b):
  70. op = ops.Elemwise(Elemwise.Mode.ADD)
  71. (y,) = apply(op, a, b)
  72. return y
  73. a = as_raw_tensor([2]).numpy()
  74. b = as_raw_tensor([4]).numpy()
  75. y = f.__wrapped__(as_raw_tensor(a), as_raw_tensor(b)).numpy()
  76. for i in range(3):
  77. np.testing.assert_equal(f(as_raw_tensor(a), as_raw_tensor(b)).numpy(), y)
  78. file = io.BytesIO()
  79. dump_info = f.dump(file)
  80. assert dump_info.nr_opr == 3
  81. np.testing.assert_equal(dump_info.inputs, ["h2d[0]", "h2d[2]"])
  82. np.testing.assert_equal(dump_info.outputs, ["ADD(h2d[0],h2d[2])[4]"])
  83. file.seek(0)
  84. result = cgtools.load_and_inference(file, [a, b])
  85. np.testing.assert_equal(result[0], y)
  86. def test_capture_dump():
  87. a = as_raw_tensor([2])
  88. @trace(symbolic=True, capture_as_const=True)
  89. def f(x):
  90. op = ops.Elemwise(Elemwise.Mode.MUL)
  91. (y,) = apply(op, x, a)
  92. return y
  93. x = as_raw_tensor([3]).numpy()
  94. y = f.__wrapped__(as_raw_tensor(x)).numpy()
  95. for i in range(3):
  96. np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y)
  97. file = io.BytesIO()
  98. f.dump(file)
  99. file.seek(0)
  100. result = cgtools.load_and_inference(file, [x])
  101. np.testing.assert_equal(result[0], y)
  102. def test_dump_volatile():
  103. p = as_raw_tensor([2])
  104. @trace(symbolic=True, capture_as_const=True)
  105. def f(x):
  106. op = ops.Elemwise(Elemwise.Mode.MUL)
  107. (y,) = apply(op, x, p)
  108. return y
  109. x = as_raw_tensor([3]).numpy()
  110. y = f.__wrapped__(as_raw_tensor(x)).numpy()
  111. for i in range(3):
  112. np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y)
  113. file = io.BytesIO()
  114. f.dump(file, optimize_for_inference=False)
  115. file.seek(0)
  116. cg, _, outputs = G.load_graph(file)
  117. (out,) = outputs
  118. assert (
  119. cgtools.get_owner_opr_type(cgtools.get_owner_opr_inputs(out)[1])
  120. == "ImmutableTensor"
  121. )
  122. def test_trace_profiler():
  123. for symbolic in [False, True]:
  124. @trace(symbolic=symbolic, profiling=True)
  125. def f(x):
  126. op = ops.Elemwise(Elemwise.Mode.NEGATE)
  127. (y,) = apply(op, x)
  128. return y
  129. x = as_raw_tensor([1]).numpy()
  130. y = f.__wrapped__(as_raw_tensor(x)).numpy()
  131. f(as_raw_tensor(x))
  132. f(as_raw_tensor(x)) # XXX: has to run twice
  133. out = f.get_profile()
  134. assert out.get("profiler")
  135. @pytest.mark.skip(reason="could not disable opt_level")
  136. def test_goptions_log_exp():
  137. @trace(symbolic=True, opt_level=0, capture_as_const=True)
  138. def f(x):
  139. return log(exp(x))
  140. @trace(symbolic=True, opt_level=1, capture_as_const=True)
  141. def g(x):
  142. return log(exp(x))
  143. f(tensor(1.0))
  144. _, out = mkstemp()
  145. f.dump(out, optimize_for_inference=False)
  146. *_, outputs = G.load_graph(out)
  147. oprs_1 = cgtools.get_oprs_seq(outputs)
  148. g(tensor(1.0))
  149. g.dump(out, optimize_for_inference=False)
  150. *_, outputs = G.load_graph(out)
  151. oprs_2 = cgtools.get_oprs_seq(outputs)
  152. assert len(oprs_1) - len(oprs_2) == 2
  153. @pytest.mark.skip(reason="could not disable opt_level")
  154. def test_goptions_log_sum_exp():
  155. @trace(symbolic=True, opt_level=0, capture_as_const=True)
  156. def f(x, y):
  157. return log(exp(x) + exp(y))
  158. @trace(symbolic=True, opt_level=1, capture_as_const=True)
  159. def g(x, y):
  160. return log(exp(x) + exp(y))
  161. f(tensor(1.0), tensor(2.0))
  162. _, out = mkstemp()
  163. f.dump(out, optimize_for_inference=False)
  164. *_, outputs = G.load_graph(out)
  165. oprs_1 = cgtools.get_oprs_seq(outputs)
  166. g(tensor(1.0), tensor(2.0))
  167. g.dump(out, optimize_for_inference=False)
  168. *_, outputs = G.load_graph(out)
  169. oprs_2 = cgtools.get_oprs_seq(outputs)
  170. assert len(oprs_1) - len(oprs_2) == 2
  171. def test_optimize_for_inference():
  172. @trace(symbolic=True, capture_as_const=True)
  173. def f(x):
  174. return exp(x)
  175. _, out = mkstemp()
  176. f(tensor(5.0))
  177. f.dump(out, enable_io16xc32=True)
  178. res = G.load_graph(out)
  179. computing_input = res.output_vars_list[0].owner.inputs[0]
  180. assert computing_input.dtype == np.float16
  181. def test_optimize_for_inference_broadcast():
  182. a = tensor(np.ones(1, dtype=np.float32))
  183. @trace(capture_as_const=True, symbolic_shape=True)
  184. def f():
  185. (b,) = apply(ops.Broadcast(), a, tensor([1, 10], dtype=np.int32))
  186. return b
  187. f()
  188. f.dump(io.BytesIO())
  189. def test_trace_cvt_bool():
  190. set_symbolic_shape(True)
  191. x = tensor([0], dtype=np.int32)
  192. @trace(symbolic=True)
  193. def f(x):
  194. return x.shape[0] == 0
  195. for i in range(3):
  196. np.testing.assert_equal(f(x).numpy()[0], False)
  197. def test_trace_reshape():
  198. for symbolic in [False, True]:
  199. set_symbolic_shape(True)
  200. x1 = tensor(np.random.randn(2, 10, 10))
  201. x2 = tensor(np.random.randn(4, 10, 10))
  202. x3 = tensor(np.random.randn(8, 10, 10))
  203. @trace(symbolic=symbolic, capture_as_const=True)
  204. def f(x):
  205. y = x.reshape(x.shape[0], 100)
  206. return y
  207. f(x1)
  208. f(x2)
  209. f(x3)
  210. def test_trace_topk():
  211. x = tensor([5, 2, 7, 1, 0, 3, 2])
  212. @trace(symbolic=True)
  213. def f(x):
  214. y = F.topk(x, 3)
  215. np.testing.assert_equal(y[0].shape.numpy(), np.array([3,]))
  216. return y
  217. for i in range(3):
  218. f(x)
  219. def test_trace_warp_perspective():
  220. inp_shape = (1, 1, 4, 4)
  221. x = tensor(np.arange(16, dtype=np.float32).reshape(inp_shape))
  222. M_shape = (1, 3, 3)
  223. M = tensor(
  224. np.array(
  225. [[1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0]], dtype=np.float32
  226. ).reshape(M_shape)
  227. )
  228. @trace(symbolic=True)
  229. def f(x, M):
  230. out = F.warp_perspective(x, M, (2, 2))
  231. np.testing.assert_equal(out.shape.numpy(), np.array([1, 1, 2, 2]))
  232. return out
  233. for i in range(1):
  234. f(x, M)
  235. def test_raise_on_trace():
  236. step_count = 0
  237. catch_count = 0
  238. bad_step = 10
  239. class CatchMe(Exception):
  240. pass
  241. a = tensor([1, 2, 3, 4])
  242. b = tensor([5, 6, 7, 8])
  243. c = tensor([9, 0, 1, 2])
  244. @trace
  245. def add_abc(a, b, c):
  246. print("Hello")
  247. ps = a + b
  248. result = ps + c
  249. if step_count == bad_step:
  250. raise CatchMe("catch me")
  251. return result
  252. for i in range(100):
  253. try:
  254. d = add_abc(a, b, c)
  255. except CatchMe as e:
  256. catch_count += 1
  257. else:
  258. np.testing.assert_equal(d.numpy(), (a + b + c).numpy())
  259. step_count += 1
  260. assert catch_count == 1
  261. def test_trace_broadcast():
  262. for symbolic in [False, True]:
  263. set_symbolic_shape(True)
  264. x1 = tensor(np.random.randn(3, 1, 1))
  265. x2 = tensor(np.random.randn(1, 4, 1))
  266. x3 = tensor(np.random.randn(1, 1, 5))
  267. @trace(symbolic=symbolic, capture_as_const=True)
  268. def f(x):
  269. y = F.broadcast_to(x, (3, 4, 5))
  270. return y
  271. f(x1)
  272. f(x2)
  273. f(x3)
  274. def test_trace_nms():
  275. def make_inputs(n):
  276. boxes = np.zeros((n, 4))
  277. boxes[:, :2] = np.random.rand(n, 2) * 100
  278. boxes[:, 2:] = np.random.rand(n, 2) * 100 + 100
  279. scores = np.random.rand(n)
  280. return tensor(boxes), tensor(scores)
  281. @trace(symbolic=False)
  282. def f(boxes, scores):
  283. results = F.nn.nms(boxes, scores=scores, iou_thresh=0.5, max_output=20)
  284. with exclude_from_trace():
  285. _ = F.nn.nms(boxes, scores=scores, iou_thresh=0.5)
  286. return results
  287. f(*make_inputs(10))
  288. f(*make_inputs(20))
  289. f(*make_inputs(30))
  290. def test_trace_valid_broadcast():
  291. set_symbolic_shape(True)
  292. x1 = tensor(np.random.randn(1, 1))
  293. x2 = tensor(np.random.randn(1, 2))
  294. shape = (tensor([2]), tensor([2]))
  295. @trace(symbolic=False)
  296. def f(x, shape):
  297. y = F.broadcast_to(x, shape)
  298. return y
  299. f(x1, shape)
  300. f(x2, shape)
  301. def test_clip():
  302. x = tensor(np.random.randn(10, 10))
  303. @trace(symbolic=True)
  304. def f(x, lower, upper):
  305. y = F.clip(x, lower, upper)
  306. return y
  307. for i in range(3):
  308. f(x, tensor([0]), tensor([1]))
  309. # test returning noncontiguous tensor from trace
  310. def test_slice():
  311. @trace
  312. def f(x):
  313. return x[:, 1::2]
  314. x = F.arange(8).reshape(2, 4)
  315. f(x)
  316. y = f(x)
  317. np.testing.assert_array_equal(y.numpy(), x.numpy()[:, 1::2])
  318. y + y

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台