You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_tracing.py 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import io
  10. from tempfile import mkstemp
  11. import numpy as np
  12. import pytest
  13. import megengine.core.tensor.megbrain_graph as G
  14. import megengine.functional as F
  15. import megengine.optimizer as optim
  16. import megengine.utils.comp_graph_tools as cgtools
  17. from megengine import Parameter, tensor
  18. from megengine.autodiff import GradManager
  19. from megengine.core._trace_option import set_symbolic_shape
  20. from megengine.core.ops import builtin as ops
  21. from megengine.core.ops.builtin import Elemwise
  22. from megengine.core.tensor.utils import isscalar
  23. from megengine.functional import exp, log
  24. from megengine.jit import exclude_from_trace, trace
  25. from megengine.module import Module
  26. from megengine.random import normal, uniform
  27. @pytest.mark.parametrize("trace_mode", [False, True])
  28. @pytest.mark.parametrize("return_mode", ["Value", "Tuple", "List", "Dict"])
  29. def test_trace(trace_mode, return_mode):
  30. @trace(symbolic=trace_mode)
  31. def f(x):
  32. if return_mode == "Tuple":
  33. return (-x,)
  34. elif return_mode == "List":
  35. return [-x]
  36. elif return_mode == "Dict":
  37. return {"neg": -x}
  38. else:
  39. return -x
  40. def get_numpy(y):
  41. if return_mode == "Tuple" or return_mode == "List":
  42. return y[0].numpy()
  43. elif return_mode == "Dict":
  44. return y["neg"].numpy()
  45. return y.numpy()
  46. x = tensor([1])
  47. y = get_numpy(f(x))
  48. for i in range(3):
  49. np.testing.assert_equal(get_numpy(f(x)), y)
  50. def test_output_copy_trace():
  51. class Simple(Module):
  52. def __init__(self):
  53. super().__init__()
  54. self.a = Parameter([1.0], dtype=np.float32)
  55. def forward(self, x):
  56. x = x * self.a
  57. # will result into a copy of output in grad
  58. x = F.exp(x)
  59. return x
  60. ys = {False: [], True: []}
  61. for symbolic in [False, True]:
  62. net = Simple()
  63. gm = GradManager().attach(net.parameters())
  64. opt = optim.SGD(net.parameters(), 1e-3, momentum=0.9)
  65. data = tensor(np.arange(4).reshape(2, 2), dtype="float32")
  66. @trace(symbolic=symbolic)
  67. def train_func(d):
  68. with gm:
  69. loss = net(d)
  70. gm.backward(loss)
  71. opt.step().clear_grad()
  72. return loss
  73. for i in range(3):
  74. y = train_func(data).numpy()
  75. ys[symbolic].append(y)
  76. for i in range(3):
  77. np.testing.assert_equal(ys[False][i], ys[True][i])
  78. @pytest.mark.parametrize("trace_mode", [False, True])
  79. def test_exclude_from_trace(trace_mode):
  80. @trace(symbolic=trace_mode)
  81. def f(x):
  82. x = -x
  83. with exclude_from_trace():
  84. if i % 2:
  85. x = -x
  86. x = -x
  87. return x
  88. x = tensor([1])
  89. for i in range(3):
  90. y = f(x).numpy()
  91. np.testing.assert_equal(f(x).numpy(), y)
  92. def test_print_in_trace():
  93. for symbolic in [False]: # cannot read value in symbolic mode
  94. @trace(symbolic=symbolic)
  95. def f(x):
  96. nonlocal buf
  97. x = -x
  98. buf = x.numpy()
  99. x = -x
  100. return x
  101. buf = None
  102. x = tensor([1])
  103. for i in range(3):
  104. y = f(x).numpy()
  105. z = buf
  106. buf = None
  107. np.testing.assert_equal(f(x).numpy(), y)
  108. np.testing.assert_equal(z, buf)
  109. def test_dump():
  110. @trace(symbolic=True, capture_as_const=True)
  111. def f(a, b):
  112. return a + b
  113. a = tensor([2])
  114. b = tensor([4])
  115. y = f(a, b).numpy()
  116. for i in range(3):
  117. np.testing.assert_equal(f(a, b).numpy(), y)
  118. file = io.BytesIO()
  119. dump_info = f.dump(file)
  120. assert dump_info.nr_opr == 3
  121. np.testing.assert_equal(dump_info.inputs, ["arg_0", "arg_1"])
  122. np.testing.assert_equal(dump_info.outputs, ["ADD"])
  123. file.seek(0)
  124. infer_cg = cgtools.GraphInference(file)
  125. result = list((infer_cg.run(a, b)).values())[0]
  126. np.testing.assert_equal(result[0], y)
  127. def test_capture_dump():
  128. a = tensor([2])
  129. @trace(symbolic=True, capture_as_const=True)
  130. def f(x):
  131. return x * a
  132. x = tensor([3])
  133. y = f(x).numpy()
  134. for i in range(3):
  135. np.testing.assert_equal(f(x).numpy(), y)
  136. file = io.BytesIO()
  137. f.dump(file)
  138. file.seek(0)
  139. infer_cg = cgtools.GraphInference(file)
  140. result = list((infer_cg.run(x)).values())[0]
  141. np.testing.assert_equal(result[0], y)
  142. def test_dump_volatile():
  143. p = tensor([2])
  144. @trace(symbolic=True, capture_as_const=True)
  145. def f(x):
  146. return x * p
  147. x = tensor([3])
  148. y = f(x).numpy()
  149. for i in range(3):
  150. np.testing.assert_equal(f(x).numpy(), y)
  151. file = io.BytesIO()
  152. f.dump(file, optimize_for_inference=False)
  153. file.seek(0)
  154. cg, _, outputs = G.load_graph(file)
  155. (out,) = outputs
  156. assert (
  157. cgtools.get_owner_opr_type(cgtools.get_owner_opr_inputs(out)[1])
  158. == "ImmutableTensor"
  159. )
  160. @pytest.mark.parametrize("trace_mode", [False, True])
  161. def test_trace_profiler(trace_mode):
  162. @trace(symbolic=trace_mode, profiling=True)
  163. def f(x):
  164. return -x
  165. x = tensor([1])
  166. y = f(x).numpy()
  167. f(x)
  168. f(x) # XXX: has to run twice
  169. out = f.get_profile()
  170. assert out.get("profiler")
  171. @pytest.mark.skip(reason="force opt_level=0 when building graph")
  172. def test_goptions():
  173. @trace(symbolic=True, opt_level=0, capture_as_const=True)
  174. def f(x):
  175. # directly return x / x will not trigger gopt
  176. # since there's no way to tell the two x are the same
  177. y = 2.0 * x
  178. return y / y
  179. @trace(symbolic=True, opt_level=1, capture_as_const=True)
  180. def g(x):
  181. y = 2.0 * x
  182. return y / y
  183. d = tensor(0.0)
  184. assert not np.isfinite(f(d).numpy())
  185. np.testing.assert_equal(g(d).numpy().item(), 1.0)
  186. @pytest.mark.skip(reason="force opt_level=0 when building graph")
  187. def test_goptions_log_sum_exp():
  188. @trace(symbolic=True, opt_level=0, capture_as_const=True)
  189. def f(x, y):
  190. return log(exp(x) + exp(y))
  191. @trace(symbolic=True, opt_level=1, capture_as_const=True)
  192. def g(x, y):
  193. return log(exp(x) + exp(y))
  194. val = 1.0e4
  195. d = tensor(val)
  196. o = tensor(0.0)
  197. assert not np.isfinite(f(d, o).numpy())
  198. np.testing.assert_almost_equal(g(d, o), val)
  199. def test_goptions_log_exp():
  200. @trace(symbolic=True, opt_level=0, capture_as_const=True)
  201. def f(x):
  202. return log(exp(x))
  203. @trace(symbolic=True, opt_level=1, capture_as_const=True)
  204. def g(x):
  205. return log(exp(x))
  206. f(tensor(1.0))
  207. _, out = mkstemp()
  208. f.dump(out, optimize_for_inference=False)
  209. *_, outputs = G.load_graph(out)
  210. oprs_1 = cgtools.get_oprs_seq(outputs)
  211. g(tensor(1.0))
  212. g.dump(out, optimize_for_inference=False)
  213. *_, outputs = G.load_graph(out)
  214. oprs_2 = cgtools.get_oprs_seq(outputs)
  215. assert len(oprs_1) - len(oprs_2) == 2
  216. def test_optimize_for_inference():
  217. @trace(symbolic=True, capture_as_const=True)
  218. def f(x):
  219. return exp(x)
  220. _, out = mkstemp()
  221. f(tensor(5.0))
  222. f.dump(out, enable_io16xc32=True)
  223. res = G.load_graph(out)
  224. computing_input = res.output_vars_list[0].owner.inputs[0]
  225. assert computing_input.dtype == np.float16
  226. def test_optimize_for_inference_broadcast():
  227. a = tensor(np.ones(1, dtype=np.float32))
  228. @trace(capture_as_const=True, symbolic_shape=True)
  229. def f():
  230. return a._broadcast(tensor([1, 10], dtype=np.int32))
  231. f()
  232. f.dump(io.BytesIO())
  233. def test_trace_cvt_bool():
  234. x = tensor([0], dtype=np.int32)
  235. @trace(symbolic=True)
  236. def f(x):
  237. a = x.shape
  238. b = a[0]
  239. assert isscalar(b)
  240. return b == 0
  241. for i in range(3):
  242. np.testing.assert_equal(f(x).numpy(), False)
  243. @pytest.mark.parametrize("trace_mode", [False, True])
  244. def test_trace_reshape(trace_mode):
  245. x1 = tensor(np.random.randn(2, 10, 10))
  246. x2 = tensor(np.random.randn(4, 10, 10))
  247. x3 = tensor(np.random.randn(8, 10, 10))
  248. @trace(symbolic=trace_mode, capture_as_const=True)
  249. def f(x):
  250. y = x.reshape(x.shape[0], 100)
  251. return y
  252. f(x1)
  253. f(x2)
  254. f(x3)
  255. def test_trace_topk():
  256. x = tensor([5, 2, 7, 1, 0, 3, 2])
  257. @trace(symbolic=True)
  258. def f(x):
  259. y = F.topk(x, 3)
  260. np.testing.assert_equal(y[0].shape.numpy(), np.array([3,]))
  261. return y
  262. for i in range(3):
  263. f(x)
  264. def test_trace_warp_perspective():
  265. inp_shape = (1, 1, 4, 4)
  266. x = tensor(np.arange(16, dtype=np.float32).reshape(inp_shape))
  267. M_shape = (1, 3, 3)
  268. M = tensor(
  269. np.array(
  270. [[1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0]], dtype=np.float32
  271. ).reshape(M_shape)
  272. )
  273. @trace(symbolic=True)
  274. def f(x, M):
  275. out = F.warp_perspective(x, M, (2, 2))
  276. np.testing.assert_equal(out.shape.numpy(), np.array([1, 1, 2, 2]))
  277. return out
  278. for i in range(1):
  279. f(x, M)
  280. def test_raise_on_trace():
  281. step_count = 0
  282. catch_count = 0
  283. bad_step = 10
  284. class CatchMe(Exception):
  285. pass
  286. a = tensor([1, 2, 3, 4])
  287. b = tensor([5, 6, 7, 8])
  288. c = tensor([9, 0, 1, 2])
  289. @trace
  290. def add_abc(a, b, c):
  291. ps = a + b
  292. result = ps + c
  293. if step_count == bad_step:
  294. raise CatchMe("catch me")
  295. return result
  296. for i in range(100):
  297. try:
  298. d = add_abc(a, b, c)
  299. except CatchMe as e:
  300. catch_count += 1
  301. else:
  302. np.testing.assert_equal(d.numpy(), (a + b + c).numpy())
  303. step_count += 1
  304. assert catch_count == 1
  305. @pytest.mark.parametrize("trace_mode", [False, True])
  306. def test_trace_broadcast(trace_mode):
  307. x1 = tensor(np.random.randn(3, 1, 1))
  308. x2 = tensor(np.random.randn(1, 4, 1))
  309. x3 = tensor(np.random.randn(1, 1, 5))
  310. @trace(symbolic=trace_mode, capture_as_const=True)
  311. def f(x):
  312. y = F.broadcast_to(x, (3, 4, 5))
  313. return y
  314. f(x1)
  315. f(x2)
  316. f(x3)
  317. def test_trace_nms():
  318. def make_inputs(n):
  319. boxes = np.zeros((n, 4))
  320. boxes[:, :2] = np.random.rand(n, 2) * 100
  321. boxes[:, 2:] = np.random.rand(n, 2) * 100 + 100
  322. scores = np.random.rand(n)
  323. return tensor(boxes), tensor(scores)
  324. @trace(symbolic=False)
  325. def f(boxes, scores):
  326. # with tracing, max_output must be specified
  327. results = F.nn.nms(boxes, scores=scores, iou_thresh=0.5, max_output=20)
  328. # without tracing, max output can be inferred inside nms
  329. with exclude_from_trace():
  330. _ = F.nn.nms(boxes, scores=scores, iou_thresh=0.5)
  331. return results
  332. f(*make_inputs(10))
  333. f(*make_inputs(20))
  334. f(*make_inputs(30))
  335. def test_trace_valid_broadcast():
  336. x1 = tensor(np.random.randn(1, 1))
  337. x2 = tensor(np.random.randn(1, 2))
  338. shape = (tensor([2]), tensor([2]))
  339. @trace(symbolic=False)
  340. def f(x, shape):
  341. y = F.broadcast_to(x, shape)
  342. return y
  343. f(x1, shape)
  344. f(x2, shape)
  345. def test_clip():
  346. x = tensor(np.random.randn(10, 10))
  347. @trace(symbolic=True)
  348. def f(x, lower, upper):
  349. y = F.clip(x, lower, upper)
  350. return y
  351. for i in range(3):
  352. f(x, tensor([0]), tensor([1]))
  353. # test returning noncontiguous tensor from trace
  354. def test_slice():
  355. @trace
  356. def f(x):
  357. return x[:, 1::2]
  358. x = F.arange(8).reshape(2, 4)
  359. f(x)
  360. y = f(x)
  361. np.testing.assert_array_equal(y.numpy(), x.numpy()[:, 1::2])
  362. y + y
  363. @pytest.mark.parametrize("shape_mode", [False, True])
  364. def test_random(shape_mode):
  365. def run_test(op):
  366. @trace(symbolic=True, symbolic_shape=shape_mode)
  367. def f():
  368. out = op(size=[10, 10])
  369. out_shape = out.shape
  370. assert out_shape is not None
  371. if not isinstance(out_shape, tuple):
  372. assert out.shape.numpy() is not None
  373. return out
  374. for _ in range(3):
  375. f()
  376. run_test(uniform)
  377. run_test(normal)

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台