You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_functional.py 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import itertools
  10. import numpy as np
  11. import pytest
  12. import megengine.core.tensor.dtype as dtype
  13. import megengine.functional as F
  14. from megengine import Buffer, Parameter, is_cuda_available, tensor
  15. from megengine.core._trace_option import use_tensor_shape
  16. from megengine.core.autodiff.grad import Grad
  17. from megengine.core.tensor.utils import make_shape_tuple
  18. from megengine.test import assertTensorClose
  19. def _default_compare_fn(x, y):
  20. assertTensorClose(x.numpy(), y)
  21. def opr_test(cases, func, compare_fn=_default_compare_fn, ref_fn=None, **kwargs):
  22. """
  23. func: the function to run opr.
  24. compare_fn: the function to compare the result and expected, use assertTensorClose if None.
  25. ref_fn: the function to generate expected data, should assign output if None.
  26. cases: the list which have dict element, the list length should be 2 for dynamic shape test.
  27. and the dict should have input,
  28. and should have output if ref_fn is None.
  29. should use list for multiple inputs and outputs for each case.
  30. kwargs: The additional kwargs for opr func.
  31. simple examples:
  32. dtype = np.float32
  33. cases = [{"input": [10, 20]}, {"input": [20, 30]}]
  34. opr_test(cases,
  35. F.eye,
  36. ref_fn=lambda n, m: np.eye(n, m).astype(dtype),
  37. dtype=dtype)
  38. """
  39. def check_results(results, expected):
  40. if not isinstance(results, (tuple, list)):
  41. results = (results,)
  42. for r, e in zip(results, expected):
  43. compare_fn(r, e)
  44. def get_param(cases, idx):
  45. case = cases[idx]
  46. inp = case.get("input", None)
  47. outp = case.get("output", None)
  48. if inp is None:
  49. raise ValueError("the test case should have input")
  50. if not isinstance(inp, (tuple, list)):
  51. inp = (inp,)
  52. if ref_fn is not None and callable(ref_fn):
  53. outp = ref_fn(*inp)
  54. if outp is None:
  55. raise ValueError("the test case should have output or reference function")
  56. if not isinstance(outp, (tuple, list)):
  57. outp = (outp,)
  58. return inp, outp
  59. if len(cases) == 0:
  60. raise ValueError("should give one case at least")
  61. if not callable(func):
  62. raise ValueError("the input func should be callable")
  63. inp, outp = get_param(cases, 0)
  64. inp_tensor = [tensor(inpi) for inpi in inp]
  65. results = func(*inp_tensor, **kwargs)
  66. check_results(results, outp)
  67. def test_flatten():
  68. data0_shape = (2, 3, 4, 5)
  69. data1_shape = (4, 5, 6, 7)
  70. data0 = np.random.random(data0_shape).astype(np.float32)
  71. data1 = np.random.random(data1_shape).astype(np.float32)
  72. def compare_fn(x, y):
  73. assert x.numpy().shape == y
  74. output0 = (2 * 3 * 4 * 5,)
  75. output1 = (4 * 5 * 6 * 7,)
  76. cases = [
  77. {"input": data0, "output": (output0,)},
  78. {"input": data1, "output": (output1,)},
  79. ]
  80. opr_test(cases, F.flatten, compare_fn=compare_fn)
  81. output0 = (2, 3 * 4 * 5)
  82. output1 = (4, 5 * 6 * 7)
  83. cases = [
  84. {"input": data0, "output": (output0,)},
  85. {"input": data1, "output": (output1,)},
  86. ]
  87. opr_test(cases, F.flatten, compare_fn=compare_fn, start_axis=1)
  88. output0 = (2, 3, 4 * 5)
  89. output1 = (4, 5, 6 * 7)
  90. cases = [
  91. {"input": data0, "output": (output0,)},
  92. {"input": data1, "output": (output1,)},
  93. ]
  94. opr_test(cases, F.flatten, compare_fn=compare_fn, start_axis=2)
  95. output0 = (2, 3 * 4, 5)
  96. output1 = (4, 5 * 6, 7)
  97. cases = [
  98. {"input": data0, "output": (output0,)},
  99. {"input": data1, "output": (output1,)},
  100. ]
  101. opr_test(cases, F.flatten, compare_fn=compare_fn, start_axis=1, end_axis=2)
  102. def test_where():
  103. maskv0 = np.array([[1, 0], [0, 1]], dtype=np.bool_)
  104. xv0 = np.array([[1, np.inf], [np.nan, 4]], dtype=np.float32)
  105. yv0 = np.array([[5, 6], [7, 8]], dtype=np.float32)
  106. maskv1 = np.array([[1, 0, 1], [1, 0, 0], [1, 1, 0]], dtype=np.bool_)
  107. xv1 = np.array([[1, np.inf, 2], [0, np.nan, 4], [1, 5, 7]], dtype=np.float32)
  108. yv1 = np.array([[5, 6, 9], [2, 7, 8], [2, 1, 9]], dtype=np.float32)
  109. cases = [
  110. {"input": [maskv0, xv0, yv0]},
  111. {"input": [maskv1, xv1, yv1]},
  112. ]
  113. opr_test(cases, F.where, ref_fn=np.where)
  114. maskv2 = np.array([1, 1, 1], dtype=np.bool_)
  115. xv2 = np.array([1, 3, 2], dtype=np.float32)
  116. yv2 = np.array([5, 6, 9], dtype=np.float32)
  117. maskv3 = np.array([0, 0, 0], dtype=np.bool_)
  118. xv3 = np.array([1, 3, 2], dtype=np.float32)
  119. yv3 = np.array([5, 6, 9], dtype=np.float32)
  120. cases = [
  121. {"input": [maskv2, xv2, yv2]},
  122. {"input": [maskv3, xv3, yv3]},
  123. ]
  124. opr_test(cases, F.where, ref_fn=np.where)
  125. def test_matmul():
  126. shape1 = 3
  127. shape2 = 3
  128. shape3 = (3, 5)
  129. shape4 = (5, 6)
  130. data1 = np.random.random(shape1).astype("float32")
  131. data2 = np.random.random(shape2).astype("float32")
  132. data3 = np.random.random(shape3).astype("float32")
  133. data4 = np.random.random(shape4).astype("float32")
  134. cases = [
  135. {"input": [data1, data2]},
  136. {"input": [data2, data3]},
  137. {"input": [data3, data4]},
  138. ]
  139. opr_test(cases, F.matmul, ref_fn=np.matmul)
  140. batch_size = 10
  141. shape1 = (batch_size, 2, 3)
  142. shape2 = (batch_size, 3, 4)
  143. shape3 = (batch_size, 10, 4, 5)
  144. data1 = np.random.random(shape1).astype("float32")
  145. data2 = np.random.random(shape2).astype("float32")
  146. data3 = np.random.random(shape3).astype("float32")
  147. cases = [{"input": [data1, data2]}, {"input": [data2, data3]}]
  148. for i in range(0, batch_size):
  149. def compare_fn(x, y):
  150. x.numpy()[i, ...] == y
  151. opr_test(
  152. cases,
  153. F.matmul,
  154. compare_fn=compare_fn,
  155. ref_fn=lambda x, y: np.matmul(x[i, ...], y[i, ...]),
  156. )
  157. def test_interpolate():
  158. if use_tensor_shape(): # XXX: please fix me
  159. return
  160. def linear_interpolate():
  161. inp = tensor(np.arange(1, 3, dtype=np.float32).reshape(1, 1, 2))
  162. out = F.interpolate(inp, scale_factor=2.0, mode="LINEAR")
  163. out2 = F.interpolate(inp, 4, mode="LINEAR")
  164. assertTensorClose(
  165. out.numpy(), np.array([[[1.0, 1.25, 1.75, 2.0]]], dtype=np.float32)
  166. )
  167. assertTensorClose(
  168. out2.numpy(), np.array([[[1.0, 1.25, 1.75, 2.0]]], dtype=np.float32)
  169. )
  170. def many_batch_interpolate():
  171. inp = tensor(np.arange(1, 9, dtype=np.float32).reshape(2, 1, 2, 2))
  172. out = F.interpolate(inp, [4, 4])
  173. out2 = F.interpolate(inp, scale_factor=2.0)
  174. assertTensorClose(out.numpy(), out2.numpy())
  175. def assign_corner_interpolate():
  176. inp = tensor(np.arange(1, 5, dtype=np.float32).reshape(1, 1, 2, 2))
  177. out = F.interpolate(inp, [4, 4], align_corners=True)
  178. out2 = F.interpolate(inp, scale_factor=2.0, align_corners=True)
  179. assertTensorClose(out.numpy(), out2.numpy())
  180. def error_shape_linear_interpolate():
  181. inp = tensor(np.arange(1, 5, dtype=np.float32).reshape(1, 1, 2, 2))
  182. with pytest.raises(ValueError):
  183. F.interpolate(inp, scale_factor=2.0, mode="LINEAR")
  184. def inappropriate_scale_linear_interpolate():
  185. inp = tensor(np.arange(1, 3, dtype=np.float32).reshape(1, 1, 2))
  186. with pytest.raises(ValueError):
  187. F.interpolate(inp, scale_factor=[2.0, 3.0], mode="LINEAR")
  188. linear_interpolate()
  189. many_batch_interpolate()
  190. assign_corner_interpolate()
  191. error_shape_linear_interpolate()
  192. inappropriate_scale_linear_interpolate()
  193. def _save_to(self, name="grad"):
  194. def callback(tensor, grad):
  195. setattr(self, name, grad)
  196. return callback
  197. def _gen_roi_inp():
  198. inp_feat = np.random.randn(2, 32, 256, 256)
  199. rois = np.zeros((4, 5))
  200. rois[:, 0] = [0, 0, 1, 1]
  201. rois[:, 1:3] = np.random.rand(4, 2) * 100
  202. rois[:, 3:] = np.random.rand(4, 2) * 100 + 150
  203. inp_feat = tensor(inp_feat)
  204. rois = tensor(rois)
  205. return inp_feat, rois
  206. def test_roi_align():
  207. inp_feat, rois = _gen_roi_inp()
  208. grad = Grad().wrt(inp_feat, callback=_save_to(inp_feat))
  209. output_shape = (7, 7)
  210. out_feat = F.roi_align(
  211. inp_feat,
  212. rois,
  213. output_shape=output_shape,
  214. mode="average",
  215. spatial_scale=1.0 / 4,
  216. sample_points=2,
  217. aligned=True,
  218. )
  219. assert make_shape_tuple(out_feat.shape) == (
  220. rois.shape[0],
  221. inp_feat.shape[1],
  222. *output_shape,
  223. )
  224. grad(out_feat, tensor(F.ones_like(out_feat)))
  225. assert make_shape_tuple(inp_feat.grad.shape) == make_shape_tuple(inp_feat.shape)
  226. def test_roi_pooling():
  227. inp_feat, rois = _gen_roi_inp()
  228. grad = Grad().wrt(inp_feat, callback=_save_to(inp_feat))
  229. output_shape = (7, 7)
  230. out_feat = F.roi_pooling(
  231. inp_feat, rois, output_shape=output_shape, mode="max", scale=1.0 / 4,
  232. )
  233. assert make_shape_tuple(out_feat.shape) == (
  234. rois.shape[0],
  235. inp_feat.shape[1],
  236. *output_shape,
  237. )
  238. grad(out_feat, tensor(F.ones_like(out_feat)))
  239. assert make_shape_tuple(inp_feat.grad.shape) == make_shape_tuple(inp_feat.shape)
  240. # def test_one_hot():
  241. # def onehot_low_dimension():
  242. # inp = tensor(np.arange(1, 4, dtype=np.int32))
  243. # out = F.one_hot(inp, num_classes=4)
  244. # assertTensorClose(
  245. # out.numpy(), np.eye(4, dtype=np.int32)[np.arange(1, 4, dtype=np.int32)]
  246. # )
  247. # def onehot_high_dimension():
  248. # arr = np.array(
  249. # [[3, 2, 4, 4, 2, 4, 0, 4, 4, 1], [4, 1, 1, 3, 2, 2, 4, 2, 4, 3]], dtype=np.int32
  250. # )
  251. # inp = tensor(arr)
  252. # out = F.one_hot(inp, 10)
  253. # assertTensorClose(out.numpy(), np.eye(10, dtype=np.int32)[arr])
  254. # onehot_low_dimension()
  255. # onehot_high_dimension()
  256. def test_add_update():
  257. shape = (2, 3)
  258. v = np.random.random(shape).astype(np.float32)
  259. b = Buffer(v)
  260. u = F.add_update(b, 1)
  261. assertTensorClose(u.numpy(), v + 1)
  262. u = F.add_update(b, 1)
  263. assertTensorClose(u.numpy(), v + 2)
  264. x = np.ones((2, 2), dtype=np.float32)
  265. y = x * 0.5
  266. dest = tensor(x)
  267. delta = tensor(y)
  268. r = F.add_update(dest, delta, alpha=0.9, beta=0.1, bias=0.1)
  269. assertTensorClose(r.numpy(), x * 0.9 + y * 0.1 + 0.1)
  270. def test_add_update_params():
  271. b = np.random.random((2, 3)).astype(np.float32)
  272. y = Buffer(b)
  273. # @jit.trace
  274. def f(x):
  275. return F.add_update(y, x)
  276. f(np.zeros((2, 3)).astype(np.float32))
  277. z = Buffer(np.zeros((2, 3)).astype(np.float32))
  278. F.add_update(y, z, beta=0.1)
  279. res = f(np.ones((2, 3)).astype(np.float32))
  280. assertTensorClose(res.numpy(), b + 1)
  281. # def test_cross_entropy_with_softmax():
  282. # data1_shape = (1, 2)
  283. # label1_shape = (1,)
  284. # data2_shape = (1, 3)
  285. # label2_shape = (1,)
  286. # data1 = np.array([1, 0.5], dtype=np.float32).reshape(data1_shape)
  287. # label1 = np.array([1], dtype=np.int32).reshape(label1_shape)
  288. # expect1 = F.cross_entropy(F.softmax(tensor(data1)), tensor(label1)).numpy()
  289. # data2 = np.array([0.3, 0.4, 0.3], dtype=np.float32).reshape(data2_shape)
  290. # label2 = np.array([1], dtype=np.int32).reshape(label2_shape)
  291. # expect2 = F.cross_entropy(F.softmax(tensor(data2)), tensor(label2)).numpy()
  292. # cases = [
  293. # {"input": [data1, label1], "output": expect1,},
  294. # {"input": [data2, label2], "output": expect2,},
  295. # ]
  296. # opr_test(cases, F.cross_entropy_with_softmax)
  297. # def test_cross_entropy():
  298. # data1_shape = (1, 2)
  299. # label1_shape = (1,)
  300. # data2_shape = (1, 3)
  301. # label2_shape = (1,)
  302. # data1 = np.array([0.5, 0.5], dtype=np.float32).reshape(data1_shape)
  303. # label1 = np.array([1], dtype=np.int32).reshape(label1_shape)
  304. # expect1 = np.array([-np.log(0.5)], dtype=np.float32)
  305. # data2 = np.array([0.3, 0.4, 0.3], dtype=np.float32).reshape(data2_shape)
  306. # label2 = np.array([1], dtype=np.int32).reshape(label2_shape)
  307. # expect2 = np.array([-np.log(0.4)], dtype=np.float32)
  308. # cases = [
  309. # {"input": [data1, label1], "output": expect1,},
  310. # {"input": [data2, label2], "output": expect2,},
  311. # ]
  312. # opr_test(cases, F.cross_entropy)
  313. def test_binary_cross_entropy():
  314. data1_shape = (2, 2)
  315. label1_shape = (2, 2)
  316. data2_shape = (2, 3)
  317. label2_shape = (2, 3)
  318. def sigmoid(x):
  319. return 1 / (1 + np.exp(-x))
  320. def compare_fn(x, y):
  321. assertTensorClose(x.numpy(), y, max_err=5e-4)
  322. np.random.seed(123)
  323. data1 = sigmoid(np.random.uniform(size=data1_shape).astype(np.float32))
  324. label1 = np.random.uniform(size=label1_shape).astype(np.float32)
  325. expect1 = np.array([0.6361], dtype=np.float32)
  326. np.random.seed(123)
  327. data2 = sigmoid(np.random.uniform(size=data2_shape).astype(np.float32))
  328. label2 = np.random.uniform(size=label2_shape).astype(np.float32)
  329. expect2 = np.array([0.6750], dtype=np.float32)
  330. cases = [
  331. {"input": [data1, label1], "output": expect1,},
  332. {"input": [data2, label2], "output": expect2,},
  333. ]
  334. opr_test(cases, F.binary_cross_entropy, compare_fn=compare_fn)
  335. def test_hinge_loss():
  336. np.random.seed(123)
  337. # case with L1 norm
  338. cases = []
  339. for shape in [(2, 2), (2, 3)]:
  340. data = np.random.uniform(size=shape).astype(np.float32)
  341. label = 2 * np.random.randint(0, 1, size=shape).astype(np.float32) - 1
  342. expect = np.clip(0, np.inf, 1 - data * label).sum(axis=1).mean()
  343. cases.append({"input": [data, label], "output": expect})
  344. opr_test(cases, F.hinge_loss)
  345. # cases with L2 norm
  346. cases = []
  347. for shape in [(2, 2), (2, 3)]:
  348. data = np.random.uniform(size=shape).astype(np.float32)
  349. label = 2 * np.random.randint(0, 1, size=shape).astype(np.float32) - 1
  350. expect = ((np.clip(0, np.inf, 1 - data * label) ** 2).sum(axis=1)).mean()
  351. cases.append({"input": [data, label], "output": expect})
  352. def hinge_loss_with_l2_norm(pred, label):
  353. return F.hinge_loss(pred, label, "L2")
  354. opr_test(cases, hinge_loss_with_l2_norm)
  355. def test_nms():
  356. x = np.array(
  357. [
  358. [0, 0, 100, 100],
  359. [10, 10, 100, 100],
  360. [50, 50, 100, 100],
  361. [100, 100, 150, 150],
  362. ],
  363. dtype=np.float32,
  364. )
  365. inp = tensor(x)
  366. scores = tensor([0.5, 0.8, 0.9, 0.6], dtype=np.float32)
  367. result = F.nms(inp, iou_thresh=0.5, scores=scores)
  368. np.testing.assert_equal(result.numpy(), np.array([2, 1, 3], dtype=np.int32))
  369. def test_batched_nms():
  370. x = np.array(
  371. [
  372. [0, 0, 100, 100],
  373. [0.5, 0.5, 1.5, 1.5],
  374. [20, 20, 100, 100],
  375. [0.5, 0.5, 1.0, 1.0],
  376. [10, 10, 100, 100],
  377. [0.5, 0.5, 1.0, 1.0],
  378. ],
  379. dtype=np.float32,
  380. )
  381. inp = tensor(x)
  382. scores = tensor([0.6, 0.9, 0.5, 0.6, 0.8, 0.7], dtype=np.float32)
  383. idxs = tensor([0, 1, 0, 1, 0, 1], dtype=np.int32)
  384. results = F.batched_nms(inp, iou_thresh=0.5, idxs=idxs, scores=scores)
  385. np.testing.assert_equal(results.numpy(), np.array([1, 4, 5], dtype=np.int32))
  386. # def test_smooth_l1_loss():
  387. # np.random.seed(123)
  388. # cases = []
  389. # for shape in [(2, 2), (2, 3)]:
  390. # data = np.random.uniform(size=shape).astype(np.float32)
  391. # label = np.random.uniform(size=shape).astype(np.float32)
  392. # diff = np.abs(data - label)
  393. # expect = np.where(diff < 1, 0.5 * diff ** 2, diff - 0.5).mean()
  394. # cases.append({"input": [data, label], "output": tensor(expect)})
  395. # opr_test(cases, F.smooth_l1_loss)
  396. def test_conv_bias():
  397. inp_scale = 1.5
  398. w_scale = 2.5
  399. outp_scale = 1.5
  400. inp_dtype = dtype.qint8(inp_scale)
  401. w_dtype = dtype.qint8(w_scale)
  402. b_dtype = dtype.qint32(inp_scale * w_scale)
  403. out_dtype = dtype.qint8(outp_scale)
  404. def run(
  405. N,
  406. IC,
  407. OC,
  408. IH,
  409. IW,
  410. KH,
  411. KW,
  412. PH,
  413. PW,
  414. SH,
  415. SW,
  416. has_bias=True,
  417. nonlinear_mode="IDENTITY",
  418. ):
  419. inp_v = np.random.normal(size=(N, IC, IH, IW))
  420. w_v = np.random.normal(size=(OC, IC, KW, KW))
  421. b_v = np.random.normal(size=(1, OC, 1, 1))
  422. inp_scale = dtype.get_scale(inp_dtype)
  423. w_scale = dtype.get_scale(w_dtype)
  424. b_scale = dtype.get_scale(b_dtype)
  425. inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype)
  426. wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype)
  427. bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype)
  428. inp_int8 = tensor(inpv, dtype=inp_dtype)
  429. w_int8 = Parameter(wv, dtype=w_dtype)
  430. b_int32 = Parameter(bv, dtype=b_dtype)
  431. inp_fp32 = inp_int8.astype("float32")
  432. w_fp32 = w_int8.astype("float32")
  433. b_fp32 = b_int32.astype("float32")
  434. def convert_to_nchw4(var):
  435. var = F.reshape(
  436. var, (var.shape[0], var.shape[1] // 4, 4, var.shape[2], var.shape[3])
  437. )
  438. var = F.dimshuffle(var, (0, 1, 3, 4, 2))
  439. return var
  440. def run_conv2d(inp, w, b):
  441. O = F.conv2d(
  442. inp, w, b if has_bias else None, stride=(SH, SW), padding=(PH, PW),
  443. )
  444. if nonlinear_mode == "RELU":
  445. return F.relu(O)
  446. else:
  447. return O
  448. def run_conv_bias(inp, w, b, format="NCHW"):
  449. b = b if has_bias else Parameter(np.zeros_like(b.numpy()))
  450. if format == "NCHW4":
  451. inp = convert_to_nchw4(inp)
  452. w = convert_to_nchw4(w)
  453. b = convert_to_nchw4(b)
  454. return F.conv_bias_activation(
  455. inp,
  456. w,
  457. b,
  458. stride=(SH, SW),
  459. padding=(PH, PW),
  460. format=format,
  461. dtype=out_dtype,
  462. nonlinear_mode=nonlinear_mode,
  463. )
  464. format = "NCHW4" if is_cuda_available() else "NCHW"
  465. expected = run_conv2d(inp_fp32, w_fp32, b_fp32)
  466. expected = expected.astype(out_dtype).astype("float32")
  467. result = run_conv_bias(inp_int8, w_int8, b_int32, format=format).astype(
  468. "float32"
  469. )
  470. if format == "NCHW4":
  471. result = F.dimshuffle(result, (0, 1, 4, 2, 3))
  472. expected = F.flatten(expected)
  473. result = F.flatten(result)
  474. assertTensorClose(result.numpy(), expected.numpy(), max_err=outp_scale)
  475. run(1, 4, 4, 24, 33, 1, 1, 2, 3, 1, 1, False)
  476. run(10, 12, 24, 46, 46, 1, 1, 2, 1, 3, 1, False)
  477. run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False)
  478. run(1, 4, 4, 24, 33, 1, 1, 2, 3, 1, 1)
  479. run(10, 12, 24, 46, 46, 1, 1, 2, 1, 3, 1)
  480. run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2)
  481. run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False, "RELU")
  482. run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, True, "RELU")
  483. # def test_softplus():
  484. # x = np.arange(1000).astype(np.float32)
  485. # out = F.softplus(tensor(x))
  486. # mask = x <= 20
  487. # with np.errstate(over="ignore"):
  488. # expected = np.where(mask, np.log(1 + np.exp(x)), x)
  489. # assertTensorClose(out, expected)
  490. # beta = 2
  491. # out = F.softplus(tensor(x), beta=beta, threshold=30)
  492. # mask = beta * x <= 30
  493. # # ignore overflow
  494. # with np.errstate(over="ignore"):
  495. # expected = np.where(mask, np.log(1 + np.exp(x * beta)) / beta, x)
  496. # assertTensorClose(out, expected)
  497. def test_condtake():
  498. x = np.array([[1, 2, 3], [4, 5, 6]])
  499. y = np.array([[True, False, True], [False, True, True]])
  500. xx = tensor(x)
  501. yy = tensor(y)
  502. val, idx = F.cond_take(yy, xx)
  503. np.testing.assert_equal(val.numpy(), x[y])
  504. np.testing.assert_equal(idx.numpy(), np.where(y.reshape(-1))[0])

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台