You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_network_device.py 10 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. # -*- coding: utf-8 -*-
  2. import functools
  3. import os
  4. import unittest
  5. import numpy as np
  6. from megenginelite import *
  7. set_log_level(2)
  8. def require_cuda(ngpu=1):
  9. """a decorator that disables a testcase if cuda is not enabled"""
  10. def dector(func):
  11. @functools.wraps(func)
  12. def wrapped(*args, **kwargs):
  13. if LiteGlobal.get_device_count(LiteDeviceType.LITE_CUDA) >= ngpu:
  14. return func(*args, **kwargs)
  15. return wrapped
  16. return dector
  17. class TestShuffleNetCuda(unittest.TestCase):
  18. source_dir = os.getenv("LITE_TEST_RESOURCE")
  19. input_data_path = os.path.join(source_dir, "input_data.npy")
  20. correct_data_path = os.path.join(source_dir, "output_data.npy")
  21. model_path = os.path.join(source_dir, "shufflenet.mge")
  22. correct_data = np.load(correct_data_path).flatten()
  23. input_data = np.load(input_data_path)
  24. def check_correct(self, out_data, error=1e-4):
  25. out_data = out_data.flatten()
  26. assert np.isfinite(out_data.sum())
  27. assert self.correct_data.size == out_data.size
  28. for i in range(out_data.size):
  29. assert abs(out_data[i] - self.correct_data[i]) < error
  30. def do_forward(self, network, times=3):
  31. input_name = network.get_input_name(0)
  32. input_tensor = network.get_io_tensor(input_name)
  33. output_name = network.get_output_name(0)
  34. output_tensor = network.get_io_tensor(output_name)
  35. input_tensor.set_data_by_copy(self.input_data)
  36. for i in range(times):
  37. network.forward()
  38. network.wait()
  39. output_data = output_tensor.to_numpy()
  40. self.check_correct(output_data)
  41. class TestNetwork(TestShuffleNetCuda):
  42. @require_cuda()
  43. def test_network_basic(self):
  44. config = LiteConfig()
  45. config.device_type = LiteDeviceType.LITE_CUDA
  46. network = LiteNetwork(config)
  47. network.load(self.model_path)
  48. input_name = network.get_input_name(0)
  49. input_tensor = network.get_io_tensor(input_name)
  50. output_name = network.get_output_name(0)
  51. output_tensor = network.get_io_tensor(output_name)
  52. assert input_tensor.layout.shapes[0] == 1
  53. assert input_tensor.layout.shapes[1] == 3
  54. assert input_tensor.layout.shapes[2] == 224
  55. assert input_tensor.layout.shapes[3] == 224
  56. assert input_tensor.layout.data_type == LiteDataType.LITE_FLOAT
  57. assert input_tensor.layout.ndim == 4
  58. self.do_forward(network)
  59. @require_cuda()
  60. def test_network_shared_data(self):
  61. config = LiteConfig()
  62. config.device_type = LiteDeviceType.LITE_CUDA
  63. network = LiteNetwork(config)
  64. network.load(self.model_path)
  65. input_name = network.get_input_name(0)
  66. input_tensor = network.get_io_tensor(input_name)
  67. output_name = network.get_output_name(0)
  68. output_tensor = network.get_io_tensor(output_name)
  69. input_tensor.set_data_by_share(self.input_data)
  70. for i in range(3):
  71. network.forward()
  72. network.wait()
  73. output_data = output_tensor.to_numpy()
  74. self.check_correct(output_data)
  75. @require_cuda(2)
  76. def test_network_set_device_id(self):
  77. config = LiteConfig()
  78. config.device_type = LiteDeviceType.LITE_CUDA
  79. network = LiteNetwork(config)
  80. assert network.device_id == 0
  81. network.device_id = 1
  82. network.load(self.model_path)
  83. assert network.device_id == 1
  84. with self.assertRaises(RuntimeError):
  85. network.device_id = 1
  86. self.do_forward(network)
  87. @require_cuda()
  88. def test_network_option(self):
  89. option = LiteOptions()
  90. option.weight_preprocess = 1
  91. option.var_sanity_check_first_run = 0
  92. config = LiteConfig(option=option)
  93. config.device_type = LiteDeviceType.LITE_CUDA
  94. network = LiteNetwork(config=config)
  95. network.load(self.model_path)
  96. self.do_forward(network)
  97. @require_cuda()
  98. def test_network_reset_io(self):
  99. option = LiteOptions()
  100. option.var_sanity_check_first_run = 0
  101. config = LiteConfig(option=option)
  102. config.device_type = LiteDeviceType.LITE_CUDA
  103. input_io = LiteIO("data")
  104. ios = LiteNetworkIO()
  105. ios.add_input(input_io)
  106. network = LiteNetwork(config=config, io=ios)
  107. network.load(self.model_path)
  108. input_tensor = network.get_io_tensor("data")
  109. assert input_tensor.device_type == LiteDeviceType.LITE_CPU
  110. self.do_forward(network)
  111. @require_cuda()
  112. def test_network_share_weights(self):
  113. option = LiteOptions()
  114. option.var_sanity_check_first_run = 0
  115. config = LiteConfig(option=option)
  116. config.device_type = LiteDeviceType.LITE_CUDA
  117. src_network = LiteNetwork(config=config)
  118. src_network.load(self.model_path)
  119. new_network = LiteNetwork()
  120. new_network.enable_cpu_inplace_mode()
  121. new_network.share_weights_with(src_network)
  122. self.do_forward(src_network)
  123. self.do_forward(new_network)
  124. @require_cuda()
  125. def test_network_share_runtime_memory(self):
  126. option = LiteOptions()
  127. option.var_sanity_check_first_run = 0
  128. config = LiteConfig(option=option)
  129. config.device_type = LiteDeviceType.LITE_CUDA
  130. src_network = LiteNetwork(config=config)
  131. src_network.load(self.model_path)
  132. new_network = LiteNetwork()
  133. new_network.enable_cpu_inplace_mode()
  134. new_network.share_runtime_memroy(src_network)
  135. new_network.load(self.model_path)
  136. self.do_forward(src_network)
  137. self.do_forward(new_network)
  138. @require_cuda
  139. def test_network_start_callback(self):
  140. config = LiteConfig()
  141. config.device = LiteDeviceType.LITE_CUDA
  142. network = LiteNetwork(config)
  143. network.load(self.model_path)
  144. start_checked = False
  145. def start_callback(ios):
  146. nonlocal start_checked
  147. start_checked = True
  148. assert len(ios) == 1
  149. for key in ios:
  150. io = key
  151. data = ios[key].to_numpy().flatten()
  152. input_data = self.input_data.flatten()
  153. assert data.size == input_data.size
  154. assert io.name.decode("utf-8") == "data"
  155. for i in range(data.size):
  156. assert data[i] == input_data[i]
  157. return 0
  158. network.set_start_callback(start_callback)
  159. self.do_forward(network, 1)
  160. assert start_checked == True
  161. @require_cuda
  162. def test_network_finish_callback(self):
  163. config = LiteConfig()
  164. config.device = LiteDeviceType.LITE_CUDA
  165. network = LiteNetwork(config)
  166. network.load(self.model_path)
  167. finish_checked = False
  168. def finish_callback(ios):
  169. nonlocal finish_checked
  170. finish_checked = True
  171. assert len(ios) == 1
  172. for key in ios:
  173. io = key
  174. data = ios[key].to_numpy().flatten()
  175. output_data = self.correct_data.flatten()
  176. assert data.size == output_data.size
  177. for i in range(data.size):
  178. assert data[i] == output_data[i]
  179. return 0
  180. network.set_finish_callback(finish_callback)
  181. self.do_forward(network, 1)
  182. assert finish_checked == True
  183. @require_cuda()
  184. def test_enable_profile(self):
  185. config = LiteConfig()
  186. config.device_type = LiteDeviceType.LITE_CUDA
  187. network = LiteNetwork(config)
  188. network.load(self.model_path)
  189. network.enable_profile_performance("./profile.json")
  190. self.do_forward(network)
  191. fi = open("./profile.json", "r")
  192. fi.close()
  193. os.remove("./profile.json")
  194. @require_cuda()
  195. def test_algo_workspace_limit(self):
  196. config = LiteConfig()
  197. config.device_type = LiteDeviceType.LITE_CUDA
  198. network = LiteNetwork(config)
  199. network.load(self.model_path)
  200. print("modify the workspace limit.")
  201. network.set_network_algo_workspace_limit(10000)
  202. self.do_forward(network)
  203. @require_cuda()
  204. def test_network_algo_policy(self):
  205. config = LiteConfig()
  206. config.device_type = LiteDeviceType.LITE_CUDA
  207. network = LiteNetwork(config)
  208. network.load(self.model_path)
  209. network.set_network_algo_policy(
  210. LiteAlgoSelectStrategy.LITE_ALGO_PROFILE
  211. | LiteAlgoSelectStrategy.LITE_ALGO_REPRODUCIBLE
  212. )
  213. self.do_forward(network)
  214. @require_cuda()
  215. def test_enable_global_layout_transform(self):
  216. config_ = LiteConfig(device_type=LiteDeviceType.LITE_CUDA)
  217. network = LiteNetwork(config=config_)
  218. network.enable_global_layout_transform()
  219. network.load(self.model_path)
  220. self.do_forward(network)
  221. @require_cuda()
  222. def test_dump_layout_transform_model(self):
  223. config_ = LiteConfig(device_type=LiteDeviceType.LITE_CUDA)
  224. network = LiteNetwork(config=config_)
  225. network.enable_global_layout_transform()
  226. network.load(self.model_path)
  227. network.dump_layout_transform_model("./model_afer_layoutTrans.mgb")
  228. self.do_forward(network)
  229. fi = open("./model_afer_layoutTrans.mgb", "r")
  230. fi.close()
  231. os.remove("./model_afer_layoutTrans.mgb")
  232. @require_cuda()
  233. def test_fast_run_and_global_layout_transform(self):
  234. config_ = LiteConfig()
  235. config_.device_type = LiteDeviceType.LITE_CUDA
  236. network = LiteNetwork(config_)
  237. fast_run_cache = "./algo_cache"
  238. global_layout_transform_model = "./model_afer_layoutTrans.mgb"
  239. network.set_network_algo_policy(
  240. LiteAlgoSelectStrategy.LITE_ALGO_PROFILE
  241. | LiteAlgoSelectStrategy.LITE_ALGO_OPTIMIZED
  242. )
  243. network.enable_global_layout_transform()
  244. network.load(self.model_path)
  245. self.do_forward(network)
  246. network.dump_layout_transform_model(global_layout_transform_model)
  247. LiteGlobal.dump_persistent_cache(fast_run_cache)
  248. fi = open(fast_run_cache, "r")
  249. fi.close()
  250. fi = open(global_layout_transform_model, "r")
  251. fi.close()
  252. LiteGlobal.set_persistent_cache(path=fast_run_cache)
  253. self.do_forward(network)
  254. os.remove(fast_run_cache)
  255. os.remove(global_layout_transform_model)