You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_network_cuda.py 7.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. # -*- coding: utf-8 -*-
  2. # This file is part of MegEngine, a deep learning framework developed by
  3. # Megvii.
  4. #
  5. # Copyright (c) Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
  6. import functools
  7. import os
  8. import unittest
  9. import numpy as np
  10. from megenginelite import *
  11. set_log_level(2)
  12. def require_cuda(ngpu=1):
  13. """a decorator that disables a testcase if cuda is not enabled"""
  14. def dector(func):
  15. @functools.wraps(func)
  16. def wrapped(*args, **kwargs):
  17. if LiteGlobal.get_device_count(LiteDeviceType.LITE_CUDA) >= ngpu:
  18. return func(*args, **kwargs)
  19. return wrapped
  20. return dector
  21. class TestShuffleNetCuda(unittest.TestCase):
  22. source_dir = os.getenv("LITE_TEST_RESOUCE")
  23. input_data_path = os.path.join(source_dir, "input_data.npy")
  24. correct_data_path = os.path.join(source_dir, "output_data.npy")
  25. model_path = os.path.join(source_dir, "shufflenet.mge")
  26. correct_data = np.load(correct_data_path).flatten()
  27. input_data = np.load(input_data_path)
  28. def check_correct(self, out_data, error=1e-4):
  29. out_data = out_data.flatten()
  30. assert np.isfinite(out_data.sum())
  31. assert self.correct_data.size == out_data.size
  32. for i in range(out_data.size):
  33. assert abs(out_data[i] - self.correct_data[i]) < error
  34. def do_forward(self, network, times=3):
  35. input_name = network.get_input_name(0)
  36. input_tensor = network.get_io_tensor(input_name)
  37. output_name = network.get_output_name(0)
  38. output_tensor = network.get_io_tensor(output_name)
  39. input_tensor.set_data_by_copy(self.input_data)
  40. for i in range(times):
  41. network.forward()
  42. network.wait()
  43. output_data = output_tensor.to_numpy()
  44. self.check_correct(output_data)
  45. class TestNetwork(TestShuffleNetCuda):
  46. @require_cuda()
  47. def test_network_basic(self):
  48. config = LiteConfig()
  49. config.device_type = LiteDeviceType.LITE_CUDA
  50. network = LiteNetwork(config)
  51. network.load(self.model_path)
  52. input_name = network.get_input_name(0)
  53. input_tensor = network.get_io_tensor(input_name)
  54. output_name = network.get_output_name(0)
  55. output_tensor = network.get_io_tensor(output_name)
  56. assert input_tensor.layout.shapes[0] == 1
  57. assert input_tensor.layout.shapes[1] == 3
  58. assert input_tensor.layout.shapes[2] == 224
  59. assert input_tensor.layout.shapes[3] == 224
  60. assert input_tensor.layout.data_type == LiteDataType.LITE_FLOAT
  61. assert input_tensor.layout.ndim == 4
  62. self.do_forward(network)
  63. @require_cuda()
  64. def test_network_shared_data(self):
  65. config = LiteConfig()
  66. config.device_type = LiteDeviceType.LITE_CUDA
  67. network = LiteNetwork(config)
  68. network.load(self.model_path)
  69. input_name = network.get_input_name(0)
  70. input_tensor = network.get_io_tensor(input_name)
  71. output_name = network.get_output_name(0)
  72. output_tensor = network.get_io_tensor(output_name)
  73. input_tensor.set_data_by_share(self.input_data)
  74. for i in range(3):
  75. network.forward()
  76. network.wait()
  77. output_data = output_tensor.to_numpy()
  78. self.check_correct(output_data)
  79. @require_cuda(2)
  80. def test_network_set_device_id(self):
  81. config = LiteConfig()
  82. config.device_type = LiteDeviceType.LITE_CUDA
  83. network = LiteNetwork(config)
  84. assert network.device_id == 0
  85. network.device_id = 1
  86. network.load(self.model_path)
  87. assert network.device_id == 1
  88. with self.assertRaises(RuntimeError):
  89. network.device_id = 1
  90. self.do_forward(network)
  91. @require_cuda()
  92. def test_network_option(self):
  93. option = LiteOptions()
  94. option.weight_preprocess = 1
  95. option.var_sanity_check_first_run = 0
  96. config = LiteConfig(option=option)
  97. config.device_type = LiteDeviceType.LITE_CUDA
  98. network = LiteNetwork(config=config)
  99. network.load(self.model_path)
  100. self.do_forward(network)
  101. @require_cuda()
  102. def test_network_reset_io(self):
  103. option = LiteOptions()
  104. option.var_sanity_check_first_run = 0
  105. config = LiteConfig(option=option)
  106. config.device_type = LiteDeviceType.LITE_CUDA
  107. input_io = LiteIO("data")
  108. ios = LiteNetworkIO()
  109. ios.add_input(input_io)
  110. network = LiteNetwork(config=config, io=ios)
  111. network.load(self.model_path)
  112. input_tensor = network.get_io_tensor("data")
  113. assert input_tensor.device_type == LiteDeviceType.LITE_CPU
  114. self.do_forward(network)
  115. @require_cuda()
  116. def test_network_share_weights(self):
  117. option = LiteOptions()
  118. option.var_sanity_check_first_run = 0
  119. config = LiteConfig(option=option)
  120. config.device_type = LiteDeviceType.LITE_CUDA
  121. src_network = LiteNetwork(config=config)
  122. src_network.load(self.model_path)
  123. new_network = LiteNetwork()
  124. new_network.enable_cpu_inplace_mode()
  125. new_network.share_weights_with(src_network)
  126. self.do_forward(src_network)
  127. self.do_forward(new_network)
  128. @require_cuda()
  129. def test_network_share_runtime_memory(self):
  130. option = LiteOptions()
  131. option.var_sanity_check_first_run = 0
  132. config = LiteConfig(option=option)
  133. config.device_type = LiteDeviceType.LITE_CUDA
  134. src_network = LiteNetwork(config=config)
  135. src_network.load(self.model_path)
  136. new_network = LiteNetwork()
  137. new_network.enable_cpu_inplace_mode()
  138. new_network.share_runtime_memroy(src_network)
  139. new_network.load(self.model_path)
  140. self.do_forward(src_network)
  141. self.do_forward(new_network)
  142. @require_cuda()
  143. def test_enable_profile(self):
  144. config = LiteConfig()
  145. config.device_type = LiteDeviceType.LITE_CUDA
  146. network = LiteNetwork(config)
  147. network.load(self.model_path)
  148. network.enable_profile_performance("./profile.json")
  149. self.do_forward(network)
  150. fi = open("./profile.json", "r")
  151. fi.close()
  152. os.remove("./profile.json")
  153. @require_cuda()
  154. def test_algo_workspace_limit(self):
  155. config = LiteConfig()
  156. config.device_type = LiteDeviceType.LITE_CUDA
  157. network = LiteNetwork(config)
  158. network.load(self.model_path)
  159. print("modify the workspace limit.")
  160. network.set_network_algo_workspace_limit(10000)
  161. self.do_forward(network)
  162. @require_cuda()
  163. def test_network_algo_policy(self):
  164. config = LiteConfig()
  165. config.device_type = LiteDeviceType.LITE_CUDA
  166. network = LiteNetwork(config)
  167. network.load(self.model_path)
  168. network.set_network_algo_policy(
  169. LiteAlgoSelectStrategy.LITE_ALGO_PROFILE
  170. | LiteAlgoSelectStrategy.LITE_ALGO_REPRODUCIBLE
  171. )
  172. self.do_forward(network)

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台