You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

gen_heuristic.py 6.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. #! /usr/local/env python3
  2. import pickle
  3. import numpy as np
  4. import os
  5. import argparse
  6. import re
  7. import collections
  8. def define_template(**kwargs):
  9. template = '''
  10. float cuda{cuda_arch}_{conv_type}_time_pred[{out_dim}] = {{0.0f}};
  11. float cuda{cuda_arch}_{conv_type}_mask[{out_dim}] = {{0.0f}};
  12. float cuda{cuda_arch}_{conv_type}_hidden_units[{hidden_num}] = {{0.0f}};
  13. const static size_t cuda{cuda_arch}_{conv_type}_layers_dim[{layer_num}] = {{{layers_dim}}};
  14. const static float cuda{cuda_arch}_{conv_type}_matrices[{matrices_dim}] = {{{matrices}}};
  15. const static float cuda{cuda_arch}_{conv_type}_biases[{biases_dim}] = {{{biases}}};
  16. const static float cuda{cuda_arch}_{conv_type}_alpha[{out_dim}] = {{{alpha}}};
  17. const static float cuda{cuda_arch}_{conv_type}_beta[{out_dim}] = {{{beta}}};
  18. '''
  19. return template.format(**kwargs)
  20. def cudnn_slt_template(**kwargs):
  21. template = ("#if CUDNN_MAJOR == {cudnn_major} && CUDNN_MINOR == {cudnn_minor}\n" +
  22. " {define_cmd}\n" +
  23. " {select_cmd}\n" +
  24. " return true;\n" +
  25. "#endif\n"
  26. )
  27. return template.format(**kwargs)
  28. def select_template(**kwargs):
  29. template = \
  30. '''if (conv_type == ConvolutionType::{conv_type} && cuda_major == {cuda_major} &&
  31. cuda_minor == {cuda_minor}) {{
  32. *layer_num_p = {layer_num};
  33. *hidden_units_p = cuda{cuda_arch}_{conv_type}_hidden_units;
  34. *layers_dim_p = cuda{cuda_arch}_{conv_type}_layers_dim;
  35. *matrices_p = cuda{cuda_arch}_{conv_type}_matrices;
  36. *biases_p = cuda{cuda_arch}_{conv_type}_biases;
  37. *alpha_p = cuda{cuda_arch}_{conv_type}_alpha;
  38. *beta_p = cuda{cuda_arch}_{conv_type}_beta;
  39. *time_pred_p = cuda{cuda_arch}_{conv_type}_time_pred;
  40. *mask_p = cuda{cuda_arch}_{conv_type}_mask;
  41. }} else '''
  42. return template.format(**kwargs)
  43. def main():
  44. fill_src()
  45. def fill_src():
  46. home = os.path.dirname(__file__)
  47. matrix_files = os.listdir(os.path.join(home, "params"))
  48. gen_list = collections.defaultdict(list)
  49. cudnn_slt_cmd = ""
  50. if len(matrix_files) == 0:
  51. print("Warning: no param files detected.")
  52. for fpath in matrix_files:
  53. cudnn_version = re.findall('cudnn([\d.]+)',fpath)[0]
  54. gen_list[cudnn_version].append(fpath)
  55. for cudnn in gen_list:
  56. select_cmd = ("{\n" +
  57. " " * 8 + "return false;\n" +
  58. " " * 4 + "}")
  59. define_cmd = ""
  60. cudnn_major, cudnn_minor = cudnn.split('.')
  61. for fpath in gen_list[cudnn]:
  62. cuda_arch = fpath.split("-")[1].replace(".", "_")
  63. print('cudnn_version: {}, cuda_arch: {}'.format(cudnn,cuda_arch))
  64. conv_type = fpath.split("-")[2].split(".")[0]
  65. with open(os.path.join(home, "params/{}".format(fpath)), "rb") as pobj:
  66. params = pickle.load(pobj)
  67. crt_define_cmd, crt_select_cmd = gen_cmds(
  68. cuda_arch, conv_type, params)
  69. select_cmd = crt_select_cmd + select_cmd
  70. define_cmd = crt_define_cmd + define_cmd
  71. cudnn_slt_cmd += cudnn_slt_template(cudnn_major=cudnn_major,
  72. cudnn_minor=cudnn_minor,
  73. select_cmd=select_cmd,
  74. define_cmd=define_cmd)
  75. #select_cmd = select_cmd
  76. with open(os.path.join(home, "get_params.template"), "r") as srcf:
  77. src = srcf.read()
  78. dst = src.replace("{cudnn_select}", cudnn_slt_cmd)
  79. MegDNN_path = os.path.join(home, "../..")
  80. with open(os.path.join(MegDNN_path,
  81. "src/cuda/convolution/get_params.cpp"), "w") as dstf:
  82. dstf.write(dst)
  83. def gen_cmds(cuda_arch, conv_type, params):
  84. cuda_major, cuda_minor = cuda_arch.split("_")
  85. alphastr = format_array(params['alpha']).rstrip()[:-1]
  86. betastr = format_array(params['beta']).rstrip()[:-1]
  87. W_list = params['W']
  88. b_list = params['b']
  89. Wstr = ''
  90. bstr = ''
  91. layer_num = str(len(b_list) + 1)
  92. layers_dim = [W_list[0].shape[1]]
  93. matrices_dim = 0
  94. biases_dim = 0
  95. for W in W_list:
  96. Wstr += format_array(W)
  97. matrices_dim += W.shape[0] * W.shape[1]
  98. for b in b_list:
  99. bstr += format_array(b)
  100. layers_dim.append(b.shape[0])
  101. biases_dim += b.shape[0]
  102. Wstr = Wstr.rstrip()[:-1]
  103. bstr = bstr.rstrip()[:-1]
  104. hidden_num = sum(layers_dim[1:-1])
  105. out_dim = layers_dim[-1]
  106. layers_dim_str = format_array(np.array(layers_dim)).rstrip()[:-1]
  107. select_cmd = select_template(conv_type=conv_type.upper(), cuda_major=cuda_major,
  108. cuda_minor=cuda_minor, layer_num=layer_num,
  109. cuda_arch=cuda_arch)
  110. define_cmd = define_template(cuda_arch=cuda_arch, conv_type=conv_type.upper(),
  111. hidden_num=hidden_num,
  112. layer_num=layer_num, out_dim=out_dim,
  113. layers_dim=layers_dim_str,
  114. matrices_dim=matrices_dim, matrices=Wstr,
  115. biases_dim=biases_dim, biases=bstr,
  116. alpha=alphastr, beta=betastr)
  117. return (define_cmd, select_cmd)
  118. def format_array(array):
  119. flat_array = np.squeeze(array.reshape(1, -1))
  120. array_str = ""
  121. ind = 0
  122. if flat_array.dtype == "int":
  123. for ind in range(len(flat_array)):
  124. array_str += str(flat_array[ind]) + ", "
  125. else:
  126. for ind in range(len(flat_array)):
  127. if ind % 4 == 0:
  128. array_str += "\n" + " " * 12
  129. ele = flat_array[ind]
  130. if abs(ele) < 1.0e-37:
  131. array_str += "0.0, "
  132. else:
  133. array_str += "{:.6e}, ".format(ele)
  134. return array_str
  135. if __name__ == "__main__":
  136. parser = argparse.ArgumentParser(
  137. description="Generate cuDNN heuristic code by neural network into"
  138. " {MEGDNN_ROOT}/src/cuda/convolution/get_params.cpp,"
  139. " using parameter value from pickle files in"
  140. " {MEGDNN_ROOT}/scripts/gen_heuristic/params/")
  141. args = parser.parse_args()
  142. main()

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台

Contributors (1)