You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

gen_heuristic.py 6.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. #! /usr/local/env python3
  2. import argparse
  3. import collections
  4. import os
  5. import pickle
  6. import re
  7. import numpy as np
  8. def define_template(**kwargs):
  9. template = """
  10. float cuda{cuda_arch}_{conv_type}_time_pred[{out_dim}] = {{0.0f}};
  11. float cuda{cuda_arch}_{conv_type}_mask[{out_dim}] = {{0.0f}};
  12. float cuda{cuda_arch}_{conv_type}_hidden_units[{hidden_num}] = {{0.0f}};
  13. const static size_t cuda{cuda_arch}_{conv_type}_layers_dim[{layer_num}] = {{{layers_dim}}};
  14. const static float cuda{cuda_arch}_{conv_type}_matrices[{matrices_dim}] = {{{matrices}}};
  15. const static float cuda{cuda_arch}_{conv_type}_biases[{biases_dim}] = {{{biases}}};
  16. const static float cuda{cuda_arch}_{conv_type}_alpha[{out_dim}] = {{{alpha}}};
  17. const static float cuda{cuda_arch}_{conv_type}_beta[{out_dim}] = {{{beta}}};
  18. """
  19. return template.format(**kwargs)
  20. def cudnn_slt_template(**kwargs):
  21. template = (
  22. "#if CUDNN_MAJOR == {cudnn_major} && CUDNN_MINOR == {cudnn_minor}\n"
  23. + " {define_cmd}\n"
  24. + " {select_cmd}\n"
  25. + " return true;\n"
  26. + "#endif\n"
  27. )
  28. return template.format(**kwargs)
  29. def select_template(**kwargs):
  30. template = """if (conv_type == ConvolutionType::{conv_type} && cuda_major == {cuda_major} &&
  31. cuda_minor == {cuda_minor}) {{
  32. *layer_num_p = {layer_num};
  33. *hidden_units_p = cuda{cuda_arch}_{conv_type}_hidden_units;
  34. *layers_dim_p = cuda{cuda_arch}_{conv_type}_layers_dim;
  35. *matrices_p = cuda{cuda_arch}_{conv_type}_matrices;
  36. *biases_p = cuda{cuda_arch}_{conv_type}_biases;
  37. *alpha_p = cuda{cuda_arch}_{conv_type}_alpha;
  38. *beta_p = cuda{cuda_arch}_{conv_type}_beta;
  39. *time_pred_p = cuda{cuda_arch}_{conv_type}_time_pred;
  40. *mask_p = cuda{cuda_arch}_{conv_type}_mask;
  41. }} else """
  42. return template.format(**kwargs)
  43. def main():
  44. fill_src()
  45. def fill_src():
  46. home = os.path.dirname(__file__)
  47. matrix_files = os.listdir(os.path.join(home, "params"))
  48. gen_list = collections.defaultdict(list)
  49. cudnn_slt_cmd = ""
  50. if len(matrix_files) == 0:
  51. print("Warning: no param files detected.")
  52. for fpath in matrix_files:
  53. cudnn_version = re.findall("cudnn([\d.]+)", fpath)[0]
  54. gen_list[cudnn_version].append(fpath)
  55. for cudnn in gen_list:
  56. select_cmd = "{\n" + " " * 8 + "return false;\n" + " " * 4 + "}"
  57. define_cmd = ""
  58. cudnn_major, cudnn_minor = cudnn.split(".")
  59. for fpath in gen_list[cudnn]:
  60. cuda_arch = fpath.split("-")[1].replace(".", "_")
  61. print("cudnn_version: {}, cuda_arch: {}".format(cudnn, cuda_arch))
  62. conv_type = fpath.split("-")[2].split(".")[0]
  63. with open(os.path.join(home, "params/{}".format(fpath)), "rb") as pobj:
  64. params = pickle.load(pobj)
  65. crt_define_cmd, crt_select_cmd = gen_cmds(cuda_arch, conv_type, params)
  66. select_cmd = crt_select_cmd + select_cmd
  67. define_cmd = crt_define_cmd + define_cmd
  68. cudnn_slt_cmd += cudnn_slt_template(
  69. cudnn_major=cudnn_major,
  70. cudnn_minor=cudnn_minor,
  71. select_cmd=select_cmd,
  72. define_cmd=define_cmd,
  73. )
  74. # select_cmd = select_cmd
  75. with open(os.path.join(home, "get_params.template"), "r") as srcf:
  76. src = srcf.read()
  77. dst = src.replace("{cudnn_select}", cudnn_slt_cmd)
  78. MegDNN_path = os.path.join(home, "../..")
  79. with open(
  80. os.path.join(MegDNN_path, "src/cuda/convolution/get_params.cpp"), "w"
  81. ) as dstf:
  82. dstf.write(dst)
  83. def gen_cmds(cuda_arch, conv_type, params):
  84. cuda_major, cuda_minor = cuda_arch.split("_")
  85. alphastr = format_array(params["alpha"]).rstrip()[:-1]
  86. betastr = format_array(params["beta"]).rstrip()[:-1]
  87. W_list = params["W"]
  88. b_list = params["b"]
  89. Wstr = ""
  90. bstr = ""
  91. layer_num = str(len(b_list) + 1)
  92. layers_dim = [W_list[0].shape[1]]
  93. matrices_dim = 0
  94. biases_dim = 0
  95. for W in W_list:
  96. Wstr += format_array(W)
  97. matrices_dim += W.shape[0] * W.shape[1]
  98. for b in b_list:
  99. bstr += format_array(b)
  100. layers_dim.append(b.shape[0])
  101. biases_dim += b.shape[0]
  102. Wstr = Wstr.rstrip()[:-1]
  103. bstr = bstr.rstrip()[:-1]
  104. hidden_num = sum(layers_dim[1:-1])
  105. out_dim = layers_dim[-1]
  106. layers_dim_str = format_array(np.array(layers_dim)).rstrip()[:-1]
  107. select_cmd = select_template(
  108. conv_type=conv_type.upper(),
  109. cuda_major=cuda_major,
  110. cuda_minor=cuda_minor,
  111. layer_num=layer_num,
  112. cuda_arch=cuda_arch,
  113. )
  114. define_cmd = define_template(
  115. cuda_arch=cuda_arch,
  116. conv_type=conv_type.upper(),
  117. hidden_num=hidden_num,
  118. layer_num=layer_num,
  119. out_dim=out_dim,
  120. layers_dim=layers_dim_str,
  121. matrices_dim=matrices_dim,
  122. matrices=Wstr,
  123. biases_dim=biases_dim,
  124. biases=bstr,
  125. alpha=alphastr,
  126. beta=betastr,
  127. )
  128. return (define_cmd, select_cmd)
  129. def format_array(array):
  130. flat_array = np.squeeze(array.reshape(1, -1))
  131. array_str = ""
  132. ind = 0
  133. if flat_array.dtype == "int":
  134. for ind in range(len(flat_array)):
  135. array_str += str(flat_array[ind]) + ", "
  136. else:
  137. for ind in range(len(flat_array)):
  138. if ind % 4 == 0:
  139. array_str += "\n" + " " * 12
  140. ele = flat_array[ind]
  141. if abs(ele) < 1.0e-37:
  142. array_str += "0.0, "
  143. else:
  144. array_str += "{:.6e}, ".format(ele)
  145. return array_str
  146. if __name__ == "__main__":
  147. parser = argparse.ArgumentParser(
  148. description="Generate cuDNN heuristic code by neural network into"
  149. " {MEGDNN_ROOT}/src/cuda/convolution/get_params.cpp,"
  150. " using parameter value from pickle files in"
  151. " {MEGDNN_ROOT}/scripts/gen_heuristic/params/"
  152. )
  153. args = parser.parse_args()
  154. main()