You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

gen_cuda_conv_bias_kern_impls.py 2.7 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. import os
  4. import argparse
  5. import itertools
  6. PREFIXES = {"dp4a": "conv_bias_int8_implicit_gemm_cdiv4hwn4", "imma": "conv_bias_int8_implicit_gemm"}
  7. ACTIVATIONS = {1: ("IDENTITY", "_id"),
  8. 2: ("RELU", "_relu"),
  9. 3: ("H_SWISH", "_hswish")}
  10. BIASES = {1: ("PerElementBiasVisitor", "_per_elem"),
  11. 2: ("PerChannelBiasVisitor", "_per_chan")}
  12. SUFFIXES = {"dp4a": ["", "_ld_64bit", "_ld_64bit_unroll_width", "_unroll_width"],
  13. "imma": ["_imma16x16x16_cdiv4hwn4", "_imma8x32x16_cdiv4hwn4", "_imma32x8x16_cdiv4hwn4",
  14. "_imma16x16x16_cdiv4hwn4_reorder_filter", "_imma8x32x16_cdiv4hwn4_reorder_filter", "_imma32x8x16_cdiv4hwn4_reorder_filter",
  15. "_imma16x16x16_cdiv4hwn4_unroll_width", "_imma8x32x16_cdiv4hwn4_unroll_width", "_imma32x8x16_cdiv4hwn4_unroll_width"]}
  16. def main():
  17. parser = argparse.ArgumentParser(
  18. description='generate cuda conv bias (dp4a/imma) kern impl files',
  19. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  20. parser.add_argument('--type', type=str, choices=['dp4a',
  21. 'imma'],
  22. default='dp4a', help='generate cuda conv bias kernel file')
  23. parser.add_argument('output', help='output directory')
  24. args = parser.parse_args()
  25. if not os.path.isdir(args.output):
  26. os.makedirs(args.output)
  27. inst = '''
  28. template void megdnn::cuda::conv_bias_int8::do_PREFIXSUFFIX<BIAS,
  29. IConvEpilogue<Activation<megdnn::param_enumv::ConvBias::NonlineMode::ACTIVATION>>>(
  30. const int8_t* d_src,
  31. const int8_t* d_filter,
  32. BIAS bias,
  33. IConvEpilogue<Activation<megdnn::param_enumv::ConvBias::NonlineMode::ACTIVATION>> epilogue,
  34. const ConvParam& param,
  35. float alpha,
  36. float beta,
  37. cudaStream_t stream);'''
  38. for suffix in SUFFIXES[args.type]:
  39. for _, act in ACTIVATIONS.items():
  40. prefix = PREFIXES[args.type]
  41. bias = BIASES[2]
  42. fname = "{}{}{}{}.cu".format(prefix, suffix, bias[1], act[1])
  43. fname = os.path.join(args.output, fname)
  44. with open(fname, "w") as fout:
  45. w = lambda s: print(s, file=fout)
  46. w('// generated by gen_cuda_conv_bias_kern_impls.py')
  47. cur_inst = inst.replace("PREFIX", prefix).replace("SUFFIX", suffix).replace("BIAS", bias[0]).replace("ACTIVATION", act[0])
  48. w('#include "../{}{}.cuinl"'.format(prefix, suffix))
  49. w(cur_inst)
  50. print('generated {}'.format(fname))
  51. os.utime(args.output)
  52. if __name__ == '__main__':
  53. main()

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台

Contributors (1)