@@ -213,7 +213,7 @@ class EmitConv2dInstance: | |||
def __init__(self): | |||
self.template = """ | |||
// kernel instance "${operation_name}" generated by cutlass generator | |||
using Convolution = | |||
using Convolution_${operation_name} = | |||
typename cutlass::conv::device::Convolution< | |||
${element_src}, | |||
${layout_src}, | |||
@@ -317,7 +317,7 @@ class EmitDeconvInstance: | |||
def __init__(self): | |||
self.template = """ | |||
// kernel instance "${operation_name}" generated by cutlass generator | |||
using Convolution = | |||
using Convolution_${operation_name} = | |||
typename cutlass::conv::device::Deconvolution< | |||
${element_src}, | |||
${layout_src}, | |||
@@ -419,7 +419,7 @@ class EmitConvolutionBackwardFilterInstance: | |||
def __init__(self): | |||
self.template = """ | |||
// kernel instance "${operation_name}" generated by cutlass generator | |||
using Convolution = | |||
using Convolution_${operation_name} = | |||
typename cutlass::conv::device::ConvolutionBackwardFilter< | |||
${element_src}, | |||
${layout_src}, | |||
@@ -905,7 +905,7 @@ namespace cutlass { | |||
namespace library { | |||
void initialize_${operation_name}(Manifest &manifest) { | |||
manifest.append(new ${convolution_name}<Convolution>( | |||
manifest.append(new ${convolution_name}<Convolution_${operation_name}>( | |||
"${operation_name}" | |||
)); | |||
} | |||
@@ -929,19 +929,6 @@ void initialize_${operation_name}(Manifest &manifest) { | |||
self.kernel_path, "%s.cu" % self.operation.procedural_name() | |||
) | |||
self.kernel_file = open(self.kernel_path, "w") | |||
self.kernel_file.write( | |||
SubstituteTemplate( | |||
self.header_template, | |||
{ | |||
"required_cuda_ver_major": str( | |||
self.operation.required_cuda_ver_major | |||
), | |||
"required_cuda_ver_minor": str( | |||
self.operation.required_cuda_ver_minor | |||
), | |||
}, | |||
) | |||
) | |||
return self | |||
# | |||
@@ -965,7 +952,6 @@ void initialize_${operation_name}(Manifest &manifest) { | |||
# | |||
def __exit__(self, exception_type, exception_value, traceback): | |||
self.kernel_file.write(self.epilogue_template) | |||
self.kernel_file.close() | |||
@@ -1347,19 +1347,6 @@ void initialize_${operation_name}(Manifest &manifest) { | |||
self.kernel_path, "%s.cu" % self.operation.procedural_name() | |||
) | |||
self.kernel_file = open(self.kernel_path, "w") | |||
self.kernel_file.write( | |||
SubstituteTemplate( | |||
self.header_template, | |||
{ | |||
"required_cuda_ver_major": str( | |||
self.operation.required_cuda_ver_major | |||
), | |||
"required_cuda_ver_minor": str( | |||
self.operation.required_cuda_ver_minor | |||
), | |||
}, | |||
) | |||
) | |||
return self | |||
# | |||
@@ -1379,7 +1366,6 @@ void initialize_${operation_name}(Manifest &manifest) { | |||
# | |||
def __exit__(self, exception_type, exception_value, traceback): | |||
self.kernel_file.write(self.epilogue_template) | |||
self.kernel_file.close() | |||
@@ -1435,20 +1421,6 @@ ${operation_instance} | |||
self.kernel_path, "%s.cu" % self.operation.procedural_name() | |||
) | |||
self.kernel_file = open(self.kernel_path, "w") | |||
self.kernel_file.write( | |||
SubstituteTemplate( | |||
self.header_template, | |||
{ | |||
"wrapper_path": self.wrapper_path, | |||
"required_cuda_ver_major": str( | |||
self.operation.required_cuda_ver_major | |||
), | |||
"required_cuda_ver_minor": str( | |||
self.operation.required_cuda_ver_minor | |||
), | |||
}, | |||
) | |||
) | |||
return self | |||
# | |||
@@ -1468,7 +1440,6 @@ ${operation_instance} | |||
# | |||
def __exit__(self, exception_type, exception_value, traceback): | |||
self.kernel_file.write(self.epilogue_template) | |||
self.kernel_file.close() | |||
@@ -35,24 +35,31 @@ def write_op_list(f, gen_op, gen_type): | |||
if gen_op != "gemv": | |||
f.write(' "all_%s_%s_operations.cu",\n' % (gen_op, gen_type)) | |||
# Write down a list of merged filenames | |||
def write_merge_file_name(f, gen_op, gen_type): | |||
f.write(' "{}_{}_1.cu",\n'.format(gen_op,gen_type)) | |||
f.write(' "{}_{}_2.cu",\n'.format(gen_op,gen_type)) | |||
if gen_op != "gemv": | |||
f.write(' "all_{}_{}_operations.cu",\n'.format(gen_op,gen_type)) | |||
if __name__ == "__main__": | |||
with open("list.bzl", "w") as f: | |||
f.write("# Generated by dnn/scripts/cutlass_generator/gen_list.py\n\n") | |||
f.write("cutlass_gen_list = [\n") | |||
write_op_list(f, "gemm", "simt") | |||
write_op_list(f, "gemm", "tensorop1688") | |||
write_op_list(f, "gemm", "tensorop884") | |||
write_op_list(f, "gemv", "simt") | |||
write_op_list(f, "deconv", "simt") | |||
write_op_list(f, "deconv", "tensorop8816") | |||
write_op_list(f, "conv2d", "simt") | |||
write_op_list(f, "conv2d", "tensorop8816") | |||
write_op_list(f, "conv2d", "tensorop8832") | |||
write_op_list(f, "dwconv2d_fprop", "simt") | |||
write_op_list(f, "dwconv2d_fprop", "tensorop884") | |||
write_op_list(f, "dwconv2d_dgrad", "simt") | |||
write_op_list(f, "dwconv2d_dgrad", "tensorop884") | |||
write_op_list(f, "dwconv2d_wgrad", "simt") | |||
write_op_list(f, "dwconv2d_wgrad", "tensorop884") | |||
write_merge_file_name(f, "gemm", "simt") | |||
write_merge_file_name(f, "gemm", "tensorop1688") | |||
write_merge_file_name(f, "gemm", "tensorop884") | |||
write_merge_file_name(f, "gemv", "simt") | |||
write_merge_file_name(f, "deconv", "simt") | |||
write_merge_file_name(f, "deconv", "tensorop8816") | |||
write_merge_file_name(f, "conv2d", "simt") | |||
write_merge_file_name(f, "conv2d", "tensorop8816") | |||
write_merge_file_name(f, "conv2d", "tensorop8832") | |||
write_merge_file_name(f, "dwconv2d_fprop", "simt") | |||
write_merge_file_name(f, "dwconv2d_fprop", "tensorop884") | |||
write_merge_file_name(f, "dwconv2d_dgrad", "simt") | |||
write_merge_file_name(f, "dwconv2d_dgrad", "tensorop884") | |||
write_merge_file_name(f, "dwconv2d_wgrad", "simt") | |||
write_merge_file_name(f, "dwconv2d_wgrad", "tensorop884") | |||
f.write("]") |
@@ -9,7 +9,7 @@ import os.path | |||
import shutil | |||
import argparse | |||
import platform | |||
import string | |||
from library import * | |||
from manifest import * | |||
@@ -1657,6 +1657,108 @@ def GenerateGemvOperations(args): | |||
return GenerateGemv_Simt(args) | |||
def concat_file(file_path:str,file_name_first:str,file_name_last:str,head:str,required_cuda_ver_major:str, required_cuda_ver_minor:str, epilogue:str, wrapper_path = None): | |||
import os | |||
meragefiledir = file_path | |||
filenames=os.listdir(meragefiledir) | |||
file1=open(file_path + '/{}_{}_1.cu'.format(file_name_first,file_name_last),'w') | |||
file2=open(file_path + '/{}_{}_2.cu'.format(file_name_first,file_name_last),'w') | |||
if wrapper_path is None: | |||
file1.write( | |||
SubstituteTemplate( | |||
head, | |||
{ | |||
"required_cuda_ver_major": str( | |||
required_cuda_ver_major | |||
), | |||
"required_cuda_ver_minor": str( | |||
required_cuda_ver_minor | |||
), | |||
}, | |||
) | |||
) | |||
file2.write( | |||
SubstituteTemplate( | |||
head, | |||
{ | |||
"required_cuda_ver_major": str( | |||
required_cuda_ver_major | |||
), | |||
"required_cuda_ver_minor": str( | |||
required_cuda_ver_minor | |||
), | |||
}, | |||
) | |||
) | |||
else: | |||
file1.write( | |||
SubstituteTemplate( | |||
head, | |||
{ | |||
"wrapper_path": wrapper_path, | |||
"required_cuda_ver_major": str( | |||
required_cuda_ver_major | |||
), | |||
"required_cuda_ver_minor": str( | |||
required_cuda_ver_minor | |||
), | |||
}, | |||
) | |||
) | |||
file2.write( | |||
SubstituteTemplate( | |||
head, | |||
{ | |||
"wrapper_path": wrapper_path, | |||
"required_cuda_ver_major": str( | |||
required_cuda_ver_major | |||
), | |||
"required_cuda_ver_minor": str( | |||
required_cuda_ver_minor | |||
), | |||
}, | |||
) | |||
) | |||
flag = 0 | |||
if "tensorop" in file_name_last: | |||
sub_string_1 = "tensorop" | |||
sub_string_2 = file_name_last[8:] | |||
else: | |||
sub_string_1 = sub_string_2 = "simt" | |||
if "dwconv2d_" in file_name_first: | |||
file_name_first = file_name_first[:2]+file_name_first[9:] | |||
elif ("conv2d" in file_name_first) or ("deconv" in file_name_first): | |||
file_name_first = "cutlass" | |||
for filename in filenames: | |||
if (file_name_first in filename) and (sub_string_1 in filename) and (sub_string_2 in filename) and ("all_" not in filename): | |||
flag += 1 | |||
filepath=meragefiledir+'/'+filename | |||
if flag <= len(filenames)/2: | |||
for line in open(filepath): | |||
file1.writelines(line) | |||
else: | |||
for line in open(filepath): | |||
file2.writelines(line) | |||
os.remove(filepath) | |||
file1.write('\n') | |||
file2.write('\n') | |||
elif filename[0].isdigit() and ("all_" not in filename): | |||
flag += 1 | |||
filepath=meragefiledir+'/'+filename | |||
if flag <= len(filenames)/2: | |||
for line in open(filepath): | |||
file1.writelines(line) | |||
else: | |||
for line in open(filepath): | |||
file2.writelines(line) | |||
os.remove(filepath) | |||
file1.write('\n') | |||
file2.write('\n') | |||
file1.write(epilogue) | |||
file2.write(epilogue) | |||
file1.close() | |||
file2.close() | |||
################################################################################################### | |||
################################################################################################### | |||
@@ -1727,18 +1829,33 @@ if __name__ == "__main__": | |||
args.output, operation, short_path | |||
) as emitter: | |||
emitter.emit() | |||
head = EmitConvSingleKernelWrapper(args.output, operations[0], short_path).header_template | |||
required_cuda_ver_major = operations[0].required_cuda_ver_major | |||
required_cuda_ver_minor = operations[0].required_cuda_ver_minor | |||
epilogue = EmitConvSingleKernelWrapper(args.output, operations[0], short_path).epilogue_template | |||
concat_file(args.output,args.operations, args.type, head,required_cuda_ver_major, required_cuda_ver_minor, epilogue) | |||
elif args.operations == "gemm": | |||
for operation in operations: | |||
with EmitGemmSingleKernelWrapper( | |||
args.output, operation, short_path | |||
) as emitter: | |||
emitter.emit() | |||
head = EmitGemmSingleKernelWrapper(args.output, operations[0], short_path).header_template | |||
required_cuda_ver_major = operations[0].required_cuda_ver_major | |||
required_cuda_ver_minor = operations[0].required_cuda_ver_minor | |||
epilogue = EmitGemmSingleKernelWrapper(args.output, operations[0], short_path).epilogue_template | |||
concat_file(args.output, args.operations, args.type, head,required_cuda_ver_major, required_cuda_ver_minor, epilogue) | |||
elif args.operations == "gemv": | |||
for operation in operations: | |||
with EmitGemvSingleKernelWrapper( | |||
args.output, operation, gemv_wrapper_path, short_path | |||
) as emitter: | |||
emitter.emit() | |||
head = EmitGemvSingleKernelWrapper(args.output, operations[0], gemv_wrapper_path, short_path).header_template | |||
required_cuda_ver_major = operations[0].required_cuda_ver_major | |||
required_cuda_ver_minor = operations[0].required_cuda_ver_minor | |||
epilogue = EmitGemvSingleKernelWrapper(args.output, operations[0], gemv_wrapper_path, short_path).epilogue_template | |||
concat_file(args.output,args.operations, args.type, head,required_cuda_ver_major, required_cuda_ver_minor, epilogue, wrapper_path = gemv_wrapper_path) | |||
if args.operations != "gemv": | |||
GenerateManifest(args, operations, args.output) | |||
@@ -1,3 +1,5 @@ | |||
#pragma once | |||
#include "cutlass/gemm/kernel/default_gemv.h" | |||
#include "cutlass/gemm/kernel/gemv_batched_strided.h" | |||
#include "src/cuda/matrix_mul/cutlass_matrix_mul_wrapper.cuh" | |||