You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dump_with_testcase.py 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. #!/usr/bin/env mdl
  2. # -*- coding: utf-8 -*-
  3. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. #
  5. # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. #
  7. # Unless required by applicable law or agreed to in writing,
  8. # software distributed under the License is distributed on an
  9. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. from megskull.graph import NodeFilter, FpropEnv
  11. from megskull.opr.all import AssertEqual, DataProvider, BatchNormalization
  12. from megskull.utils.logconf import get_logger
  13. from meghair.utils import io
  14. import megbrain as mgb
  15. import argparse
  16. import struct
  17. import re
  18. import os
  19. import numpy as np
  20. import cv2
  21. logger = get_logger(__name__)
  22. def auto_reformat_image(args, path, data, dst_shape):
  23. """reformat image to target shape
  24. :param data: image data as numpy array
  25. :param dst_shape: target shape
  26. """
  27. dim3_format = False # required input format does not contain batch
  28. hwc_format = False # required input format is NHWC
  29. if len(dst_shape) == 3:
  30. dst_shape = (1, ) + dst_shape
  31. dim3_format = True
  32. assert len(dst_shape) == 4, 'bad dst_shape: {}'.format(dst_shape)
  33. chl = dst_shape[1]
  34. if chl in [1, 3]:
  35. n, c, h, w = dst_shape
  36. dst_shape = (n, h, w, c)
  37. else:
  38. chl = dst_shape[3]
  39. assert chl in [1, 3], (
  40. 'can not infer input format from shape: {}'.format(dst_shape))
  41. hwc_format = True
  42. # dst_shape has now been normalized to NHWC format
  43. if args.resize_input:
  44. h, w = dst_shape[1:3]
  45. data = cv2.resize(data, (w, h))
  46. logger.info('input {} resized to {}'.format(path, data.shape))
  47. if chl == 1:
  48. data = cv2.cvtColor(data, cv2.COLOR_BGR2GRAY)
  49. data = data[:, :, np.newaxis]
  50. assert data.ndim == 3
  51. data = data[np.newaxis]
  52. # data normalized to NHWC format
  53. if not hwc_format:
  54. data = np.transpose(data, (0, 3, 1, 2))
  55. if dim3_format:
  56. data = np.squeeze(data, 0)
  57. return data
  58. def read_input_data(args, dst_shape, dtype, path, repeat):
  59. def check_shape_equal(dst_shape, data_shape):
  60. assert len(data_shape) == len(dst_shape) , (
  61. 'input/data shapes mismatch: {} vs {}'.format(
  62. dst_shape, data_shape))
  63. if data_shape[1:] != dst_shape[1:]:
  64. logger.warning('dst_shape is {}; data_shape is {}'.format(
  65. dst_shape, data_shape))
  66. if path.startswith('#'):
  67. assert not args.resize_input
  68. assert not args.input_transform
  69. spec = path
  70. m = re.match(
  71. r'^#rand\(([-0-9.]*)\s*,\s*([-0-9.]*)\s*(,[^\)]+)?\)$', spec)
  72. assert m, 'bad spec {}'.format(spec)
  73. rng_min = float(m.group(1))
  74. rng_max = float(m.group(2))
  75. if m.group(3):
  76. shape_str = m.group(3)
  77. try:
  78. shape = shape_str[1:].split(',')
  79. if shape[-1].strip() == '...':
  80. shape = shape[:-1]
  81. shape.extend(list(dst_shape[len(shape):]))
  82. data_shape = tuple(map(int, shape))
  83. except ValueError as e:
  84. raise ValueError('bad spec {}: {}'.format(spec, e.args))
  85. else:
  86. data_shape = dst_shape
  87. check_shape_equal(dst_shape, data_shape)
  88. return np.random.uniform(rng_min, rng_max, data_shape).astype(dtype)
  89. # try to load image
  90. data = cv2.imread(path, cv2.IMREAD_COLOR)
  91. if data is None:
  92. assert not args.resize_input
  93. data = io.load(path)
  94. assert isinstance(data, np.ndarray)
  95. else:
  96. # load image succeeds, so we expect input format is image format
  97. data = auto_reformat_image(args, path, data, dst_shape)
  98. data = np.repeat(data, repeat, axis=0)
  99. if repeat > 1:
  100. logger.info('repeat input for {} times, data shape is {}'.format(
  101. repeat, data.shape))
  102. check_shape_equal(dst_shape, data.shape)
  103. if args.input_transform:
  104. data = eval(args.input_transform, {'data': data, 'np': np})
  105. return data
  106. def gen_one_testcase(args, inputs, spec):
  107. paths = spec.split(';')
  108. if len(paths) != len(inputs):
  109. if len(paths) == 1 and paths[0].startswith('#'):
  110. paths = ['{}:{}'.format(name, paths[0]) for name in inputs.keys()]
  111. assert len(paths) == len(inputs), (
  112. 'required inputs: {}; data paths: {}'.format(inputs.keys(), paths))
  113. if len(paths) == 1 and ':' not in paths[0]:
  114. paths[0] = next(iter(inputs.keys())) + ':' + paths[0]
  115. ret = {}
  116. for path in paths:
  117. var, path = path.split(':')
  118. if args.repeat:
  119. repeat = args.repeat
  120. else:
  121. repeat = 1
  122. ret[var] = read_input_data(args, inputs[var].imm_shape,
  123. inputs[var].dtype, path, repeat)
  124. return ret
  125. def make_feeds(args):
  126. outputs = io.load_network(args.input).outputs
  127. if not args.no_assert:
  128. env = FpropEnv(verbose_fprop=False)
  129. # set flag so ExternCOprPlaceholder produce expected output
  130. env.flags.user['extern_c_opr_eval'] = True
  131. func = env.comp_graph.compile(None, [mgb.copy_output(env.get_mgbvar(i))
  132. for i in outputs])
  133. def expect_name(var): return 'expect:{}'.format(var.name)
  134. nf = NodeFilter.make_all_deps(*outputs)
  135. inputs = {i.name: i for i in nf.data_provider()}
  136. if args.init_bn:
  137. for i in nf:
  138. if isinstance(i, BatchNormalization):
  139. if i._iter.get_value() == 0:
  140. i._iter.set_value(1)
  141. i._variance.set_value(np.ones(i._variance.shape))
  142. testcases = []
  143. np.set_printoptions(precision=2, threshold=4, suppress=True)
  144. data_list = []
  145. for item in args.data:
  146. if item.startswith('@'):
  147. with open(item[1:], 'r') as f:
  148. data_list.extend([ line.rstrip() for line in f if line.rstrip() != ''])
  149. else:
  150. data_list.append(item)
  151. for inp_spec in data_list:
  152. cur_testcase = gen_one_testcase(args, inputs, inp_spec)
  153. assert len(cur_testcase) == len(inputs), (
  154. 'required inputs: {}; given data: {}'.format(
  155. inputs.keys(), cur_testcase.keys()))
  156. if not args.no_assert:
  157. outputs_get = func(**cur_testcase)
  158. for var, val in zip(outputs, outputs_get):
  159. cur_testcase[expect_name(var)] = val
  160. logger.info(
  161. 'generate test groundtruth: var={} shape={} range=({}, {})'
  162. ' mean={} var={}'.format(
  163. var, val.shape, val.min(), val.max(),
  164. np.mean(val), np.var(val)))
  165. testcases.append(cur_testcase)
  166. logger.info('add testcase: \n {}'.format(
  167. '\n '.join('{}: shape={} dtype={} range=({:.2f},{:.2f}) '
  168. 'mean={:.2f} sd={:.2f}'.format(
  169. k, v.shape, v.dtype, v.min(), v.max(), np.mean(v),
  170. np.std(v))
  171. for k, v in sorted(cur_testcase.items()))))
  172. if not args.no_assert:
  173. def expect_shp(var):
  174. ret = var.partial_shape.determined_shape
  175. if ret:
  176. return ret
  177. return testcases[0][expect_name(var)].shape
  178. verbose = not args.silent
  179. outputs = [AssertEqual(DataProvider(expect_name(i), expect_shp(i),
  180. dtype=i.dtype,
  181. comp_node=i.comp_node),
  182. i, verbose=verbose, maxerr=args.maxerr)
  183. for i in outputs]
  184. return {'outputs': outputs, 'testcases': testcases}
  185. def optimize_for_inference(args, outputs):
  186. args_map = {
  187. 'enable_io16xc32': 'f16_io_f32_comp',
  188. 'enable_ioc16': 'f16_io_comp',
  189. 'enable_hwcd4': 'use_nhwcd4',
  190. 'enable_nchw4': 'use_nchw4',
  191. 'enable_nchw88': 'use_nchw88',
  192. 'enable_nchw44': 'use_nchw44',
  193. 'enable_nchw44_dot': 'use_nchw44_dot',
  194. 'enable_nchw32': 'use_nchw32',
  195. 'enable_chwn4': 'use_chwn4',
  196. 'enable_fuse_conv_bias_nonlinearity': 'fuse_conv_bias_nonlinearity',
  197. 'enable_fuse_conv_bias_with_z': 'fuse_conv_bias_with_z',
  198. 'enable_nchw64': 'use_nchw64',
  199. 'enable_fuse_preprocess': 'fuse_preprocess',
  200. }
  201. kwargs = {}
  202. for k, v in args_map.items():
  203. if getattr(args, k):
  204. assert args.optimize_for_inference, (
  205. 'optimize_for_inference should be set when {} is given'.format(
  206. k))
  207. kwargs[v] = True
  208. if args.optimize_for_inference:
  209. return mgb.optimize_for_inference(outputs, **kwargs)
  210. return outputs
  211. def main():
  212. parser = argparse.ArgumentParser(
  213. description='Pack computing graph, input values and expected output '
  214. 'values into one file for checking correctness. README.md gives more '
  215. 'details on the usage',
  216. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  217. parser.add_argument('input', help='input file; see README for details')
  218. parser.add_argument('-o', '--output', help='output file', required=True)
  219. parser.add_argument('--init-bn', action='store_true',
  220. help='initialize untrained batch-normalization, to '
  221. 'avoid NaN or Inf results')
  222. parser.add_argument(
  223. '-d', '--data', default=[], action='append',
  224. help='Given input test data when input file is a network, '
  225. 'and current network output would be used as groundtruth. '
  226. 'The format is var0:file0;var1:file1... to specify data files for '
  227. 'input vars. It can also be #rand(min,max,shape...) for generating '
  228. 'random input data, for example, #rand(0,255), '
  229. '#rand(0,255,1,3,224,224) or #rand(0, 255, 1, ...) where `...` means '
  230. 'the remaining part of the original shape. '
  231. 'If the shape is not specified, the shape of '
  232. 'corresponding DataProvider in the network will be used. '
  233. 'If there is only one input var, its name can be omitted. '
  234. 'Each data file can either be an image which can be loaded by opencv, '
  235. 'or a pickled numpy.ndarray. '
  236. 'This option can be given multiple times to add multiple testcases. '
  237. ' *NOTE* '
  238. 'If you start the data with the letter @, the rest should be a '
  239. 'filename, and each line in the file should be a single datum in '
  240. 'the format described above. '
  241. )
  242. parser.add_argument(
  243. '--repeat', type=int, default=1,
  244. help='Specify how many times the input image is repeated. '
  245. 'Useful when running benchmark for batch size other than one. '
  246. 'Have no effect on randomly generated input data.')
  247. parser.add_argument('--silent', action='store_true',
  248. help='set verbose to False in AssertEqual opr')
  249. parser.add_argument('--optimize-for-inference', action='store_true',
  250. help='enbale optimization for inference')
  251. parser.add_argument('--no-assert', action='store_true',
  252. help='do not insert AssertEqual opr to check result; '
  253. 'this option is useful for benchmarking')
  254. parser.add_argument('--maxerr', type=float, default=AssertEqual.maxerr,
  255. help='max error for AssertEqual check during runtime')
  256. parser.add_argument('--resize-input', action='store_true',
  257. help='resize input image to fit input var shape')
  258. parser.add_argument('--input-transform',
  259. help='a python expression to transform the input data. '
  260. 'Example: data / np.std(data)')
  261. parser.add_argument('--discard-var-name', action='store_true',
  262. help='discard variable and param names in the '
  263. 'generated output')
  264. parser.add_argument('--output-strip-info', action='store_true',
  265. help='output code strip information')
  266. parser.add_argument('--enable-io16xc32', action='store_true',
  267. help='transform the mode to float16 io float32 compute')
  268. parser.add_argument('--enable-ioc16', action='store_true',
  269. help='transform the dtype of the model to float16 io '
  270. 'and compute')
  271. parser.add_argument('--enable-fuse-conv-bias-nonlinearity',
  272. action='store_true',
  273. help='fuse convolution bias and nonlinearity opr to a '
  274. 'conv_bias opr and compute')
  275. parser.add_argument('--enable-hwcd4', action='store_true',
  276. help='transform the model format from NCHW to NHWCD4 '
  277. 'for inference; you may need to disable CUDA and set '
  278. 'MGB_USE_MEGDNN_DBG=2')
  279. parser.add_argument('--enable-nchw4', action='store_true',
  280. help='transform the model format from NCHW to NCHW4 '
  281. 'for inference')
  282. parser.add_argument('--enable-nchw88', action='store_true',
  283. help='transform the model format from NCHW to NCHW88 '
  284. 'for inference')
  285. parser.add_argument('--enable-nchw44', action='store_true',
  286. help='transform the model format from NCHW to NCHW44 '
  287. 'for inference')
  288. parser.add_argument('--enable-nchw44-dot', action='store_true',
  289. help='transform the model format from NCHW to NCHW44_DOT '
  290. 'for optimizing armv8.2 dot in inference')
  291. parser.add_argument('--enable-chwn4', action='store_true',
  292. help='transform the model format to CHWN4 '
  293. 'for inference, mainly used for nvidia tensorcore')
  294. parser.add_argument('--enable-nchw32', action='store_true',
  295. help='transform the model format from NCHW4 to NCHW32 '
  296. 'for inference on nvidia TensoCore')
  297. parser.add_argument('--enable-nchw64', action='store_true',
  298. help='transform the model format from NCHW to NCHW64 '
  299. 'for inference on Nvidia GPU')
  300. parser.add_argument('--enable-fuse-conv-bias-with-z', action='store_true',
  301. help='fuse conv_bias with z input for inference on '
  302. 'nvidia GPU (this optimization pass will result in mismatch '
  303. 'of the precision of output of training and inference)')
  304. parser.add_argument('--enable-fuse-preprocess', action='store_true',
  305. help='fuse astype\pad_channel\dimshuffle and etc opr '
  306. 'from h2d op')
  307. args = parser.parse_args()
  308. if args.data:
  309. feeds = make_feeds(args)
  310. else:
  311. feeds = io.load(args.input)
  312. assert isinstance(feeds, dict) and feeds['testcases'], (
  313. 'testcases can not be empty')
  314. env = FpropEnv(verbose_fprop=False)
  315. outputs = feeds['outputs']
  316. output_mgbvars = list(map(env.get_mgbvar, outputs))
  317. output_mgbvars = optimize_for_inference(args, output_mgbvars)
  318. inputs = sorted(((i.name, i.dtype) for i in
  319. NodeFilter.make_all_deps(*outputs).data_provider()))
  320. if args.discard_var_name:
  321. sereg_kwargs = dict(keep_var_name=0, keep_param_name=False)
  322. else:
  323. sereg_kwargs = dict(keep_var_name=2, keep_param_name=True)
  324. with open(args.output, 'wb') as fout:
  325. fout.write(b'mgbtest0')
  326. fout.write(struct.pack('I', len(feeds['testcases'])))
  327. stat = mgb.serialize_comp_graph_to_file(
  328. args.output, output_mgbvars, append=True,
  329. output_strip_info=args.output_strip_info,
  330. **sereg_kwargs)
  331. logger.info('graph dump sizes: tot_size={:.3f}KiB overhead={:.3f}KiB'.
  332. format(stat.tot_bytes / 1024,
  333. (stat.tot_bytes - stat.tensor_value_bytes) / 1024))
  334. for testcase in feeds['testcases']:
  335. assert isinstance(testcase, dict)
  336. cg = mgb.comp_graph()
  337. cn = mgb.comp_node('cpux')
  338. output_mgbvars = []
  339. for name, dtype in inputs:
  340. output_mgbvars.append(cg.make_shared(cn, value=testcase.pop(name),
  341. dtype=dtype))
  342. assert not testcase, 'extra inputs provided in testcase: {}'.format(
  343. testcase.keys())
  344. mgb.serialize_comp_graph_to_file(
  345. args.output,
  346. output_mgbvars,
  347. append=True,
  348. output_strip_info=args.output_strip_info,
  349. append_json=True)
  350. if __name__ == '__main__':
  351. main()