Browse Source

refactor(sdk): refactor load and run with new framework

GitOrigin-RevId: b092699dee
release-1.7
Megvii Engine Team 3 years ago
parent
commit
37c1726fc1
45 changed files with 6581 additions and 5 deletions
  1. +2
    -5
      CMakeLists.txt
  2. +1
    -0
      cmake/gflags.cmake
  3. +3
    -0
      lite/CMakeLists.txt
  4. +38
    -0
      lite/load_and_run/BUILD
  5. +29
    -0
      lite/load_and_run/CMakeLists.txt
  6. +404
    -0
      lite/load_and_run/dump_with_testcase.py
  7. +535
    -0
      lite/load_and_run/dump_with_testcase_mge.py
  8. +74
    -0
      lite/load_and_run/src/helpers/common.h
  9. +266
    -0
      lite/load_and_run/src/helpers/data_parser.cpp
  10. +48
    -0
      lite/load_and_run/src/helpers/data_parser.h
  11. +297
    -0
      lite/load_and_run/src/helpers/json_loader.cpp
  12. +183
    -0
      lite/load_and_run/src/helpers/json_loader.h
  13. +615
    -0
      lite/load_and_run/src/helpers/npy.h
  14. +48
    -0
      lite/load_and_run/src/helpers/outdumper.cpp
  15. +42
    -0
      lite/load_and_run/src/helpers/outdumper.h
  16. +119
    -0
      lite/load_and_run/src/helpers/text_table.cpp
  17. +133
    -0
      lite/load_and_run/src/helpers/text_table.h
  18. +31
    -0
      lite/load_and_run/src/main.cpp
  19. +60
    -0
      lite/load_and_run/src/models/model.cpp
  20. +49
    -0
      lite/load_and_run/src/models/model.h
  21. +50
    -0
      lite/load_and_run/src/models/model_lite.cpp
  22. +73
    -0
      lite/load_and_run/src/models/model_lite.h
  23. +105
    -0
      lite/load_and_run/src/models/model_mdl.cpp
  24. +117
    -0
      lite/load_and_run/src/models/model_mdl.h
  25. +200
    -0
      lite/load_and_run/src/options/device_options.cpp
  26. +49
    -0
      lite/load_and_run/src/options/device_options.h
  27. +216
    -0
      lite/load_and_run/src/options/extern_c_opr_options.cpp
  28. +64
    -0
      lite/load_and_run/src/options/extern_c_opr_options.h
  29. +231
    -0
      lite/load_and_run/src/options/fastrun_options.cpp
  30. +57
    -0
      lite/load_and_run/src/options/fastrun_options.h
  31. +295
    -0
      lite/load_and_run/src/options/io_options.cpp
  32. +78
    -0
      lite/load_and_run/src/options/io_options.h
  33. +171
    -0
      lite/load_and_run/src/options/layout_options.cpp
  34. +56
    -0
      lite/load_and_run/src/options/layout_options.h
  35. +600
    -0
      lite/load_and_run/src/options/optimize_options.cpp
  36. +207
    -0
      lite/load_and_run/src/options/optimize_options.h
  37. +87
    -0
      lite/load_and_run/src/options/option_base.h
  38. +401
    -0
      lite/load_and_run/src/options/plugin_options.cpp
  39. +105
    -0
      lite/load_and_run/src/options/plugin_options.h
  40. +96
    -0
      lite/load_and_run/src/options/strategy_options.cpp
  41. +68
    -0
      lite/load_and_run/src/options/strategy_options.h
  42. +24
    -0
      lite/load_and_run/src/strategys/strategy.cpp
  43. +63
    -0
      lite/load_and_run/src/strategys/strategy.h
  44. +24
    -0
      lite/load_and_run/src/strategys/strategy_fitting.cpp
  45. +167
    -0
      lite/load_and_run/src/strategys/strategy_normal.cpp

+ 2
- 5
CMakeLists.txt View File

@@ -74,7 +74,6 @@ option(MGE_ENABLE_EXCEPTIONS "Build with exceptions" ON)
option(MGE_WITH_TEST "Enable test for MegEngine." OFF) option(MGE_WITH_TEST "Enable test for MegEngine." OFF)
option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON) option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON)
option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON) option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON)
option(MGE_BUILD_SDK "Build load_and_run" ON)
option(MGE_INFERENCE_ONLY "Build inference only library." OFF) option(MGE_INFERENCE_ONLY "Build inference only library." OFF)
option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON) option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
option(MGE_WITH_ROCM "Enable ROCM support" OFF) option(MGE_WITH_ROCM "Enable ROCM support" OFF)
@@ -542,6 +541,8 @@ if(MGE_WITH_TEST)
include(cmake/gtest.cmake) include(cmake/gtest.cmake)
endif() endif()


include(cmake/gflags.cmake)

if(MGE_BUILD_IMPERATIVE_RT) if(MGE_BUILD_IMPERATIVE_RT)
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
endif() endif()
@@ -1147,10 +1148,6 @@ endif()


add_subdirectory(src) add_subdirectory(src)


if(MGE_BUILD_SDK)
add_subdirectory(sdk/load-and-run)
endif()

if(MGE_BUILD_IMPERATIVE_RT) if(MGE_BUILD_IMPERATIVE_RT)
add_subdirectory(imperative) add_subdirectory(imperative)
message(STATUS "Enable imperative python wrapper runtime") message(STATUS "Enable imperative python wrapper runtime")


+ 1
- 0
cmake/gflags.cmake View File

@@ -0,0 +1 @@
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gflags ${CMAKE_CURRENT_BINARY_DIR}/gflags)

+ 3
- 0
lite/CMakeLists.txt View File

@@ -150,6 +150,9 @@ if(MGE_WITH_TEST)
add_subdirectory(test) add_subdirectory(test)
endif() endif()


#load_and_run
add_subdirectory(load_and_run)

# tools and example # tools and example
add_executable(rc4_encryptor tools/rc4_encrypt.cpp) add_executable(rc4_encryptor tools/rc4_encrypt.cpp)




+ 38
- 0
lite/load_and_run/BUILD View File

@@ -0,0 +1,38 @@
load("//brain/megbrain/lite:flags.bzl","pthread_select")

cc_library(
name = "mgblar",
copts = ["-std=c++14"],

srcs = glob(["src/**/*.cpp"], exclude = ["src/main.cpp"]),
hdrs = glob(["src/**/*.h"]),
includes = ["src"],
features = if_opt([
"no_exceptions",
"no_rtti",
]),
defines = [
"LITE_BUILD_WITH_MGE=1",
],

deps = ["//brain/megbrain/lite:lite_static_test"]+
pthread_select(
["@com_github_gflags_gflags//:gflags_nothreads"],
["//external:gflags"]
),
alwayslink = 1,
visibility = ["//visibility:public"],
)

cc_megvii_binary(
name = "load_and_run",
copts = ["-std=c++14"],
srcs = ["src/main.cpp"],
features = if_opt([
"no_exceptions",
"no_rtti",
]),
internal_deps = [":mgblar"],
visibility = ["//visibility:public"],
)


+ 29
- 0
lite/load_and_run/CMakeLists.txt View File

@@ -0,0 +1,29 @@
# BUILD the load and run for lite
include_directories(PUBLIC $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/lite/load_and_run/src>)
file (GLOB_RECURSE SOURCES ./*.cpp)

add_executable (load_and_run ${SOURCES})

target_link_libraries(load_and_run lite_static)
target_link_libraries(load_and_run megbrain)
target_link_libraries(load_and_run gflags)

if(LITE_BUILD_WITH_RKNPU)
#rknn sdk1.0.0 depend on libc++_shared, use gold to remove NEEDED so symbol check
target_link_options(load_and_run PRIVATE "-fuse-ld=gold")
endif()

if(MGE_WITH_ROCM)
# FIXME: hip obj can not find cpp obj only through lite_static
target_link_libraries(load_and_run megdnn)
endif()

if(UNIX)
if(APPLE OR ANDROID)
target_link_libraries(load_and_run dl)
else()
target_link_libraries(load_and_run dl rt)
endif()
endif()

install (TARGETS load_and_run EXPORT ${LITE_EXPORT_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})

+ 404
- 0
lite/load_and_run/dump_with_testcase.py View File

@@ -0,0 +1,404 @@
#!/usr/bin/env mdl
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

from megskull.graph import NodeFilter, FpropEnv
from megskull.opr.all import AssertEqual, DataProvider, BatchNormalization
from megskull.utils.logconf import get_logger
from meghair.utils import io
import megbrain as mgb

import argparse
import struct
import re
import os

import numpy as np
import cv2

logger = get_logger(__name__)

def auto_reformat_image(args, path, data, dst_shape):
"""reformat image to target shape

:param data: image data as numpy array
:param dst_shape: target shape
"""
dim3_format = False # required input format does not contain batch
hwc_format = False # required input format is NHWC

if len(dst_shape) == 3:
dst_shape = (1, ) + dst_shape
dim3_format = True

assert len(dst_shape) == 4, 'bad dst_shape: {}'.format(dst_shape)
chl = dst_shape[1]
if chl in [1, 3]:
n, c, h, w = dst_shape
dst_shape = (n, h, w, c)
else:
chl = dst_shape[3]
assert chl in [1, 3], (
'can not infer input format from shape: {}'.format(dst_shape))
hwc_format = True

# dst_shape has now been normalized to NHWC format

if args.resize_input:
h, w = dst_shape[1:3]
data = cv2.resize(data, (w, h))
logger.info('input {} resized to {}'.format(path, data.shape))

if chl == 1:
data = cv2.cvtColor(data, cv2.COLOR_BGR2GRAY)
data = data[:, :, np.newaxis]

assert data.ndim == 3
data = data[np.newaxis]
# data normalized to NHWC format

if not hwc_format:
data = np.transpose(data, (0, 3, 1, 2))

if dim3_format:
data = np.squeeze(data, 0)

return data

def read_input_data(args, dst_shape, dtype, path, repeat):
def check_shape_equal(dst_shape, data_shape):
assert len(data_shape) == len(dst_shape) , (
'input/data shapes mismatch: {} vs {}'.format(
dst_shape, data_shape))

if data_shape[1:] != dst_shape[1:]:
logger.warning('dst_shape is {}; data_shape is {}'.format(
dst_shape, data_shape))

if path.startswith('#'):
assert not args.resize_input
assert not args.input_transform
spec = path
m = re.match(
r'^#rand\(([-0-9.]*)\s*,\s*([-0-9.]*)\s*(,[^\)]+)?\)$', spec)
assert m, 'bad spec {}'.format(spec)

rng_min = float(m.group(1))
rng_max = float(m.group(2))
if m.group(3):
shape_str = m.group(3)
try:
shape = shape_str[1:].split(',')
if shape[-1].strip() == '...':
shape = shape[:-1]
shape.extend(list(dst_shape[len(shape):]))
data_shape = tuple(map(int, shape))
except ValueError as e:
raise ValueError('bad spec {}: {}'.format(spec, e.args))
else:
data_shape = dst_shape

check_shape_equal(dst_shape, data_shape)
return np.random.uniform(rng_min, rng_max, data_shape).astype(dtype)

# try to load image
data = cv2.imread(path, cv2.IMREAD_COLOR)
if data is None:
assert not args.resize_input
data = io.load(path)
assert isinstance(data, np.ndarray)
else:
# load image succeeds, so we expect input format is image format
data = auto_reformat_image(args, path, data, dst_shape)

data = np.repeat(data, repeat, axis=0)
if repeat > 1:
logger.info('repeat input for {} times, data shape is {}'.format(
repeat, data.shape))

check_shape_equal(dst_shape, data.shape)

if args.input_transform:
data = eval(args.input_transform, {'data': data, 'np': np})

return data


def gen_one_testcase(args, inputs, spec):
paths = spec.split(';')
if len(paths) != len(inputs):
if len(paths) == 1 and paths[0].startswith('#'):
paths = ['{}:{}'.format(name, paths[0]) for name in inputs.keys()]
assert len(paths) == len(inputs), (
'required inputs: {}; data paths: {}'.format(inputs.keys(), paths))
if len(paths) == 1 and ':' not in paths[0]:
paths[0] = next(iter(inputs.keys())) + ':' + paths[0]

ret = {}
for path in paths:
var, path = path.split(':')
if args.repeat:
repeat = args.repeat
else:
repeat = 1
ret[var] = read_input_data(args, inputs[var].imm_shape,
inputs[var].dtype, path, repeat)
return ret


def make_feeds(args):
outputs = io.load_network(args.input).outputs
if not args.no_assert:
env = FpropEnv(verbose_fprop=False)
# set flag so ExternCOprPlaceholder produce expected output
env.flags.user['extern_c_opr_eval'] = True
func = env.comp_graph.compile(None, [mgb.copy_output(env.get_mgbvar(i))
for i in outputs])

def expect_name(var): return 'expect:{}'.format(var.name)

nf = NodeFilter.make_all_deps(*outputs)
inputs = {i.name: i for i in nf.data_provider()}
if args.init_bn:
for i in nf:
if isinstance(i, BatchNormalization):
if i._iter.get_value() == 0:
i._iter.set_value(1)
i._variance.set_value(np.ones(i._variance.shape))

testcases = []

np.set_printoptions(precision=2, threshold=4, suppress=True)

data_list = []
for item in args.data:
if item.startswith('@'):
with open(item[1:], 'r') as f:
data_list.extend([ line.rstrip() for line in f if line.rstrip() != ''])
else:
data_list.append(item)

for inp_spec in data_list:
cur_testcase = gen_one_testcase(args, inputs, inp_spec)
assert len(cur_testcase) == len(inputs), (
'required inputs: {}; given data: {}'.format(
inputs.keys(), cur_testcase.keys()))

if not args.no_assert:
outputs_get = func(**cur_testcase)
for var, val in zip(outputs, outputs_get):
cur_testcase[expect_name(var)] = val
logger.info(
'generate test groundtruth: var={} shape={} range=({}, {})'
' mean={} var={}'.format(
var, val.shape, val.min(), val.max(),
np.mean(val), np.var(val)))
testcases.append(cur_testcase)
logger.info('add testcase: \n {}'.format(
'\n '.join('{}: shape={} dtype={} range=({:.2f},{:.2f}) '
'mean={:.2f} sd={:.2f}'.format(
k, v.shape, v.dtype, v.min(), v.max(), np.mean(v),
np.std(v))
for k, v in sorted(cur_testcase.items()))))

if not args.no_assert:
def expect_shp(var):
ret = var.partial_shape.determined_shape
if ret:
return ret
return testcases[0][expect_name(var)].shape

verbose = not args.silent
outputs = [AssertEqual(DataProvider(expect_name(i), expect_shp(i),
dtype=i.dtype,
comp_node=i.comp_node),
i, verbose=verbose, maxerr=args.maxerr)
for i in outputs]
return {'outputs': outputs, 'testcases': testcases}

def optimize_for_inference(args, outputs):
args_map = {
'enable_io16xc32': 'f16_io_f32_comp',
'enable_ioc16': 'f16_io_comp',
'enable_hwcd4': 'use_nhwcd4',
'enable_nchw4': 'use_nchw4',
'enable_nchw88': 'use_nchw88',
'enable_nchw44': 'use_nchw44',
'enable_nchw44_dot': 'use_nchw44_dot',
'enable_nchw32': 'use_nchw32',
'enable_chwn4': 'use_chwn4',
'enable_fuse_conv_bias_nonlinearity': 'fuse_conv_bias_nonlinearity',
'enable_fuse_conv_bias_with_z': 'fuse_conv_bias_with_z',
'enable_nchw64': 'use_nchw64',
'enable_fuse_preprocess': 'fuse_preprocess',
}

kwargs = {}
for k, v in args_map.items():
if getattr(args, k):
assert args.optimize_for_inference, (
'optimize_for_inference should be set when {} is given'.format(
k))
kwargs[v] = True

if args.optimize_for_inference:
return mgb.optimize_for_inference(outputs, **kwargs)

return outputs

def main():
parser = argparse.ArgumentParser(
description='Pack computing graph, input values and expected output '
'values into one file for checking correctness. README.md gives more '
'details on the usage',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('input', help='input file; see README for details')
parser.add_argument('-o', '--output', help='output file', required=True)
parser.add_argument('--init-bn', action='store_true',
help='initialize untrained batch-normalization, to '
'avoid NaN or Inf results')
parser.add_argument(
'-d', '--data', default=[], action='append',
help='Given input test data when input file is a network, '
'and current network output would be used as groundtruth. '
'The format is var0:file0;var1:file1... to specify data files for '
'input vars. It can also be #rand(min,max,shape...) for generating '
'random input data, for example, #rand(0,255), '
'#rand(0,255,1,3,224,224) or #rand(0, 255, 1, ...) where `...` means '
'the remaining part of the original shape. '
'If the shape is not specified, the shape of '
'corresponding DataProvider in the network will be used. '
'If there is only one input var, its name can be omitted. '
'Each data file can either be an image which can be loaded by opencv, '
'or a pickled numpy.ndarray. '
'This option can be given multiple times to add multiple testcases. '
' *NOTE* '
'If you start the data with the letter @, the rest should be a '
'filename, and each line in the file should be a single datum in '
'the format described above. '
)
parser.add_argument(
'--repeat', type=int, default=1,
help='Specify how many times the input image is repeated. '
'Useful when running benchmark for batch size other than one. '
'Have no effect on randomly generated input data.')
parser.add_argument('--silent', action='store_true',
help='set verbose to False in AssertEqual opr')
parser.add_argument('--optimize-for-inference', action='store_true',
help='enbale optimization for inference')
parser.add_argument('--no-assert', action='store_true',
help='do not insert AssertEqual opr to check result; '
'this option is useful for benchmarking')
parser.add_argument('--maxerr', type=float, default=AssertEqual.maxerr,
help='max error for AssertEqual check during runtime')
parser.add_argument('--resize-input', action='store_true',
help='resize input image to fit input var shape')
parser.add_argument('--input-transform',
help='a python expression to transform the input data. '
'Example: data / np.std(data)')
parser.add_argument('--discard-var-name', action='store_true',
help='discard variable and param names in the '
'generated output')
parser.add_argument('--output-strip-info', action='store_true',
help='output code strip information')
parser.add_argument('--enable-io16xc32', action='store_true',
help='transform the mode to float16 io float32 compute')
parser.add_argument('--enable-ioc16', action='store_true',
help='transform the dtype of the model to float16 io '
'and compute')
parser.add_argument('--enable-fuse-conv-bias-nonlinearity',
action='store_true',
help='fuse convolution bias and nonlinearity opr to a '
'conv_bias opr and compute')
parser.add_argument('--enable-hwcd4', action='store_true',
help='transform the model format from NCHW to NHWCD4 '
'for inference; you may need to disable CUDA and set '
'MGB_USE_MEGDNN_DBG=2')
parser.add_argument('--enable-nchw4', action='store_true',
help='transform the model format from NCHW to NCHW4 '
'for inference')
parser.add_argument('--enable-nchw88', action='store_true',
help='transform the model format from NCHW to NCHW88 '
'for inference')
parser.add_argument('--enable-nchw44', action='store_true',
help='transform the model format from NCHW to NCHW44 '
'for inference')
parser.add_argument('--enable-nchw44-dot', action='store_true',
help='transform the model format from NCHW to NCHW44_DOT '
'for optimizing armv8.2 dot in inference')
parser.add_argument('--enable-chwn4', action='store_true',
help='transform the model format to CHWN4 '
'for inference, mainly used for nvidia tensorcore')
parser.add_argument('--enable-nchw32', action='store_true',
help='transform the model format from NCHW4 to NCHW32 '
'for inference on nvidia TensoCore')
parser.add_argument('--enable-nchw64', action='store_true',
help='transform the model format from NCHW to NCHW64 '
'for inference on Nvidia GPU')
parser.add_argument('--enable-fuse-conv-bias-with-z', action='store_true',
help='fuse conv_bias with z input for inference on '
'nvidia GPU (this optimization pass will result in mismatch '
'of the precision of output of training and inference)')
parser.add_argument('--enable-fuse-preprocess', action='store_true',
help='fuse astype\pad_channel\dimshuffle and etc opr '
'from h2d op')
args = parser.parse_args()
if args.data:
feeds = make_feeds(args)
else:
feeds = io.load(args.input)

assert isinstance(feeds, dict) and feeds['testcases'], (
'testcases can not be empty')

env = FpropEnv(verbose_fprop=False)

outputs = feeds['outputs']
output_mgbvars = list(map(env.get_mgbvar, outputs))

output_mgbvars = optimize_for_inference(args, output_mgbvars)

inputs = sorted(((i.name, i.dtype) for i in
NodeFilter.make_all_deps(*outputs).data_provider()))
if args.discard_var_name:
sereg_kwargs = dict(keep_var_name=0, keep_param_name=False)
else:
sereg_kwargs = dict(keep_var_name=2, keep_param_name=True)

with open(args.output, 'wb') as fout:
fout.write(b'mgbtest0')
fout.write(struct.pack('I', len(feeds['testcases'])))
stat = mgb.serialize_comp_graph_to_file(
args.output, output_mgbvars, append=True,
output_strip_info=args.output_strip_info,
**sereg_kwargs)
logger.info('graph dump sizes: tot_size={:.3f}KiB overhead={:.3f}KiB'.
format(stat.tot_bytes / 1024,
(stat.tot_bytes - stat.tensor_value_bytes) / 1024))

for testcase in feeds['testcases']:
assert isinstance(testcase, dict)
cg = mgb.comp_graph()
cn = mgb.comp_node('cpux')
output_mgbvars = []
for name, dtype in inputs:
output_mgbvars.append(cg.make_shared(cn, value=testcase.pop(name),
dtype=dtype))
assert not testcase, 'extra inputs provided in testcase: {}'.format(
testcase.keys())

mgb.serialize_comp_graph_to_file(
args.output,
output_mgbvars,
append=True,
output_strip_info=args.output_strip_info,
append_json=True)

if __name__ == '__main__':
main()

+ 535
- 0
lite/load_and_run/dump_with_testcase_mge.py View File

@@ -0,0 +1,535 @@
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import argparse
import os
import re
import struct

import cv2
import numpy as np

import megengine as mge
import megengine.core._imperative_rt as rt
import megengine.core.tensor.megbrain_graph as G
from megengine import tensor
from megengine.core._imperative_rt.core2 import apply
from megengine.core.ops import builtin
from megengine.utils import comp_graph_tools as cgtools

logger = mge.get_logger(__name__)


def auto_reformat_image(args, path, data, dst_shape):
"""reformat image to target shape

:param data: image data as numpy array
:param dst_shape: target shape
"""
dim3_format = False # required input format does not contain batch
hwc_format = False # required input format is NHWC

if not dst_shape: # input tensor shape is not predefined
if len(data.shape) == 2:
chl = 1
h = data.shape[0]
w = data.shape[1]
else:
assert len(data.shape) == 3, "Input image must be of dimension 2 or 3"
h, w, chl = data.shape
dst_shape = (1, chl, h, w)

if len(dst_shape) == 3:
dst_shape = (1,) + dst_shape
dim3_format = True

assert len(dst_shape) == 4, "bad dst_shape: {}".format(dst_shape)
chl = dst_shape[1]
if chl in [1, 3]:
n, c, h, w = dst_shape
dst_shape = (n, h, w, c)
else:
chl = dst_shape[3]
assert chl in [1, 3], "can not infer input format from shape: {}".format(
dst_shape
)
hwc_format = True

# dst_shape has now been normalized to NHWC format

if args.resize_input:
h, w = dst_shape[1:3]
data = cv2.resize(data, (w, h))
logger.info("input {} resized to {}".format(path, data.shape))

if chl == 1:
data = cv2.cvtColor(data, cv2.COLOR_BGR2GRAY)
data = data[:, :, np.newaxis]

assert data.ndim == 3
data = data[np.newaxis]
# data normalized to NHWC format

if not hwc_format:
data = np.transpose(data, (0, 3, 1, 2))

if dim3_format:
data = np.squeeze(data, 0)

return data


def read_input_data(args, dst_shape, dtype, path, repeat):
def check_shape_equal(dst_shape, data_shape):
if len(dst_shape):
assert len(data_shape) == len(
dst_shape
), "input/data shapes mismatch: {} vs {}".format(dst_shape, data_shape)

if data_shape[1:] != dst_shape[1:]:
logger.warning(
"dst_shape is {}; data_shape is {}".format(dst_shape, data_shape)
)

if path.startswith("#"):
assert not args.resize_input
assert not args.input_transform
spec = path
m = re.match(r"^#rand\(([-0-9.]*)\s*,\s*([-0-9.]*)\s*(,[^\)]+)?\)$", spec)
assert m, "bad spec {}".format(spec)

rng_min = float(m.group(1))
rng_max = float(m.group(2))
if m.group(3):
shape_str = m.group(3)
try:
shape = shape_str[1:].split(",")
if shape[-1].strip() == "...":
shape = shape[:-1]
shape.extend(list(dst_shape[len(shape) :]))
data_shape = tuple(map(int, shape))
except ValueError as e:
raise ValueError("bad spec {}: {}".format(spec, e.args))
else:
data_shape = dst_shape

check_shape_equal(dst_shape, data_shape)
return np.random.uniform(rng_min, rng_max, data_shape).astype(dtype)

# try to load image
data = cv2.imread(path, cv2.IMREAD_COLOR)
if data is None:
assert not args.resize_input
data = np.load(path)
assert isinstance(data, np.ndarray)
else:
# load image succeeds, so we expect input format is image format
data = auto_reformat_image(args, path, data, dst_shape)

data = np.repeat(data, repeat, axis=0)
if repeat > 1:
logger.info(
"repeat input for {} times, data shape is {}".format(repeat, data.shape)
)

check_shape_equal(dst_shape, data.shape)

if args.input_transform:
data = eval(args.input_transform, {"data": data, "np": np})

return data


def gen_one_testcase(args, inputs, spec):
paths = spec.split(";")
if len(paths) != len(inputs):
if len(paths) == 1 and paths[0].startswith("#"):
paths = ["{}:{}".format(name, paths[0]) for name in inputs.keys()]
assert len(paths) == len(inputs), "required inputs: {}; data paths: {}".format(
inputs.keys(), paths
)
if len(paths) == 1 and ":" not in paths[0]:
paths[0] = next(iter(inputs.keys())) + ":" + paths[0]

ret = {}
for path in paths:
var, path = path.split(":")
if args.repeat:
repeat = args.repeat
else:
repeat = 1
ret[var] = read_input_data(
args, inputs[var].shape, inputs[var].dtype, path, repeat
)
return ret


def make_feeds(args):
ret = G.load_graph(args.input)
cg_rt, outputs = ret.graph, ret.output_vars_list
inputs = cgtools.get_dep_vars(outputs, "Host2DeviceCopy")

inputs = {i.name: i for i in inputs}
if not args.no_assert:

replace_varmap = {}
inp_map = {}
# replace var use InputNode
for name, var in inputs.items():
inp = G.InputNode(
device="xpux", dtype=var.dtype, shape=var.shape, graph=cg_rt
)
replace_varmap[var] = inp.outputs[0]
inp_map[name] = inp

new = cgtools.replace_vars(outputs, replace_varmap)
if isinstance(new, rt.VarNode):
new = list(new)

output_nodes = [G.OutputNode(var) for var in new]
func = cg_rt.compile([node.outputs[0] for node in output_nodes])

def make_dev_tensor(value, dtype=None, device=None):
return tensor(value, dtype=dtype, device=device)._dev_tensor()

def calculate(*args, **kwargs):
output_val = []
# set inputs value
for name, var in inputs.items():
val = kwargs.pop(name, None)
assert val is not None, "miss input name{}".format(name)
dev_tensor = make_dev_tensor(val, dtype=var.dtype, device="xpux")
inp_map[name].set_value(dev_tensor)

func.execute()

for res in output_nodes:
output_val.append(res.get_value().numpy())
return output_val

def expect_name(var):
return "{}:expect".format(var.name)

testcases = []

np.set_printoptions(precision=2, threshold=4, suppress=True)

data_list = []
for item in args.data:
if item.startswith("@"):
with open(item[1:], "r") as f:
data_list.extend([line.rstrip() for line in f if line.rstrip() != ""])
else:
data_list.append(item)

for inp_spec in data_list:
cur_testcase = gen_one_testcase(args, inputs, inp_spec)
assert len(cur_testcase) == len(
inputs
), "required inputs: {}; given data: {}".format(
inputs.keys(), cur_testcase.keys()
)

if not args.no_assert:
outputs_get = calculate(**cur_testcase)
for var, val in zip(outputs, outputs_get):
cur_testcase[expect_name(var)] = val
logger.info(
"generate test groundtruth: var={} shape={} range=({}, {})"
" mean={} var={}".format(
var, val.shape, val.min(), val.max(), np.mean(val), np.var(val)
)
)
testcases.append(cur_testcase)
logger.info(
"add testcase: \n {}".format(
"\n ".join(
"{}: shape={} dtype={} range=({:.2f},{:.2f}) "
"mean={:.2f} sd={:.2f}".format(
k, v.shape, v.dtype, v.min(), v.max(), np.mean(v), np.std(v)
)
for k, v in sorted(cur_testcase.items())
)
)
)

if not args.no_assert:

def expect_shp(var):
ret = var.shape
if ret:
return ret
return testcases[0][expect_name(var)].shape

def assert_equal(expect, real, **kwargs):
op = builtin.AssertEqual(**kwargs)
(res,) = G.apply_normal_varnode(op, expect, real)
return res

verbose = not args.silent

outputs_new = []
for i in outputs:
device = rt.CompNode("xpux")
dtype = i.dtype
name = expect_name(i)
shape = expect_shp(i)
# make expect output as one input of model.
expect_get = rt.make_h2d(cg_rt, device, dtype, shape, name)
# insert assert opr to check expect and real.
outputs_new.append(
assert_equal(
expect_get,
i,
verbose=verbose,
maxerr=args.maxerr,
)
)
inputs[expect_name(i)] = expect_get
outputs = outputs_new

return {"outputs": outputs, "testcases": testcases}


def optimize_for_inference(args, outputs):
args_list = [
"enable_io16xc32",
"enable_ioc16",
"enable_hwcd4",
"enable_nchw4",
"enable_nchw88",
"enable_nchw44",
"enable_nchw44_dot",
"enable_nchw32",
"enable_chwn4",
"enable_fuse_conv_bias_nonlinearity",
"enable_fuse_conv_bias_with_z",
"enable_fuse_preprocess",
]
kwargs = {}
for k in args_list:
if getattr(args, k):
kwargs[k] = True

if args.optimize_for_inference:
outputs = G.optimize_for_inference(outputs, **kwargs)

return outputs


def main():
parser = argparse.ArgumentParser(
description="Pack computing graph, input values and expected output "
"values into one file for checking correctness. README.md gives more "
"details on the usage",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument("input", help="MegEngine dumped model file")
parser.add_argument("-o", "--output", help="output file", required=True)
parser.add_argument(
"-d",
"--data",
default=[],
action="append",
required=True,
help="Given input test data when input file is a network, "
"and current network output would be used as groundtruth. "
"The format is var0:file0;var1:file1... to specify data files for "
"input vars. It can also be #rand(min,max,shape...) for generating "
"random input data, for example, #rand(0,255), "
"#rand(0,255,1,3,224,224) or #rand(0, 255, 1, ...) where `...` means "
"the remaining part of the original shape. "
"If the shape is not specified, the shape of "
"corresponding input tensors in the network will be used. "
"If there is only one input var, its name can be omitted. "
"Each data file can either be an image which can be loaded by opencv, "
"or a pickled numpy.ndarray. "
"This option can be given multiple times to add multiple testcases. "
" *NOTE* "
"If you start the data with the letter @, the rest should be a "
"filename, and each line in the file should be a single datum in "
"the format described above. ",
)
parser.add_argument(
"--repeat",
type=int,
default=1,
help="Specify how many times the input image is repeated. "
"Useful when running benchmark for batch size other than one. "
"Have no effect on randomly generated input data.",
)
parser.add_argument(
"--silent",
action="store_true",
help="set verbose to False in asserti_equal opr",
)
parser.add_argument(
"--optimize-for-inference",
action="store_true",
help="enable optimization for inference",
)
parser.add_argument(
"--no-assert",
action="store_true",
help="do not insert assert_equal opr to check result; "
"this option is useful for benchmarking",
)
parser.add_argument(
"--maxerr",
type=float,
default=1e-4,
help="max error for assert_equal check during runtime",
)
parser.add_argument(
"--resize-input",
action="store_true",
help="resize input image to fit input var shape",
)
parser.add_argument(
"--input-transform",
help="a python expression to transform the input data. "
"Example: data / np.std(data)",
)
parser.add_argument(
"--discard-var-name",
action="store_true",
help="discard variable and param names in the " "generated output",
)
parser.add_argument(
"--output-strip-info", action="store_true", help="output code strip information"
)
parser.add_argument(
"--enable-io16xc32",
action="store_true",
help="transform the mode to float16 io float32 compute",
)
parser.add_argument(
"--enable-ioc16",
action="store_true",
help="transform the dtype of the model to float16 io " "and compute",
)
parser.add_argument(
"--enable-fuse-conv-bias-nonlinearity",
action="store_true",
help="fuse convolution bias and nonlinearity opr to a "
"conv_bias opr and compute",
)
parser.add_argument(
"--enable-hwcd4",
action="store_true",
help="transform the model format from NCHW to NHWCD4 "
"for inference; you may need to disable CUDA and set "
"MGB_USE_MEGDNN_DBG=2",
)
parser.add_argument(
"--enable-nchw4",
action="store_true",
help="transform the model format from NCHW to NCHW4 " "for inference",
)
parser.add_argument(
"--enable-nchw88",
action="store_true",
help="transform the model format from NCHW to NCHW88 " "for inference",
)
parser.add_argument(
"--enable-nchw44",
action="store_true",
help="transform the model format from NCHW to NCHW44 " "for inference",
)
parser.add_argument(
"--enable-nchw44-dot",
action="store_true",
help="transform the model format from NCHW to NCHW44_DOT "
"for optimizing armv8.2 dot in inference",
)
parser.add_argument(
"--enable-nchw32",
action="store_true",
help="transform the model format from NCHW4 to NCHW32 "
"for inference on nvidia TensoCore",
)
parser.add_argument(
"--enable-chwn4",
action="store_true",
help="transform the model format to CHWN4 "
"for inference, mainly used for nvidia tensorcore",
)
parser.add_argument(
"--enable-fuse-conv-bias-with-z",
action="store_true",
help="fuse conv_bias with z input for inference on "
"nvidia GPU (this optimization pass will result in mismatch "
"of the precision of output of training and inference)",
)
parser.add_argument(
"--enable-fuse-preprocess",
action="store_true",
help="fuse astype\pad_channel\dimshuffle and etc opr "
"from h2d opr",
)
args = parser.parse_args()

feeds = make_feeds(args)

assert isinstance(feeds, dict) and feeds["testcases"], "testcases can not be empty"

output_mgbvars = feeds["outputs"]
output_mgbvars = optimize_for_inference(args, output_mgbvars)

inputs = cgtools.get_dep_vars(output_mgbvars, "Host2DeviceCopy")
inputs = sorted((i.name, i.dtype) for i in inputs)

if args.discard_var_name:
sereg_kwargs = dict(keep_var_name=0, keep_param_name=False)
else:
sereg_kwargs = dict(keep_var_name=2, keep_param_name=True)

strip_info_file = args.output + ".json" if args.output_strip_info else None

with open(args.output, "wb") as fout:
fout.write(b"mgbtest0")
fout.write(struct.pack("I", len(feeds["testcases"])))
dump_content, stat = G.dump_graph(
output_mgbvars,
append_json=True,
strip_info_file=strip_info_file,
**sereg_kwargs,
)
fout.write(dump_content)

logger.info(
"graph dump sizes: tot_size={:.3f}KiB overhead={:.3f}KiB".format(
stat.tot_bytes / 1024, (stat.tot_bytes - stat.tensor_value_bytes) / 1024
)
)

def make_dev_tensor(value, dtype=None, device=None):
return tensor(value, dtype=dtype, device=device)._dev_tensor()

for testcase in feeds["testcases"]:
assert isinstance(testcase, dict)
cg = G.Graph()
output_mgbvars = []
for name, dtype in inputs:
output_mgbvars.append(
cg.make_const(
make_dev_tensor(testcase.pop(name), dtype=dtype, device="cpux")
)
)
assert not testcase, "extra inputs provided in testcase: {}".format(
testcase.keys()
)
with open(args.output, "ab") as fout:
dump_content, _ = G.dump_graph(
output_mgbvars, strip_info_file=strip_info_file, append_json=True
)
fout.write(dump_content)


if __name__ == "__main__":
main()

+ 74
- 0
lite/load_and_run/src/helpers/common.h View File

@@ -0,0 +1,74 @@
/**
* \file lite/load_and_run/src/helpers/common.h
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#pragma once
#include <gflags/gflags.h>
#include <memory>
DECLARE_int32(thread);
namespace lar {
/*!
* \brief: state of model running
*/
enum class RunStage {

BEFORE_MODEL_LOAD = 0,

AFTER_MODEL_LOAD = 1,

BEFORE_OUTSPEC_SET = 2,

//! using for dump static memory information svg file
AFTER_OUTSPEC_SET = 3,

//! using for external c opr library
MODEL_RUNNING = 4,

//! using for output dumper
AFTER_RUNNING_WAIT = 5,

//! using for external c opr library
AFTER_RUNNING_ITER = 6,

AFTER_MODEL_RUNNING = 7,
};
/*!
* \brief: type of different model
*/
enum class ModelType {
LITE_MODEL = 0,
MEGDL_MODEL,
UNKNOWN,
};
/*!
* \brief: param for running model
*/
struct RuntimeParam {
RunStage stage = RunStage::AFTER_MODEL_LOAD;
size_t warmup_iter; //! warm up number before running model
size_t run_iter; //! iteration number for running model
size_t threads = FLAGS_thread; //! thread number for running model (NOTE:it's
//! different from multithread device )
size_t testcase_num = 1; //! testcase number for model with testcase
};
/*!
* \brief:layout type for running model optimization
*/
enum class OptLayoutType {
NCHW4 = 1 << 0,
CHWN4 = 1 << 1,
NCHW44 = 1 << 2,
NCHW88 = 1 << 3,
NCHW32 = 1 << 4,
NCHW64 = 1 << 5,
NHWCD4 = 1 << 6,
NCHW44_DOT = 1 << 7
};

} // namespace lar
// vim: syntax=cpp.doxygen

+ 266
- 0
lite/load_and_run/src/helpers/data_parser.cpp View File

@@ -0,0 +1,266 @@
/**
* \file lite/load_and_run/src/helpers/data_parser.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#include "data_parser.h"
#include <sstream>
#include "json_loader.h"
#include "npy.h"

using namespace lar;

/*!
* \brief feed different data to diffferent parser
* \param path data file path or data string
*/
void DataParser::feed(const std::string& path) {
std::string blob_name = "data", blob_string = path;
size_t sep = path.find(":");
if (sep != std::string::npos) {
blob_name = path.substr(0, sep);
blob_string = path.substr(sep + 1);
}

auto endWith = [blob_string](std::string suffix) -> bool {
return blob_string.rfind(suffix) == (blob_string.length() - suffix.length());
};

if (endWith(".ppm") || endWith(".pgm")) {
parse_image(blob_name, blob_string);
} else if (endWith(".json")) {
parse_json(blob_string);
} else if (endWith(".npy")) {
parse_npy(blob_name, blob_string);
} else {
parse_string(blob_name, blob_string);
}
}

void DataParser::parse_json(const std::string& path) {
mgb::JsonLoader json;
std::shared_ptr<mgb::JsonLoader::Value> root = json.load(path.c_str());

mgb_assert(root != nullptr, "parse json %s fail", path.c_str());
// parse json to data map
const std::string SHAPE = "shape", TYPE = "type", RAW = "raw";
for (auto& item : root->objects()) {
auto&& value = *item.second;
auto&& shape = value[SHAPE];
mgb_assert(shape->is_array());

auto&& type = value[TYPE];
mgb_assert(type->is_str());

auto&& raw = value[RAW];
mgb_assert(raw->is_array());

megdnn::SmallVector<size_t> data_shape;
for (auto&& shape_ptr : shape->array()) {
data_shape.append({static_cast<size_t>(std::round(shape_ptr->number()))});
}

// get type
const std::map<std::string, megdnn::DType> type_map = {
{"float32", mgb::dtype::Float32()}, {"float", mgb::dtype::Float32()},
{"int32", mgb::dtype::Int32()}, {"int", mgb::dtype::Int32()},
{"int8", mgb::dtype::Int8()}, {"uint8", mgb::dtype::Uint8()}};

const std::string& type_str = type->str();
mgb_assert(
type_map.find(type_str) != type_map.end(),
"unknown json data type for --input");

mgb::DType datatype = type_map.at(type_str);
mgb::HostTensorND hv;
hv.comp_node(mgb::CompNode::default_cpu(), true)
.dtype(datatype)
.resize(data_shape);
mgb::dt_byte* raw_ptr = hv.raw_ptr();
size_t elem_size = datatype.size();

// get raw
const size_t array_size = raw->len();
for (size_t idx = 0; idx < array_size; ++idx) {
double tmp = (*raw)[idx]->number();

switch (datatype.enumv()) {
case megdnn::DTypeEnum::Int32: {
int32_t ival = std::round(tmp);
memcpy(((char*)raw_ptr) + idx * elem_size, &ival, elem_size);
} break;
case megdnn::DTypeEnum::Uint8:
case megdnn::DTypeEnum::Int8: {
int8_t cval = std::round(tmp);
memcpy(((char*)raw_ptr) + idx, &cval, sizeof(int8_t));
} break;
case megdnn::DTypeEnum::Float32: {
float fval = tmp;
memcpy(((char*)raw_ptr) + idx * elem_size, &fval, elem_size);
} break;
default:
break;
}
}

inputs.insert(std::make_pair(item.first, std::move(hv)));
}
}

void DataParser::parse_image(const std::string& name, const std::string& path) {
// load binary ppm/pgm
std::ifstream fin;
fin.open(path, std::ifstream::binary | std::ifstream::in);
mgb_assert(fin.is_open(), "open file %s failed for --input", path.c_str());

size_t w = 0, h = 0, channel = 0;
char buf[128] = {0};

fin.getline(buf, 128);
if ('5' == buf[1]) {
channel = 1;
} else if ('6' == buf[1]) {
channel = 3;
} else {
mgb_assert(0, "not a formal ppm/pgm");
}

while (fin.getline(buf, 128)) {
if (buf[0] == '#') {
continue;
}
break;
}
std::stringstream ss;
ss << std::string(buf);
ss >> w;
ss >> h;

mgb_assert(w > 0 and h > 0);

mgb::HostTensorND hv;
hv.comp_node(mgb::CompNode::default_cpu(), true)
.dtype(mgb::dtype::Uint8())
.resize({1, h, w, channel});

fin.read((char*)(hv.raw_ptr()), hv.layout().total_nr_elems());
fin.close();
inputs.insert(std::make_pair(name, std::move(hv)));
}

void DataParser::parse_npy(const std::string& name, const std::string& path) {
std::string type_str;
std::vector<npy::ndarray_len_t> stl_shape;
std::vector<int8_t> raw;
npy::LoadArrayFromNumpy(path, type_str, stl_shape, raw);

megdnn::SmallVector<size_t> shape;
for (auto val : stl_shape) {
shape.append({static_cast<size_t>(val)});
}

const std::map<std::string, megdnn::DType> type_map = {
{"f4", mgb::dtype::Float32()}, {"i4", mgb::dtype::Int32()},
{"i2", mgb::dtype::Int16()}, {"u2", mgb::dtype::Uint16()},
{"i1", mgb::dtype::Int8()}, {"u1", mgb::dtype::Uint8()}};

megdnn::DType hv_type;
for (auto& item : type_map) {
if (type_str.find(item.first) != std::string::npos) {
hv_type = item.second;
break;
}
}

mgb::HostTensorND hv;
hv.comp_node(mgb::CompNode::default_cpu(), true).dtype(hv_type).resize(shape);
mgb::dt_byte* raw_ptr = hv.raw_ptr();
memcpy(raw_ptr, raw.data(), raw.size());

inputs.insert(std::make_pair(name, std::move(hv)));
}

void DataParser::parse_string(const std::string name, const std::string& str) {
// data type
megdnn::DType data_type = mgb::dtype::Int32();
if (str.find(".") != std::string::npos or str.find(".") != std::string::npos) {
data_type = mgb::dtype::Float32();
}
// shape
size_t number_cnt = 0;

std::shared_ptr<Brace> brace_root = std::make_shared<Brace>();
std::shared_ptr<Brace> cur = brace_root;
for (size_t i = 0; i < str.size(); ++i) {
char c = str[i];
if (c == '[') {
std::shared_ptr<Brace> child = std::make_shared<Brace>();
child->parent = cur;
cur->chidren.emplace_back(child);
cur = child;
} else if (c == ']') {
cur = cur->parent.lock();
} else if (c == ',') {
number_cnt++;
}
continue;
}
++number_cnt;

mgb_assert(cur == brace_root, "braces not closed for --input");
megdnn::SmallVector<size_t> shape;
cur = brace_root;
while (not cur->chidren.empty()) {
shape.append({cur->chidren.size()});
number_cnt /= cur->chidren.size();
cur = cur->chidren[0];
}
mgb_assert(number_cnt > 0);
shape.append({number_cnt});

// data
std::string json_arr;
for (size_t i = 0; i < str.size(); ++i) {
char c = str[i];
if (c != '[' and c != ']') {
json_arr += c;
}
}
json_arr = "[" + json_arr + "]";

// reuse json parser to resolve raw data
mgb::JsonLoader json;
std::shared_ptr<mgb::JsonLoader::Value> json_root =
json.load(json_arr.data(), json_arr.size());
mgb_assert(json_root != nullptr, "parse json fail in parse_string");

mgb::HostTensorND hv;
hv.comp_node(mgb::CompNode::default_cpu(), true).dtype(data_type).resize(shape);
mgb::dt_byte* raw_ptr = hv.raw_ptr();

const size_t array_len = json_root->len();
const size_t elem_size = data_type.size();
for (size_t idx = 0; idx < array_len; ++idx) {
double tmp = json_root->array()[idx]->number();
switch (data_type.enumv()) {
case megdnn::DTypeEnum::Int32: {
int32_t ival = std::round(tmp);
memcpy(((char*)raw_ptr) + idx * elem_size, &ival, elem_size);
} break;
case megdnn::DTypeEnum::Float32: {
float fval = tmp;
memcpy(((char*)raw_ptr) + idx * elem_size, &fval, elem_size);
} break;
default:
break;
}
}
inputs.insert(std::make_pair(name, std::move(hv)));
}

+ 48
- 0
lite/load_and_run/src/helpers/data_parser.h View File

@@ -0,0 +1,48 @@
/**
* \file lite/load_and_run/src/helpers/data_parser.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#pragma once

#include <memory>
#include <unordered_map>
#include <vector>
#include "megbrain/opr/io.h"

namespace lar {
/*!
* \brief data parser for --input
* support .json|.ppm|.pgm|.npy data and user define data string
* data string format: [0,0,227,227]
*/
struct DataParser {
struct Brace {
std::weak_ptr<Brace> parent;
std::vector<std::shared_ptr<Brace>> chidren;
};
void feed(const std::string& path);

std::unordered_map<std::string, mgb::HostTensorND> inputs;

private:
//! parser for json data
void parse_json(const std::string& path);

//! parser for .ppm .pgm image
void parse_image(const std::string& name, const std::string& path);

//! parser for .npy data
void parse_npy(const std::string& name, const std::string& path);

//! parser for user define string
void parse_string(const std::string name, const std::string& str);
};
} // namespace lar

+ 297
- 0
lite/load_and_run/src/helpers/json_loader.cpp View File

@@ -0,0 +1,297 @@
/**
* \file lite/load_and_run/src/helpers/json_loader.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#include "json_loader.h"

using namespace mgb;

template <typename T>
T* JsonLoader::Value::safe_cast() {
T* ptr = (T*)(this);
if (nullptr == ptr) {
fprintf(stderr, "cast ptr is null\n");
}
return ptr;
}

std::unique_ptr<JsonLoader::Value>& JsonLoader::Value::operator[](
const std::string& key) {
mgb_assert(Type::OBJECT == m_type);
auto t = safe_cast<JsonLoader::ObjectValue>();
return t->m_obj.at(key);
}

std::unique_ptr<JsonLoader::Value>& JsonLoader::Value::operator[](const size_t index) {
mgb_assert(Type::ARRAY == m_type);
auto t = safe_cast<JsonLoader::ArrayValue>();
return t->m_obj[index];
}

std::map<std::string, std::unique_ptr<JsonLoader::Value>>& JsonLoader::Value::
objects() {
mgb_assert(Type::OBJECT == m_type);
auto t = safe_cast<JsonLoader::ObjectValue>();
return t->m_obj;
}

size_t JsonLoader::Value::len() {
if (Type::ARRAY == m_type) {
auto t = safe_cast<JsonLoader::ArrayValue>();
return t->m_obj.size();
} else if (Type::OBJECT == m_type) {
auto t = safe_cast<JsonLoader::ObjectValue>();
return t->m_obj.size();
}
return 0;
}

megdnn::SmallVector<std::unique_ptr<JsonLoader::Value>>& JsonLoader::Value::array() {
mgb_assert(Type::ARRAY == m_type);
auto t = safe_cast<JsonLoader::ArrayValue>();
return t->m_obj;
}

double JsonLoader::Value::number() {
mgb_assert(Type::NUMBER == m_type);
auto t = safe_cast<JsonLoader::NumberValue>();
return t->value();
}

std::string JsonLoader::Value::str() {
if (Type::STRING == m_type) {
auto t = safe_cast<StringValue>();
return t->value();
}
return std::string();
}

void JsonLoader::expect(char c) {
mgb_assert(c == (*m_buf));
m_buf++;
}

void JsonLoader::skip_whitespace() {
const char* p = m_buf;
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') {
++p;
}
m_buf = p;
}

std::unique_ptr<JsonLoader::Value> JsonLoader::parse_object() {
expect('{');
skip_whitespace();

std::unique_ptr<JsonLoader::Value> ret;
JsonLoader::ObjectValue* pObject = new JsonLoader::ObjectValue();

if ('}' == *m_buf) {
m_buf = m_buf + 1;
ret.reset((JsonLoader::Value*)(pObject));
return ret;
}

while (true) {
std::unique_ptr<JsonLoader::Value> key = parse_string();
if (m_state != State::OK) {
return ret;
}

skip_whitespace();
if (':' != (*m_buf)) {
m_state = State::MISS_COLON;
return ret;
}
m_buf++;
skip_whitespace();

std::unique_ptr<JsonLoader::Value> pVal = parse_value();
if (m_state != State::OK) {
return ret;
}

if (pObject->m_obj.find(pVal->str()) != pObject->m_obj.end()) {
m_state = State::KEY_NOT_UNIQUE;
return ret;
}

pObject->m_obj.insert(std::make_pair(key->str(), std::move(pVal)));

skip_whitespace();
if (',' == (*m_buf)) {
m_buf++;
skip_whitespace();
} else if ('}' == (*m_buf)) {
m_buf++;
break;
} else {
m_state = State::MISS_BRACE;
break;
}
}

ret.reset((JsonLoader::Value*)(pObject));
return ret;
}

std::unique_ptr<JsonLoader::Value> JsonLoader::parse_array() {
expect('[');
skip_whitespace();

std::unique_ptr<JsonLoader::Value> ret;
JsonLoader::ArrayValue* pArray = new JsonLoader::ArrayValue();

if (']' == *m_buf) {
m_buf = m_buf + 1;

ret.reset((JsonLoader::Value*)(pArray));
return ret;
}

while (true) {
std::unique_ptr<JsonLoader::Value> pVal = parse_value();
if (m_state != State::OK) {
mgb_assert(0, "parse value failed during pase array");
return ret;
}

pArray->m_obj.emplace_back(pVal.get());
pVal.release();

skip_whitespace();
if (',' == *m_buf) {
m_buf++;
skip_whitespace();
} else if (']' == *m_buf) {
m_buf++;
break;
} else {
m_state = State::BAD_ARRAY;
return ret;
}
}

ret.reset((JsonLoader::Value*)(pArray));
return ret;
}

std::unique_ptr<JsonLoader::Value> JsonLoader::parse_string() {
expect('\"');

std::unique_ptr<JsonLoader::Value> ret;
JsonLoader::StringValue* pStr = new JsonLoader::StringValue();

const char* p = m_buf;
while (true) {
if (*p == '\"') {
p++;
break;
} else {
pStr->m_value += (*p);
p++;
}
}
m_buf = p;
ret.reset((JsonLoader::Value*)(pStr));
return ret;
}

std::unique_ptr<JsonLoader::Value> JsonLoader::parse_number() {
const char* p = m_buf;

auto loop_digit = [this](const char*& p) {
if (not std::isdigit(*p)) {
m_state = State::BAD_DIGIT;
return;
}
while (std::isdigit(*p)) {
p++;
}
return;
};

if (*p == '-')
p++;
if (*p == '0')
p++;
else {
loop_digit(std::ref(p));
}
if (*p == '.') {
p++;
loop_digit(std::ref(p));
}

if (*p == 'e' || *p == 'E') {
p++;
if (*p == '+' || *p == '-')
p++;
loop_digit(std::ref(p));
}
JsonLoader::NumberValue* pNum = new JsonLoader::NumberValue();
pNum->m_value = strtod(m_buf, nullptr);

m_buf = p;

std::unique_ptr<JsonLoader::Value> ret;
ret.reset((JsonLoader::Value*)(pNum));
return ret;
}

std::unique_ptr<JsonLoader::Value> JsonLoader::parse_value() {
switch (*m_buf) {
case '[':
return parse_array();
case '{':
return parse_object();
case '\"':
return parse_string();
case '\0':
m_state = State::BAD_TYPE;
break;
default:
return parse_number();
}
return nullptr;
}

std::unique_ptr<JsonLoader::Value> JsonLoader::load(
const char* content, const size_t size) {
m_buf = content;
skip_whitespace();
std::unique_ptr<JsonLoader::Value> value = parse_value();
skip_whitespace();

if (m_state != State::OK) {
return nullptr;
}
mgb_assert(size == static_cast<size_t>(m_buf - content));

return value;
}

std::unique_ptr<JsonLoader::Value> JsonLoader::load(const char* path) {
std::unique_ptr<std::FILE, void (*)(std::FILE*)> fin(
std::fopen(path, "rb"), [](std::FILE* fp) { std::fclose(fp); });

mgb_assert(fin.get(), "failed to open %s: %s", path, strerror(errno));
std::fseek(fin.get(), 0, SEEK_END);
const size_t size = ftell(fin.get());
std::fseek(fin.get(), 0, SEEK_SET);

std::unique_ptr<char> buf(static_cast<char*>(malloc(size)));

auto nr = std::fread(buf.get(), 1, size, fin.get());
mgb_assert(nr == size);

return load(buf.get(), size);
}

+ 183
- 0
lite/load_and_run/src/helpers/json_loader.h View File

@@ -0,0 +1,183 @@
/**
* \file lite/load_and_run/src/helpers/json_loader.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#pragma once

#include <cctype>
#include <fstream>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include "megbrain/common.h"
#include "megdnn/thin/small_vector.h"

namespace mgb {
/*!
* \brief JSON format data loader for --input
*/
class JsonLoader {
public:
// base class for different value format
class Value {
protected:
enum struct Type : uint8_t { UNKNOWN, NUMBER, STRING, OBJECT, ARRAY };
Type m_type;

public:
template <typename T>
T* safe_cast();

Value() { m_type = Type::UNKNOWN; }

Value(Type type) : m_type(type) {}

virtual ~Value() {}

bool is_array() { return Type::ARRAY == m_type; }

bool is_object() { return Type::OBJECT == m_type; }

bool is_number() { return Type::NUMBER == m_type; }

bool is_str() { return Type::STRING == m_type; }

std::unique_ptr<Value>& operator[](const std::string& key);

std::unique_ptr<Value>& operator[](const size_t index);

std::map<std::string, std::unique_ptr<Value>>& objects();

size_t len();

megdnn::SmallVector<std::unique_ptr<Value>>& array();

double number();

std::string str();
};

void expect(char c);

void skip_whitespace();

std::unique_ptr<Value> parse_object();

std::unique_ptr<Value> parse_array();

std::unique_ptr<Value> parse_string();

std::unique_ptr<Value> parse_number();

std::unique_ptr<Value> parse_value();

enum struct State : uint8_t {
OK = 0,
BAD_TYPE,
BAD_DIGIT,
BAD_ARRAY,
MISS_COLON,
MISS_BRACE,
KEY_NOT_UNIQUE
};

JsonLoader() { m_state = State::OK; }

std::unique_ptr<Value> load(const char* content, const size_t size);

std::unique_ptr<Value> load(const char* path);

class NumberValue final : public Value {
friend std::unique_ptr<Value> JsonLoader::parse_number();
double m_value;

public:
NumberValue() : Value(Type::NUMBER) {}

double value() { return m_value; }
};

class StringValue final : public Value {
std::string m_value;

public:
StringValue() : Value(Type::STRING) {}

std::string value() { return m_value; }

friend std::unique_ptr<Value> JsonLoader::parse_string();
};

class ArrayValue final : public Value {
megdnn::SmallVector<std::unique_ptr<Value>> m_obj;

public:
ArrayValue() : Value(Type::ARRAY) {}

ArrayValue(ArrayValue& arr) : Value(arr) {
m_obj.clear();
for (auto& item : arr.m_obj) {
m_obj.emplace_back(item.get());
item.release();
}
}

ArrayValue(ArrayValue&& arr) : Value(arr) {
m_obj.clear();
for (auto& item : arr.m_obj) {
m_obj.emplace_back(item.get());
item.release();
}
}

friend std::unique_ptr<Value> JsonLoader::parse_array();
friend std::unique_ptr<JsonLoader::Value>& JsonLoader::Value::operator[](
const size_t index);
friend megdnn::SmallVector<std::unique_ptr<JsonLoader::Value>>& JsonLoader::
Value::array();
friend size_t JsonLoader::Value::len();
};

class ObjectValue final : public Value {
std::map<std::string, std::unique_ptr<Value>> m_obj;

public:
ObjectValue() : Value(Type::OBJECT) {}

ObjectValue(ObjectValue& arr) : Value(arr) {
m_obj.clear();
for (auto itra = arr.m_obj.begin(); itra != arr.m_obj.end(); ++itra) {
m_obj.emplace(std::make_pair(itra->first, std::move(itra->second)));
}
}

ObjectValue(ObjectValue&& arr) : Value(arr) {
m_obj.clear();
for (auto itra = arr.m_obj.begin(); itra != arr.m_obj.end(); ++itra) {
m_obj.emplace(std::make_pair(itra->first, std::move(itra->second)));
}
}

friend std::unique_ptr<Value> JsonLoader::parse_object();
friend std::unique_ptr<JsonLoader::Value>& JsonLoader::Value::operator[](
const std::string&);
friend std::map<std::string, std::unique_ptr<JsonLoader::Value>>& JsonLoader::
Value::objects();
friend size_t JsonLoader::Value::len();
};

private:
const char* m_buf;
State m_state;
};

} // namespace mgb

+ 615
- 0
lite/load_and_run/src/helpers/npy.h View File

@@ -0,0 +1,615 @@
/*
Copyright 2017 Leon Merten Lohse

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/

#ifndef NPY_H
#define NPY_H

#include <algorithm>
#include <complex>
#include <cstdint>
#include <cstring>
#include <fstream>
#include <iostream>
#include <regex>
#include <sstream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <vector>

namespace npy {

/* Compile-time test for byte order.
If your compiler does not define these per default, you may want to define
one of these constants manually.
Defaults to little endian order. */
#if defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN || \
defined(__BIG_ENDIAN__) || defined(__ARMEB__) || defined(__THUMBEB__) || \
defined(__AARCH64EB__) || defined(_MIBSEB) || defined(__MIBSEB) || \
defined(__MIBSEB__)
const bool big_endian = true;
#else
const bool big_endian = false;
#endif

const char magic_string[] = "\x93NUMPY";
const size_t magic_string_length = 6;

const char little_endian_char = '<';
const char big_endian_char = '>';
const char no_endian_char = '|';

constexpr char host_endian_char = (big_endian ? big_endian_char : little_endian_char);

/* npy array length */
typedef unsigned long int ndarray_len_t;

inline void write_magic(
std::ostream& ostream, unsigned char v_major = 1, unsigned char v_minor = 0) {
ostream.write(magic_string, magic_string_length);
ostream.put(v_major);
ostream.put(v_minor);
}

inline void read_magic(
std::istream& istream, unsigned char& v_major, unsigned char& v_minor) {
char buf[magic_string_length + 2];
istream.read(buf, magic_string_length + 2);

if (!istream) {
fprintf(stderr, "io error: failed reading file");
}

if (0 != std::memcmp(buf, magic_string, magic_string_length)) {
fprintf(stderr, "this file does not have a valid npy format.");
}

v_major = buf[magic_string_length];
v_minor = buf[magic_string_length + 1];
}

// typestring magic
struct Typestring {
private:
char c_endian;
char c_type;
int len;

public:
inline std::string str() {
const size_t max_buflen = 16;
char buf[max_buflen];
std::sprintf(buf, "%c%c%u", c_endian, c_type, len);
return std::string(buf);
}

Typestring(const std::vector<float>&)
: c_endian{host_endian_char}, c_type{'f'}, len{sizeof(float)} {}
Typestring(const std::vector<double>&)
: c_endian{host_endian_char}, c_type{'f'}, len{sizeof(double)} {}
Typestring(const std::vector<long double>&)
: c_endian{host_endian_char}, c_type{'f'}, len{sizeof(long double)} {}

Typestring(const std::vector<char>&)
: c_endian{no_endian_char}, c_type{'i'}, len{sizeof(char)} {}
Typestring(const std::vector<short>&)
: c_endian{host_endian_char}, c_type{'i'}, len{sizeof(short)} {}
Typestring(const std::vector<int>&)
: c_endian{host_endian_char}, c_type{'i'}, len{sizeof(int)} {}
Typestring(const std::vector<long>&)
: c_endian{host_endian_char}, c_type{'i'}, len{sizeof(long)} {}
Typestring(const std::vector<long long>&)
: c_endian{host_endian_char}, c_type{'i'}, len{sizeof(long long)} {}

Typestring(const std::vector<unsigned char>&)
: c_endian{no_endian_char}, c_type{'u'}, len{sizeof(unsigned char)} {}
Typestring(const std::vector<unsigned short>&)
: c_endian{host_endian_char}, c_type{'u'}, len{sizeof(unsigned short)} {}
Typestring(const std::vector<unsigned int>&)
: c_endian{host_endian_char}, c_type{'u'}, len{sizeof(unsigned int)} {}
Typestring(const std::vector<unsigned long>&)
: c_endian{host_endian_char}, c_type{'u'}, len{sizeof(unsigned long)} {}
Typestring(const std::vector<unsigned long long>&)
: c_endian{host_endian_char},
c_type{'u'},
len{sizeof(unsigned long long)} {}

Typestring(const std::vector<std::complex<float>>&)
: c_endian{host_endian_char},
c_type{'c'},
len{sizeof(std::complex<float>)} {}
Typestring(const std::vector<std::complex<double>>&)
: c_endian{host_endian_char},
c_type{'c'},
len{sizeof(std::complex<double>)} {}
Typestring(const std::vector<std::complex<long double>>&)
: c_endian{host_endian_char},
c_type{'c'},
len{sizeof(std::complex<long double>)} {}
};

inline void parse_typestring(std::string typestring) {
std::regex re("'([<>|])([ifuc])(\\d+)'");
std::smatch sm;

std::regex_match(typestring, sm, re);

if (sm.size() != 4) {
fprintf(stderr, "invalid typestring");
}
}

namespace pyparse {

/**
Removes leading and trailing whitespaces
*/
inline std::string trim(const std::string& str) {
const std::string whitespace = " \t";
auto begin = str.find_first_not_of(whitespace);

if (begin == std::string::npos)
return "";

auto end = str.find_last_not_of(whitespace);

return str.substr(begin, end - begin + 1);
}

inline std::string get_value_from_map(const std::string& mapstr) {
size_t sep_pos = mapstr.find_first_of(":");
if (sep_pos == std::string::npos)
return "";

std::string tmp = mapstr.substr(sep_pos + 1);
return trim(tmp);
}

/**
Parses the string representation of a Python dict

The keys need to be known and may not appear anywhere else in the data.
*/
inline std::unordered_map<std::string, std::string> parse_dict(
std::string in, std::vector<std::string>& keys) {
std::unordered_map<std::string, std::string> map;

if (keys.size() == 0)
return map;

in = trim(in);

// unwrap dictionary
if ((in.front() == '{') && (in.back() == '}'))
in = in.substr(1, in.length() - 2);
else {
fprintf(stderr, "Not a Python dictionary.");
}

std::vector<std::pair<size_t, std::string>> positions;

for (auto const& value : keys) {
size_t pos = in.find("'" + value + "'");

if (pos == std::string::npos) {
fprintf(stderr, "Missing %s key.", value.c_str());
}

std::pair<size_t, std::string> position_pair{pos, value};
positions.push_back(position_pair);
}

// sort by position in dict
std::sort(positions.begin(), positions.end());

for (size_t i = 0; i < positions.size(); ++i) {
std::string raw_value;
size_t begin{positions[i].first};
size_t end{std::string::npos};

std::string key = positions[i].second;

if (i + 1 < positions.size())
end = positions[i + 1].first;

raw_value = in.substr(begin, end - begin);

raw_value = trim(raw_value);

if (raw_value.back() == ',')
raw_value.pop_back();

map[key] = get_value_from_map(raw_value);
}

return map;
}

/**
Parses the string representation of a Python boolean
*/
inline bool parse_bool(const std::string& in) {
if (in == "True")
return true;
if (in == "False")
return false;

fprintf(stderr, "Invalid python boolan.");
return false;
}

/**
Parses the string representation of a Python str
*/
inline std::string parse_str(const std::string& in) {
if ((in.front() == '\'') && (in.back() == '\''))
return in.substr(1, in.length() - 2);

fprintf(stderr, "Invalid python string.");
return "";
}

/**
Parses the string represenatation of a Python tuple into a vector of its items
*/
inline std::vector<std::string> parse_tuple(std::string in) {
std::vector<std::string> v;
const char seperator = ',';

in = trim(in);

if ((in.front() == '(') && (in.back() == ')'))
in = in.substr(1, in.length() - 2);
else {
fprintf(stderr, "Invalid Python tuple.");
}

std::istringstream iss(in);

for (std::string token; std::getline(iss, token, seperator);) {
v.push_back(token);
}

return v;
}

template <typename T>
inline std::string write_tuple(const std::vector<T>& v) {
if (v.size() == 0)
return "";

std::ostringstream ss;

if (v.size() == 1) {
ss << "(" << v.front() << ",)";
} else {
const std::string delimiter = ", ";
// v.size() > 1
ss << "(";
std::copy(
v.begin(), v.end() - 1,
std::ostream_iterator<T>(ss, delimiter.c_str()));
ss << v.back();
ss << ")";
}

return ss.str();
}

inline std::string write_boolean(bool b) {
if (b)
return "True";
else
return "False";
}

} // namespace pyparse

inline void parse_header(std::string header, std::string& descr) {
/*
The first 6 bytes are a magic string: exactly "x93NUMPY".
The next 1 byte is an unsigned byte: the major version number of the file
format, e.g. x01. The next 1 byte is an unsigned byte: the minor version
number of the file format, e.g. x00. Note: the version of the file format
is not tied to the version of the numpy package. The next 2 bytes form a
little-endian unsigned short int: the length of the header data
HEADER_LEN. The next HEADER_LEN bytes form the header data describing the
array's format. It is an ASCII string which contains a Python literal
expression of a dictionary. It is terminated by a newline ('n') and
padded with spaces
('x20') to make the total length of the magic string + 4 + HEADER_LEN be
evenly divisible by 16 for alignment purposes. The dictionary contains
three keys:

"descr" : dtype.descr
An object that can be passed as an argument to the numpy.dtype()
constructor to create the array's dtype. For repeatability and
readability, this dictionary is formatted using pprint.pformat() so the
keys are in alphabetic order.
*/

// remove trailing newline
if (header.back() != '\n')
fprintf(stderr, "invalid header");
header.pop_back();

// parse the dictionary
std::vector<std::string> keys{"descr"};
auto dict_map = npy::pyparse::parse_dict(header, keys);

if (dict_map.size() == 0)
fprintf(stderr, "invalid dictionary in header");

std::string descr_s = dict_map["descr"];
parse_typestring(descr_s);
// remove
descr = npy::pyparse::parse_str(descr_s);
return;
}

inline void parse_header(
std::string header, std::string& descr, bool& fortran_order,
std::vector<ndarray_len_t>& shape) {
/*
The first 6 bytes are a magic string: exactly "x93NUMPY".
The next 1 byte is an unsigned byte: the major version number of the file
format, e.g. x01. The next 1 byte is an unsigned byte: the minor version
number of the file format, e.g. x00. Note: the version of the file format
is not tied to the version of the numpy package. The next 2 bytes form a
little-endian unsigned short int: the length of the header data
HEADER_LEN. The next HEADER_LEN bytes form the header data describing the
array's format. It is an ASCII string which contains a Python literal
expression of a dictionary. It is terminated by a newline ('n') and
padded with spaces
('x20') to make the total length of the magic string + 4 + HEADER_LEN be
evenly divisible by 16 for alignment purposes. The dictionary contains
three keys:

"descr" : dtype.descr
An object that can be passed as an argument to the numpy.dtype()
constructor to create the array's dtype. "fortran_order" : bool Whether
the array data is Fortran-contiguous or not. Since Fortran-contiguous
arrays are a common form of non-C-contiguity, we allow them to be written
directly to disk for efficiency. "shape" : tuple of int The shape of the
array. For repeatability and readability, this dictionary is formatted
using pprint.pformat() so the keys are in alphabetic order.
*/

// remove trailing newline
if (header.back() != '\n')
fprintf(stderr, "invalid header");
header.pop_back();

// parse the dictionary
std::vector<std::string> keys{"descr", "fortran_order", "shape"};
auto dict_map = npy::pyparse::parse_dict(header, keys);

if (dict_map.size() == 0)
fprintf(stderr, "invalid dictionary in header");

std::string descr_s = dict_map["descr"];
std::string fortran_s = dict_map["fortran_order"];
std::string shape_s = dict_map["shape"];

// TODO: extract info from typestring
parse_typestring(descr_s);
// remove
descr = npy::pyparse::parse_str(descr_s);

// convert literal Python bool to C++ bool
fortran_order = npy::pyparse::parse_bool(fortran_s);

// parse the shape tuple
auto shape_v = npy::pyparse::parse_tuple(shape_s);
if (shape_v.size() == 0)
fprintf(stderr, "invalid shape tuple in header");

for (auto item : shape_v) {
ndarray_len_t dim = static_cast<ndarray_len_t>(std::stoul(item));
shape.push_back(dim);
}
}

inline std::string write_header_dict(
const std::string& descr, bool fortran_order,
const std::vector<ndarray_len_t>& shape) {
std::string s_fortran_order = npy::pyparse::write_boolean(fortran_order);
std::string shape_s = npy::pyparse::write_tuple(shape);

return "{'descr': '" + descr + "', 'fortran_order': " + s_fortran_order +
", 'shape': " + shape_s + ", }";
}

inline void write_header(
std::ostream& out, const std::string& descr, bool fortran_order,
const std::vector<ndarray_len_t>& shape_v) {
std::string header_dict = write_header_dict(descr, fortran_order, shape_v);

size_t length = magic_string_length + 2 + 2 + header_dict.length() + 1;

unsigned char version[2] = {1, 0};
if (length >= 255 * 255) {
length = magic_string_length + 2 + 4 + header_dict.length() + 1;
version[0] = 2;
version[1] = 0;
}
size_t padding_len = 16 - length % 16;
std::string padding(padding_len, ' ');

// write magic
write_magic(out, version[0], version[1]);

// write header length
if (version[0] == 1 && version[1] == 0) {
char header_len_le16[2];
uint16_t header_len =
static_cast<uint16_t>(header_dict.length() + padding.length() + 1);

header_len_le16[0] = (header_len >> 0) & 0xff;
header_len_le16[1] = (header_len >> 8) & 0xff;
out.write(reinterpret_cast<char*>(header_len_le16), 2);
} else {
char header_len_le32[4];
uint32_t header_len =
static_cast<uint32_t>(header_dict.length() + padding.length() + 1);

header_len_le32[0] = (header_len >> 0) & 0xff;
header_len_le32[1] = (header_len >> 8) & 0xff;
header_len_le32[2] = (header_len >> 16) & 0xff;
header_len_le32[3] = (header_len >> 24) & 0xff;
out.write(reinterpret_cast<char*>(header_len_le32), 4);
}

out << header_dict << padding << '\n';
}

inline std::string read_header(std::istream& istream) {
// check magic bytes an version number
unsigned char v_major, v_minor;
read_magic(istream, v_major, v_minor);

uint32_t header_length = 0;
if (v_major == 1 && v_minor == 0) {
char header_len_le16[2];
istream.read(header_len_le16, 2);
header_length = (header_len_le16[0] << 0) | (header_len_le16[1] << 8);

if ((magic_string_length + 2 + 2 + header_length) % 16 != 0) {
// TODO: display warning
}
} else if (v_major == 2 && v_minor == 0) {
char header_len_le32[4];
istream.read(header_len_le32, 4);

header_length = (header_len_le32[0] << 0) | (header_len_le32[1] << 8) |
(header_len_le32[2] << 16) | (header_len_le32[3] << 24);

if ((magic_string_length + 2 + 4 + header_length) % 16 != 0) {
// TODO: display warning
}
} else {
fprintf(stderr, "unsupported file format version");
}

auto buf_v = std::vector<char>();
buf_v.reserve(header_length);
istream.read(buf_v.data(), header_length);
std::string header(buf_v.data(), header_length);

return header;
}

inline ndarray_len_t comp_size(const std::vector<ndarray_len_t>& shape) {
ndarray_len_t size = 1;
for (ndarray_len_t i : shape)
size *= i;

return size;
}

template <typename Scalar>
inline void SaveArrayAsNumpy(
const std::string& filename, bool fortran_order, unsigned int n_dims,
const unsigned long shape[], const std::vector<Scalar>& data) {
Typestring typestring_o(data);
std::string typestring = typestring_o.str();

std::ofstream stream(filename, std::ofstream::binary);
if (!stream) {
fprintf(stderr, "io error: failed to open a file.");
}

std::vector<ndarray_len_t> shape_v(shape, shape + n_dims);
write_header(stream, typestring, fortran_order, shape_v);

auto size = static_cast<size_t>(comp_size(shape_v));

stream.write(reinterpret_cast<const char*>(data.data()), sizeof(Scalar) * size);
}

template <typename Scalar>
inline void LoadArrayFromNumpy(
const std::string& filename, std::vector<unsigned long>& shape,
std::vector<Scalar>& data) {
bool fortran_order;
LoadArrayFromNumpy<Scalar>(filename, shape, fortran_order, data);
}

template <typename Scalar>
inline void LoadArrayFromNumpy(
const std::string& filename, std::vector<unsigned long>& shape,
bool& fortran_order, std::vector<Scalar>& data) {
std::ifstream stream(filename, std::ifstream::binary);
if (!stream) {
fprintf(stderr, "io error: failed to open a file.");
}

std::string header = read_header(stream);

// parse header
std::string typestr;

parse_header(header, typestr, fortran_order, shape);

// check if the typestring matches the given one
Typestring typestring_o{data};
std::string expect_typestr = typestring_o.str();
if (typestr != expect_typestr) {
fprintf(stderr, "formatting error: typestrings not matching");
}

// compute the data size based on the shape
auto size = static_cast<size_t>(comp_size(shape));
data.resize(size);

// read the data
stream.read(reinterpret_cast<char*>(data.data()), sizeof(Scalar) * size);
}

inline void LoadArrayFromNumpy(
const std::string& filename, std::string& type_str,
std::vector<ndarray_len_t>& shape, std::vector<int8_t>& data) {
std::ifstream stream(filename, std::ifstream::binary);
if (!stream) {
fprintf(stderr, "io error: failed to open a file.");
}

std::string header = read_header(stream);
bool fortran_order;
// parse header
parse_header(header, type_str, fortran_order, shape);

// check if the typestring matches the given one
std::string size_str = type_str.substr(type_str.size() - 1);
size_t elem_size = atoi(size_str.c_str());

// compute the data size based on the shape
auto byte_size = elem_size * static_cast<size_t>(comp_size(shape));
data.resize(byte_size);

// read the data
stream.read(reinterpret_cast<char*>(data.data()), byte_size);
}

} // namespace npy

#endif // NPY_H

+ 48
- 0
lite/load_and_run/src/helpers/outdumper.cpp View File

@@ -0,0 +1,48 @@
/**
* \file lite/load_and_run/src/helpers/outdumper.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*/

#include "outdumper.h"
#include "megbrain/utils/debug.h"

using namespace lar;

void OutputDumper::set(mgb::SymbolVarArray& symb_var) {
for (auto&& i : symb_var) {
auto&& var = i.node();
DumpInfo info;
info.var_info = mgb::cg::dump_var_info({var});
info.owner_inputs_info = mgb::cg::dump_var_info(var->owner_opr()->input());
info.id = var->id();
m_infos.push_back(info);
}
}

mgb::ComputingGraph::Callback OutputDumper::bind() {
auto& info = m_infos.at(m_bind_id++);
mgb::ComputingGraph::Callback cb = [&info](const mgb::DeviceTensorND& dv) {
info.hv.copy_from(dv);
};
return cb;
}

void OutputDumper::write_to_file() {
if (!dump_file.empty()) {
for (auto&& info : m_infos) {
auto value = mgb::debug::dump_tensor(
info.hv,
mgb::ssprintf(
"var=%s owner_opr_inputs= %s", info.var_info.c_str(),
info.owner_inputs_info.c_str()));
mgb::debug::write_to_file(
mgb::ssprintf(
"%s/run%zu-var %zd", dump_file.c_str(), m_run_id, info.id)
.c_str(),
value);
}
}
m_run_id++;
}

+ 42
- 0
lite/load_and_run/src/helpers/outdumper.h View File

@@ -0,0 +1,42 @@
/**
* \file lite/load_and_run/src/helpers/outdumper.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*/

#pragma once
#include "megbrain/serialization/serializer.h"

namespace lar {

/*!
* \brief dumper for only output used for --bin-out-dump
*/
class OutputDumper {
public:
struct DumpInfo {
mgb::HostTensorND hv = {};
std::string var_info;
std::string owner_inputs_info;
size_t id;
};
//! init the dump_file path
OutputDumper(const char* file) { dump_file = file; }

//! set the dump informations
void set(mgb::SymbolVarArray& symb_var);

//! callback function for specify output when compile computing graph
mgb::ComputingGraph::Callback bind();

//! write dumped output into dump_file
void write_to_file();

private:
mgb::SmallVector<DumpInfo> m_infos;
size_t m_run_id = 0;
size_t m_bind_id = 0;
std::string dump_file;
};
} // namespace lar

+ 119
- 0
lite/load_and_run/src/helpers/text_table.cpp View File

@@ -0,0 +1,119 @@
/**
* \file lite/load_and_run/src/helpers/text_table.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#include "text_table.h"

using namespace mgb;

namespace {
inline void mid(std::ostream& os, const std::string& str, size_t max_w) {
size_t l = (max_w - str.length()) / 2 + str.length();
size_t r = max_w - l;
os << std::setw(l) << std::right << str;
if (r > 0)
os << std::setw(r) << ' ';
}
inline size_t char_length(char c) {
return c ? 1 : 0;
}
} // namespace

void TextTable::adjuster_last_row() {
if (m_rows.empty())
return;
auto& row = m_rows.back();
if (row.params.horizontal == 0 or row.params.vertical == 0) {
row.params.corner = 0;
}
if (row.params.horizontal != 0 && row.params.vertical != 0 &&
row.params.corner == 0) {
row.params.corner = row.params.horizontal;
}
}

void TextTable::show(std::ostream& os) {
if (m_rows.empty())
return;
auto& last_row = m_rows.front();
bool first = true;
for (auto& row : m_rows) {
auto& lrow =
(last_row.values.size() * char_length(last_row.params.horizontal)) >
(row.values.size() * char_length(row.params.horizontal))
? last_row
: row;
// line before row
if (lrow.params.horizontal) {
if (not first)
os << std::endl;
os << m_prefix;
if (lrow.params.corner)
os << lrow.params.corner;
size_t skip_size = 0;
// table name
if (first) {
os << m_name;
skip_size = m_name.length();
}
for (size_t i = 0; i < lrow.values.size(); ++i) {
auto max_w = m_cols_max_w.at(i) + m_padding * 2;
if (max_w + char_length(lrow.params.corner) <= skip_size) {
skip_size = skip_size - max_w - char_length(lrow.params.corner);
continue;
}
size_t rest = max_w + char_length(lrow.params.corner) - skip_size;
skip_size = 0;
if (rest > char_length(lrow.params.corner)) {
os << std::string(
rest - char_length(lrow.params.corner),
lrow.params.horizontal);
rest = char_length(lrow.params.corner);
}
if (rest > 0 && lrow.params.corner)
os << lrow.params.corner;
}
} else if (first) {
os << m_prefix << ' ' << m_name;
}
first = false;
os << std::endl << m_prefix;
if (row.params.vertical)
os << row.params.vertical;
// row
for (size_t i = 0; i < row.values.size(); ++i) {
auto& str = row.values.at(i);
auto max_w = m_cols_max_w.at(i) + 2 * m_padding;
if (row.params.align == Align::Mid) {
mid(os, str, max_w);
} else if (row.params.align == Align::Left) {
os << std::setw(max_w) << std::left << str;
} else {
os << std::setw(max_w) << std::right << str;
}
if (row.params.vertical)
os << row.params.vertical;
}
last_row = row;
}
if (last_row.params.horizontal) {
os << std::endl << m_prefix;
if (last_row.params.corner)
os << last_row.params.corner;
for (size_t i = 0; i < last_row.values.size(); ++i) {
auto max_w = m_cols_max_w.at(i);
std::string tmp(max_w + m_padding * 2, last_row.params.horizontal);
os << tmp;
if (last_row.params.corner)
os << last_row.params.corner;
}
}
}

+ 133
- 0
lite/load_and_run/src/helpers/text_table.h View File

@@ -0,0 +1,133 @@
/**
* \file lite/load_and_run/src/helpers/text_table.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#pragma once

#include <array>
#include <iomanip>
#include <ostream>
#include <sstream>
#include <string>
#include <tuple>
#include <type_traits>
#include <vector>
#include "megbrain/common.h"

namespace mgb {

class TextTable {
public:
enum Level { Summary, Detail };
enum class Align : int { Left, Right, Mid };
explicit TextTable(const std::string& table_name) : m_name(table_name) {}
TextTable& horizontal(char c) {
m_row.params.horizontal = c;
return *this;
}
TextTable& vertical(char c) {
m_row.params.vertical = c;
return *this;
}
TextTable& corner(char c) {
m_row.params.corner = c;
return *this;
}
TextTable& align(Align v) {
m_row.params.align = v;
return *this;
}
TextTable& padding(size_t w) {
m_padding = w;
return *this;
}
TextTable& prefix(const std::string& str) {
m_prefix = str;
return *this;
}

template <typename T>
TextTable& add(const T& value) {
m_row.values.emplace_back(value);
if (m_cols_max_w.size() < m_row.values.size()) {
m_cols_max_w.emplace_back(m_row.values.back().length());
} else {
mgb_assert(m_row.values.size() >= 1);
size_t i = m_row.values.size() - 1;
m_cols_max_w[i] = std::max(m_cols_max_w[i], m_row.values.back().length());
}
return *this;
}

template <
typename T,
typename std::enable_if<std::is_floating_point<T>::value, bool>::type = 0>
TextTable& add(const T& value) {
std::stringstream ss;
ss << std::setiosflags(std::ios::fixed) << std::setprecision(2);
ss << value;
m_row.values.emplace_back(ss.str());
if (m_cols_max_w.size() < m_row.values.size()) {
m_cols_max_w.emplace_back(m_row.values.back().length());
} else {
mgb_assert(m_row.values.size() >= 1);
size_t i = m_row.values.size() - 1;
m_cols_max_w[i] = std::max(m_cols_max_w[i], m_row.values.back().length());
}
return *this;
}

template <
typename T,
typename std::enable_if<std::is_integral<T>::value, bool>::type = 0>
TextTable& add(const T& value) {
m_row.values.emplace_back(std::to_string(value));
return *this;
}

void eor() {
m_rows.emplace_back(m_row);
adjuster_last_row();
m_row.values.clear();
}

void reset() {
m_row = {};
m_cols_max_w.clear();
m_padding = 0;
m_rows.clear();
}

void show(std::ostream& os);

private:
void adjuster_last_row();
std::string m_name;
std::vector<size_t> m_cols_max_w;
size_t m_padding = 0;
std::string m_prefix = "";
struct Row {
std::vector<std::string> values;
struct Params {
Align align = Align::Left;
char horizontal = '-', vertical = '|', corner = '+';
} params;
};
std::vector<Row> m_rows;
Row m_row;
};

inline std::ostream& operator<<(std::ostream& stream, TextTable& table) {
table.show(stream);
return stream;
}

} // namespace mgb

+ 31
- 0
lite/load_and_run/src/main.cpp View File

@@ -0,0 +1,31 @@
/**
* \file lite/load_and_run/src/main.cpp
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#include <gflags/gflags.h>
#include <string>
#include "strategys/strategy.h"

int main(int argc, char** argv) {
std::string usage = "load_and_run <model_path> [options...]";
if (argc < 2) {
printf("usage: %s\n", usage.c_str());
return -1;
}
gflags::SetUsageMessage(usage);
gflags::SetVersionString("1.0");
gflags::ParseCommandLineFlags(&argc, &argv, true);
std::string model_path = argv[1];
auto strategy = lar::StrategyBase::create_strategy(model_path);
strategy->run();
gflags::ShutDownCommandLineFlags();

return 0;
}

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 60
- 0
lite/load_and_run/src/models/model.cpp View File

@@ -0,0 +1,60 @@

/**
* \file lite/load_and_run/src/models/model.cpp
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/
#include "model.h"
#include <iostream>
#include <memory>
#include "model_lite.h"
#include "model_mdl.h"

using namespace lar;

ModelType ModelBase::get_model_type(std::string model_path) {
//! read magic number of dump file
FILE* fin = fopen(model_path.c_str(), "rb");
mgb_assert(fin, "failed to open %s: %s", model_path.c_str(), strerror(errno));
char buf[16];
mgb_assert(fread(buf, 1, 16, fin) == 16, "read model failed");
fclose(fin);

// get model type
// uint32_t MGB_MAGIC = 0x5342474D
std::string tag(buf);
ModelType type;
if (tag.substr(0, 7) == std::string("mgb0001") ||
tag.substr(0, 8) == std::string("mgb0000a") ||
tag.substr(0, 4) == std::string("MGBS") ||
tag.substr(0, 8) == std::string("mgbtest0")) {
type = ModelType::MEGDL_MODEL;

} else {
type = ModelType::LITE_MODEL;
}

return type;
}

std::shared_ptr<ModelBase> ModelBase::create_model(std::string model_path) {
mgb_log_debug("model path %s\n", model_path.c_str());

auto model_type = get_model_type(model_path);

if (ModelType::LITE_MODEL == model_type) {
return std::make_shared<ModelLite>(model_path);
} else if (ModelType::MEGDL_MODEL == model_type) {
if (FLAGS_lite)
return std::make_shared<ModelLite>(model_path);
else
return std::make_shared<ModelMdl>(model_path);
} else {
return nullptr;
}
}
DEFINE_bool(lite, false, "using lite model to run mdl model");
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 49
- 0
lite/load_and_run/src/models/model.h View File

@@ -0,0 +1,49 @@
/**
* \file lite/load_and_run/src/models/model.h
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#pragma once
#include <gflags/gflags.h>
#include <string>
#include "helpers/common.h"

DECLARE_bool(lite);

namespace lar {
/*!
* \brief: base class of model
*/
class ModelBase {
public:
//! get model type by the magic number in dump file
static ModelType get_model_type(std::string model_path);

//! create model by different model type
static std::shared_ptr<ModelBase> create_model(std::string model_path);

//! type of the model
virtual ModelType type() = 0;

//! set model load state

virtual void set_shared_mem(bool state) = 0;

//! load model interface for load and run strategy
virtual void load_model() = 0;

//! run model interface for load and run strategy
virtual void run_model() = 0;

//! wait asynchronous function interface for load and run strategy
virtual void wait() = 0;

virtual ~ModelBase() = default;
};
} // namespace lar

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 50
- 0
lite/load_and_run/src/models/model_lite.cpp View File

@@ -0,0 +1,50 @@
/**
* \file lite/load_and_run/src/models/model_lite.cpp
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/
#include "model_lite.h"
#include <gflags/gflags.h>
#include <cstring>
#include "misc.h"

DECLARE_bool(share_param_mem);

using namespace lar;
ModelLite::ModelLite(const std::string& path) : model_path(path) {
LITE_WARN("creat lite model use CPU as default comp node");
};
void ModelLite::load_model() {
m_network = std::make_shared<lite::Network>(config, IO);
if (share_model_mem) {
//! WARNNING:maybe not right to share param memmory for this
LITE_WARN("enable share model memory");

FILE* fin = fopen(model_path.c_str(), "rb");
LITE_ASSERT(fin, "failed to open %s: %s", model_path.c_str(), strerror(errno));
fseek(fin, 0, SEEK_END);
size_t size = ftell(fin);
fseek(fin, 0, SEEK_SET);

void* ptr = malloc(size);
std::shared_ptr<void> buf{ptr, free};
auto nr = fread(buf.get(), 1, size, fin);
LITE_ASSERT(nr == size, "read model file failed");
fclose(fin);

m_network->load_model(buf.get(), size);
} else {
m_network->load_model(model_path);
}
}

void ModelLite::run_model() {
m_network->forward();
}

void ModelLite::wait() {
m_network->wait();
}

+ 73
- 0
lite/load_and_run/src/models/model_lite.h View File

@@ -0,0 +1,73 @@
/**
* \file lite/load_and_run/src/models/model_lite.h
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#pragma once

#include <string>
#include "helpers/common.h"
#include "helpers/data_parser.h"
#include "lite/network.h"
#include "model.h"

namespace lar {
/*!
* \brief: megengine lite model
*/
class ModelLite : public ModelBase {
public:
using Strategy = LiteAlgoSelectStrategy;

ModelLite(const std::string& path);
//! model type
ModelType type() override { return ModelType::LITE_MODEL; }

//! set to load from shared memory
void set_shared_mem(bool state) override { share_model_mem = state; }

//! load model from dump file
void load_model() override;

//! run model with given runtime parameter
void run_model() override;

//! wait the end of asynchronous function execution
void wait() override;

//! get the network of lite model
std::shared_ptr<lite::Network> get_lite_network() { return m_network; }

//! get the config of lite model
lite::Config& get_config() { return config; }

//! get the networkIO of lite model
lite::NetworkIO& get_networkIO() { return IO; }

//! get the data parser
DataParser& get_input_parser() { return parser; }

//! set the strategy before load model
void set_lite_strategy(Strategy& u_strategy) { m_strategy = u_strategy; }

//! get algo strategy
Strategy& get_lite_strategy() { return m_strategy; }

private:
bool share_model_mem;
std::string model_path;

DataParser parser;
lite::Config config;
lite::NetworkIO IO;

std::shared_ptr<lite::Network> m_network;

Strategy m_strategy;
};
} // namespace lar
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 105
- 0
lite/load_and_run/src/models/model_mdl.cpp View File

@@ -0,0 +1,105 @@
/**
* \file lite/load_and_run/src/models/model_mdl.cpp
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#include "model_mdl.h"
#include <gflags/gflags.h>
#include <iostream>

DECLARE_bool(share_param_mem);

using namespace lar;

ModelMdl::ModelMdl(const std::string& path) : model_path(path) {
mgb_log_warn("creat mdl model use XPU as default comp node");
m_load_config.comp_graph = mgb::ComputingGraph::make();
m_load_config.comp_graph->options().graph_opt_level = 0;
testcase_num = 0;
}

void ModelMdl::load_model() {
//! read dump file
if (share_model_mem) {
mgb_log_warn("enable share model memory");
FILE* fin = fopen(model_path.c_str(), "rb");
mgb_assert(fin, "failed to open %s: %s", model_path.c_str(), strerror(errno));
fseek(fin, 0, SEEK_END);
size_t size = ftell(fin);
fseek(fin, 0, SEEK_SET);

void* ptr = malloc(size);
std::shared_ptr<void> buf{ptr, free};
auto nr = fread(buf.get(), 1, size, fin);
mgb_assert(nr == size, "read model file failed");
fclose(fin);

m_model_file = mgb::serialization::InputFile::make_mem_proxy(buf, size);
} else {
m_model_file = mgb::serialization::InputFile::make_fs(model_path.c_str());
}

//! get dump_with_testcase model testcase number
char magic[8];
m_model_file->read(magic, sizeof(magic));
if (strncmp(magic, "mgbtest0", 8)) {
m_model_file->rewind();
} else {
m_model_file->read(&testcase_num, sizeof(testcase_num));
}

auto format =
mgb::serialization::GraphLoader::identify_graph_dump_format(*m_model_file);
mgb_assert(
format.valid(),
"invalid format, please make sure model is dumped by GraphDumper");

//! load computing graph of model
m_loader = mgb::serialization::GraphLoader::make(
std::move(m_model_file), format.val());
m_load_result = m_loader->load(m_load_config, false);
m_load_config.comp_graph.reset();

// get testcase input generated by dump_with_testcase.py
if (testcase_num) {
for (auto&& i : m_load_result.tensor_map) {
test_input_tensors.emplace_back(i.first, i.second.get());
}
std::sort(test_input_tensors.begin(), test_input_tensors.end());
}
// initialize output callback
for (size_t i = 0; i < m_load_result.output_var_list.size(); i++) {
mgb::ComputingGraph::Callback cb;
m_callbacks.push_back(cb);
}
}

void ModelMdl::make_output_spec() {
for (size_t i = 0; i < m_load_result.output_var_list.size(); i++) {
auto item = m_load_result.output_var_list[i];
m_output_spec.emplace_back(item, std::move(m_callbacks[i]));
}

m_asyc_exec = m_load_result.graph_compile(m_output_spec);
}

std::shared_ptr<mgb::serialization::GraphLoader>& ModelMdl::reset_loader() {
m_loader = mgb::serialization::GraphLoader::make(
m_loader->reset_file(), m_loader->format());
return m_loader;
}

void ModelMdl::run_model() {
mgb_assert(
m_asyc_exec != nullptr,
"empty asychronous function to execute after graph compiled");
m_asyc_exec->execute();
}

void ModelMdl::wait() {
m_asyc_exec->wait();
}

+ 117
- 0
lite/load_and_run/src/models/model_mdl.h View File

@@ -0,0 +1,117 @@
/**
* \file lite/load_and_run/src/models/model_mdl.h
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#pragma once
#include <string>
#include "megbrain/opr/search_policy/algo_chooser_helper.h"
#include "megbrain/plugin/opr_io_dump.h"
#include "megbrain/serialization/extern_c_opr.h"
#include "megbrain/serialization/serializer.h"
#include "megbrain/utils/debug.h"

#include "megbrain/plugin/num_range_checker.h"
#include "megbrain/plugin/profiler.h"

#include "helpers/common.h"
#include "helpers/data_parser.h"
#include "model.h"

namespace lar {

class ModelMdl : public ModelBase {
public:
using Strategy = mgb::opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;
//! interface implement of ModelBase
ModelMdl(const std::string& path);

ModelType type() override { return ModelType::MEGDL_MODEL; }

void set_shared_mem(bool state) override { share_model_mem = state; }

void load_model() override;

void make_output_spec();

void run_model() override;

void wait() override;

//! get load result for megDL model
mgb::serialization::GraphLoader::LoadResult& get_mdl_load_result() {
return m_load_result;
}

//! get load config for megDL model
mgb::serialization::GraphLoadConfig& get_mdl_config() { return m_load_config; }

//! reset the graph loader for dump_with_testcase model
std::shared_ptr<mgb::serialization::GraphLoader>& reset_loader();

//! algo strategy for runing model
void set_mdl_strategy(Strategy& u_strategy) { m_strategy = u_strategy; }
Strategy& get_mdl_strategy() { return m_strategy; }

//! get data parser
DataParser& get_input_parser() { return parser; }
uint32_t get_testcase_num() { return testcase_num; }
std::vector<std::pair<std::string, mgb::HostTensorND*>>& get_test_input() {
return test_input_tensors;
}

//! get output specified configuration
mgb::ComputingGraph::OutputSpec& get_output_spec() { return m_output_spec; }
std::unique_ptr<mgb::cg::AsyncExecutable>& get_async_func() { return m_asyc_exec; }

void set_output_callback(std::vector<mgb::ComputingGraph::Callback>& cb) {
mgb_assert(
m_callbacks.size() == cb.size(),
"invalid output callback list to set!!");
for (size_t i = 0; i < cb.size(); i++) {
m_callbacks[i] = cb[i];
}
}
#if MGB_ENABLE_JSON
std::unique_ptr<mgb::GraphProfiler>& get_profiler() { return m_profiler; }
void set_profiler() {
m_profiler =
std::make_unique<mgb::GraphProfiler>(m_load_config.comp_graph.get());
}
#endif
void set_num_range_checker(float range) {
m_num_range_checker = std::make_unique<mgb::NumRangeChecker>(
m_load_config.comp_graph.get(), range);
}

private:
bool share_model_mem;
std::string model_path;
std::unique_ptr<mgb::serialization::InputFile> m_model_file;
mgb::serialization::GraphLoadConfig m_load_config;

mgb::serialization::GraphLoader::LoadResult m_load_result;
std::shared_ptr<mgb::serialization::GraphLoader> m_loader;
std::unique_ptr<mgb::cg::AsyncExecutable> m_asyc_exec;

uint32_t testcase_num;
std::vector<std::pair<std::string, mgb::HostTensorND*>> test_input_tensors;

DataParser parser;
Strategy m_strategy = Strategy::HEURISTIC;
std::vector<mgb::ComputingGraph::Callback> m_callbacks;
mgb::ComputingGraph::OutputSpec m_output_spec;

std::unique_ptr<mgb::NumRangeChecker> m_num_range_checker;
#if MGB_ENABLE_JSON
std::unique_ptr<mgb::GraphProfiler> m_profiler;
#endif
};

} // namespace lar

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 200
- 0
lite/load_and_run/src/options/device_options.cpp View File

@@ -0,0 +1,200 @@
/**
* \file lite/load_and_run/src/options/device_options.cpp
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#include <iostream>
#include <sstream>
#include "lite/global.h"
#include "megbrain/comp_node_env.h"
#include "misc.h"
#include "device_options.h"
#include "models/model_lite.h"
#include "models/model_mdl.h"

DECLARE_bool(weight_preprocess);

using namespace lar;

/////////////////// XPUDeviceOption //////////////////////
namespace lar {
template <>
void XPUDeviceOption::config_model_internel<ModelLite>(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
if ((enable_cpu) || (enable_cpu_default) || (enable_multithread) ||
(enable_multithread_default)) {
LITE_WARN("using cpu device\n");
model->get_config().device_type = LiteDeviceType::LITE_CPU;
}
#if MGE_WITH_CUDA
if (enable_cuda) {
model->get_config().device_type = LiteDeviceType::LITE_CUDA;
}
#endif
} else if (runtime_param.stage == RunStage::AFTER_MODEL_LOAD) {
auto network = model->get_lite_network();
if (enable_cpu_default) {
LITE_WARN("using cpu default device\n");
lite::Runtime::set_cpu_inplace_mode(network);
}
if (enable_multithread) {
LITE_WARN("using multithread device\n");
lite::Runtime::set_cpu_threads_number(network, thread_num);
}
if (enable_multithread_default) {
LITE_WARN("using multithread default device\n");
lite::Runtime::set_cpu_inplace_mode(network);
lite::Runtime::set_cpu_threads_number(network, thread_num);
}
if (enable_set_core_ids) {
std::string core_str;
for (auto id : core_ids) {
core_str += std::to_string(id) + ",";
}
LITE_WARN("multi thread core ids: %s\n", core_str.c_str());
lite::ThreadAffinityCallback affinity_callback = [&](size_t thread_id) {
mgb::sys::set_cpu_affinity({core_ids[thread_id]});
};
lite::Runtime::set_runtime_thread_affinity(network, affinity_callback);
}
}
}

template <>
void XPUDeviceOption::config_model_internel<ModelMdl>(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
if (enable_cpu) {
mgb_log_warn("using cpu device\n");
model->get_mdl_config().comp_node_mapper = [](mgb::CompNode::Locator& loc) {
loc.type = mgb::CompNode::DeviceType::CPU;
};
}
#if MGE_WITH_CUDA
if (enable_cuda) {
mgb_log_warn("using cuda device\n");
model->get_mdl_config().comp_node_mapper = [](mgb::CompNode::Locator& loc) {
loc.type = mgb::CompNode::DeviceType::CUDA;
};
}
#endif
if (enable_cpu_default) {
mgb_log_warn("using cpu default device\n");
model->get_mdl_config().comp_node_mapper = [](mgb::CompNode::Locator& loc) {
loc.type = mgb::CompNode::DeviceType::CPU;
loc.device = mgb::CompNode::Locator::DEVICE_CPU_DEFAULT;
};
}
if (enable_multithread) {
mgb_log_warn("using multithread device\n");
model->get_mdl_config().comp_node_mapper =
[&](mgb::CompNode::Locator& loc) {
loc.type = mgb::CompNode::DeviceType::MULTITHREAD;
loc.device = 0;
loc.stream = thread_num;
};
}
if (enable_multithread_default) {
mgb_log_warn("using multithread default device\n");
model->get_mdl_config().comp_node_mapper =
[&](mgb::CompNode::Locator& loc) {
loc.type = mgb::CompNode::DeviceType::MULTITHREAD;
loc.device = mgb::CompNode::Locator::DEVICE_MULTITHREAD_DEFAULT;
loc.stream = thread_num;
};
}
if (enable_set_core_ids) {
std::string core_str;
for (auto id : core_ids) {
core_str += std::to_string(id) + ",";
}
mgb_log_warn("set multi thread core ids:%s\n", core_str.c_str());
auto affinity_callback = [&](size_t thread_id) {
mgb::sys::set_cpu_affinity({core_ids[thread_id]});
};
mgb::CompNode::Locator loc;
model->get_mdl_config().comp_node_mapper(loc);
auto comp_node = mgb::CompNode::load(loc);
mgb::CompNodeEnv::from_comp_node(comp_node).cpu_env().set_affinity(
affinity_callback);
}
}
}
} // namespace lar

XPUDeviceOption::XPUDeviceOption() {
m_option_name = "xpu_device";
enable_cpu = FLAGS_cpu;
#if MGE_WITH_CUDA
enable_cuda = FLAGS_cuda;
#endif
enable_cpu_default = FLAGS_cpu_default;

if (FLAGS_multithread >= 0) {
thread_num = FLAGS_multithread;
enable_multithread = true;
}

if (FLAGS_multithread_default >= 0) {
thread_num = FLAGS_multithread_default;
enable_multithread_default = true;
}

if (!FLAGS_multi_thread_core_ids.empty()) {
mgb_assert(enable_multithread, "core ids should be set after --multithread");
std::stringstream id_stream(FLAGS_multi_thread_core_ids);
std::string id;
size_t thread_cnt = 0;
while (getline(id_stream, id, ',')) {
thread_cnt++;
core_ids.push_back(atoi(id.c_str()));
}
mgb_assert(
thread_cnt == thread_num,
"core ids number should be same with thread number set before");
enable_set_core_ids = true;
}
}

bool XPUDeviceOption::is_valid() {
bool ret = FLAGS_cpu || FLAGS_cpu_default;
#if MGE_WITH_CUDA
ret = ret || FLAGS_cuda;
#endif
ret = ret || FLAGS_multithread >= 0;
ret = ret || FLAGS_multithread_default >= 0;
ret = ret || !FLAGS_multi_thread_core_ids.empty();

return ret;
}

std::shared_ptr<OptionBase> XPUDeviceOption::create_option() {
static std::shared_ptr<lar::XPUDeviceOption> option(new XPUDeviceOption);
if (XPUDeviceOption::is_valid()) {
return std::static_pointer_cast<lar::OptionBase>(option);
} else {
return nullptr;
}
}

void XPUDeviceOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}
///////////////////////// xpu gflags ////////////////////////////
DEFINE_bool(cpu, false, "set CPU device as running device");
#if MGE_WITH_CUDA
DEFINE_bool(cuda, false, "set CUDA device as running device ");
#endif
DEFINE_bool(cpu_default, false, "set running device as CPU device with inplace mode");
DEFINE_int32(multithread, -1, "set multithread device as running device");
DEFINE_int32(
multithread_default, -1,
"set multithread device as running device with inplace mode");
DEFINE_string(multi_thread_core_ids, "", "set multithread core id");
REGIST_OPTION_CREATOR(xpu_device, lar::XPUDeviceOption::create_option);

+ 49
- 0
lite/load_and_run/src/options/device_options.h View File

@@ -0,0 +1,49 @@
/**
* \file lite/load_and_run/src/options/device_options.h
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#pragma once
#include <gflags/gflags.h>
#include "models/model.h"
#include "option_base.h"

DECLARE_bool(cpu);
#if MGE_WITH_CUDA
DECLARE_bool(cuda);
#endif
DECLARE_bool(cpu_default);
DECLARE_int32(multithread);
DECLARE_int32(multithread_default);
DECLARE_string(multi_thread_core_ids);
namespace lar {

class XPUDeviceOption final : public OptionBase {
public:
static bool is_valid();
static std::shared_ptr<OptionBase> create_option();
void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;
std::string option_name() const override { return m_option_name; };

private:
XPUDeviceOption();
template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>){};
bool enable_cpu;
#if MGE_WITH_CUDA
bool enable_cuda;
#endif
bool enable_cpu_default;
bool enable_multithread;
bool enable_multithread_default;
bool enable_set_core_ids;
size_t thread_num;
std::vector<int> core_ids;
std::string m_option_name;
};
} // namespace lar

+ 216
- 0
lite/load_and_run/src/options/extern_c_opr_options.cpp View File

@@ -0,0 +1,216 @@
/**
* \file lite/load_and_run/src/options/extern_c_opr_options.cpp
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#include "extern_c_opr_options.h"
#include "megbrain/utils/debug.h"
#include "misc.h"
#include "models/model_lite.h"
#include "models/model_mdl.h"

namespace lar {
template <>
void COprLibOption::config_model_internel(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
MGB_MARK_USED_VAR(model);
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
if (!lib_path.empty()) {
lite::set_loader_lib_path(lib_path);
}
if (c_opr_args.is_run_c_opr_with_param) {
LITE_THROW(
"lite model dont't support run with external c opr "
"parmeter");
}
}
}
template <>
void COprLibOption::config_model_internel(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
if (!lib_path.empty()) {
load_lib();
}
if (c_opr_args.is_run_c_opr_with_param) {
mgb_assert(
c_opr_args.is_run_c_opr &&
c_opr_args.copr_param_device_ptr_malloc &&
c_opr_args.copr_param_device_ptr_free &&
c_opr_args.copr_param_device_ptr_h2d,
"--c-opr-lib-with-param need config with --c-opr-lib, also "
"extern c opr loader need implemente "
"copr_param_device_ptr_malloc, copr_param_device_ptr_free "
"and copr_param_device_ptr_h2d symbols");
}
} else if (runtime_param.stage == RunStage::MODEL_RUNNING) {
if (model->get_testcase_num() && c_opr_args.is_run_c_opr_with_param) {
init_extern_param(model);
set_Copr_IO(model);
}
} else if (runtime_param.stage == RunStage::AFTER_RUNNING_ITER) {
if (model->get_testcase_num() && c_opr_args.is_run_c_opr_with_param) {
c_opr_args.copr_param_device_ptr_free(c_opr_param.get());
free(c_opr_param->input);
}
}
}
} // namespace lar

using namespace lar;

MGBDType COprLibOption::dtype_cpp2c(megdnn::DType dtype) {
switch (dtype.enumv()) {
case megdnn::DTypeEnum::Float32:
return MGB_DTYPE_FLOAT32;
case megdnn::DTypeEnum::Int32:
return MGB_DTYPE_INT32;
case megdnn::DTypeEnum::Int16:
return MGB_DTYPE_INT16;
case megdnn::DTypeEnum::Uint8:
return MGB_DTYPE_UINT8;
#if !MEGDNN_DISABLE_FLOAT16
case megdnn::DTypeEnum::Float16:
return MGB_DTYPE_FLOAT16;
#endif
default:
mgb_throw(
mgb::InternalError, "unsupported dtype for extern C API: %s",
dtype.name());
}
}

void COprLibOption::tensor_shape_to_c(
const megdnn::TensorShape& shape, MGBTensorShape& mgb_shape) {
mgb_assert(
shape.ndim <= MGB_TENSOR_MAX_NDIM, "shape ndim too large: %zu", shape.ndim);
mgb_shape.ndim = shape.ndim;
for (size_t i = 0; i < shape.ndim; ++i) {
mgb_shape.shape[i] = shape[i];
}
}

void COprLibOption::init_extern_param(std::shared_ptr<ModelBase> model_ptr) {
auto model = std::static_pointer_cast<ModelMdl>(model_ptr);
auto inp_tensors = model->get_test_input();

c_opr_param = std::make_shared<ExternCOprParam>();
memset(c_opr_param.get(), 0, sizeof(ExternCOprParam));

//! we just test input on npu case, do not test output on
//! npu case, so we just init input shape and type

c_opr_param->nr_input = inp_tensors.size();
c_opr_param->input = (ExternDeviceTensor*)malloc(
sizeof(ExternDeviceTensor) * inp_tensors.size());
memset(c_opr_param->input, 0, sizeof(ExternDeviceTensor) * inp_tensors.size());

//! init input ExternDeviceTensor shape and dtype
for (size_t input_idx = 0; input_idx < inp_tensors.size(); input_idx++) {
auto& mgb_tensor_layout = c_opr_param->input[input_idx].layout;
auto host_tensor_nd_p = inp_tensors[input_idx].second;
mgb_tensor_layout.dtype = dtype_cpp2c(host_tensor_nd_p->dtype());
tensor_shape_to_c(
inp_tensors[input_idx].second->shape(), mgb_tensor_layout.shape);
}
c_opr_param->nr_output = 0;

//! now call copr_param_device_ptr_malloc to malloc
//! device_ptr
c_opr_args.copr_param_device_ptr_malloc(c_opr_param.get());
}

void COprLibOption::load_lib() {
auto handle = dlopen(lib_path.c_str(), RTLD_LAZY);
mgb_assert(handle, "failed to open c opr lib %s: %s", lib_path.c_str(), dlerror());

const char* entry = MGB_C_OPR_INIT_FUNC_STR;
auto func = dlsym(handle, entry);
mgb_assert(func, "can not resolve %s: %s", entry, dlerror());
typedef void (*entry_f_t)(void*);
reinterpret_cast<entry_f_t>(func)(
reinterpret_cast<void*>(&mgb_get_extern_c_opr_api_versioned));
printf("loaded C opr library: %s\n", lib_path.c_str());
entry = "copr_param_device_ptr_malloc";
func = dlsym(handle, entry);
if (func) {
printf("get %s from: %s\n", entry, lib_path.c_str());
c_opr_args.copr_param_device_ptr_malloc =
reinterpret_cast<COprArgs::COPR_PARAM_DEVICE_PTR_MEM_T>(func);
}

entry = "copr_param_device_ptr_free";
func = dlsym(handle, entry);
if (func) {
printf("get %s from: %s\n", entry, lib_path.c_str());
c_opr_args.copr_param_device_ptr_free =
reinterpret_cast<COprArgs::COPR_PARAM_DEVICE_PTR_MEM_T>(func);
}

entry = "copr_param_device_ptr_h2d";
func = dlsym(handle, entry);
if (func) {
printf("get %s from: %s\n", entry, lib_path.c_str());
c_opr_args.copr_param_device_ptr_h2d =
reinterpret_cast<COprArgs::COPR_PARAM_DEVICE_PTR_H2D_T>(func);
}
}

void COprLibOption::set_Copr_IO(std::shared_ptr<ModelBase> model_ptr) {
auto model = std::static_pointer_cast<ModelMdl>(model_ptr);
auto inp_tensors = model->get_test_input();
auto loader = model->reset_loader();
auto testcase = loader->load(model->get_mdl_config(), false);
mgb_assert(testcase.output_var_list.size() == inp_tensors.size());
for (size_t i = 0; i < inp_tensors.size(); ++i) {
auto&& opr = testcase.output_var_list[i]
.node()
->owner_opr()
->cast_final_safe<mgb::opr::SharedDeviceTensor>();
c_opr_args.copr_param_device_ptr_h2d(
c_opr_param.get(), opr.dev_data()->raw_ptr(), i);
}

//! now config c opr dynamic param
config_extern_c_opr_dynamic_param(model->get_async_func(), c_opr_param);
}

COprLibOption::COprLibOption() {
m_option_name = "c_opr_lib";
lib_path = FLAGS_c_opr_lib;
c_opr_args.is_run_c_opr = !lib_path.empty();
c_opr_args.is_run_c_opr_with_param = FLAGS_c_opr_lib_with_param;
}

bool COprLibOption::is_valid() {
return !FLAGS_c_opr_lib.empty() || FLAGS_c_opr_lib_with_param;
}

std::shared_ptr<OptionBase> COprLibOption::create_option() {
static std::shared_ptr<COprLibOption> option(new COprLibOption);
if (COprLibOption::is_valid()) {
return std::static_pointer_cast<OptionBase>(option);
} else {
return nullptr;
}
}

void COprLibOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}
DEFINE_string(
c_opr_lib, "",
"Load external operator library. It must implement "
"MGB_C_OPR_INIT_FUNC_STR as the entry point");
DEFINE_bool(
c_opr_lib_with_param, false,
"Run c opr lib with param, use to benchmark speed and check result, "
"need c opr loader implemente `copr_param_device_ptr_malloc, "
"copr_param_device_ptr_free and copr_param_device_ptr_h2d' symbols");

REGIST_OPTION_CREATOR(c_opr_lib, lar::COprLibOption::create_option);

+ 64
- 0
lite/load_and_run/src/options/extern_c_opr_options.h View File

@@ -0,0 +1,64 @@
/**
* \file lite/load_and_run/src/options/extern_c_opr_options.h
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#pragma once
#include <gflags/gflags.h>
#include "megbrain/graph/extern_copr_api.h"
#include "models/model.h"
#include "option_base.h"

DECLARE_bool(c_opr_lib_with_param);
DECLARE_string(c_opr_lib);

namespace lar {

struct COprArgs {
//! for run c opr
bool is_run_c_opr = false;
bool is_run_c_opr_with_param = false;
typedef void (*COPR_PARAM_DEVICE_PTR_MEM_T)(ExternCOprParam* param);
typedef void (*COPR_PARAM_DEVICE_PTR_H2D_T)(
ExternCOprParam* param, void* host_ptr, size_t extern_device_tensor_id);
COPR_PARAM_DEVICE_PTR_MEM_T copr_param_device_ptr_malloc = nullptr;
COPR_PARAM_DEVICE_PTR_MEM_T copr_param_device_ptr_free = nullptr;
COPR_PARAM_DEVICE_PTR_H2D_T copr_param_device_ptr_h2d = nullptr;
};

class COprLibOption final : public OptionBase {
public:
static bool is_valid();

static std::shared_ptr<OptionBase> create_option();

void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;

std::string option_name() const override { return m_option_name; };

private:
COprLibOption();
template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>){};

void load_lib();

MGBDType dtype_cpp2c(megdnn::DType dtype);

void tensor_shape_to_c(const megdnn::TensorShape& shape, MGBTensorShape& mgb_shape);

void init_extern_param(std::shared_ptr<ModelBase> model);

void set_Copr_IO(std::shared_ptr<ModelBase> model);

std::string m_option_name;
COprArgs c_opr_args;
std::string lib_path;
std::shared_ptr<ExternCOprParam> c_opr_param;
};
} // namespace lar

+ 231
- 0
lite/load_and_run/src/options/fastrun_options.cpp View File

@@ -0,0 +1,231 @@
/**
* \file lite/load_and_run/src/options/fastrun_options.cpp
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#include <gflags/gflags.h>

#if defined(_WIN32)
#include <io.h>
#define F_OK 0
#define access(a, b) _access(a, b)
#elif __linux__ || __unix__ || __APPLE__
#include <unistd.h>
#endif
#include "fastrun_options.h"
#include "megbrain/gopt/inference.h"
#include "megbrain/utils/infile_persistent_cache.h"
#include "misc.h"
#include "models/model_lite.h"
#include "models/model_mdl.h"

namespace lar {

template <>
void FastRunOption::config_model_internel<ModelLite>(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
//! set the algo policy before model load
using Strategy = ModelLite::Strategy;
uint32_t strategy = 0;
#if MGB_ENABLE_FASTRUN
if (enable_full_run) {
LITE_WARN("enable full-run strategy for algo profile");
strategy = static_cast<uint32_t>(Strategy::LITE_ALGO_PROFILE) | strategy;
} else if (enable_fast_run) {
LITE_WARN("enable fast-run strategy for algo profile");
strategy = static_cast<uint32_t>(Strategy::LITE_ALGO_PROFILE) |
static_cast<uint32_t>(Strategy::LITE_ALGO_OPTIMIZED) | strategy;
} else {
strategy = static_cast<uint32_t>(Strategy::LITE_ALGO_HEURISTIC) | strategy;
}
#else
strategy = static_cast<uint32_t>(Strategy::LITE_ALGO_HEURISTIC) | strategy;
#endif
if (batch_binary_equal || enable_reproducible) {
LITE_WARN("enable reproducible strategy for algo profile");
if (batch_binary_equal)
strategy = static_cast<uint32_t>(Strategy::LITE_ALGO_REPRODUCIBLE) |
strategy;
}
auto lite_strategy = static_cast<Strategy>(strategy);
model->set_lite_strategy(lite_strategy);
} else if (runtime_param.stage == RunStage::AFTER_MODEL_LOAD) {
auto lite_network = model->get_lite_network();
auto lite_strategy = model->get_lite_strategy();
//! set algo policy for model
lite::Runtime::set_network_algo_policy(
lite_network, lite_strategy, share_batch_size, batch_binary_equal);
if (!m_fast_run_cache.empty()) {
if (!access(m_fast_run_cache.c_str(), F_OK)) {
lite::set_persistent_cache(m_fast_run_cache);
} else {
lite::set_persistent_cache(m_fast_run_cache, true);
}
//! TODO:this is from mdl model settings but not matched settings in
//! lite model
// if (!enable_full_run && !enable_fast_run)
// mgb::gopt::enable_opr_use_profiling_cache_inplace(vars);
}
} else if (runtime_param.stage == RunStage::AFTER_MODEL_RUNNING) {
#if MGB_ENABLE_FASTRUN
//! dump algo cache
if (!m_fast_run_cache.empty()) {
lite::dump_persistent_cache(m_fast_run_cache);
}
#endif
}
}

template <>
void FastRunOption::config_model_internel<ModelMdl>(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
//! set the algo policy before model load
using Strategy = ModelMdl::Strategy;
auto strategy = static_cast<Strategy>(0);
#if MGB_ENABLE_FASTRUN
if (enable_full_run) {
mgb_log_warn("enable full-run strategy for algo profile");
strategy = Strategy::PROFILE | strategy;
} else if (enable_fast_run) {
mgb_log_warn("enable fast-run strategy for algo profile");
strategy = Strategy::PROFILE | Strategy::OPTIMIZED | strategy;
} else {
strategy = Strategy::HEURISTIC | strategy;
}
#else
strategy = Strategy::HEURISTIC | strategy;
#endif
if (batch_binary_equal || enable_reproducible) {
mgb_log_warn("enable reproducible strategy for algo profile");
strategy = Strategy::REPRODUCIBLE | strategy;
}
model->set_mdl_strategy(strategy);

//! set binary_equal_between_batch and shared_batch_size
if (batch_binary_equal) {
mgb_log_warn("enable batch binary equal");
model->get_mdl_config()
.comp_graph->options()
.fast_run_config.binary_equal_between_batch = true;
}
if (share_batch_size > 0) {
mgb_log_warn("set shared shared batch");
model->get_mdl_config()
.comp_graph->options()
.fast_run_config.shared_batch_size = share_batch_size;
}
} else if (runtime_param.stage == RunStage::AFTER_MODEL_LOAD) {
auto vars = model->get_mdl_load_result().output_var_list;
auto strategy = model->get_mdl_strategy();
mgb::gopt::modify_opr_algo_strategy_inplace(vars, strategy);
// set algo cache path
if (!m_fast_run_cache.empty()) {
if (!access(m_fast_run_cache.c_str(), F_OK)) {
mgb::PersistentCache::set_impl(
std::make_shared<mgb::InFilePersistentCache>(
m_fast_run_cache.c_str()));
} else {
mgb::PersistentCache::set_impl(
std::make_shared<mgb::InFilePersistentCache>());
}
#if MGB_ENABLE_FASTRUN
if (!enable_full_run && !enable_fast_run)
#endif
mgb::gopt::enable_opr_use_profiling_cache_inplace(vars);
}
} else if (runtime_param.stage == RunStage::AFTER_MODEL_RUNNING) {
#if MGB_ENABLE_FASTRUN
//! dump algo cache
if (!m_fast_run_cache.empty()) {
static_cast<mgb::InFilePersistentCache&>(mgb::PersistentCache::inst())
.dump_cache(m_fast_run_cache.c_str());
}
#endif
}
}

} // namespace lar

using namespace lar;

FastRunOption::FastRunOption() {
m_option_name = "fastrun";
#if MGB_ENABLE_FASTRUN
enable_fast_run = FLAGS_fast_run;
enable_full_run = FLAGS_full_run;
#endif
batch_binary_equal = FLAGS_binary_equal_between_batch;
enable_reproducible = FLAGS_reproducible;
m_fast_run_cache = FLAGS_fast_run_algo_policy;
share_batch_size = FLAGS_fast_run_shared_batch_size;
#if MGB_ENABLE_FASTRUN
//! while fastrun cache file path is not empty and can't be accessed
if (!m_fast_run_cache.empty() && access(m_fast_run_cache.c_str(), F_OK)) {
mgb_assert(
enable_full_run || enable_fast_run,
"--fast-run or --full-run should be enabled");
}
if (share_batch_size) {
mgb_assert(
enable_full_run || enable_fast_run || !m_fast_run_cache.empty(),
"--fast-run-shared-batch-size should be used with "
"--fast-run|--full-run|--fast-run-algo-policy");
}
#endif
}

bool FastRunOption::is_valid() {
bool ret = false;
#if MGB_ENABLE_FASTRUN
ret = ret || FLAGS_fast_run;
ret = ret || FLAGS_full_run;
#endif
ret = ret || FLAGS_binary_equal_between_batch;
ret = ret || FLAGS_fast_run_shared_batch_size > 0;
ret = ret || FLAGS_reproducible;
ret = ret || FLAGS_fast_run_algo_policy.size() > 0;

return ret;
}

std::shared_ptr<OptionBase> FastRunOption::create_option() {
static std::shared_ptr<FastRunOption> option(new FastRunOption);
if (FastRunOption::is_valid()) {
return std::static_pointer_cast<OptionBase>(option);
} else {
return nullptr;
}
}

void FastRunOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}

#if MGB_ENABLE_FASTRUN
DEFINE_bool(fast_run, false, "whether to use fast-run in model run");
DEFINE_bool(full_run, false, "whether to use full-run in model run");
#endif

DEFINE_bool(
binary_equal_between_batch, false,
"Each batch of output is promised binary equal if each batch of "
"input is binary equal\n Note that if this option is turned on, "
"`--reproducible` will also be turned on.");
DEFINE_bool(
reproducible, false,
"Enable choose algo which is reproducible. It mainly used for "
"cudnn algos.See "
"https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/"
"index.html#reproducibility"
"for more details.");
DEFINE_uint32(fast_run_shared_batch_size, 0, "Set the batch size used during fastrun");
DEFINE_string(fast_run_algo_policy, "", "fast-run cache path.");

REGIST_OPTION_CREATOR(fastrun, lar::FastRunOption::create_option);

+ 57
- 0
lite/load_and_run/src/options/fastrun_options.h View File

@@ -0,0 +1,57 @@
/**
* \file lite/load_and_run/src/options/fastrun_options.h
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#pragma once

#include <gflags/gflags.h>
#include "models/model.h"
#include "option_base.h"

#if MGB_ENABLE_FASTRUN
DECLARE_bool(fast_run);
DECLARE_bool(full_run);
#endif
DECLARE_bool(reproducible);
DECLARE_bool(binary_equal_between_batch);
DECLARE_uint32(fast_run_shared_batch_size);
DECLARE_string(fast_run_algo_policy);

namespace lar {
class FastRunOption final : public OptionBase {
public:
//! get condition for construct FastRunOption
static bool is_valid();

//! creat option using condition from cmdline args
static std::shared_ptr<OptionBase> create_option();

//! configure model for different runtime_param
void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;

//! get options name for quickly search
std::string option_name() const override { return m_option_name; }

private:
FastRunOption();
//! config template for different model
template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>) {}

#if MGB_ENABLE_FASTRUN
bool enable_fast_run; //! fast run strategy flag
bool enable_full_run; //! full run strategy flag
#endif
bool batch_binary_equal; //! fast run stratgey setting
bool enable_reproducible; //! enable reproducible strategy
size_t share_batch_size; //! fast run strategy share batch size setting
std::string m_fast_run_cache; //! fast run cache file path
std::string m_option_name; //! option name
};
} // namespace lar

+ 295
- 0
lite/load_and_run/src/options/io_options.cpp View File

@@ -0,0 +1,295 @@
/**
* \file lite/load_and_run/src/options/io_options.cpp
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#include <map>

#include "helpers/data_parser.h"
#include "misc.h"
#include "models/model_lite.h"
#include "models/model_mdl.h"

#include "io_options.h"
namespace lar {
template <>
void InputOption::config_model_internel<ModelLite>(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
auto parser = model->get_input_parser();
auto io = model->get_networkIO();
for (size_t idx = 0; idx < data_path.size(); ++idx) {
parser.feed(data_path[idx].c_str());
}

auto inputs = parser.inputs;
bool is_host = true;
for (auto& i : inputs) {
io.inputs.push_back({i.first, is_host});
}
} else if (runtime_param.stage == RunStage::AFTER_MODEL_LOAD) {
auto config = model->get_config();
auto parser = model->get_input_parser();
auto network = model->get_lite_network();

//! datd type map from mgb data type to lite data type
std::map<megdnn::DTypeEnum, LiteDataType> type_map = {
{megdnn::DTypeEnum::Float32, LiteDataType::LITE_FLOAT},
{megdnn::DTypeEnum::Int32, LiteDataType::LITE_INT},
{megdnn::DTypeEnum::Int8, LiteDataType::LITE_INT8},
{megdnn::DTypeEnum::Uint8, LiteDataType::LITE_UINT8}};

for (auto& i : parser.inputs) {
//! get tensor information from data parser
auto tensor = i.second;
auto data_type = tensor.dtype();
auto tensor_shape = tensor.shape();
mgb::dt_byte* src = tensor.raw_ptr();

//! set lite layout
lite::Layout layout;
layout.ndim = tensor_shape.ndim;
for (size_t idx = 0; idx < tensor_shape.ndim; idx++) {
layout.shapes[idx] = tensor_shape[idx];
}
layout.data_type = type_map[data_type.enumv()];

//! set network input tensor
std::shared_ptr<lite::Tensor> input_tensor =
network->get_io_tensor(i.first);
input_tensor->reset(src, layout);
}
}
}

template <>
void InputOption::config_model_internel<ModelMdl>(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
auto parser = model->get_input_parser();
for (size_t idx = 0; idx < data_path.size(); ++idx) {
parser.feed(data_path[idx].c_str());
}
} else if (runtime_param.stage == RunStage::AFTER_MODEL_LOAD) {
auto parser = model->get_input_parser();
auto network = model->get_mdl_load_result();
auto tensormap = network.tensor_map;
for (auto& i : parser.inputs) {
mgb_assert(
tensormap.find(i.first) != tensormap.end(),
"can't find tesnor named %s", i.first.c_str());
auto& in = tensormap.find(i.first)->second;
in->copy_from(i.second);
}
}
}

template <>
void IOdumpOption::config_model_internel<ModelLite>(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
if (runtime_param.stage == RunStage::AFTER_MODEL_LOAD) {
if (enable_io_dump) {
LITE_WARN("enable text io dump");
lite::Runtime::enable_io_txt_dump(model->get_lite_network(), dump_path);
}
if (enable_bin_io_dump) {
LITE_WARN("enable binary io dump");
lite::Runtime::enable_io_bin_dump(model->get_lite_network(), dump_path);
}
//! FIX:when add API in lite complate this
if (enable_io_dump_stdout || enable_io_dump_stderr) {
LITE_THROW("lite model don't support the stdout or stderr io dump");
}
if (enable_bin_out_dump) {
LITE_THROW("lite model don't support the binary output dump");
}
if (enable_copy_to_host) {
LITE_WARN("lite model set copy to host defaultly");
}
}
}

template <>
void IOdumpOption::config_model_internel<ModelMdl>(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
if (enable_io_dump) {
mgb_log_warn("enable text io dump");
auto iodump = std::make_unique<mgb::TextOprIODump>(
model->get_mdl_config().comp_graph.get(), dump_path.c_str());
iodump->print_addr(false);
io_dumper = std::move(iodump);
}

if (enable_io_dump_stdout) {
mgb_log_warn("enable text io dump to stdout");
std::shared_ptr<FILE> std_out(stdout, [](FILE*) {});
auto iodump = std::make_unique<mgb::TextOprIODump>(
model->get_mdl_config().comp_graph.get(), std_out);
iodump->print_addr(false);
io_dumper = std::move(iodump);
}

if (enable_io_dump_stderr) {
mgb_log_warn("enable text io dump to stderr");
std::shared_ptr<FILE> std_err(stderr, [](FILE*) {});
auto iodump = std::make_unique<mgb::TextOprIODump>(
model->get_mdl_config().comp_graph.get(), std_err);
iodump->print_addr(false);
io_dumper = std::move(iodump);
}

if (enable_bin_io_dump) {
mgb_log_warn("enable binary io dump");
auto iodump = std::make_unique<mgb::BinaryOprIODump>(
model->get_mdl_config().comp_graph.get(), dump_path);
io_dumper = std::move(iodump);
}

if (enable_bin_out_dump) {
mgb_log_warn("enable binary output dump");
out_dumper = std::make_unique<OutputDumper>(dump_path.c_str());
}
} else if (runtime_param.stage == RunStage::AFTER_MODEL_LOAD) {
if (enable_bin_out_dump) {
auto load_result = model->get_mdl_load_result();
out_dumper->set(load_result.output_var_list);

std::vector<mgb::ComputingGraph::Callback> cb;
for (size_t i = 0; i < load_result.output_var_list.size(); i++) {
cb.push_back(out_dumper->bind());
}
model->set_output_callback(cb);
}
if (enable_copy_to_host) {
auto load_result = model->get_mdl_load_result();

std::vector<mgb::ComputingGraph::Callback> cb;
for (size_t i = 0; i < load_result.output_var_list.size(); i++) {
mgb::HostTensorND val;
auto callback = [val](const mgb::DeviceTensorND& dv) mutable {
val.copy_from(dv);
};
cb.push_back(callback);
}
model->set_output_callback(cb);
}
} else if (runtime_param.stage == RunStage::AFTER_RUNNING_WAIT) {
if (enable_bin_out_dump) {
out_dumper->write_to_file();
}
}
}

} // namespace lar

////////////////////// Input options ////////////////////////
using namespace lar;

InputOption::InputOption() {
m_option_name = "input";
size_t start = 0;
auto end = FLAGS_input.find(";", start);
while (end != std::string::npos) {
std::string path = FLAGS_input.substr(start, end - start);
data_path.emplace_back(path);
start = end + 1;
end = FLAGS_input.find(";", start);
}
data_path.emplace_back(FLAGS_input.substr(start));
}

std::shared_ptr<lar::OptionBase> lar::InputOption::create_option() {
static std::shared_ptr<InputOption> m_option(new InputOption);
if (InputOption::is_valid()) {
return std::static_pointer_cast<OptionBase>(m_option);
} else {
return nullptr;
}
}

void InputOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}

////////////////////// OprIOdump options ////////////////////////

IOdumpOption::IOdumpOption() {
m_option_name = "iodump";
size_t valid_flag = 0;
if (!FLAGS_io_dump.empty()) {
dump_path = FLAGS_io_dump;
enable_io_dump = true;
valid_flag = valid_flag | (1 << 0);
}
if (!FLAGS_bin_io_dump.empty()) {
dump_path = FLAGS_bin_io_dump;
enable_bin_io_dump = true;
valid_flag = valid_flag | (1 << 1);
}
if (!FLAGS_bin_out_dump.empty()) {
dump_path = FLAGS_bin_out_dump;
enable_bin_out_dump = true;
valid_flag = valid_flag | (1 << 2);
}
if (FLAGS_io_dump_stdout) {
enable_io_dump_stdout = FLAGS_io_dump_stdout;
valid_flag = valid_flag | (1 << 3);
}
if (FLAGS_io_dump_stderr) {
enable_io_dump_stderr = FLAGS_io_dump_stderr;
valid_flag = valid_flag | (1 << 4);
}
// not only one dump set valid
if (valid_flag && (valid_flag & (valid_flag - 1))) {
mgb_log_warn(
"ONLY the last io dump option is validate and others is "
"skipped!!!");
}

enable_copy_to_host = FLAGS_copy_to_host;
}

bool IOdumpOption::is_valid() {
bool ret = !FLAGS_io_dump.empty();
ret = ret || FLAGS_io_dump_stdout;
ret = ret || FLAGS_io_dump_stderr;
ret = ret || !FLAGS_bin_io_dump.empty();
ret = ret || !FLAGS_bin_out_dump.empty();
ret = ret || FLAGS_copy_to_host;
return ret;
}

std::shared_ptr<OptionBase> IOdumpOption::create_option() {
static std::shared_ptr<IOdumpOption> option(new IOdumpOption);
if (IOdumpOption::is_valid()) {
return std::static_pointer_cast<OptionBase>(option);
} else {
return nullptr;
}
}

void IOdumpOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}
////////////////////// Input gflags ////////////////////////
DEFINE_string(
input, "", "Set up inputs data for model --input [ file_path | data_string]");

////////////////////// OprIOdump gflags ////////////////////////

DEFINE_string(io_dump, "", "set the io dump file path in text format");
DEFINE_bool(io_dump_stdout, false, "dump io opr to stdout in text format");
DEFINE_bool(io_dump_stderr, false, "dump io opr to stderr in text format");
DEFINE_string(bin_io_dump, "", "set the io dump file path in binary format");
DEFINE_string(bin_out_dump, "", "set the out dump file path in binary format");
DEFINE_bool(copy_to_host, false, "copy device data to host");

REGIST_OPTION_CREATOR(input, lar::InputOption::create_option);
REGIST_OPTION_CREATOR(iodump, lar::IOdumpOption::create_option);

+ 78
- 0
lite/load_and_run/src/options/io_options.h View File

@@ -0,0 +1,78 @@
/**
* \file lite/load_and_run/src/options/io_options.h
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#pragma once
#include <gflags/gflags.h>
#include "helpers/outdumper.h"
#include "megbrain/plugin/opr_io_dump.h"
#include "models/model.h"
#include "option_base.h"

DECLARE_string(input);

DECLARE_string(io_dump);
DECLARE_bool(io_dump_stdout);
DECLARE_bool(io_dump_stderr);
DECLARE_string(bin_io_dump);
DECLARE_string(bin_out_dump);
DECLARE_bool(copy_to_host);

namespace lar {

/*!
* \brief: input option for --input set
*/
class InputOption final : public OptionBase {
public:
//! static function for registe options
static bool is_valid() { return !FLAGS_input.empty(); };
static std::shared_ptr<OptionBase> create_option();

void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;
//! interface implement from OptionBase
std::string option_name() const override { return m_option_name; };

private:
InputOption();

template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>){};

std::string m_option_name;
std::vector<std::string> data_path; // data string or data file path
};

class IOdumpOption : public OptionBase {
public:
static bool is_valid();
static std::shared_ptr<OptionBase> create_option();
//! config the model, if different has different configure code, then
//! dispatch
void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;
std::string option_name() const override { return m_option_name; };

private:
IOdumpOption();
template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>){};

bool enable_io_dump;
bool enable_io_dump_stdout;
bool enable_io_dump_stderr;
bool enable_bin_io_dump;
bool enable_bin_out_dump;
bool enable_copy_to_host;
std::string m_option_name;
std::string dump_path;
std::unique_ptr<mgb::OprIODumpBase> io_dumper;
std::unique_ptr<OutputDumper> out_dumper;
};
} // namespace lar

+ 171
- 0
lite/load_and_run/src/options/layout_options.cpp View File

@@ -0,0 +1,171 @@
/**
* \file lite/load_and_run/src/options/layout_options.cpp
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#include <gflags/gflags.h>

#include "misc.h"
#include "models/model_lite.h"
#include "models/model_mdl.h"

#include "layout_options.h"
namespace lar {
template <>
void LayoutOption::config_model_internel<ModelLite>(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
#define ENABLE_LAYOUT(layout) \
LITE_WARN("enable " #layout " optimization"); \
model->get_config().options.enable_##layout = true; \
break;

switch (option_flag) {
case OptLayoutType::NCHW4:
ENABLE_LAYOUT(nchw4)

case OptLayoutType::CHWN4:
LITE_THROW("lite model unsupport chwn4 layout");
break;
case OptLayoutType::NCHW44:
ENABLE_LAYOUT(nchw44)

case OptLayoutType::NCHW88:
ENABLE_LAYOUT(nchw88)

case OptLayoutType::NCHW32:
ENABLE_LAYOUT(nchw32)

case OptLayoutType::NCHW64:
ENABLE_LAYOUT(nchw64)

case OptLayoutType::NHWCD4:
ENABLE_LAYOUT(nhwcd4)

case OptLayoutType::NCHW44_DOT:
ENABLE_LAYOUT(nchw44_dot)
default:
break;
}
#undef ENABLE_LAYOUT
}
}

template <>
void lar::LayoutOption::config_model_internel<ModelMdl>(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
mgb_log_debug("mdl layout config start");
#define ENABLE_LAYOUT(layout) \
mgb_log_warn("enable " #layout " optimization"); \
model->get_mdl_config().comp_graph->options().graph_opt.enable_##layout(); \
break;

switch (option_flag) {
case OptLayoutType::NCHW4:
ENABLE_LAYOUT(nchw4)

case OptLayoutType::CHWN4:
ENABLE_LAYOUT(chwn4)

case OptLayoutType::NCHW44:
ENABLE_LAYOUT(nchw44)

case OptLayoutType::NCHW88:
ENABLE_LAYOUT(nchw88)

case OptLayoutType::NCHW32:
ENABLE_LAYOUT(nchw32)

case OptLayoutType::NCHW64:
ENABLE_LAYOUT(nchw64)

case OptLayoutType::NHWCD4:
ENABLE_LAYOUT(nhwcd4)

case OptLayoutType::NCHW44_DOT:
ENABLE_LAYOUT(nchw44_dot)

default:
break;
}
mgb_log_debug("mdl layout config end");

#undef ENABLE_LAYOUT
}
}
} // namespace lar

using namespace lar;

OptLayoutType LayoutOption::option_flag;

LayoutOption::LayoutOption() {
m_option_name = "layout";
}

bool LayoutOption::is_valid() {
size_t valid_flag = 0;
if (FLAGS_enable_nchw4) {
valid_flag = valid_flag | (1 << 0);
}
if (FLAGS_enable_chwn4) {
valid_flag = valid_flag | (1 << 1);
}
if (FLAGS_enable_nchw44) {
valid_flag = valid_flag | (1 << 2);
}
if (FLAGS_enable_nchw88) {
valid_flag = valid_flag | (1 << 3);
}
if (FLAGS_enable_nchw32) {
valid_flag = valid_flag | (1 << 4);
}
if (FLAGS_enable_nchw64) {
valid_flag = valid_flag | (1 << 5);
}
if (FLAGS_enable_nhwcd4) {
valid_flag = valid_flag | (1 << 6);
}
if (FLAGS_enable_nchw44_dot) {
valid_flag = valid_flag | (1 << 7);
}

bool ret = valid_flag && !(valid_flag & (valid_flag - 1));
if (ret) {
option_flag = static_cast<OptLayoutType>(valid_flag);
} else {
option_flag = static_cast<OptLayoutType>(0);
}

return ret;
};

std::shared_ptr<OptionBase> LayoutOption::create_option() {
static std::shared_ptr<LayoutOption> option(new LayoutOption);
if (LayoutOption::is_valid()) {
return std::static_pointer_cast<OptionBase>(option);
} else {
return nullptr;
}
}

void LayoutOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}

DEFINE_bool(enable_nchw4, false, "enable nchw4 layout optimization!!");
DEFINE_bool(enable_chwn4, false, "enable chwn4 layout optimization!!");
DEFINE_bool(enable_nchw44, false, "enable nchw44 layout optimization!!");
DEFINE_bool(enable_nchw88, false, "enable nchw88 layout optimization!!");
DEFINE_bool(enable_nchw32, false, "enable nchw32 layout optimization!!");
DEFINE_bool(enable_nchw64, false, "enable nchw64 layout optimization!!");
DEFINE_bool(enable_nhwcd4, false, "enable nhwcd4 layout optimization!!");
DEFINE_bool(enable_nchw44_dot, false, "enable nchw444-dot layout optimization!!");

REGIST_OPTION_CREATOR(layout, lar::LayoutOption::create_option);

+ 56
- 0
lite/load_and_run/src/options/layout_options.h View File

@@ -0,0 +1,56 @@
/**
* \file lite/load_and_run/src/options/layout_options.h
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#pragma once

#include <gflags/gflags.h>
#include "helpers/common.h"
#include "models/model.h"
#include "option_base.h"

DECLARE_bool(enable_nchw4);
DECLARE_bool(enable_chwn4);
DECLARE_bool(enable_nchw44);
DECLARE_bool(enable_nchw88);
DECLARE_bool(enable_nchw32);
DECLARE_bool(enable_nchw64);
DECLARE_bool(enable_nhwcd4);
DECLARE_bool(enable_nchw44_dot);

namespace lar {
/*!
* \brief: layout option for optimization
*/
class LayoutOption final : public OptionBase {
public:
//! check the validation of option flag
static bool is_valid();

//! creat options when option is used
static std::shared_ptr<OptionBase> create_option();

//! config the model, dispatch configuration for different model implement
void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;

//! get option name
std::string option_name() const override { return m_option_name; };

private:
//! Constructor
LayoutOption();

//! configuration for different model implement
template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>){};

static OptLayoutType option_flag;
std::string m_option_name;
};
} // namespace lar

+ 600
- 0
lite/load_and_run/src/options/optimize_options.cpp View File

@@ -0,0 +1,600 @@
/**
* \file lite/load_and_run/src/options/optimize_options.cpp
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#include "megbrain/gopt/inference.h"
#if MGB_ENABLE_TENSOR_RT
#include "megbrain/tensorrt/tensorrt_engine_cache.h"
#endif
#include "lite/global.h"
#include "misc.h"
#include "models/model_lite.h"
#include "models/model_mdl.h"
#include "optimize_options.h"

///////////////////////// fuse and preprocess optimize options ///////////////
namespace lar {
template <>
void FusePreprocessOption::config_model_internel<ModelLite>(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
if (enable_fuse_preprocess) {
LITE_WARN("enable fuse-preprocess optimization");
model->get_config().options.fuse_preprocess = true;
}
}
}

template <>
void FusePreprocessOption::config_model_internel<ModelMdl>(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
auto&& graph_option = model->get_mdl_config().comp_graph->options();
if (enable_fuse_preprocess) {
mgb_log_warn("enable fuse-preprocess optimization");
graph_option.graph_opt.enable_fuse_preprocess();
}
}
}
} // namespace lar
using namespace lar;

FusePreprocessOption::FusePreprocessOption() {
m_option_name = "fuse_preprocess";
enable_fuse_preprocess = FLAGS_enable_fuse_preprocess;
}

bool FusePreprocessOption::is_valid() {
bool ret = FLAGS_enable_fuse_preprocess;
return ret;
}

std::shared_ptr<OptionBase> FusePreprocessOption::create_option() {
static std::shared_ptr<FusePreprocessOption> option(new FusePreprocessOption);
if (FusePreprocessOption::is_valid()) {
return std::static_pointer_cast<OptionBase>(option);
} else {
return nullptr;
}
}

void FusePreprocessOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}

///////////////////////// weight preprocess optimize options ///////////////
namespace lar {
template <>
void WeightPreprocessOption::config_model_internel<ModelLite>(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
if (weight_preprocess) {
LITE_WARN("enable weight-preprocess optimization");
model->get_config().options.weight_preprocess = true;
//! FIXME: algo searcher enable weight preprocess for opencl(
//! implement below has some problem);
// #if MGB_OPENCL
// megdnn::opencl::algo_searcher::AlgoSearcherBase::
// enable_weight_preprocess();
// #endif
}
}
}

template <>
void WeightPreprocessOption::config_model_internel<ModelMdl>(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
auto&& graph_option = model->get_mdl_config().comp_graph->options();
if (weight_preprocess) {
mgb_log_warn("enable weight-preprocess optimization");
graph_option.graph_opt.enable_weight_preprocess();
//! FIXME: this implemment is not right
// #if MGB_OPENCL
// megdnn::opencl::algo_searcher::AlgoSearcherBase::
// enable_weight_preprocess();
// #endif
}
}
}
} // namespace lar

WeightPreprocessOption::WeightPreprocessOption() {
m_option_name = "weight_preprocess";
weight_preprocess = FLAGS_weight_preprocess;
}

bool WeightPreprocessOption::is_valid() {
bool ret = FLAGS_weight_preprocess;
return ret;
}

std::shared_ptr<OptionBase> WeightPreprocessOption::create_option() {
static std::shared_ptr<WeightPreprocessOption> option(new WeightPreprocessOption);
if (WeightPreprocessOption::is_valid()) {
return std::static_pointer_cast<OptionBase>(option);
} else {
return nullptr;
}
}

void WeightPreprocessOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}

///// fuse conv bias and nonlinear activation opr optimize options ////////
namespace lar {
template <>
void FuseConvBiasNonlinearOption::config_model_internel<ModelLite>(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
LITE_MARK_USED_VAR(model);
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
if (enable_fuse_conv_bias_nonlinearity) {
LITE_THROW("fuse conv+bias+nonlinearity not supported in lite model");
}
}
}

template <>
void FuseConvBiasNonlinearOption::config_model_internel<ModelMdl>(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
auto&& graph_option = model->get_mdl_config().comp_graph->options();
if (enable_fuse_conv_bias_nonlinearity) {
mgb_log_warn("enable fuse conv+bias+nonlinearity optimization");
graph_option.graph_opt.enable_fuse_conv_bias_nonlinearity();
}
}
}
} // namespace lar

FuseConvBiasNonlinearOption::FuseConvBiasNonlinearOption() {
m_option_name = "fuse_conv_bias_nonlinear";
enable_fuse_conv_bias_nonlinearity = FLAGS_enable_fuse_conv_bias_nonlinearity;
}

bool FuseConvBiasNonlinearOption::is_valid() {
bool ret = FLAGS_enable_fuse_conv_bias_nonlinearity;
return ret;
}

std::shared_ptr<OptionBase> FuseConvBiasNonlinearOption::create_option() {
static std::shared_ptr<FuseConvBiasNonlinearOption> option(
new FuseConvBiasNonlinearOption);
if (FuseConvBiasNonlinearOption::is_valid()) {
return std::static_pointer_cast<OptionBase>(option);
} else {
return nullptr;
}
}

void FuseConvBiasNonlinearOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}

///////////////////////// fuse and preprocess optimize options ///////////////
namespace lar {
template <>
void FuseConvBiasElemwiseAddOption::config_model_internel<ModelLite>(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
LITE_MARK_USED_VAR(model);
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
if (enable_fuse_conv_bias_with_z) {
LITE_THROW(
"fuse conv+bias+z optimization not supported in lite "
"model");
}
}
}

template <>
void FuseConvBiasElemwiseAddOption::config_model_internel<ModelMdl>(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
auto&& graph_option = model->get_mdl_config().comp_graph->options();
if (enable_fuse_conv_bias_with_z) {
mgb_log_warn("enable fuse conv+bias+z optimization");
graph_option.graph_opt.enable_fuse_conv_bias_with_z();
}
}
}
} // namespace lar

FuseConvBiasElemwiseAddOption::FuseConvBiasElemwiseAddOption() {
m_option_name = "fuse_conv_bias_z";
enable_fuse_conv_bias_with_z = FLAGS_enable_fuse_conv_bias_with_z;
}

bool FuseConvBiasElemwiseAddOption::is_valid() {
bool ret = FLAGS_enable_fuse_conv_bias_with_z;
return ret;
}

std::shared_ptr<OptionBase> FuseConvBiasElemwiseAddOption::create_option() {
static std::shared_ptr<FuseConvBiasElemwiseAddOption> option(
new FuseConvBiasElemwiseAddOption);
if (FuseConvBiasElemwiseAddOption::is_valid()) {
return std::static_pointer_cast<OptionBase>(option);
} else {
return nullptr;
}
}

void FuseConvBiasElemwiseAddOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}

///////////////////////// graph retrict options /////////////////////////
namespace lar {
template <>
void GraphRecordOption::config_model_internel<ModelLite>(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
auto&& config_option = model->get_config().options;
if (const_shape) {
LITE_WARN("enable const var shape");
config_option.const_shape = true;
}
if (fake_first) {
LITE_WARN("enable fake-first optimization");
config_option.fake_next_exec = true;
}
if (no_sanity_check) {
LITE_WARN("disable var sanity check optimization");
config_option.var_sanity_check_first_run = false;
}
if (m_record_comp_seq == 1) {
LITE_WARN("set record_comp_seq_level to 1");
}
if (m_record_comp_seq == 2) {
mgb_assert(
no_sanity_check,
"--no-sanity-check should be set before "
"--record-comp-seq2");
LITE_WARN("set record_comp_seq_level to 2");
}
config_option.comp_node_seq_record_level = m_record_comp_seq;
}
}

template <>
void GraphRecordOption::config_model_internel<ModelMdl>(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
auto&& graph_option = model->get_mdl_config().comp_graph->options();
if (const_shape) {
mgb_log_warn("enable const var shape");
model->get_mdl_config().const_var_shape = true;
}
if (fake_first) {
mgb_log_warn("enable fake-first optimization");
graph_option.fake_next_exec = true;
}
if (no_sanity_check) {
mgb_log_warn("disable var sanity check optimization");
graph_option.var_sanity_check_first_run = false;
}
if (m_record_comp_seq == 1) {
mgb_log_warn("set record_comp_seq_level to 1");
}
if (m_record_comp_seq == 2) {
mgb_assert(
no_sanity_check && !fake_first,
"--no-sanity-check should be set before "
"--record-comp-seq2 and --fake-first should not be set");
mgb_log_warn("set record_comp_seq_level to 2");
}
graph_option.comp_node_seq_record_level = m_record_comp_seq;
}
}
} // namespace lar

GraphRecordOption::GraphRecordOption() {
m_option_name = "graph_record";
m_record_comp_seq = 0;
const_shape = FLAGS_const_shape;
fake_first = FLAGS_fake_first;
no_sanity_check = FLAGS_no_sanity_check;
if (FLAGS_record_comp_seq) {
m_record_comp_seq = 1;
}
if (FLAGS_record_comp_seq2) {
m_record_comp_seq = 2;
}
}

bool GraphRecordOption::is_valid() {
bool ret = FLAGS_const_shape;
ret = ret || FLAGS_fake_first;
ret = ret || FLAGS_no_sanity_check;
ret = ret || FLAGS_record_comp_seq;
ret = ret || FLAGS_record_comp_seq2;
return ret;
}

std::shared_ptr<OptionBase> GraphRecordOption::create_option() {
static std::shared_ptr<GraphRecordOption> option(new GraphRecordOption);
if (GraphRecordOption::is_valid()) {
return std::static_pointer_cast<OptionBase>(option);
} else {
return nullptr;
}
}

void GraphRecordOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}
///////////////////////// graph retrict options /////////////////////////
namespace lar {
template <>
void MemoryOptimizeOption::config_model_internel<ModelLite>(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
LITE_MARK_USED_VAR(model);
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
if (disable_mem_opt) {
LITE_THROW("lite model don't support disable memory optimization");
}
} else if (runtime_param.stage == RunStage::AFTER_MODEL_LOAD) {
if (workspace_limit != SIZE_MAX) {
LITE_WARN("set workspace limit to %ld", workspace_limit);
lite::Runtime::set_network_algo_workspace_limit(
model->get_lite_network(), workspace_limit);
}
}
}

template <>
void MemoryOptimizeOption::config_model_internel<ModelMdl>(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
auto&& graph_option = model->get_mdl_config().comp_graph->options();
if (disable_mem_opt) {
mgb_log_warn("disable memory optimization");
graph_option.seq_opt.enable_mem_plan_opt = false;
graph_option.seq_opt.enable_mem_reuse_alloc = false;
}
if (workspace_limit < SIZE_MAX) {
mgb_log_warn("set workspace limit to %ld", workspace_limit);
auto output_spec = model->get_output_spec();
mgb::SymbolVarArray vars;
for (auto i : output_spec) {
vars.push_back(i.first);
}
mgb::gopt::set_opr_algo_workspace_limit_inplace(vars, workspace_limit);
}
}
}
} // namespace lar

MemoryOptimizeOption::MemoryOptimizeOption() {
m_option_name = "memory_optimize";
disable_mem_opt = FLAGS_disable_mem_opt;
workspace_limit = FLAGS_workspace_limit;
}

bool MemoryOptimizeOption::is_valid() {
bool ret = FLAGS_disable_mem_opt;
ret = ret || FLAGS_workspace_limit < SIZE_MAX;
return ret;
}

std::shared_ptr<OptionBase> MemoryOptimizeOption::create_option() {
static std::shared_ptr<MemoryOptimizeOption> option(new MemoryOptimizeOption);
if (MemoryOptimizeOption::is_valid()) {
return std::static_pointer_cast<OptionBase>(option);
} else {
return nullptr;
}
}

void MemoryOptimizeOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}

///////////////////////// other options for optimization /////////////////
namespace lar {
template <>
void JITOption::config_model_internel<ModelLite>(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
auto&& config_option = model->get_config().options;
if (enable_jit) {
LITE_WARN("enable JIT (level 1)");
config_option.jit_level = 1;
}
}
}

template <>
void JITOption::config_model_internel<ModelMdl>(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
auto&& graph_option = model->get_mdl_config().comp_graph->options();
if (enable_jit) {
mgb_log_warn("enable JIT (level 1)");
graph_option.graph_opt.jit = 1;
}
}
}
} // namespace lar
JITOption::JITOption() {
m_option_name = "JIT";
enable_jit = FLAGS_enable_jit;
}

bool JITOption::is_valid() {
bool ret = FLAGS_enable_jit;
return ret;
}

std::shared_ptr<OptionBase> JITOption::create_option() {
static std::shared_ptr<JITOption> option(new JITOption);
if (JITOption::is_valid()) {
return std::static_pointer_cast<OptionBase>(option);
} else {
return nullptr;
}
}

void JITOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}
///////////////////////// other options for optimization /////////////////
#if MGB_ENABLE_TENSOR_RT
namespace lar {
template <>
void TensorRTOption::config_model_internel<ModelLite>(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
if (!tensorrt_cache.empty()) {
LITE_WARN("set tensorrt cache as %s", tensorrt_cache.c_str());
lite::set_tensor_rt_cache(tensorrt_cache);
}
} else if (runtime_param.stage == RunStage::AFTER_MODEL_LOAD) {
if (enable_tensorrt) {
LITE_WARN("enable TensorRT");
lite::Runtime::use_tensorrt(model->get_lite_network());
}
} else if (runtime_param.stage == RunStage::AFTER_MODEL_RUNNING) {
if (!tensorrt_cache.empty()) {
lite::dump_tensor_rt_cache();
}
}
}

template <>
void TensorRTOption::config_model_internel<ModelMdl>(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
auto&& graph_option = model->get_mdl_config().comp_graph->options();
if (enable_tensorrt) {
mgb_log_warn("using tensorRT");
graph_option.graph_opt.tensorrt = true;
}
if (!tensorrt_cache.empty()) {
mgb_log_warn("use tensorrt cache: %s", tensorrt_cache.c_str());
mgb::TensorRTEngineCache::enable_engine_cache(true);
mgb::TensorRTEngineCache::set_impl(
std::make_shared<mgb::TensorRTEngineCacheIO>(
tensorrt_cache.c_str()));
}
} else if (runtime_param.stage == RunStage::AFTER_MODEL_RUNNING) {
if (!tensorrt_cache.empty()) {
if (mgb::TensorRTEngineCache::enable_engine_cache()) {
mgb::TensorRTEngineCache::inst().dump_cache();
}
}
}
}
} // namespace lar

TensorRTOption::TensorRTOption() {
m_option_name = "tensorRT";
enable_tensorrt = FLAGS_tensorrt;
tensorrt_cache = FLAGS_tensorrt_cache;
}

bool TensorRTOption::is_valid() {
bool ret = FLAGS_tensorrt;
ret = ret || !FLAGS_tensorrt_cache.empty();
return ret;
}

std::shared_ptr<OptionBase> TensorRTOption::create_option() {
static std::shared_ptr<TensorRTOption> option(new TensorRTOption);
if (TensorRTOption::is_valid()) {
return std::static_pointer_cast<OptionBase>(option);
} else {
return nullptr;
}
}

void TensorRTOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}
#endif
///////////////////////// fuse and preprocess optimize options ///////////////
DEFINE_bool(
enable_fuse_preprocess, false,
"Fusion astype | pad_channel | dimshuffle and etc opr from h2d opr");
DEFINE_bool(
weight_preprocess, false,
"Execute operators with weight preprocess, which can optimize the "
"operator execution time with algo of winograd, im2col ,etc., but "
"it may consume more memory.");
DEFINE_bool(
enable_fuse_conv_bias_nonlinearity, false,
"whether to fuse conv+bias+nonlinearity");
DEFINE_bool(
enable_fuse_conv_bias_with_z, false,
"fuse conv,bias (elemwise add),z(elemwise add) into one opr "
"(only support on GPU)");

///////////////////////// graph retrict options /////////////////////////
DEFINE_bool(
const_shape, false,
"set const_var_shape to reduce memory usage, since some static "
"inference data structures can be omitted");
DEFINE_bool(
fake_first, false,
"Enable fake exec for the first run. In fake exec mode, some "
"initialization job would be done, but no actual computing is "
"performed.");
DEFINE_bool(no_sanity_check, false, "Disable var sanity check on the first run");
DEFINE_bool(
record_comp_seq, false,
"Record the computing sequence, in level 1 . It reduces overhead of API"
"calls of some asynchronous computing devices");
DEFINE_bool(
record_comp_seq2, false,
"Record the computing sequence, in level 2, the computing graph can be"
"destructed to reduce memory usage");
DEFINE_bool(disable_mem_opt, false, "disable memory optimization!!");
DEFINE_uint64(workspace_limit, SIZE_MAX, "set workspace upbound limit");

///////////////////////// other options for optimization /////////////////
DEFINE_bool(
enable_jit, false,
" Execute supported operators with JIT(now only support NVRTC). "
"Can only be used on Nvidia GPUs");
#if MGB_ENABLE_ANDROID_NN
DEFINE_bool(
android_nn, false,
"Execute supported operators with Android NN. Can only be used "
"with --cpu.");
#endif
#if MGB_ENABLE_TENSOR_RT
DEFINE_bool(
tensorrt, false,
" Execute supported operators with TensorRT. Can only be used on "
"Nvidia GPUs,i.e. comp node is xpu or gpu.");
DEFINE_string(
tensorrt_cache, "",
"Set the TensorRT engine cache path for serialized prebuilt "
"ICudaEngine");
#endif
REGIST_OPTION_CREATOR(fuse_preprocess, lar::FusePreprocessOption::create_option);
REGIST_OPTION_CREATOR(weight_preprocess, lar::WeightPreprocessOption::create_option);
REGIST_OPTION_CREATOR(
fuse_conv_bias_nonlinear, lar::FuseConvBiasNonlinearOption::create_option);
REGIST_OPTION_CREATOR(
fuse_conv_bias_z, lar::FuseConvBiasElemwiseAddOption::create_option);
REGIST_OPTION_CREATOR(graph_record, lar::GraphRecordOption::create_option);
REGIST_OPTION_CREATOR(memory_optimize, lar::MemoryOptimizeOption::create_option);
REGIST_OPTION_CREATOR(JIT, lar::JITOption::create_option);
#if MGB_ENABLE_TENSOR_RT
REGIST_OPTION_CREATOR(tensorRT, lar::TensorRTOption::create_option);
#endif

+ 207
- 0
lite/load_and_run/src/options/optimize_options.h View File

@@ -0,0 +1,207 @@
/**
* \file lite/load_and_run/src/options/optimize_options.h
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#pragma once
#include <gflags/gflags.h>
#include "helpers/common.h"
#include "models/model.h"
#include "option_base.h"

DECLARE_bool(enable_fuse_preprocess);
DECLARE_bool(weight_preprocess);
DECLARE_bool(enable_fuse_conv_bias_nonlinearity);
DECLARE_bool(enable_fuse_conv_bias_with_z);

DECLARE_bool(const_shape);
DECLARE_bool(fake_first);
DECLARE_bool(no_sanity_check);
DECLARE_bool(record_comp_seq);
DECLARE_bool(record_comp_seq2);
DECLARE_bool(disable_mem_opt);
DECLARE_uint64(workspace_limit);

DECLARE_bool(enable_jit);
#if MGB_ENABLE_TENSOR_RT
DECLARE_bool(tensorrt);
DECLARE_string(tensorrt_cache);
#endif
namespace lar {
///////////////////////// fuse_preprocess optimize options //////////////
class FusePreprocessOption final : public OptionBase {
public:
static bool is_valid();

static std::shared_ptr<OptionBase> create_option();

void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;

std::string option_name() const override { return m_option_name; };

private:
FusePreprocessOption();
template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>){};

std::string m_option_name;
bool enable_fuse_preprocess;
};

///////////////////////// weight preprocess optimize options //////////////
class WeightPreprocessOption final : public OptionBase {
public:
static bool is_valid();

static std::shared_ptr<OptionBase> create_option();

void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;

std::string option_name() const override { return m_option_name; };

private:
WeightPreprocessOption();
template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>){};

std::string m_option_name;
bool weight_preprocess;
};

/////////////// fuse_conv_bias_nonlinearity optimize options ///////////////
class FuseConvBiasNonlinearOption final : public OptionBase {
public:
static bool is_valid();

static std::shared_ptr<OptionBase> create_option();

void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;

std::string option_name() const override { return m_option_name; };

private:
FuseConvBiasNonlinearOption();
template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>){};

std::string m_option_name;
bool enable_fuse_conv_bias_nonlinearity;
};

///////////////////////// fuse_conv_bias_with_z optimize options //////////////
class FuseConvBiasElemwiseAddOption final : public OptionBase {
public:
static bool is_valid();

static std::shared_ptr<OptionBase> create_option();

void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;

std::string option_name() const override { return m_option_name; };

private:
FuseConvBiasElemwiseAddOption();
template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>){};
std::string m_option_name;
bool enable_fuse_conv_bias_with_z;
};

///////////////////////// graph record options ///////////////////////////
class GraphRecordOption final : public OptionBase {
public:
static bool is_valid();

static std::shared_ptr<OptionBase> create_option();

void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;

std::string option_name() const override { return m_option_name; };

private:
GraphRecordOption();
template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>){};

std::string m_option_name;
size_t m_record_comp_seq;
bool const_shape;
bool fake_first;
bool no_sanity_check;
};

///////////////////////// memory optimize options /////////////////////////
class MemoryOptimizeOption final : public OptionBase {
public:
static bool is_valid();

static std::shared_ptr<OptionBase> create_option();

void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;

std::string option_name() const override { return m_option_name; };

private:
MemoryOptimizeOption();
template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>){};

std::string m_option_name;
bool disable_mem_opt;
uint64_t workspace_limit;
};

///////////////////////// other options for optimization /////////////////
class JITOption final : public OptionBase {
public:
static bool is_valid();

static std::shared_ptr<OptionBase> create_option();

void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;

std::string option_name() const override { return m_option_name; };

private:
JITOption();
template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>){};

std::string m_option_name;
bool enable_jit;
};
///////////////////////// TensorRT options for optimization /////////////////
#if MGB_ENABLE_TENSOR_RT
class TensorRTOption final : public OptionBase {
public:
static bool is_valid();

static std::shared_ptr<OptionBase> create_option();

void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;

std::string option_name() const override { return m_option_name; };

private:
TensorRTOption();
template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>){};

std::string m_option_name;
bool enable_tensorrt;
std::string tensorrt_cache;
};
#endif
} // namespace lar

+ 87
- 0
lite/load_and_run/src/options/option_base.h View File

@@ -0,0 +1,87 @@
/**
* \file lite/load_and_run/src/options/option_base.h
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#pragma once
#include <functional>
#include <iostream>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "megbrain/common.h"

#include "helpers/common.h"
#include "models/model.h"

namespace lar {
/*!
* \brief: base class of options
*/
class OptionBase {
public:
//! configure model in different runtime state
virtual void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) = 0;
//! get depend options
virtual std::vector<std::string> depend_option() const { return {}; };

//! get option name
virtual std::string option_name() const = 0;

virtual ~OptionBase() = default;
};

/*!
* \brief: Singleton option factory for register options before main function
*/
class OptionFactory {
public:
using OptionCreator = std::function<std::shared_ptr<OptionBase>()>;
using OptionMap = std::unordered_map<std::string, OptionCreator>;

//! get Singleton option factory
static OptionFactory& get_Instance() {
static OptionFactory instance;
return instance;
}

//! registe option creator into option map
void registe_options(std::string name, OptionCreator creator) {
if (option_creator_map.count(name) == 0) {
option_creator_map[name] = creator;
}
}

//! get creator map
OptionMap* get_option_creator_map() { return &option_creator_map; }

private:
OptionFactory(){};
OptionMap option_creator_map;
};

} // namespace lar

#define REGIST_OPTION_CREATOR(name_, creator_) \
struct OptionRegister_##name_ { \
OptionRegister_##name_() { \
lar::OptionFactory::get_Instance().registe_options(#name_, creator_); \
} \
}; \
OptionRegister_##name_ name_;

#define CONFIG_MODEL_FUN \
if (model->type() == ModelType::LITE_MODEL) { \
config_model_internel<ModelLite>( \
runtime_param, std::static_pointer_cast<ModelLite>(model)); \
} else if (model->type() == ModelType::MEGDL_MODEL) { \
config_model_internel<ModelMdl>( \
runtime_param, std::static_pointer_cast<ModelMdl>(model)); \
}
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 401
- 0
lite/load_and_run/src/options/plugin_options.cpp View File

@@ -0,0 +1,401 @@
/**
* \file lite/load_and_run/src/options/plugin_options.cpp
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#include "plugin_options.h"
#include "misc.h"
#include "models/model_lite.h"
#include "models/model_mdl.h"

///////////////////// Plugin options///////////////////////////
namespace lar {

template <>
void PluginOption::config_model_internel<ModelLite>(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
LITE_ASSERT(range == 0, "lite model don't support NumRangeChecker plugin");
LITE_ASSERT(
!enable_check_dispatch,
"lite model don't support CPUDispatchChecker plugin");
LITE_ASSERT(
var_value_check_str.empty(),
"lite model don't support VarValueChecker plugin");
}
#if MGB_ENABLE_JSON
else if (runtime_param.stage == RunStage::AFTER_MODEL_LOAD) {
if (!profile_path.empty()) {
if (!enable_profile_host) {
LITE_WARN("enable profiling");
model->get_lite_network()->enable_profile_performance(profile_path);
} else {
LITE_WARN("enable profiling for host");
model->get_lite_network()->enable_profile_performance(profile_path);
}
}
}
#endif
}

template <>
void PluginOption::config_model_internel<ModelMdl>(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
auto config = model->get_mdl_config();
if (range > 0) {
mgb_log_warn("enable number range check");
model->set_num_range_checker(float(range));
}

if (enable_check_dispatch) {
mgb_log_warn("enable cpu dispatch check");
cpu_dispatch_checker =
std::make_unique<mgb::CPUDispatchChecker>(config.comp_graph.get());
}

if (!var_value_check_str.empty()) {
mgb_log_warn("enable variable value check");
size_t init_idx = 0, switch_interval;
auto sep = var_value_check_str.find(':');
if (sep != std::string::npos) {
switch_interval = std::stoul(var_value_check_str.substr(0, sep));
init_idx = std::stoul(var_value_check_str.substr(sep + 1));
} else {
switch_interval = std::stoul(var_value_check_str);
}
var_value_checker = std::make_unique<mgb::VarValueChecker>(
config.comp_graph.get(), switch_interval, init_idx);
}

#if MGB_ENABLE_JSON

if (!profile_path.empty()) {
if (!enable_profile_host) {
mgb_log_warn("enable profiling");
} else {
mgb_log_warn("enable profiling for host");
}
model->set_profiler();
}
#endif
}

else if (runtime_param.stage == RunStage::AFTER_MODEL_RUNNING) {
#if MGB_ENABLE_JSON
if (!profile_path.empty()) {
mgb_log_warn("filename %s", profile_path.c_str());
if (model->get_profiler()) {
model->get_profiler()
->to_json_full(model->get_async_func().get())
->writeto_fpath(profile_path);
mgb_log_warn("profiling result written to %s", profile_path.c_str());
}
}
#endif
}
}

} // namespace lar

using namespace lar;
PluginOption::PluginOption() {
m_option_name = "plugin";
range = FLAGS_range;
enable_check_dispatch = FLAGS_check_dispatch;
var_value_check_str = FLAGS_check_var_value;
#if MGB_ENABLE_JSON
enable_profile_host = false;
if (!FLAGS_profile.empty()) {
profile_path = FLAGS_profile;
}
if (!FLAGS_profile_host.empty()) {
enable_profile_host = !FLAGS_profile_host.empty();
profile_path = FLAGS_profile_host;
}
#endif
}

bool PluginOption::is_valid() {
bool ret = FLAGS_check_dispatch;
ret = ret || FLAGS_range > 0;
ret = ret || !FLAGS_check_var_value.empty();
#if MGB_ENABLE_JSON
ret = ret || !FLAGS_profile.empty();
ret = ret || !FLAGS_profile_host.empty();
#endif
return ret;
}

std::shared_ptr<OptionBase> PluginOption::create_option() {
static std::shared_ptr<PluginOption> option(new PluginOption);
if (PluginOption::is_valid()) {
return std::static_pointer_cast<OptionBase>(option);
} else {
return nullptr;
}
}

void PluginOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}

///////////////////// Debug options///////////////////////////
namespace lar {
template <>
void DebugOption::format_and_print(
const std::string& tablename, std::shared_ptr<ModelLite> model) {
auto table = mgb::TextTable(tablename);
auto network = model->get_lite_network();
table.padding(1);
table.align(mgb::TextTable::Align::Mid).add("type").add("name").add("shape").eor();

auto to_string = [&](lite::Layout& layout) {
std::string shape("{");
for (size_t i = 0; i < layout.ndim; i++) {
if (i)
shape.append(",");
shape.append(std::to_string(layout.shapes[i]));
}
shape.append("}");
return shape;
};

auto input_name = network->get_all_input_name();
for (auto& i : input_name) {
auto layout = network->get_io_tensor(i)->get_layout();
table.align(mgb::TextTable::Align::Mid)
.add("INPUT")
.add(i)
.add(to_string(layout))
.eor();
}

auto output_name = network->get_all_output_name();
for (auto& i : output_name) {
auto layout = network->get_io_tensor(i)->get_layout();
table.align(mgb::TextTable::Align::Mid)
.add("OUTPUT")
.add(i)
.add(to_string(layout))
.eor();
}

std::stringstream ss;
ss << table;
printf("%s\n\n", ss.str().c_str());
}

template <>
void DebugOption::format_and_print(
const std::string& tablename, std::shared_ptr<ModelMdl> model) {
auto table = mgb::TextTable(tablename);
table.padding(1);
table.align(mgb::TextTable::Align::Mid).add("type").add("name").add("shape").eor();

for (auto&& i : model->get_mdl_load_result().tensor_map) {
table.align(mgb::TextTable::Align::Mid)
.add("INPUT")
.add(i.first)
.add(i.second->shape().to_string())
.eor();
}

for (auto&& i : model->get_mdl_load_result().output_var_list) {
table.align(mgb::TextTable::Align::Mid)
.add("OUTPUT")
.add(i.node()->name())
.add(i.shape().to_string())
.eor();
}

std::stringstream ss;
ss << table;
printf("%s\n\n", ss.str().c_str());
}

template <>
void DebugOption::config_model_internel<ModelLite>(
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
LITE_ASSERT(
!disable_assert_throw, "lite model don't support disable assert throw");
#ifndef __IN_TEE_ENV__
#if MGB_ENABLE_JSON
LITE_ASSERT(
static_mem_log_dir_path.empty(),
"lite model don't support static memory information export");
#endif
#endif
if (enable_verbose) {
LITE_WARN("enable verbose");
lite::set_log_level(LiteLogLevel::DEBUG);
}

#if __linux__ || __unix__
if (enable_wait_gdb) {
printf("wait for gdb attach (pid=%d): ", getpid());
getchar();
}
#endif
} else if (runtime_param.stage == RunStage::AFTER_MODEL_LOAD) {
if (enable_display_model_info) {
LITE_WARN("enable display model information");
format_and_print<ModelLite>("Runtime Model Info", model);
}
} else if (runtime_param.stage == RunStage::AFTER_MODEL_RUNNING) {
if (enable_display_model_info) {
format_and_print<ModelLite>("Runtime Model Info", model);
}
}
}

template <>
void DebugOption::config_model_internel<ModelMdl>(
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
auto config = model->get_mdl_config();
if (enable_verbose) {
mgb_log_warn("enable verbose");
mgb::set_log_level(mgb::LogLevel::DEBUG);
}

#if __linux__ || __unix__
if (enable_wait_gdb) {
printf("wait for gdb attach (pid=%d): ", getpid());
getchar();
}
#endif
} else if (runtime_param.stage == RunStage::AFTER_OUTSPEC_SET) {
if (enable_display_model_info) {
mgb_log_warn("enable display model information");
format_and_print<ModelMdl>("Runtime Model Info", model);
}

if (disable_assert_throw) {
mgb_log_warn("disable assert throw");
auto on_opr = [](mgb::cg::OperatorNodeBase* opr) {
if (opr->same_type<mgb::opr::AssertEqual>()) {
opr->cast_final<mgb::opr::AssertEqual>().disable_throw_on_error();
}
};
mgb::cg::DepOprIter iter{on_opr};
for (auto&& i : model->get_output_spec()) {
iter.add(i.first.node()->owner_opr());
}
}
} else if (runtime_param.stage == RunStage::AFTER_OUTSPEC_SET) {
//! FIX:it don't work for cpu build (nothing dumped)
//! megbrain/sdk origin code will assert(m_recorded) in
//! EventImplHelper::finished();

#ifndef __IN_TEE_ENV__
#if MGB_ENABLE_JSON
if (!static_mem_log_dir_path.empty()) {
mgb_log_warn("enable get static memeory information");
model->get_async_func()->get_static_memory_alloc_info(
static_mem_log_dir_path);
}
#endif
#endif
} else if (runtime_param.stage == RunStage::AFTER_MODEL_RUNNING) {
if (enable_display_model_info) {
format_and_print<ModelMdl>("Runtime Model Info", model);
}
}
}

} // namespace lar

DebugOption::DebugOption() {
m_option_name = "debug";
enable_display_model_info = FLAGS_model_info;
enable_verbose = FLAGS_verbose;
disable_assert_throw = FLAGS_disable_assert_throw;
#if __linux__ || __unix__
enable_wait_gdb = FLAGS_wait_gdb;
#endif
#ifndef __IN_TEE_ENV__
#if MGB_ENABLE_JSON
static_mem_log_dir_path = FLAGS_get_static_mem_info;
#endif
#endif
}

bool DebugOption::is_valid() {
bool ret = FLAGS_model_info;
ret = ret || FLAGS_verbose;
ret = ret || FLAGS_disable_assert_throw;

#if __linux__ || __unix__
ret = ret || FLAGS_wait_gdb;
#endif
#ifndef __IN_TEE_ENV__
#if MGB_ENABLE_JSON
ret = ret || !FLAGS_get_static_mem_info.empty();
#endif
#endif
return ret;
}

std::shared_ptr<OptionBase> DebugOption::create_option() {
static std::shared_ptr<DebugOption> option(new DebugOption);
if (DebugOption::is_valid()) {
return std::static_pointer_cast<OptionBase>(option);
} else {
return nullptr;
}
}

void DebugOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
CONFIG_MODEL_FUN;
}
///////////////////// Plugin gflags///////////////////////////
DEFINE_double(
range, 0,
"check whether absolute value of all numbers in computing graph "
"is in the given range");

DEFINE_bool(
check_dispatch, false,
"check whether an operator call dispatch on cpu comp nodes");

DEFINE_string(
check_var_value, "",
"--check-var-value [interval]|[interval:init_idx], Enable "
"VarValueChecker plugin. Refer to its doc for more details");
#if MGB_ENABLE_JSON
DEFINE_string(
profile, "",
"Write profiling result to given file. The output file is in "
"JSON format");
DEFINE_string(profile_host, "", "focus on host time profiling For some backends");
#endif

///////////////////// Debug gflags///////////////////////////
DEFINE_bool(
model_info, false,
" Format and display model input/output tensor inforamtion");

DEFINE_bool(verbose, false, "get more inforamtion for debug");

DEFINE_bool(disable_assert_throw, false, "disable assert throw on error check");
#if __linux__ || __unix__
DEFINE_bool(wait_gdb, false, "print current process PID and wait for gdb attach");
#endif
#ifndef __IN_TEE_ENV__
#if MGB_ENABLE_JSON
DEFINE_string(
get_static_mem_info, "",
"Record the static computing graph's static memory information");
#endif
#endif
REGIST_OPTION_CREATOR(plugin, lar::PluginOption::create_option);

REGIST_OPTION_CREATOR(debug, lar::DebugOption::create_option);

+ 105
- 0
lite/load_and_run/src/options/plugin_options.h View File

@@ -0,0 +1,105 @@
/**
* \file lite/load_and_run/src/options/plugin_options.h
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#pragma once
#include <gflags/gflags.h>
#if __linux__ || __unix__
#include <unistd.h>
#endif
#include "megbrain/plugin/cpu_dispatch_checker.h"
#include "megbrain/plugin/var_value_checker.h"

#include "helpers/common.h"
#include "helpers/text_table.h"
#include "models/model.h"

#include "option_base.h"

DECLARE_bool(check_dispatch);
DECLARE_double(range);
DECLARE_string(check_var_value);
#if MGB_ENABLE_JSON
DECLARE_string(profile);
DECLARE_string(profile_host);
#endif

DECLARE_bool(model_info);
DECLARE_bool(verbose);
DECLARE_bool(disable_assert_throw);
#if __linux__ || __unix__
DECLARE_bool(wait_gdb);
#endif
#ifndef __IN_TEE_ENV__
#if MGB_ENABLE_JSON
DECLARE_string(get_static_mem_info);
#endif
#endif

namespace lar {
class PluginOption final : public OptionBase {
public:
static bool is_valid();

static std::shared_ptr<OptionBase> create_option();

void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;

std::string option_name() const override { return m_option_name; };

private:
PluginOption();
template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>){};
double range;
bool enable_check_dispatch;
#if MGB_ENABLE_JSON
bool enable_profile_host;
std::string profile_path;
#endif

std::string var_value_check_str;

std::string m_option_name;

std::unique_ptr<mgb::VarValueChecker> var_value_checker;
std::unique_ptr<mgb::CPUDispatchChecker> cpu_dispatch_checker;
};

class DebugOption final : public OptionBase {
public:
static bool is_valid();

static std::shared_ptr<OptionBase> create_option();

void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;

std::string option_name() const override { return m_option_name; };

private:
DebugOption();
template <typename ModelImpl>
void format_and_print(const std::string&, std::shared_ptr<ModelImpl>){};
template <typename ModelImpl>
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>){};
bool enable_display_model_info;
bool enable_verbose;
bool disable_assert_throw;
#if __linux__ || __unix__
bool enable_wait_gdb;
#endif
#ifndef __IN_TEE_ENV__
#if MGB_ENABLE_JSON
std::string static_mem_log_dir_path;
#endif
#endif
std::string m_option_name;
};
} // namespace lar

+ 96
- 0
lite/load_and_run/src/options/strategy_options.cpp View File

@@ -0,0 +1,96 @@
/**
* \file lite/load_and_run/src/options/strategy_options.cpp
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#include "strategy_options.h"
#include "models/model_mdl.h"

using namespace lar;

DECLARE_bool(c_opr_lib_with_param);

StrategyOption::StrategyOption() {
m_option_name = "run_strategy";
warmup_iter = FLAGS_warmup_iter;
run_iter = FLAGS_iter;
threads = FLAGS_thread;
}

std::shared_ptr<OptionBase> StrategyOption::create_option() {
static std::shared_ptr<StrategyOption> option(new StrategyOption);
return std::static_pointer_cast<OptionBase>(option);
}

void StrategyOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) {
model->set_shared_mem(FLAGS_share_param_mem);
runtime_param.warmup_iter = warmup_iter;
runtime_param.run_iter = run_iter;
runtime_param.threads = threads;
runtime_param.testcase_num = 1;
} else if (runtime_param.stage == RunStage::BEFORE_OUTSPEC_SET) {
if (model->type() == ModelType::MEGDL_MODEL) {
auto model_ptr = std::static_pointer_cast<ModelMdl>(model);
auto num = model_ptr->get_testcase_num();
if (num != 0)
runtime_param.testcase_num = num;

model_ptr->make_output_spec();
}
}
}

TestcaseOption::TestcaseOption() {
m_option_name = "run_testcase";
}

std::shared_ptr<OptionBase> TestcaseOption::create_option() {
static std::shared_ptr<TestcaseOption> option(new TestcaseOption);
return std::static_pointer_cast<OptionBase>(option);
}

void TestcaseOption::config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) {
if (model->type() == ModelType::MEGDL_MODEL) {
auto model_ptr = std::static_pointer_cast<ModelMdl>(model);
if (model_ptr->get_testcase_num() && !FLAGS_c_opr_lib_with_param) {
if (runtime_param.stage == RunStage::MODEL_RUNNING) {
auto load_result = model_ptr->get_mdl_load_result();
auto input_tensor = model_ptr->get_test_input();
auto loader = model_ptr->reset_loader();
auto testcase = loader->load(model_ptr->get_mdl_config(), false);
mgb_assert(testcase.output_var_list.size() == input_tensor.size());
for (size_t i = 0; i < input_tensor.size(); ++i) {
auto&& opr =
testcase.output_var_list[i]
.node()
->owner_opr()
->cast_final_safe<mgb::opr::SharedDeviceTensor>();
input_tensor[i].second->copy_from(
mgb::HostTensorND::make_proxy(*opr.dev_data()));
}
}
}
}
}

DEFINE_int32(iter, 10, "iteration number for run model");

DEFINE_int32(warmup_iter, 1, "iteration number for warm up model before run");

DEFINE_int32(
thread, 1,
"thread number for run model while <thread> is supported( NOTE: "
"this is not a mapper device setting just for load and run)");

DEFINE_bool(share_param_mem, false, "load model from shared memeory");

REGIST_OPTION_CREATOR(run_strategy, lar::StrategyOption::create_option);

REGIST_OPTION_CREATOR(run_testcase, lar::TestcaseOption::create_option);

+ 68
- 0
lite/load_and_run/src/options/strategy_options.h View File

@@ -0,0 +1,68 @@
/**
* \file lite/load_and_run/src/options/strategy_options.h
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#include <gflags/gflags.h>
#include "models/model.h"
#include "option_base.h"
DECLARE_int32(iter);
DECLARE_int32(warmup_iter);
DECLARE_int32(thread);
DECLARE_bool(share_param_mem);

namespace lar {
/*!
* \brief: strategy option for running model
*/
class StrategyOption final : public OptionBase {
public:
//! creat options when option is used
static std::shared_ptr<OptionBase> create_option();

//! config the model, dispatch configuration for different model implement

void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;

//! get option name
std::string option_name() const override { return m_option_name; };

private:
//! Constructor
StrategyOption();

//! configuration for different model implement
std::string m_option_name;

size_t warmup_iter; //! warm up number before running model
size_t run_iter; //! iteration number for running model
size_t threads; //! thread number for running model (NOTE:it's different
//! from multithread device )
};

class TestcaseOption final : public OptionBase {
public:
//! creat options when option is used
static std::shared_ptr<OptionBase> create_option();

//! config the model, dispatch configuration for different model implement

void config_model(
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override;

//! get option name
std::string option_name() const override { return m_option_name; };

private:
//! Constructor
TestcaseOption();

//! configuration for different model implement
std::string m_option_name;
};
} // namespace lar

+ 24
- 0
lite/load_and_run/src/strategys/strategy.cpp View File

@@ -0,0 +1,24 @@

/**
* \file lite/load_and_run/src/strategys/strategy.cpp
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#include "strategy.h"
#include <iostream>

using namespace lar;

std::shared_ptr<StrategyBase> StrategyBase::create_strategy(std::string model_path) {
if (FLAGS_fitting) {
return std::make_shared<FittingStrategy>(model_path);
} else {
return std::make_shared<NormalStrategy>(model_path);
}
}

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 63
- 0
lite/load_and_run/src/strategys/strategy.h View File

@@ -0,0 +1,63 @@
/**
* \file lite/load_and_run/src/strategys/strategy.h
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#pragma once
#include <gflags/gflags.h>
#include <string>
#include <unordered_map>
#include "helpers/common.h"
#include "models/model.h"
#include "options/option_base.h"

DECLARE_bool(fitting);

namespace lar {
/*!
* \brief: load and run strategy base class
*/
class StrategyBase {
public:
static std::shared_ptr<StrategyBase> create_strategy(std::string model_path);

virtual void run() = 0;

virtual ~StrategyBase() = default;

RuntimeParam m_runtime_param;
std::unordered_map<std::string, std::shared_ptr<OptionBase>> m_options;
};

/*!
* \brief: normal strategy for running
*/
class NormalStrategy : public StrategyBase {
public:
NormalStrategy(std::string model_path);

//! run model with runtime parameter
void run() override;

private:
//! run model subline for multiple thread
void run_subline();

std::string m_model_path;
};

/*!
* \brief: Fitting strategy for running
*/
class FittingStrategy : public StrategyBase {
public:
FittingStrategy(std::string model_path);
void run() override;
};
} // namespace lar

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 24
- 0
lite/load_and_run/src/strategys/strategy_fitting.cpp View File

@@ -0,0 +1,24 @@
/**
* \file lite/load_and_run/src/strategys/strategy_fitting.cpp
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/

#include "strategy.h"
using namespace lar;

FittingStrategy::FittingStrategy(std::string) {
mgb_assert("this version don't support Fitting Strategy");
};

void FittingStrategy::run() {
mgb_assert("this version don't support Fitting Strategy");
};

DEFINE_bool(
fitting, false,
"whether to use the fitting model, which will auto profile and get "
"the best option set!");

+ 167
- 0
lite/load_and_run/src/strategys/strategy_normal.cpp View File

@@ -0,0 +1,167 @@
/**
* \file lite/load_and_run/src/strategys/strategy_normal.cpp
*
* This file is part of MegEngine, a deep learning framework developed by
* Megvii.
*
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
*/
#include <iostream>
#include <thread>
#include "megbrain/common.h"
#include "megbrain/utils/timer.h"
#include "megbrain/version.h"
#include "megdnn/version.h"
#include "misc.h"
#include "strategy.h"

using namespace lar;

NormalStrategy::NormalStrategy(std::string model_path) {
mgb::set_log_level(mgb::LogLevel::WARN);
lite::set_log_level(LiteLogLevel::WARN);
m_model_path = model_path;
auto option_creator_map = OptionFactory::get_Instance().get_option_creator_map();
mgb_log_debug("option map size: %lu", option_creator_map->size());
auto construct_option = [&](std::string name) -> void {
auto& creator = (*option_creator_map)[name];
auto option = creator();
if (option) {
m_options.insert({name, option});
}
};

for (auto& creator : *option_creator_map) {
auto name = creator.first;
if (m_options.count(name) == 0) {
construct_option(name);
}
}
}

void NormalStrategy::run_subline() {
auto model = ModelBase::create_model(m_model_path);
mgb_assert(model != nullptr, "create model failed!!");

auto stage_config_model = [&]() {
for (auto& option : m_options) {
option.second->config_model(m_runtime_param, model);
}
};
//! execute before load config
m_runtime_param.stage = RunStage::BEFORE_MODEL_LOAD;
stage_config_model();

mgb::RealTimer timer;
model->load_model();
printf("load model: %.3fms\n", timer.get_msecs_reset());

//! after load configure
m_runtime_param.stage = RunStage::AFTER_MODEL_LOAD;
stage_config_model();

m_runtime_param.stage = RunStage::BEFORE_OUTSPEC_SET;
stage_config_model();

// for get static memmory information options
m_runtime_param.stage = RunStage::AFTER_OUTSPEC_SET;
stage_config_model();

auto warm_up = [&]() {
auto warmup_num = m_runtime_param.warmup_iter;
for (size_t i = 0; i < warmup_num; i++) {
printf("=== prepare: %.3fms; going to warmup\n\n", timer.get_msecs_reset());
model->run_model();
model->wait();
printf("warm up %lu %.3fms\n", i, timer.get_msecs_reset());
m_runtime_param.stage = RunStage::AFTER_RUNNING_WAIT;
stage_config_model();
}
};

auto run_iter = [&](int idx) {
double time_sqrsum = 0, time_sum = 0,
min_time = std::numeric_limits<double>::max(), max_time = 0;
auto run_num = m_runtime_param.run_iter;
for (size_t i = 0; i < run_num; i++) {
timer.reset();
model->run_model();
auto exec_time = timer.get_msecs();
model->wait();
m_runtime_param.stage = RunStage::AFTER_RUNNING_WAIT;
stage_config_model();
auto cur = timer.get_msecs();
printf("iter %lu/%lu: %.3fms (exec=%.3fms)\n", i, run_num, cur, exec_time);
time_sum += cur;
time_sqrsum += cur * cur;
fflush(stdout);
min_time = std::min(min_time, cur);
max_time = std::max(max_time, cur);
}
printf("\n=== finished test #%u: time=%.3fms avg_time=%.3fms "
"sexec=%.3fms min=%.3fms max=%.3fms\n\n",
idx, time_sum, time_sum / run_num,
std::sqrt(
(time_sqrsum * run_num - time_sum * time_sum) /
(run_num * (run_num - 1))),
min_time, max_time);
return time_sum;
};

//! model with testcase
size_t iter_num = m_runtime_param.testcase_num;

double tot_time = 0;
for (size_t idx = 0; idx < iter_num; idx++) {
//! config when running model
mgb_log_warn("run testcase: %zu ", idx);
m_runtime_param.stage = RunStage::MODEL_RUNNING;
stage_config_model();

if (!idx) {
warm_up();
}
tot_time += run_iter(idx);

m_runtime_param.stage = RunStage::AFTER_RUNNING_ITER;
stage_config_model();
}

printf("=== total time: %.3fms\n", tot_time);
//! execute after run
m_runtime_param.stage = RunStage::AFTER_MODEL_RUNNING;
stage_config_model();
};

void NormalStrategy::run() {
auto v0 = mgb::get_version();
auto v1 = megdnn::get_version();
printf("megbrain/lite/load_and_run:\nusing MegBrain "
"%d.%d.%d(%d) and MegDNN %d.%d.%d\n",
v0.major, v0.minor, v0.patch, v0.is_dev, v1.major, v1.minor, v1.patch);

size_t thread_num = m_runtime_param.threads;
auto run_sub = [&]() { run_subline(); };
if (thread_num == 1) {
run_sub();
} else if (thread_num > 1) {
#if MGB_HAVE_THREAD
std::vector<std::thread> threads;

for (size_t i = 0; i < thread_num; ++i) {
threads.emplace_back(run_sub);
}
for (auto&& i : threads) {
i.join();
}
#else
mgb_log_error(
"%d threads requested, but load_and_run was compiled "
"without <thread> support.",
thread_num);
#endif
} else {
mgb_assert(false, "--thread must input a positive number!!");
}
//! execute before run
}

Loading…
Cancel
Save