GitOrigin-RevId: 45d81c9a96
HuaHua404-patch-4
@@ -23,3 +23,10 @@ imperative/python/test/unit/module/MagicMindRuntimeOprTest.GraphShapeMutable.mlu | |||
lite/test/resource/lite/ax_data_input.npy filter=lfs diff=lfs merge=lfs -text | |||
lite/test/resource/lite/ax_data_output.npy filter=lfs diff=lfs merge=lfs -text | |||
lite/test/resource/lite/ax_model.mge filter=lfs diff=lfs merge=lfs -text | |||
lite/test/resource/lite/add_demo_input.json filter=lfs diff=lfs merge=lfs -text | |||
lite/test/resource/lite/add_demo.mge filter=lfs diff=lfs merge=lfs -text | |||
lite/test/resource/lite/resnet50_b10.mdl filter=lfs diff=lfs merge=lfs -text | |||
lite/test/resource/lite/resnet50_input.npy filter=lfs diff=lfs merge=lfs -text | |||
lite/test/resource/lite/resnet50.mge filter=lfs diff=lfs merge=lfs -text | |||
lite/test/resource/lite/resnet50_uint8.mge filter=lfs diff=lfs merge=lfs -text | |||
lite/test/resource/lite/cat.ppm filter=lfs diff=lfs merge=lfs -text |
@@ -112,7 +112,7 @@ function prepare_model_and_data(){ | |||
#prepare mge model | |||
python3 script/resnet50_mge.py --dir model_source | |||
python3 script/resnet50_mge.py --dir model_source -d int8 | |||
python3 script/resnet50_mge.py --dir model_source -d uint8 | |||
python3 script/resnet50_mge.py --dir model_source --inputs "#rand(0,255)" | |||
#make input_data | |||
@@ -43,7 +43,7 @@ if __name__ == "__main__": | |||
@jit.trace(symbolic=True, capture_as_const=True) | |||
def fun(data): | |||
return net(data) | |||
data = tensor([3,4,5]) | |||
data = tensor([3.0,4.0,5.0]) | |||
fun(data) | |||
if args.inputs == "": | |||
fun.dump( | |||
@@ -26,7 +26,7 @@ cv2.imwrite("input_data/cat.ppm",processed_img) | |||
#json | |||
data_obj = { | |||
"shape": [1,3], | |||
"type": "int32", | |||
"type": "float32", | |||
"raw": [2,3,4] | |||
} | |||
with open("input_data/add_demo_input.json", "w") as f: |
@@ -348,4 +348,4 @@ if __name__ == "__main__": | |||
) | |||
else: | |||
raise TypeError("dtype should be float32") | |||
raise TypeError("dtype should be float32 or uint8") |
@@ -18,6 +18,8 @@ struct DataParser { | |||
}; | |||
void feed(const std::string& path); | |||
~DataParser() { inputs.clear(); }; | |||
std::unordered_map<std::string, mgb::HostTensorND> inputs; | |||
private: | |||
@@ -321,10 +321,10 @@ std::unique_ptr<JsonLoader::Value> JsonLoader::load(const char* path) { | |||
const size_t size = ftell(fin.get()); | |||
std::fseek(fin.get(), 0, SEEK_SET); | |||
std::unique_ptr<char> buf(static_cast<char*>(malloc(size))); | |||
std::vector<char> buf(size + 1); | |||
auto nr = std::fread(buf.get(), 1, size, fin.get()); | |||
auto nr = std::fread(buf.data(), 1, size, fin.get()); | |||
mgb_assert(nr == size); | |||
return load(buf.get(), size); | |||
return load(buf.data(), size); | |||
} |
@@ -179,4 +179,41 @@ std::vector<uint8_t> ModelMdl::get_model_data() { | |||
mgb::serialization::GraphDumper::make(std::move(out_file), m_format.val()); | |||
dumper->dump(m_load_result.output_var_list, config); | |||
return out_data; | |||
} | |||
void ModelMdl::update_io() { | |||
//! update output varlist when input shape maybe change(some pass excution | |||
//! time depends on the shape of init input) | |||
mgb::thin_hash_table::ThinHashMap<mgb::cg::SymbolVar, mgb::cg::SymbolVar> varmap; | |||
auto&& network = m_load_result; | |||
std::unordered_map<void*, std::string> tensor_name_map; | |||
for (auto& input : network.tensor_map) { | |||
tensor_name_map.insert({input.second->raw_ptr(), input.first}); | |||
} | |||
mgb::cg::DepOprIter dep([&](mgb::cg::OperatorNodeBase* opr) { | |||
if (auto h2d = opr->try_cast_final<mgb::opr::Host2DeviceCopy>()) { | |||
if (tensor_name_map.find(h2d->host_data()->raw_ptr()) != | |||
tensor_name_map.end()) { | |||
//! make new h2d opr with new host tensor shape | |||
std::string name = tensor_name_map[h2d->host_data()->raw_ptr()]; | |||
std::shared_ptr<mgb::HostTensorND> new_tensor = | |||
std::make_shared<mgb::HostTensorND>(); | |||
new_tensor->copy_from(*h2d->host_data()); | |||
auto h2d_opr = mgb::opr::Host2DeviceCopy::make( | |||
*h2d->owner_graph(), new_tensor, h2d->param(), h2d->config()); | |||
//! rename new h2d with given name | |||
h2d_opr.node()->owner_opr()->name(name); | |||
varmap[h2d->output(0)] = h2d_opr; | |||
} | |||
} | |||
}); | |||
//! get replace var map | |||
for (auto&& i : network.output_var_list) | |||
dep.add(i); | |||
//! replace new h2d and update related var shape | |||
if (!varmap.empty()) { | |||
auto output_vars = mgb::cg::replace_vars(network.output_var_list, varmap); | |||
network.output_var_list = output_vars; | |||
} | |||
} |
@@ -108,6 +108,8 @@ public: | |||
std::vector<uint8_t> get_model_data() override; | |||
void update_io(); | |||
private: | |||
bool share_model_mem; | |||
std::string model_path; | |||
@@ -18,6 +18,11 @@ void COprLibOption::config_model_internel( | |||
"lite model dont't support run with external c opr " | |||
"parmeter"); | |||
} | |||
if (m_c_opr_init_func != MGB_C_OPR_INIT_FUNC_STR) { | |||
LITE_THROW( | |||
"lite model dont't support to set the c_opr_init_func to another " | |||
"API"); | |||
} | |||
} | |||
} | |||
template <> | |||
@@ -26,32 +26,89 @@ void InputOption::config_model_internel<ModelLite>( | |||
auto&& parser = model->get_input_parser(); | |||
auto&& network = model->get_lite_network(); | |||
//! datd type map from mgb data type to lite data type | |||
std::map<megdnn::DTypeEnum, LiteDataType> type_map = { | |||
{megdnn::DTypeEnum::Float32, LiteDataType::LITE_FLOAT}, | |||
{megdnn::DTypeEnum::Int32, LiteDataType::LITE_INT}, | |||
{megdnn::DTypeEnum::Int8, LiteDataType::LITE_INT8}, | |||
{megdnn::DTypeEnum::Uint8, LiteDataType::LITE_UINT8}}; | |||
for (auto& i : parser.inputs) { | |||
//! get tensor information from data parser | |||
auto tensor = i.second; | |||
auto data_type = tensor.dtype(); | |||
auto tensor_shape = tensor.shape(); | |||
mgb::dt_byte* src = tensor.raw_ptr(); | |||
//! set lite layout | |||
lite::Layout layout; | |||
layout.ndim = tensor_shape.ndim; | |||
for (size_t idx = 0; idx < tensor_shape.ndim; idx++) { | |||
layout.shapes[idx] = tensor_shape[idx]; | |||
//! datd type map from lite data type to mgb data type | |||
std::map<LiteDataType, megdnn::DTypeEnum> type_map = { | |||
{LiteDataType::LITE_FLOAT, megdnn::DTypeEnum::Float32}, | |||
{LiteDataType::LITE_INT, megdnn::DTypeEnum::Int32}, | |||
{LiteDataType::LITE_INT8, megdnn::DTypeEnum::Int8}, | |||
{LiteDataType::LITE_UINT8, megdnn::DTypeEnum::Uint8}}; | |||
if (m_force_batch_size > 0) { | |||
LITE_WARN("force set batch size to %d", m_force_batch_size); | |||
auto all_inputs_name = network->get_all_input_name(); | |||
for (auto& name : all_inputs_name) { | |||
std::shared_ptr<lite::Tensor> input_tensor = | |||
network->get_io_tensor(name); | |||
//! set lite layout | |||
lite::Layout layout; | |||
mgb::TensorShape new_shape; | |||
new_shape.ndim = input_tensor->get_layout().ndim; | |||
layout.ndim = input_tensor->get_layout().ndim; | |||
for (size_t idx = 0; idx < new_shape.ndim; idx++) { | |||
new_shape.shape[idx] = input_tensor->get_layout().shapes[idx]; | |||
layout.shapes[idx] = new_shape.shape[idx]; | |||
} | |||
new_shape.shape[0] = m_force_batch_size; | |||
layout.shapes[0] = m_force_batch_size; | |||
//! gengrate tesnor copy from origin tensor | |||
mgb::HostTensorND hv; | |||
hv.comp_node(mgb::CompNode::default_cpu(), true) | |||
.dtype(megdnn::DType::from_enum( | |||
type_map[input_tensor->get_layout().data_type])) | |||
.resize(new_shape); | |||
mgb::dt_byte* raw_ptr = hv.raw_ptr(); | |||
//! single batch input size | |||
size_t batch_stride = hv.dtype().size() * hv.layout().total_nr_elems() / | |||
m_force_batch_size; | |||
size_t curr_batch_size = m_force_batch_size; | |||
//! copy data from origin input_tensor | |||
size_t init_batch = input_tensor->get_layout().shapes[0]; | |||
while (curr_batch_size > init_batch) { | |||
memcpy((char*)raw_ptr, (char*)(input_tensor->get_memory_ptr()), | |||
batch_stride * init_batch); | |||
curr_batch_size -= init_batch; | |||
raw_ptr += batch_stride * init_batch; | |||
} | |||
memcpy((char*)raw_ptr, (char*)(input_tensor->get_memory_ptr()), | |||
batch_stride * curr_batch_size); | |||
input_tensor->reset(hv.raw_ptr(), layout); | |||
parser.inputs[name] = std::move(hv); | |||
} | |||
layout.data_type = type_map[data_type.enumv()]; | |||
} else { | |||
for (auto& i : parser.inputs) { | |||
//! get tensor information from data parser | |||
auto tensor = i.second; | |||
auto tensor_shape = tensor.shape(); | |||
mgb::dt_byte* src = tensor.raw_ptr(); | |||
std::shared_ptr<lite::Tensor> input_tensor = | |||
network->get_io_tensor(i.first); | |||
//! set lite layout | |||
lite::Layout layout; | |||
layout.ndim = tensor_shape.ndim; | |||
for (size_t idx = 0; idx < tensor_shape.ndim; idx++) { | |||
layout.shapes[idx] = tensor_shape[idx]; | |||
} | |||
layout.data_type = input_tensor->get_layout().data_type; | |||
//! set network input tensor | |||
std::shared_ptr<lite::Tensor> input_tensor = | |||
network->get_io_tensor(i.first); | |||
input_tensor->reset(src, layout); | |||
//! set data for only given shape | |||
if (tensor.storage().empty()) { | |||
mgb::HostTensorND hv; | |||
hv.comp_node(mgb::CompNode::default_cpu(), true) | |||
.dtype(megdnn::DType::from_enum(type_map[layout.data_type])) | |||
.resize(tensor.shape()); | |||
mgb::dt_byte* raw_ptr = hv.raw_ptr(); | |||
//! set all value in tesnor to 1 | |||
memset((char*)raw_ptr, 1, | |||
hv.layout().total_nr_elems() * hv.dtype().size()); | |||
parser.inputs[i.first] = std::move(hv); | |||
input_tensor->reset(raw_ptr, layout); | |||
} else { | |||
//! set network input tensor | |||
input_tensor->reset(src, layout); | |||
} | |||
} | |||
} | |||
} | |||
} | |||
@@ -67,22 +124,58 @@ void InputOption::config_model_internel<ModelMdl>( | |||
} else if (runtime_param.stage == RunStage::AFTER_MODEL_LOAD) { | |||
auto&& parser = model->get_input_parser(); | |||
auto&& network = model->get_mdl_load_result(); | |||
auto tensormap = network.tensor_map; | |||
for (auto& i : parser.inputs) { | |||
mgb_assert( | |||
tensormap.find(i.first) != tensormap.end(), | |||
"can't find tesnor named %s", i.first.c_str()); | |||
auto& in = tensormap.find(i.first)->second; | |||
if (i.second.storage().empty()) { | |||
auto&& tensormap = network.tensor_map; | |||
if (m_force_batch_size > 0) { | |||
mgb_log_warn("force set batch size to %d", m_force_batch_size); | |||
for (auto& iter : tensormap) { | |||
auto& in = iter.second; | |||
mgb::HostTensorND hv; | |||
mgb::TensorShape new_shape = in->shape(); | |||
new_shape[0] = m_force_batch_size; | |||
hv.comp_node(mgb::CompNode::default_cpu(), true) | |||
.dtype(in->dtype()) | |||
.resize(i.second.shape()); | |||
.resize(new_shape); | |||
mgb::dt_byte* raw_ptr = hv.raw_ptr(); | |||
memset((char*)raw_ptr, 1, hv.layout().total_nr_elems()); | |||
//! copy given batch data into new tensor | |||
size_t batch_stride = in->dtype().size() * | |||
in->layout().total_nr_elems() / (in->shape()[0]); | |||
size_t curr_batch_size = m_force_batch_size; | |||
//! copy data from origin input_tensor | |||
size_t init_batch = in->shape()[0]; | |||
while (curr_batch_size > init_batch) { | |||
memcpy((char*)raw_ptr, (char*)(in->raw_ptr()), | |||
batch_stride * init_batch); | |||
curr_batch_size -= init_batch; | |||
raw_ptr += batch_stride * init_batch; | |||
} | |||
memcpy((char*)raw_ptr, (char*)(in->raw_ptr()), | |||
batch_stride * curr_batch_size); | |||
//! set input tensor | |||
in->copy_from(hv); | |||
} else { | |||
in->copy_from(i.second); | |||
parser.inputs[iter.first] = std::move(hv); | |||
} | |||
} else { | |||
for (auto& i : parser.inputs) { | |||
mgb_assert( | |||
tensormap.find(i.first) != tensormap.end(), | |||
"can't find tesnor named %s", i.first.c_str()); | |||
auto& in = tensormap.find(i.first)->second; | |||
if (i.second.storage().empty()) { | |||
mgb::HostTensorND hv; | |||
hv.comp_node(mgb::CompNode::default_cpu(), true) | |||
.dtype(in->dtype()) | |||
.resize(i.second.shape()); | |||
mgb::dt_byte* raw_ptr = hv.raw_ptr(); | |||
memset((char*)raw_ptr, 1, | |||
hv.layout().total_nr_elems() * hv.dtype().size()); | |||
in->copy_from(hv); | |||
parser.inputs[i.first] = std::move(hv); | |||
} else { | |||
in->copy_from(i.second); | |||
} | |||
} | |||
} | |||
} | |||
@@ -191,6 +284,7 @@ void IOdumpOption::config_model_internel<ModelMdl>( | |||
using namespace lar; | |||
void InputOption::update() { | |||
data_path.clear(); | |||
m_option_name = "input"; | |||
size_t start = 0; | |||
auto end = FLAGS_input.find(";", start); | |||
@@ -201,6 +295,7 @@ void InputOption::update() { | |||
end = FLAGS_input.find(";", start); | |||
} | |||
data_path.emplace_back(FLAGS_input.substr(start)); | |||
m_force_batch_size = FLAGS_batch_size; | |||
} | |||
std::shared_ptr<lar::OptionBase> lar::InputOption::create_option() { | |||
@@ -283,7 +378,10 @@ void IOdumpOption::config_model( | |||
////////////////////// Input gflags //////////////////////// | |||
DEFINE_string( | |||
input, "", "Set up inputs data for model --input [ file_path | data_string]"); | |||
DEFINE_int32( | |||
batch_size, -1, | |||
"set the batch size of input(especially for global layout transform " | |||
"optimization working on)"); | |||
////////////////////// OprIOdump gflags //////////////////////// | |||
DEFINE_string(io_dump, "", "set the io dump file path in text format"); | |||
@@ -299,4 +397,5 @@ DEFINE_string( | |||
DEFINE_bool(copy_to_host, false, "copy device data to host"); | |||
REGIST_OPTION_CREATOR(input, lar::InputOption::create_option); | |||
REGIST_OPTION_CREATOR(iodump, lar::IOdumpOption::create_option); |
@@ -13,7 +13,7 @@ DECLARE_bool(io_dump_stderr); | |||
DECLARE_string(bin_io_dump); | |||
DECLARE_string(bin_out_dump); | |||
DECLARE_bool(copy_to_host); | |||
DECLARE_int32(batch_size); | |||
namespace lar { | |||
/*! | |||
@@ -22,7 +22,7 @@ namespace lar { | |||
class InputOption final : public OptionBase { | |||
public: | |||
//! static function for registe options | |||
static bool is_valid() { return !FLAGS_input.empty(); }; | |||
static bool is_valid() { return !FLAGS_input.empty() || FLAGS_batch_size > 0; }; | |||
static std::shared_ptr<OptionBase> create_option(); | |||
void config_model( | |||
@@ -40,6 +40,7 @@ private: | |||
std::string m_option_name; | |||
std::vector<std::string> data_path; // data string or data file path | |||
int32_t m_force_batch_size; | |||
}; | |||
class IOdumpOption : public OptionBase { | |||
@@ -11,7 +11,7 @@ void GoptLayoutOption::config_model_internel<ModelLite>( | |||
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> model) { | |||
if (runtime_param.stage == RunStage::AFTER_NETWORK_CREATED) { | |||
if (m_layout_transform) { | |||
LITE_LOG("using global layout transform optimization\n"); | |||
LITE_LOG("using global layout transform optimization"); | |||
if (m_layout_transform_target == | |||
mgb::gopt::GraphTuningOptions::Target::CPU) { | |||
model->get_config().device_type = LiteDeviceType::LITE_CPU; | |||
@@ -43,67 +43,25 @@ void GoptLayoutOption::config_model_internel<ModelMdl>( | |||
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) { | |||
if (runtime_param.stage == RunStage::AFTER_MODEL_LOAD) { | |||
if (m_layout_transform) { | |||
mgb_log_debug("update input shape for global layout transform\n"); | |||
auto&& load_result = model->get_mdl_load_result(); | |||
if (m_force_batch_size > 0) { | |||
for (auto&& i : load_result.tensor_map) { | |||
auto& in = i.second; | |||
mgb::TensorShape new_shape = in->shape(); | |||
new_shape[0] = m_force_batch_size; | |||
mgb::HostTensorND new_tensor; | |||
new_tensor.comp_node(mgb::CompNode::default_cpu(), true) | |||
.dtype(in->dtype()) | |||
.resize(new_shape); | |||
mgb::dt_byte* raw_ptr = new_tensor.raw_ptr(); | |||
memset((char*)raw_ptr, 1, new_tensor.layout().total_nr_elems()); | |||
in->copy_from(new_tensor); | |||
} | |||
} | |||
for (auto&& item : load_result.output_var_list) { | |||
if (item.shape()[0] > 1) { | |||
mgb_log_warn( | |||
" model may be dumped with multi batch and will cost lots " | |||
"of time to profile during global layout transform!!!\n"); | |||
} | |||
} | |||
//! update output varlist when input shape maybe change(some pass excution | |||
//! time depends on the shape of init input) | |||
mgb::thin_hash_table::ThinHashMap<mgb::cg::SymbolVar, mgb::cg::SymbolVar> | |||
varmap; | |||
mgb::cg::DepOprIter dep([&](mgb::cg::OperatorNodeBase* opr) { | |||
if (auto h2d = opr->try_cast_final<mgb::opr::Host2DeviceCopy>()) { | |||
auto param = h2d->param(); | |||
mgb::TensorShape new_shape = h2d->host_data()->shape(); | |||
std::shared_ptr<mgb::HostTensorND> new_tensor = | |||
std::make_shared<mgb::HostTensorND>( | |||
h2d->host_data()->comp_node(), new_shape, | |||
h2d->host_data()->dtype()); | |||
new_tensor->only_reset_raw_storage(h2d->host_data()->storage()); | |||
auto h2d_opr = mgb::opr::Host2DeviceCopy::make( | |||
*h2d->owner_graph(), new_tensor, param, h2d->config()); | |||
varmap[h2d->output(0)] = h2d_opr; | |||
} | |||
}); | |||
for (auto&& i : load_result.output_var_list) | |||
dep.add(i); | |||
if (!varmap.empty()) { | |||
auto output_vars = | |||
mgb::cg::replace_vars(load_result.output_var_list, varmap); | |||
for (size_t i = 0; i < load_result.output_var_list.size(); ++i) { | |||
output_vars[i].rename( | |||
load_result.output_var_list[i].node()->name()); | |||
"of time to profile during global layout transform!!!"); | |||
} | |||
load_result.output_var_list = output_vars; | |||
} | |||
} | |||
} else if (runtime_param.stage == RunStage::GLOBAL_OPTIMIZATION) { | |||
if (m_layout_transform) { | |||
mgb_log("using global layout transform optimization\n"); | |||
mgb_log("using global layout transform optimization"); | |||
auto&& load_result = model->get_mdl_load_result(); | |||
load_result.output_var_list = mgb::gopt::layout_transform( | |||
auto output_vars = mgb::gopt::layout_transform( | |||
load_result.output_var_list, m_layout_transform_target); | |||
for (size_t i = 0; i < load_result.output_var_list.size(); ++i) { | |||
output_vars[i].rename(load_result.output_var_list[i].node()->name()); | |||
} | |||
load_result.output_var_list = output_vars; | |||
if (!m_layout_transform_dump_file.empty()) { | |||
auto out_file = mgb::serialization::OutputFile::make_fs( | |||
@@ -176,8 +134,6 @@ void GoptLayoutOption::update() { | |||
} | |||
m_layout_transform_dump_file = FLAGS_layout_transform_dump; | |||
m_force_batch_size = FLAGS_layout_transform_batch_size; | |||
m_option = { | |||
{"layout_transform", lar::String::make("")}, | |||
}; | |||
@@ -204,14 +160,6 @@ bool GoptLayoutOption::is_valid() { | |||
} | |||
} | |||
ret = ret || !FLAGS_layout_transform_dump.empty(); | |||
if (FLAGS_layout_transform_batch_size > 0) { | |||
mgb_assert( | |||
FLAGS_layout_transform_batch_size > 0 && | |||
!FLAGS_layout_transform.empty(), | |||
"\"layout-transform-batch-size\" should be set with " | |||
"\"layout-transform\""); | |||
ret = ret || FLAGS_layout_transform_batch_size > 0; | |||
} | |||
return ret || m_valid; | |||
} | |||
@@ -264,8 +212,5 @@ DEFINE_string( | |||
"The computing graph after global layout transform will be dumped to the given " | |||
"file path."); | |||
DEFINE_int32( | |||
layout_transform_batch_size, -1, | |||
"the batch size of input for global layout transform optimization working on"); | |||
REGIST_OPTION_CREATOR(gopt_layout, lar::GoptLayoutOption::create_option); | |||
REGIST_OPTION_VALIDATER(gopt_layout, lar::GoptLayoutOption::set_valid); |
@@ -5,7 +5,6 @@ | |||
#include "models/model.h" | |||
#include "option_base.h" | |||
DECLARE_string(layout_transform); | |||
DECLARE_int32(layout_transform_batch_size); | |||
DECLARE_string(layout_transform_dump); | |||
namespace lar { | |||
@@ -41,6 +40,5 @@ private: | |||
mgb::gopt::GraphTuningOptions::Target m_layout_transform_target; | |||
static bool m_valid; | |||
OptionValMap m_option; | |||
int32_t m_force_batch_size; | |||
}; | |||
} // namespace lar |
@@ -199,7 +199,7 @@ void DebugOption::format_and_print( | |||
std::stringstream ss; | |||
ss << table; | |||
LITE_LOG("%s\n\n", ss.str().c_str()); | |||
LITE_LOG("\n%s\n", ss.str().c_str()); | |||
} | |||
template <> | |||
@@ -243,7 +243,7 @@ void DebugOption::format_and_print( | |||
std::stringstream ss; | |||
ss << table; | |||
mgb_log("%s\n\n", ss.str().c_str()); | |||
mgb_log("\n%s\n", ss.str().c_str()); | |||
} | |||
template <> | |||
@@ -32,13 +32,19 @@ void StrategyOption::config_model( | |||
runtime_param.run_iter = run_iter; | |||
runtime_param.threads = threads; | |||
runtime_param.testcase_num = 1; | |||
} else if (runtime_param.stage == RunStage::UPDATE_IO) { | |||
if (model->type() == ModelType::MEGDL_MODEL) { | |||
auto model_ptr = std::static_pointer_cast<ModelMdl>(model); | |||
//! update input and output related varnode | |||
model_ptr->update_io(); | |||
} | |||
} else if (runtime_param.stage == RunStage::BEFORE_OUTSPEC_SET) { | |||
if (model->type() == ModelType::MEGDL_MODEL) { | |||
auto model_ptr = std::static_pointer_cast<ModelMdl>(model); | |||
auto num = model_ptr->get_testcase_num(); | |||
if (num != 0) | |||
runtime_param.testcase_num = num; | |||
//! make output specification | |||
model_ptr->make_output_spec(); | |||
} | |||
} | |||
@@ -205,9 +205,9 @@ void OptionsTimeProfiler::profile_with_given_options( | |||
//! after load configure | |||
auto config_model_before_runing = [&]() { | |||
for (auto stage : | |||
{RunStage::AFTER_MODEL_LOAD, RunStage::GLOBAL_OPTIMIZATION, | |||
RunStage::BEFORE_OUTSPEC_SET, RunStage::AFTER_OUTSPEC_SET, | |||
RunStage::MODEL_RUNNING}) { | |||
{RunStage::AFTER_MODEL_LOAD, RunStage::UPDATE_IO, | |||
RunStage::GLOBAL_OPTIMIZATION, RunStage::BEFORE_OUTSPEC_SET, | |||
RunStage::AFTER_OUTSPEC_SET, RunStage::MODEL_RUNNING}) { | |||
runtime_param.stage = stage; | |||
stage_config_model(); | |||
} | |||
@@ -453,9 +453,9 @@ void FittingStrategy::dump_best_options_with_model() { | |||
//! get model binary data after optimized | |||
for (auto stage : | |||
{RunStage::AFTER_MODEL_LOAD, RunStage::GLOBAL_OPTIMIZATION, | |||
RunStage::BEFORE_OUTSPEC_SET, RunStage::AFTER_OUTSPEC_SET, | |||
RunStage::MODEL_RUNNING}) { | |||
{RunStage::AFTER_MODEL_LOAD, RunStage::UPDATE_IO, | |||
RunStage::GLOBAL_OPTIMIZATION, RunStage::BEFORE_OUTSPEC_SET, | |||
RunStage::AFTER_OUTSPEC_SET, RunStage::MODEL_RUNNING}) { | |||
runtime_param.stage = stage; | |||
stage_config_model(); | |||
} | |||
@@ -502,9 +502,9 @@ void FittingStrategy::AutoCleanFile::dump_model() { | |||
model->load_model(); | |||
//! get model binary data after optimized | |||
for (auto stage : | |||
{RunStage::AFTER_MODEL_LOAD, RunStage::GLOBAL_OPTIMIZATION, | |||
RunStage::BEFORE_OUTSPEC_SET, RunStage::AFTER_OUTSPEC_SET, | |||
RunStage::MODEL_RUNNING}) { | |||
{RunStage::AFTER_MODEL_LOAD, RunStage::UPDATE_IO, | |||
RunStage::GLOBAL_OPTIMIZATION, RunStage::BEFORE_OUTSPEC_SET, | |||
RunStage::AFTER_OUTSPEC_SET, RunStage::MODEL_RUNNING}) { | |||
runtime_param.stage = stage; | |||
stage_config_model(); | |||
} | |||
@@ -53,8 +53,9 @@ void NormalStrategy::run_subline() { | |||
//! after load configure | |||
auto config_after_load = [&]() { | |||
for (auto stage : | |||
{RunStage::AFTER_MODEL_LOAD, RunStage::GLOBAL_OPTIMIZATION, | |||
RunStage::BEFORE_OUTSPEC_SET, RunStage::AFTER_OUTSPEC_SET}) { | |||
{RunStage::AFTER_MODEL_LOAD, RunStage::UPDATE_IO, | |||
RunStage::GLOBAL_OPTIMIZATION, RunStage::BEFORE_OUTSPEC_SET, | |||
RunStage::AFTER_OUTSPEC_SET}) { | |||
m_runtime_param.stage = stage; | |||
stage_config_model(); | |||
} | |||
@@ -0,0 +1,77 @@ | |||
#include <gtest/gtest.h> | |||
#include <string.h> | |||
#include <memory> | |||
#include "test_options.h" | |||
using namespace lar; | |||
DECLARE_bool(lite); | |||
DECLARE_string(input); | |||
DECLARE_int32(batch_size); | |||
DECLARE_int32(iter); | |||
namespace { | |||
STRING_OPTION_WRAP(input, ""); | |||
INT32_OPTION_WRAP(batch_size, -1); | |||
BOOL_OPTION_WRAP(lite); | |||
INT32_OPTION_WRAP(iter, 10); | |||
} // anonymous namespace | |||
TEST(TestLarIO, INPUT) { | |||
DEFINE_INT32_WRAP(iter, 1); | |||
{ | |||
std::string model_path = "./resnet50.mge"; | |||
TEST_STRING_OPTION(input, "data:./resnet50_input.npy"); | |||
} | |||
{ | |||
std::string model_path = "./add_demo.mge"; | |||
TEST_STRING_OPTION(input, "data:add_demo_input.json"); | |||
} | |||
{ | |||
std::string model_path = "./resnet50_uint8.mge"; | |||
TEST_STRING_OPTION(input, "data:./cat.ppm"); | |||
} | |||
{ | |||
std::string model_path = "./add_demo.mge"; | |||
TEST_STRING_OPTION(input, "data:[2.0,3.0,4.0]"); | |||
} | |||
{ | |||
std::string model_path = "./shufflenet.mge"; | |||
TEST_STRING_OPTION(input, "data:{2,3,224,224}"); | |||
} | |||
{ | |||
std::string model_path = "./resnet50_b10.mdl"; | |||
TEST_INT32_OPTION(batch_size, 1); | |||
TEST_INT32_OPTION(batch_size, 5); | |||
TEST_INT32_OPTION(batch_size, 11); | |||
} | |||
} | |||
TEST(TestLarIO, INPUT_LITE) { | |||
DEFINE_INT32_WRAP(iter, 1); | |||
DEFINE_BOOL_WRAP(lite); | |||
{ | |||
std::string model_path = "./resnet50.mge"; | |||
TEST_STRING_OPTION(input, "data:./resnet50_input.npy"); | |||
} | |||
{ | |||
std::string model_path = "./add_demo.mge"; | |||
TEST_STRING_OPTION(input, "data:add_demo_input.json"); | |||
} | |||
{ | |||
std::string model_path = "./resnet50_uint8.mge"; | |||
TEST_STRING_OPTION(input, "data:./cat.ppm"); | |||
} | |||
{ | |||
std::string model_path = "./add_demo.mge"; | |||
TEST_STRING_OPTION(input, "data:[2.0,3.0,4.0]"); | |||
} | |||
{ | |||
std::string model_path = "./shufflenet.mge"; | |||
TEST_STRING_OPTION(input, "data:{2,3,224,224}"); | |||
} | |||
{ | |||
std::string model_path = "./resnet50_b10.mdl"; | |||
TEST_INT32_OPTION(batch_size, 1); | |||
TEST_INT32_OPTION(batch_size, 5); | |||
TEST_INT32_OPTION(batch_size, 11); | |||
} | |||
} |
@@ -24,7 +24,7 @@ BOOL_OPTION_WRAP(cuda); | |||
} // anonymous namespace | |||
TEST(TestLarOption, OPTIMIZE_FOR_INFERENCE) { | |||
DEFINE_WRAP(cpu); | |||
DEFINE_BOOL_WRAP(cpu); | |||
std::string model_path = "./shufflenet.mge"; | |||
TEST_BOOL_OPTION(optimize_for_inference); | |||
@@ -33,7 +33,7 @@ TEST(TestLarOption, OPTIMIZE_FOR_INFERENCE) { | |||
#if LITE_WITH_OPENCL | |||
TEST(TestLarOption, OPTIMIZE_FOR_INFERENCE_OPENCL) { | |||
REQUIRE_OPENCL(); | |||
DEFINE_WRAP(opencl); | |||
DEFINE_BOOL_WRAP(opencl); | |||
std::string model_path = "./shufflenet.mge"; | |||
TEST_BOOL_OPTION(optimize_for_inference); | |||
@@ -43,7 +43,7 @@ TEST(TestLarOption, OPTIMIZE_FOR_INFERENCE_OPENCL) { | |||
#if LITE_WITH_CUDA | |||
TEST(TestLarOption, OPTIMIZE_FOR_INFERENCE_CUDA) { | |||
REQUIRE_CUDA(); | |||
DEFINE_WRAP(cuda); | |||
DEFINE_BOOL_WRAP(cuda); | |||
std::string model_path = "./shufflenet.mge"; | |||
TEST_BOOL_OPTION(optimize_for_inference); | |||
@@ -20,6 +20,7 @@ DECLARE_bool(enable_nchw64); | |||
DECLARE_bool(enable_nhwcd4); | |||
DECLARE_bool(enable_nchw44_dot); | |||
DECLARE_bool(fast_run); | |||
DECLARE_int32(iter); | |||
namespace { | |||
BOOL_OPTION_WRAP(enable_nchw4); | |||
BOOL_OPTION_WRAP(enable_chwn4); | |||
@@ -30,6 +31,7 @@ BOOL_OPTION_WRAP(enable_nchw64); | |||
BOOL_OPTION_WRAP(enable_nhwcd4); | |||
BOOL_OPTION_WRAP(enable_nchw44_dot); | |||
BOOL_OPTION_WRAP(fast_run); | |||
INT32_OPTION_WRAP(iter, 10); | |||
BOOL_OPTION_WRAP(lite); | |||
BOOL_OPTION_WRAP(cpu); | |||
@@ -39,7 +41,8 @@ BOOL_OPTION_WRAP(cuda); | |||
} // anonymous namespace | |||
TEST(TestLarLayout, X86_CPU) { | |||
DEFINE_WRAP(cpu); | |||
DEFINE_INT32_WRAP(iter, 1); | |||
DEFINE_BOOL_WRAP(cpu); | |||
std::string model_path = "./shufflenet.mge"; | |||
TEST_BOOL_OPTION(enable_nchw4); | |||
@@ -52,8 +55,9 @@ TEST(TestLarLayout, X86_CPU) { | |||
} | |||
TEST(TestLarLayout, X86_CPU_LITE) { | |||
DEFINE_WRAP(cpu); | |||
DEFINE_WRAP(lite); | |||
DEFINE_INT32_WRAP(iter, 1); | |||
DEFINE_BOOL_WRAP(cpu); | |||
DEFINE_BOOL_WRAP(lite); | |||
std::string model_path = "./shufflenet.mge"; | |||
TEST_BOOL_OPTION(enable_nchw4); | |||
@@ -65,18 +69,20 @@ TEST(TestLarLayout, X86_CPU_LITE) { | |||
} | |||
TEST(TestLarLayoutFastRun, CPU_LITE) { | |||
DEFINE_WRAP(cpu); | |||
DEFINE_WRAP(lite); | |||
DEFINE_INT32_WRAP(iter, 1); | |||
DEFINE_BOOL_WRAP(cpu); | |||
DEFINE_BOOL_WRAP(lite); | |||
std::string model_path = "./shufflenet.mge"; | |||
{ | |||
DEFINE_WRAP(enable_nchw44); | |||
DEFINE_WRAP(fast_run); | |||
DEFINE_BOOL_WRAP(enable_nchw44); | |||
DEFINE_BOOL_WRAP(fast_run); | |||
run_NormalStrategy(model_path); | |||
} | |||
} | |||
#if LITE_WITH_CUDA | |||
TEST(TestLarLayout, CUDA) { | |||
DEFINE_WRAP(cuda); | |||
DEFINE_INT32_WRAP(iter, 1); | |||
DEFINE_BOOL_WRAP(cuda); | |||
std::string model_path = "./shufflenet.mge"; | |||
TEST_BOOL_OPTION(enable_nchw4); | |||
TEST_BOOL_OPTION(enable_chwn4); | |||
@@ -87,8 +93,9 @@ TEST(TestLarLayout, CUDA) { | |||
} | |||
TEST(TestLarLayout, CUDA_LITE) { | |||
DEFINE_WRAP(cuda); | |||
DEFINE_WRAP(lite); | |||
DEFINE_INT32_WRAP(iter, 1); | |||
DEFINE_BOOL_WRAP(cuda); | |||
DEFINE_BOOL_WRAP(lite); | |||
std::string model_path = "./shufflenet.mge"; | |||
TEST_BOOL_OPTION(enable_nchw4); | |||
@@ -23,11 +23,35 @@ void run_NormalStrategy(std::string model_path); | |||
~BoolOptionWrap_##option() { FLAGS_##option = false; } \ | |||
}; | |||
#define DEFINE_WRAP(option) BoolOptionWrap_##option flags_##option; | |||
#define STRING_OPTION_WRAP(option, default_val) \ | |||
struct StringOptionWrap_##option { \ | |||
StringOptionWrap_##option(const char* val) { FLAGS_##option = val; } \ | |||
~StringOptionWrap_##option() { FLAGS_##option = default_val; } \ | |||
}; | |||
#define INT32_OPTION_WRAP(option, default_val) \ | |||
struct Int32OptionWrap_##option { \ | |||
Int32OptionWrap_##option(int32_t val) { FLAGS_##option = val; } \ | |||
~Int32OptionWrap_##option() { FLAGS_##option = default_val; } \ | |||
}; | |||
#define DEFINE_BOOL_WRAP(option) BoolOptionWrap_##option flags_##option; | |||
#define DEFINE_STRING_WRAP(option, value) \ | |||
StringOptionWrap_##option flags_##option(value); | |||
#define DEFINE_INT32_WRAP(option, value) Int32OptionWrap_##option flags_##option(value); | |||
#define TEST_BOOL_OPTION(option) \ | |||
{ \ | |||
DEFINE_WRAP(option); \ | |||
DEFINE_BOOL_WRAP(option); \ | |||
run_NormalStrategy(model_path); \ | |||
} | |||
#define TEST_STRING_OPTION(option, value) \ | |||
{ \ | |||
DEFINE_STRING_WRAP(option, value); \ | |||
run_NormalStrategy(model_path); \ | |||
} | |||
#define TEST_INT32_OPTION(option, value) \ | |||
{ \ | |||
DEFINE_INT32_WRAP(option, value); \ | |||
run_NormalStrategy(model_path); \ | |||
} | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -64,7 +64,8 @@ OperatorNodeBase* SubGraph::Rewriter::auto_replace_outputs(OperatorNodeBase* opr | |||
bool v0 = out0[i]->contain_flag(VarNode::Flag::VOLATILE_CONTENT), | |||
v1 = out1[i]->contain_flag(VarNode::Flag::VOLATILE_CONTENT); | |||
mgb_assert(v0 == v1, "%s", err_msg().c_str()); | |||
//! rename new var | |||
out1[i]->name(out0[i]->cname()); | |||
auto&& ins = m_varmap.insert({out0[i], {true, nullptr}}); | |||
mgb_assert( | |||
ins.second || ins.first->second.first, | |||