GitOrigin-RevId: 65c2430ec2
release-1.7
@@ -138,7 +138,7 @@ void ConvBiasForwardImpl::AlgoCUDNNConv::exec(const ExecArgs& args) const { | |||
if (args.z_layout->ndim > 0) { | |||
auto z_tensor = *args.z_tensor; | |||
if (args.z_layout->dtype.enumv() != args.bias_layout->dtype.enumv()) { | |||
z_tensor.raw_ptr = bundle.get(2); | |||
z_tensor = TensorND{bundle.get(2), args.z_tensor->layout}; | |||
z_tensor.layout.dtype = DType(); | |||
args.opr->check_or_deduce_dtype_fwd( | |||
args.src_layout->dtype, args.filter_layout->dtype, | |||
@@ -36,6 +36,8 @@ enum class RunStage { | |||
AFTER_RUNNING_ITER = 6, | |||
AFTER_MODEL_RUNNING = 7, | |||
GLOBAL_OPTIMIZATION = 8, | |||
}; | |||
/*! | |||
* \brief: type of different model | |||
@@ -52,15 +52,15 @@ void ModelMdl::load_model() { | |||
m_model_file->read(&testcase_num, sizeof(testcase_num)); | |||
} | |||
auto format = | |||
m_format = | |||
mgb::serialization::GraphLoader::identify_graph_dump_format(*m_model_file); | |||
mgb_assert( | |||
format.valid(), | |||
m_format.valid(), | |||
"invalid format, please make sure model is dumped by GraphDumper"); | |||
//! load computing graph of model | |||
m_loader = mgb::serialization::GraphLoader::make( | |||
std::move(m_model_file), format.val()); | |||
std::move(m_model_file), m_format.val()); | |||
m_load_result = m_loader->load(m_load_config, false); | |||
m_load_config.comp_graph.reset(); | |||
@@ -87,9 +87,15 @@ void ModelMdl::make_output_spec() { | |||
m_asyc_exec = m_load_result.graph_compile(m_output_spec); | |||
} | |||
std::shared_ptr<mgb::serialization::GraphLoader>& ModelMdl::reset_loader() { | |||
m_loader = mgb::serialization::GraphLoader::make( | |||
m_loader->reset_file(), m_loader->format()); | |||
std::shared_ptr<mgb::serialization::GraphLoader>& ModelMdl::reset_loader( | |||
std::unique_ptr<mgb::serialization::InputFile> input_file) { | |||
if (input_file) { | |||
m_loader = mgb::serialization::GraphLoader::make( | |||
std::move(input_file), m_loader->format()); | |||
} else { | |||
m_loader = mgb::serialization::GraphLoader::make( | |||
m_loader->reset_file(), m_loader->format()); | |||
} | |||
return m_loader; | |||
} | |||
@@ -50,8 +50,16 @@ public: | |||
//! get load config for megDL model | |||
mgb::serialization::GraphLoadConfig& get_mdl_config() { return m_load_config; } | |||
//! reset the graph loader for dump_with_testcase model | |||
std::shared_ptr<mgb::serialization::GraphLoader>& reset_loader(); | |||
/*! reset the underlying graph loader from which further load() would read() | |||
* | |||
* \param input_file new input_file, can be null | |||
* \return new loader | |||
*/ | |||
std::shared_ptr<mgb::serialization::GraphLoader>& reset_loader( | |||
std::unique_ptr<mgb::serialization::InputFile> input_file = {}); | |||
//! get the underlying graph loader | |||
std::shared_ptr<mgb::serialization::GraphLoader>& get_loader() { return m_loader; } | |||
//! algo strategy for runing model | |||
void set_mdl_strategy(Strategy& u_strategy) { m_strategy = u_strategy; } | |||
@@ -88,11 +96,18 @@ public: | |||
m_load_config.comp_graph.get(), range); | |||
} | |||
std::unique_ptr<mgb::serialization::GraphDumper> get_dumper( | |||
std::unique_ptr<mgb::serialization::OutputFile> out_file) { | |||
return mgb::serialization::GraphDumper::make( | |||
std::move(out_file), m_format.val()); | |||
} | |||
private: | |||
bool share_model_mem; | |||
std::string model_path; | |||
std::unique_ptr<mgb::serialization::InputFile> m_model_file; | |||
mgb::serialization::GraphLoadConfig m_load_config; | |||
mgb::Maybe<mgb::serialization::GraphDumpFormat> m_format; | |||
mgb::serialization::GraphLoader::LoadResult m_load_result; | |||
std::shared_ptr<mgb::serialization::GraphLoader> m_loader; | |||
@@ -0,0 +1,148 @@ | |||
/** | |||
* \file lite/load_and_run/src/options/layout_trans_options.h | |||
* | |||
* This file is part of MegEngine, a deep learning framework developed by | |||
* Megvii. | |||
* | |||
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved. | |||
*/ | |||
#include "layout_trans_options.h" | |||
#include <gflags/gflags.h> | |||
#include "megbrain/serialization/serializer.h" | |||
#include "misc.h" | |||
#include "models/model_lite.h" | |||
#include "models/model_mdl.h" | |||
namespace lar { | |||
template <> | |||
void GoptLayoutOption::config_model_internel<ModelLite>( | |||
RuntimeParam& runtime_param, std::shared_ptr<ModelLite> /* model */) { | |||
if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) { | |||
LITE_THROW("lite model don't support global graph optimization"); | |||
} | |||
} | |||
template <> | |||
void GoptLayoutOption::config_model_internel<ModelMdl>( | |||
RuntimeParam& runtime_param, std::shared_ptr<ModelMdl> model) { | |||
if (runtime_param.stage == RunStage::GLOBAL_OPTIMIZATION) { | |||
if (layout_transform) { | |||
auto&& load_result = model->get_mdl_load_result(); | |||
load_result.output_var_list = mgb::gopt::layout_transform( | |||
load_result.output_var_list, layout_transform_target); | |||
if (!layout_transform_dump_file.empty()) { | |||
auto out_file = mgb::serialization::OutputFile::make_fs( | |||
layout_transform_dump_file.c_str(), 'w'); | |||
auto testcase_num = model->get_testcase_num(); | |||
if (testcase_num) { | |||
const char* magic = "mgbtest0"; | |||
constexpr size_t len = sizeof(magic); | |||
out_file->write(magic, len); | |||
out_file->write(&testcase_num, sizeof(testcase_num)); | |||
} | |||
using DumpConfig = mgb::serialization::GraphDumper::DumpConfig; | |||
DumpConfig config{1, false, false}; | |||
auto dumper = model->get_dumper(std::move(out_file)); | |||
dumper->dump(load_result.output_var_list, config); | |||
if (testcase_num) { | |||
auto input_file = model->get_loader()->reset_file(); | |||
auto current_offset = input_file->tell(); | |||
auto loader = model->reset_loader(std::move(input_file)); | |||
auto testcase = loader->load(model->get_mdl_config(), false); | |||
mgb::serialization::GraphDumper::DumpConfig config{1, false, false}; | |||
for (size_t i = 0; i < testcase_num; ++i) { | |||
auto casefile = mgb::serialization::OutputFile::make_fs( | |||
layout_transform_dump_file.c_str(), 'a'); | |||
auto casedumper = model->get_dumper(std::move(casefile)); | |||
casedumper->dump(testcase.output_var_list, config); | |||
if (i != testcase_num - 1) { | |||
loader = model->reset_loader(); | |||
testcase = loader->load(model->get_mdl_config(), false); | |||
} | |||
} | |||
input_file = model->get_loader()->reset_file(); | |||
input_file->rewind(); | |||
input_file->skip(current_offset); | |||
model->reset_loader(std::move(input_file)); | |||
} | |||
} | |||
} | |||
} | |||
} | |||
} // namespace lar | |||
using namespace lar; | |||
GoptLayoutOption::GoptLayoutOption() { | |||
m_option_name = "gopt_layout"; | |||
if (FLAGS_layout_transform != "cuda" && FLAGS_layout_transform != "cpu" && | |||
FLAGS_layout_transform != "opencl") { | |||
layout_transform = false; | |||
layout_transform_target = mgb::gopt::GraphTuningOptions::Target::UNSPEC; | |||
} else { | |||
layout_transform = true; | |||
if (FLAGS_layout_transform == "cuda") { | |||
layout_transform_target = mgb::gopt::GraphTuningOptions::Target::CUDA; | |||
} else if (FLAGS_layout_transform == "cpu") { | |||
layout_transform_target = mgb::gopt::GraphTuningOptions::Target::CPU; | |||
} else if (FLAGS_layout_transform == "opencl") { | |||
layout_transform_target = mgb::gopt::GraphTuningOptions::Target::OPENCL; | |||
} | |||
} | |||
layout_transform_dump_file = FLAGS_layout_transform_dump; | |||
} | |||
bool GoptLayoutOption::is_valid() { | |||
bool ret = false; | |||
if (!FLAGS_layout_transform.empty()) { | |||
if (FLAGS_layout_transform != "cuda" && FLAGS_layout_transform != "cpu" && | |||
FLAGS_layout_transform != "opencl") { | |||
mgb_assert( | |||
false, | |||
"unsupported target(got:%s) for global layout " | |||
"transform", | |||
FLAGS_layout_transform.c_str()); | |||
ret = false; | |||
} else { | |||
ret = true; | |||
} | |||
} | |||
ret = ret || FLAGS_layout_transform_dump.empty(); | |||
return ret; | |||
} | |||
std::shared_ptr<OptionBase> GoptLayoutOption::create_option() { | |||
static std::shared_ptr<GoptLayoutOption> option(new GoptLayoutOption); | |||
if (GoptLayoutOption::is_valid()) { | |||
return std::static_pointer_cast<OptionBase>(option); | |||
} else { | |||
return nullptr; | |||
} | |||
} | |||
void GoptLayoutOption::config_model( | |||
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) { | |||
CONFIG_MODEL_FUN; | |||
} | |||
DEFINE_string( | |||
layout_transform, "", | |||
"Enable global layout transform optimization for computing graph. User should " | |||
"specify the device target for the optimization, and a series of passes will " | |||
"be applied on the computing graph. The passes will benchmark the elapsed time " | |||
"of operators on different tensor layouts, and select fastest implementation " | |||
"for the operators. The optimization process will take some time. The default " | |||
"target is unspec, which all the available for operators will be profiled. So " | |||
"the optimize time will be longer."); | |||
DEFINE_string( | |||
layout_transform_dump, "", | |||
"The computing graph after global layout transform will be dumped to the given " | |||
"file path."); | |||
REGIST_OPTION_CREATOR(gopt_layout, lar::GoptLayoutOption::create_option); |
@@ -0,0 +1,45 @@ | |||
/** | |||
* \file lite/load_and_run/src/options/layout_trans_options.h | |||
* | |||
* This file is part of MegEngine, a deep learning framework developed by | |||
* Megvii. | |||
* | |||
* \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved. | |||
*/ | |||
#pragma once | |||
#include <gflags/gflags.h> | |||
#include "megbrain/gopt/inference.h" | |||
#include "models/model.h" | |||
#include "option_base.h" | |||
DECLARE_string(layout_transform); | |||
DECLARE_string(layout_transform_dump); | |||
namespace lar { | |||
class GoptLayoutOption final : public OptionBase { | |||
public: | |||
//! get condition for construct FastRunOption | |||
static bool is_valid(); | |||
//! creat option using condition from cmdline args | |||
static std::shared_ptr<OptionBase> create_option(); | |||
//! configure model for different runtime_param | |||
void config_model( | |||
RuntimeParam& runtime_param, std::shared_ptr<ModelBase> model) override; | |||
//! get options name for quickly search | |||
std::string option_name() const override { return m_option_name; } | |||
private: | |||
GoptLayoutOption(); | |||
//! config template for different model | |||
template <typename ModelImpl> | |||
void config_model_internel(RuntimeParam&, std::shared_ptr<ModelImpl>) {} | |||
bool layout_transform; | |||
std::string m_option_name; | |||
std::string layout_transform_dump_file; | |||
mgb::gopt::GraphTuningOptions::Target layout_transform_target; | |||
}; | |||
} // namespace lar |
@@ -93,4 +93,4 @@ DEFINE_bool(share_param_mem, false, "load model from shared memeory"); | |||
REGIST_OPTION_CREATOR(run_strategy, lar::StrategyOption::create_option); | |||
REGIST_OPTION_CREATOR(run_testcase, lar::TestcaseOption::create_option); | |||
REGIST_OPTION_CREATOR(run_testcase, lar::TestcaseOption::create_option); |
@@ -60,6 +60,9 @@ void NormalStrategy::run_subline() { | |||
m_runtime_param.stage = RunStage::AFTER_MODEL_LOAD; | |||
stage_config_model(); | |||
m_runtime_param.stage = RunStage::GLOBAL_OPTIMIZATION; | |||
stage_config_model(); | |||
m_runtime_param.stage = RunStage::BEFORE_OUTSPEC_SET; | |||
stage_config_model(); | |||
@@ -164,4 +167,4 @@ void NormalStrategy::run() { | |||
mgb_assert(false, "--thread must input a positive number!!"); | |||
} | |||
//! execute before run | |||
} | |||
} |