Browse Source

feat(externcopr): add config extern c opr dynamic param

GitOrigin-RevId: 0fa3a534af
release-1.2
Megvii Engine Team 4 years ago
parent
commit
9ec8d375f1
7 changed files with 434 additions and 27 deletions
  1. +2
    -2
      sdk/load-and-run/src/mgblar.cpp
  2. +29
    -0
      src/core/include/megbrain/graph/extern_copr_api.h
  3. +106
    -12
      src/serialization/impl/extern_c_opr.cpp
  4. +52
    -0
      src/serialization/include/megbrain/serialization/extern_c_opr.h
  5. +18
    -4
      src/serialization/include/megbrain/serialization/extern_c_opr_io.h
  6. +220
    -9
      src/serialization/test/extern_c_opr.cpp
  7. +7
    -0
      src/serialization/test/extern_c_opr_v23.h

+ 2
- 2
sdk/load-and-run/src/mgblar.cpp View File

@@ -153,7 +153,7 @@ R"__usage__(
Print PID and wait for a line from stdin before starting execution. Useful Print PID and wait for a line from stdin before starting execution. Useful
for waiting for gdb attach. for waiting for gdb attach.
--c-opr-lib <path> --c-opr-lib <path>
Load external operator library. It must implement `mgb_c_opr_init` as the
Load external operator library. It must implement MGB_C_OPR_INIT_FUNC_STR as the
entry point. entry point.
--thread <num> --thread <num>
Number of threads to run concurrently. All threads perform the same work of Number of threads to run concurrently. All threads perform the same work of
@@ -1223,7 +1223,7 @@ Args Args::from_argv(int argc, char **argv) {
auto handle = dlopen(argv[i], RTLD_LAZY); auto handle = dlopen(argv[i], RTLD_LAZY);
mgb_assert(handle, "failed to open c opr lib %s: %s", mgb_assert(handle, "failed to open c opr lib %s: %s",
argv[i], dlerror()); argv[i], dlerror());
const char* entry = "mgb_c_opr_init";
const char* entry = MGB_C_OPR_INIT_FUNC_STR;
auto func = dlsym(handle, entry); auto func = dlsym(handle, entry);
mgb_assert(func, "can not resolve %s: %s", entry, dlerror()); mgb_assert(func, "can not resolve %s: %s", entry, dlerror());
typedef void (*entry_f_t)(void*); typedef void (*entry_f_t)(void*);


+ 29
- 0
src/core/include/megbrain/graph/extern_copr_api.h View File

@@ -0,0 +1,29 @@
/**
* \file src/core/include/megbrain/graph/extern_copr_api.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#pragma once

#include "megbrain/graph/bases.h"
#include "megbrain/serialization/extern_c_opr.h"

namespace mgb {

/*!
* \brief config extern c opr dynamic param
*/
void config_extern_c_opr_dynamic_param(
std::unique_ptr<cg::AsyncExecutable>& func,
std::shared_ptr<ExternCOprParam> param);

} // namespace mgb

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 106
- 12
src/serialization/impl/extern_c_opr.cpp View File

@@ -9,8 +9,9 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/ */


#include "megbrain/comp_node_env.h"
#include "megbrain/serialization/extern_c_opr.h" #include "megbrain/serialization/extern_c_opr.h"
#include "megbrain/comp_node_env.h"
#include "megbrain/graph/extern_copr_api.h"
#include "megbrain/serialization/extern_c_opr_io.h" #include "megbrain/serialization/extern_c_opr_io.h"
#include "megbrain/serialization/opr_load_dump.h" #include "megbrain/serialization/opr_load_dump.h"


@@ -280,11 +281,14 @@ void PlaceholderMGBOprDesc::dump(OprDumpContext& ctx, MGBOprDesc* desc) {


/* ===================== ExternCOprRunner ===================== */ /* ===================== ExternCOprRunner ===================== */
MGB_DYN_TYPE_OBJ_FINAL_IMPL(ExternCOprRunner); MGB_DYN_TYPE_OBJ_FINAL_IMPL(ExternCOprRunner);
ExternCOprRunner::ExternCOprRunner(const VarNodeArray& inputs,
ExternCOprRunner::ExternCOprRunner(std::string& name,
const VarNodeArray& inputs,
std::shared_ptr<MGBOprDesc> desc, std::shared_ptr<MGBOprDesc> desc,
const OperatorNodeConfig& config) const OperatorNodeConfig& config)
: Super{inputs[0]->owner_graph(), config, desc->type_name, inputs}, : Super{inputs[0]->owner_graph(), config, desc->type_name, inputs},
m_desc{std::move(desc)} {
m_desc{std::move(desc)},
m_dump_name{name},
m_param{nullptr} {
mgb_assert(m_desc->size == sizeof(MGBOprDesc), mgb_assert(m_desc->size == sizeof(MGBOprDesc),
"invalid MGBOprDesc size: expect=%zu got=%u", sizeof(MGBOprDesc), "invalid MGBOprDesc size: expect=%zu got=%u", sizeof(MGBOprDesc),
m_desc->size); m_desc->size);
@@ -332,10 +336,61 @@ void ExternCOprRunner::init_output_dtype() {
output(i)->dtype(dtype_c2cpp(out_dtypes[i])); output(i)->dtype(dtype_c2cpp(out_dtypes[i]));
} }
} }
void ExternCOprRunner::check_param() {
//! check extern dynamic param validity
//! nr_input=0 or nr_output=0 means do not provide input/output
//! ExternDeviceTensor for some case, ExternCOprParam may only config
//! device_id, extra_info, etc. so we need consider nr_input=0 or
//! nr_output=0
auto check = [](size_t nr_config_tensor, size_t var_node_size,
ExternDeviceTensor* e_tensor,
const VarNodeArray& var_node_array, const char* msg) {
mgb_assert(e_tensor, "%s ExternDeviceTensor should not be null!!", msg);
mgb_assert(
nr_config_tensor == var_node_size,
"param %s size provided by `config_extern_c_opr_dynamic_param` "
"mismatch with the number of %s, got %zu, expected %zu",
msg, msg, nr_config_tensor, var_node_size);
for (size_t i = 0; i < nr_config_tensor; i++) {
mgb_assert(e_tensor[i].device_ptr,
"%s ExternDeviceTensor(index: %zu) device_ptr should "
"not be null!!",
msg, i);
auto param_shape = e_tensor[i].layout.shape;
auto shape = var_node_array.at(i)->shape();
auto param_dtype = e_tensor[i].layout.dtype;
auto dtype = dtype_cpp2c(var_node_array.at(i)->dtype());
mgb_assert(param_dtype == dtype,
"%s dtype provided mismatch, expected: %u, got: %d", msg,
param_dtype, dtype);
mgb_assert(shape.ndim == param_shape.ndim,
"%s ndim provided mismatch got: %u, expect: %zu of "
"index: %zu",
msg, param_shape.ndim, shape.ndim, i);
for (size_t j = 0; j < shape.ndim; j++) {
mgb_assert(param_shape.shape[j] == shape.shape[j],
"config %s shape should same with c opr %s shape: "
"(got: %u expect: %zu) of index: %zu",
msg, msg, param_shape.shape[j], shape.shape[j], j);
}
}
};

if (m_param && m_param->nr_input > 0) {
check(m_param->nr_input, input().size(), m_param->input, input(),
"input");
}

if (m_param && m_param->nr_output > 0) {
check(m_param->nr_output, output().size(), m_param->output, output(),
"output");
}
}


void ExternCOprRunner::scn_do_execute() { void ExternCOprRunner::scn_do_execute() {
SmallVector<MGBTensor> c_inp(input().size()), c_out(output().size()); SmallVector<MGBTensor> c_inp(input().size()), c_out(output().size());
SmallVector<HostTensorND> cpu_inp, cpu_out; SmallVector<HostTensorND> cpu_inp, cpu_out;
check_param();


bool need_copy = false; bool need_copy = false;
if (comp_node().device_type() == CompNode::DeviceType::CPU) { if (comp_node().device_type() == CompNode::DeviceType::CPU) {
@@ -399,27 +454,31 @@ cg::OperatorNodeBase* ExternCOprRunner::make_placeholder(
var_inp[i] = inputs[i].node(); var_inp[i] = inputs[i].node();
} }


return make_from_desc(var_inp, desc, config);
auto dump_name = std::string{name};
return make_from_desc(dump_name, var_inp, desc, config);
} }


cg::OperatorNodeBase* ExternCOprRunner::make_from_desc( cg::OperatorNodeBase* ExternCOprRunner::make_from_desc(
const VarNodeArray& inputs, MGBOprDesc* desc,
std::string& name, const VarNodeArray& inputs, MGBOprDesc* desc,
const OperatorNodeConfig& config) { const OperatorNodeConfig& config) {
auto desc_del = [](MGBOprDesc* ptr) { ptr->release(ptr); }; auto desc_del = [](MGBOprDesc* ptr) { ptr->release(ptr); };
return make_from_desc_shared(inputs, {desc, desc_del}, config);
return make_from_desc_shared(name, inputs, {desc, desc_del}, config);
} }


cg::OperatorNodeBase* ExternCOprRunner::make_from_desc_shared( cg::OperatorNodeBase* ExternCOprRunner::make_from_desc_shared(
const VarNodeArray& inputs, std::shared_ptr<MGBOprDesc> desc,
const OperatorNodeConfig& config) {
std::string& name, const VarNodeArray& inputs,
std::shared_ptr<MGBOprDesc> desc, const OperatorNodeConfig& config) {
mgb_assert(!inputs.empty() && desc->nr_output); mgb_assert(!inputs.empty() && desc->nr_output);


#define CHECK(name) mgb_assert(desc->name, #name " is not given"); #define CHECK(name) mgb_assert(desc->name, #name " is not given");
MGB_OPR_DESC_FOREACH_MEM_FN(CHECK); MGB_OPR_DESC_FOREACH_MEM_FN(CHECK);
#undef CHECK #undef CHECK


if (!config.name().valid())
const_cast<OperatorNodeConfig&>(config).name(name);

auto opr = inputs[0]->owner_graph()->insert_opr( auto opr = inputs[0]->owner_graph()->insert_opr(
std::make_unique<ExternCOprRunner>(inputs, std::move(desc),
std::make_unique<ExternCOprRunner>(name, inputs, std::move(desc),
config)); config));
return &opr->cast_final_safe<ExternCOprRunner>(); return &opr->cast_final_safe<ExternCOprRunner>();
} }
@@ -437,7 +496,11 @@ void ExternCOprRunner::dump(OprDumpContext& ctx,
cg::OperatorNodeBase* ExternCOprRunner::load(OprLoadContext& ctx, cg::OperatorNodeBase* ExternCOprRunner::load(OprLoadContext& ctx,
const cg::VarNodeArray& inputs, const cg::VarNodeArray& inputs,
const OperatorNodeConfig& config) { const OperatorNodeConfig& config) {
auto name = ctx.load_buf_with_len();
auto dump_name = ctx.load_buf_with_len();
auto name = dump_name;
//! use to compat dump ExternCOprRunner with more info
if (auto index = name.find(":"))
name = name.substr(0, index);
auto&& map = loader_map(); auto&& map = loader_map();
auto iter = map.find(name); auto iter = map.find(name);
mgb_assert(iter != map.end(), mgb_assert(iter != map.end(),
@@ -448,7 +511,7 @@ cg::OperatorNodeBase* ExternCOprRunner::load(OprLoadContext& ctx,
if (auto trans = iter->second.second) { if (auto trans = iter->second.second) {
desc = trans(desc); desc = trans(desc);
} }
return make_from_desc(inputs, desc, config);
return make_from_desc(dump_name, inputs, desc, config);
} }


cg::OperatorNodeBase* ExternCOprRunner::shallow_copy( cg::OperatorNodeBase* ExternCOprRunner::shallow_copy(
@@ -456,7 +519,8 @@ cg::OperatorNodeBase* ExternCOprRunner::shallow_copy(
const cg::OperatorNodeBase& opr_, const VarNodeArray& inputs, const cg::OperatorNodeBase& opr_, const VarNodeArray& inputs,
const OperatorNodeConfig& config) { const OperatorNodeConfig& config) {
auto&& opr = opr_.cast_final_safe<ExternCOprRunner>(); auto&& opr = opr_.cast_final_safe<ExternCOprRunner>();
return make_from_desc_shared(inputs, opr.m_desc, config);
auto dump_name = opr.m_dump_name;
return make_from_desc_shared(dump_name, inputs, opr.m_desc, config);
} }


MGBTensorShape ExternCOprRunner::tensor_shape_to_c(const TensorShape& shape) { MGBTensorShape ExternCOprRunner::tensor_shape_to_c(const TensorShape& shape) {
@@ -481,6 +545,36 @@ TensorShape ExternCOprRunner::tensor_shape_from_c(const MGBTensorShape& shape) {
return ret; return ret;
} }


void mgb::config_extern_c_opr_dynamic_param(
std::unique_ptr<cg::AsyncExecutable>& func,
std::shared_ptr<ExternCOprParam> param) {
mgb_throw_if(!param, MegBrainError, "invalid ExternCOprParam param!!");

auto find_config_opr = false;

auto cb = [&](cg::OperatorNodeBase* opr) {
if (auto c_opr = opr->try_cast_final<opr::ExternCOprRunner>()) {
auto dump_name = c_opr->get_dump_name().c_str();
if (!param->extern_c_opr_dump_name ||
!strncmp(param->extern_c_opr_dump_name, dump_name,
strlen(dump_name))) {
c_opr->set_param(param);
find_config_opr = true;
mgb_log_debug("config dynamic param for extern c opr: %s",
dump_name);
}
}

return !find_config_opr;
};

func->iter_opr_seq(cb);

mgb_throw_if(!find_config_opr, MegBrainError,
"graph do not include a ExternCOprRunner opr or error config "
"extern_c_opr_dump_name!!");
}

/* ===================== public APIs ===================== */ /* ===================== public APIs ===================== */
const MGBExternCOprApi* mgb_get_extern_c_opr_api_versioned(int version) { const MGBExternCOprApi* mgb_get_extern_c_opr_api_versioned(int version) {
auto unreg = [](const char* name) -> int { auto unreg = [](const char* name) -> int {


+ 52
- 0
src/serialization/include/megbrain/serialization/extern_c_opr.h View File

@@ -26,6 +26,10 @@ extern "C" {
#define MGB_C_OPR_INIT_FUNC mgb_c_opr_init #define MGB_C_OPR_INIT_FUNC mgb_c_opr_init
#endif #endif


#define INIT_FUNCS(s) #s
#define INIT_FUNC(s) INIT_FUNCS(s)
#define MGB_C_OPR_INIT_FUNC_STR INIT_FUNC(MGB_C_OPR_INIT_FUNC)

#define MGB_EXTERN_C_OPR_VERSION 0x24 #define MGB_EXTERN_C_OPR_VERSION 0x24
#define MGB_TENSOR_MAX_NDIM 8 #define MGB_TENSOR_MAX_NDIM 8


@@ -54,6 +58,51 @@ typedef struct MGBTensor {
void* data; //!< the tensor value, accessible by caller CPU thread void* data; //!< the tensor value, accessible by caller CPU thread
} MGBTensor; } MGBTensor;


//! extern device tenosr struct
typedef struct ExternDeviceTensor {
//! layout of device extern tensor, use to validity check with MGBTensor
MGBTensorLayout layout;
//! different NPU API has different type define so just define a void * to
//! compat all, need loader and SDK implement reinterpret_cast it
//! exampe for NNIE, device_ptr may define as
//! struct MemoryInfo {
//! HI_U64 phy_addr;
//! void* vir_addr;
//! size_t size = 0;
//! }
void* device_ptr;
} ExternDeviceTensor;

//! for dynamic extern c opr param
typedef struct ExternCOprParam {
//! dump name of extern c opr in graph
//! example graph:
//! ExternCOpr1(3516:preprocess)->opr->ExternCOpr2(3559)->opr->ExternCOpr3(3516:det_face)...
//! extern_c_opr_dump_name config case:
//! when set 3516:preprocess, ExternCOpr1 will be config.
//! when set 3559, ExternCOpr2 will be config.
//! when set 3516:det_face, ExternCOpr3 will be config.
//! when set nullptr, will auto config the first ExternCOpr.
const char* extern_c_opr_dump_name;

//! number of input/output, use to index and check
//! if set nr_input = 0, means do not provide input ExternDeviceTensor
//! if set nr_output = 0, means do not provide nr_output ExternDeviceTensor
size_t nr_input, nr_output;

//! ptr of input/output ExternDeviceTensor
ExternDeviceTensor* input;
ExternDeviceTensor* output;

//! device id
size_t device_id;

//! extra info for misc dynamic config
uint8_t* extra_info;
//! size of extra_info
size_t extra_info_size;
} ExternCOprParam;

/*! /*!
* \brief operator descriptor * \brief operator descriptor
* *
@@ -93,6 +142,9 @@ typedef struct MGBOprDesc {


//! custom user data to be associated with this descriptor //! custom user data to be associated with this descriptor
void* user_data; void* user_data;

//! dynamic extern c opr param
ExternCOprParam* dynamic_param;
} MGBOprDesc; } MGBOprDesc;


//! foreach member function of MGBOprDesc to help initialization //! foreach member function of MGBOprDesc to help initialization


+ 18
- 4
src/serialization/include/megbrain/serialization/extern_c_opr_io.h View File

@@ -22,25 +22,30 @@ namespace opr {
MGB_DEFINE_OPR_CLASS(ExternCOprRunner, MGB_DEFINE_OPR_CLASS(ExternCOprRunner,
cg::SingleCNOutshapePureByInshapeOprBase) // { cg::SingleCNOutshapePureByInshapeOprBase) // {
std::shared_ptr<MGBOprDesc> m_desc; std::shared_ptr<MGBOprDesc> m_desc;
//! store ExternCOprRunner opr full dump name
std::string m_dump_name;
//! store dynamic store param
std::shared_ptr<ExternCOprParam> m_param;


void get_output_var_shape(const TensorShapeArray& inp_shape, void get_output_var_shape(const TensorShapeArray& inp_shape,
TensorShapeArray& out_shape) const override; TensorShapeArray& out_shape) const override;
void scn_do_execute() override; void scn_do_execute() override;
void add_input_layout_constraint() override; void add_input_layout_constraint() override;
void init_output_dtype() override; void init_output_dtype() override;
void check_param();


static cg::OperatorNodeBase* make_from_desc_shared( static cg::OperatorNodeBase* make_from_desc_shared(
const VarNodeArray& inputs, std::shared_ptr<MGBOprDesc> desc,
const OperatorNodeConfig& config);
std::string& name, const VarNodeArray& inputs,
std::shared_ptr<MGBOprDesc> desc, const OperatorNodeConfig& config);


public: public:
ExternCOprRunner(const VarNodeArray& inputs,
ExternCOprRunner(std::string& name, const VarNodeArray& inputs,
std::shared_ptr<MGBOprDesc> desc, std::shared_ptr<MGBOprDesc> desc,
const OperatorNodeConfig& config); const OperatorNodeConfig& config);


//! create from MGBOprDesc and steal its reference //! create from MGBOprDesc and steal its reference
static cg::OperatorNodeBase* make_from_desc( static cg::OperatorNodeBase* make_from_desc(
const VarNodeArray& inputs, MGBOprDesc* desc,
std::string& name, const VarNodeArray& inputs, MGBOprDesc* desc,
const OperatorNodeConfig& config = {}); const OperatorNodeConfig& config = {});


/*! /*!
@@ -87,6 +92,15 @@ public:


//! helper for converting MGBTensorShape to TensorShape //! helper for converting MGBTensorShape to TensorShape
static TensorShape tensor_shape_from_c(const MGBTensorShape& shape); static TensorShape tensor_shape_from_c(const MGBTensorShape& shape);

const std::string& get_dump_name() {
return m_dump_name;
}

void set_param(const std::shared_ptr<ExternCOprParam>& param) {
m_param = param;
m_desc->dynamic_param = m_param.get();
}
}; };


} // namespace opr } // namespace opr


+ 220
- 9
src/serialization/test/extern_c_opr.cpp View File

@@ -9,6 +9,8 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/ */


#include <memory>
#include "megbrain/graph/extern_copr_api.h"
#include "megbrain/opr/io.h" #include "megbrain/opr/io.h"
#include "megbrain/opr/utility.h" #include "megbrain/opr/utility.h"
#include "megbrain/serialization/extern_c_opr_io.h" #include "megbrain/serialization/extern_c_opr_io.h"
@@ -68,31 +70,50 @@ class MGBOprDescImpl {


static void execute(const MGBOprDesc* self, const MGBTensor* input, static void execute(const MGBOprDesc* self, const MGBTensor* input,
const MGBTensor* output) { const MGBTensor* output) {
if (self->dynamic_param) {
auto device_id = self->dynamic_param->device_id;
mgb_assert(0 == device_id || 8 == device_id);
}
bool use_extern_input =
(self->dynamic_param && self->dynamic_param->nr_input > 0)
? true
: false;
bool use_extern_output =
(self->dynamic_param && self->dynamic_param->nr_output > 0)
? true
: false;

auto&& i = input[0].layout; auto&& i = input[0].layout;
auto&& o = output[0].layout; auto&& o = output[0].layout;
mgb_assert(i.shape.ndim == 1 && o.shape.ndim == 1 && mgb_assert(i.shape.ndim == 1 && o.shape.ndim == 1 &&
i.shape.shape[0] == o.shape.shape[0]); i.shape.shape[0] == o.shape.shape[0]);
mgb_assert(i.dtype == MGB_DTYPE_FLOAT32 && o.dtype == out_dtype); mgb_assert(i.dtype == MGB_DTYPE_FLOAT32 && o.dtype == out_dtype);
auto pi = static_cast<float*>(input[0].data);
auto input_p = static_cast<float*>(input[0].data);
if (use_extern_input)
input_p = static_cast<float*>(
self->dynamic_param->input[0].device_ptr);
auto bias = user_data(self)->bias; auto bias = user_data(self)->bias;
if (out_dtype == MGB_DTYPE_FLOAT32) { if (out_dtype == MGB_DTYPE_FLOAT32) {
auto po = static_cast<float*>(output[0].data);
auto output_p = static_cast<float*>(output[0].data);
if (use_extern_output)
output_p = static_cast<float*>(
self->dynamic_param->output[0].device_ptr);
for (size_t x = 0; x < i.shape.shape[0]; ++x) { for (size_t x = 0; x < i.shape.shape[0]; ++x) {
po[x] = pi[x] + bias;
output_p[x] = input_p[x] + bias;
} }
} else if (MEGDNN_FLOAT16_SELECT(out_dtype == MGB_DTYPE_FLOAT16, } else if (MEGDNN_FLOAT16_SELECT(out_dtype == MGB_DTYPE_FLOAT16,
false)) { false)) {
#if !MEGDNN_DISABLE_FLOAT16 #if !MEGDNN_DISABLE_FLOAT16
auto po = static_cast<dt_float16*>(output[0].data);
auto output_p = static_cast<dt_float16*>(output[0].data);
for (size_t x = 0; x < i.shape.shape[0]; ++x) { for (size_t x = 0; x < i.shape.shape[0]; ++x) {
po[x] = pi[x] + bias;
output_p[x] = input_p[x] + bias;
} }
#endif #endif
} else { } else {
mgb_assert(out_dtype == MGB_DTYPE_INT32); mgb_assert(out_dtype == MGB_DTYPE_INT32);
auto po = static_cast<int32_t*>(output[0].data);
auto output_p = static_cast<int32_t*>(output[0].data);
for (size_t x = 0; x < i.shape.shape[0]; ++x) { for (size_t x = 0; x < i.shape.shape[0]; ++x) {
po[x] = pi[x] + bias;
output_p[x] = input_p[x] + bias;
} }
} }
} }
@@ -185,7 +206,7 @@ std::vector<uint8_t> create_graph_dump(float bias, float extra_scale,
x = opr::ExternCOprRunner::make_placeholder( x = opr::ExternCOprRunner::make_placeholder(
{x}, {TensorShape{1}}, {x}, {TensorShape{1}},
dtype == MGB_DTYPE_FLOAT32 dtype == MGB_DTYPE_FLOAT32
? "bias_adder_dump"
? "bias_adder_dump:test"
: (dtype == MGB_DTYPE_INT32 ? "bias_adder_dump_i32" : (dtype == MGB_DTYPE_INT32 ? "bias_adder_dump_i32"
: "bias_adder_dump_f16"), : "bias_adder_dump_f16"),
&bias, sizeof(bias), {}, {dtype_c2cpp(dtype)}) &bias, sizeof(bias), {}, {dtype_c2cpp(dtype)})
@@ -238,6 +259,65 @@ void check_dump_by_compute(std::unique_ptr<serialization::InputFile> input_file,
MGB_ASSERT_TENSOR_EQ(y_expect, host_y); MGB_ASSERT_TENSOR_EQ(y_expect, host_y);
} }


void check_dump_by_compute_with_param(
std::unique_ptr<serialization::InputFile> input_file, CompNode cn,
MGBDType dtype, float bias, std::shared_ptr<ExternCOprParam> param) {
GraphLoadConfig config;
config.comp_node_mapper = [loc = cn.locator()](CompNode::Locator& t) {
t = loc;
};
auto loader = GraphLoader::make(std::move(input_file));
auto load_ret = loader->load(config);
load_ret.graph->options().var_sanity_check_first_run = false;
SymbolVar y;
unpack_vector(load_ret.output_var_list, y);

HostTensorGenerator<> gen;
auto host_x = load_ret.tensor_map.begin()->second;
*host_x = *gen({23}, cn);
HostTensorND y_expect;
y_expect.copy_from(*host_x);
{
auto py = y_expect.ptr<float>();
float* extern_input_device_ptr = nullptr;
if (param->nr_input && param->input && param->input->device_ptr) {
extern_input_device_ptr =
static_cast<float*>(param->input->device_ptr);
}
for (int i = 0; i < 23; ++i) {
float t = 0;
//! this test code is run before config_extern_c_opr_dynamic_param
//! so we need double child member ptr is valid or not
if (param->nr_input && param->input && param->input->device_ptr) {
t = extern_input_device_ptr[i] + bias;
} else {
t = py[i] + bias;
}
if (dtype == MGB_DTYPE_INT32) {
t = int(t);
#if !MEGDNN_DISABLE_FLOAT16
} else if (dtype == MGB_DTYPE_FLOAT16) {
t = dt_float16(t);
#endif
}
py[i] = t;
}
}

HostTensorND host_y;
auto func = load_ret.graph->compile({make_callback_copy(y, host_y)});
config_extern_c_opr_dynamic_param(func, param);
func->execute();
if (param->nr_output) {
auto ph = host_y.ptr<float>();
auto outp = static_cast<float*>(param->output->device_ptr);
for (int i = 0; i < 23; ++i) {
ph[i] = outp[i];
}
}
MGB_ASSERT_TENSOR_EQ(y_expect, host_y);
}

void run_compute_test(CompNode cn, MGBDType dtype) { void run_compute_test(CompNode cn, MGBDType dtype) {
float bias = 1.2, scale = -2.1; float bias = 1.2, scale = -2.1;
auto graph_dump = create_graph_dump(bias, scale, 0.3, dtype); auto graph_dump = create_graph_dump(bias, scale, 0.3, dtype);
@@ -245,8 +325,138 @@ void run_compute_test(CompNode cn, MGBDType dtype) {
InputFile::make_mem_proxy(graph_dump.data(), graph_dump.size()), cn, InputFile::make_mem_proxy(graph_dump.data(), graph_dump.size()), cn,
dtype, bias, scale); dtype, bias, scale);
} }
void run_compute_test_with_param(CompNode cn, MGBDType dtype,
std::shared_ptr<ExternCOprParam> param) {
float bias = 1.2, scale = 0;
auto graph_dump = create_graph_dump(bias, scale, 0.3, dtype);
check_dump_by_compute_with_param(
InputFile::make_mem_proxy(graph_dump.data(), graph_dump.size()), cn,
dtype, bias, param);
}
} // namespace } // namespace


TEST(TestExternCOpr, ExternCOprParam) {
//! same with check_dump_by_compute_with_param
constexpr int input_output_size = 23;
auto c_opr_param = std::make_shared<ExternCOprParam>();
MGBTensorLayout input_layput, output_layput;
ExternDeviceTensor input, output;
float* input_device_ptr = (float*)malloc(input_output_size * sizeof(float));
float* output_device_ptr =
(float*)malloc(input_output_size * sizeof(float));

auto reset = [&] {
memset(c_opr_param.get(), 0, sizeof(ExternCOprParam));
memset(&input_layput, 0, sizeof(MGBTensorLayout));
memset(&input, 0, sizeof(ExternDeviceTensor));
memset(&output_layput, 0, sizeof(MGBTensorLayout));
memset(&output, 0, sizeof(ExternDeviceTensor));
memset(input_device_ptr, 0, input_output_size * sizeof(float));
memset(output_device_ptr, 0, input_output_size * sizeof(float));

for (size_t i = 0; i < input_output_size; i++) {
input_device_ptr[i] = i;
}
};

auto run_test = [&] {
run_compute_test_with_param(CompNode::load("cpux"), MGB_DTYPE_FLOAT32,
c_opr_param);
};

auto init_param = [&] {
reset();
c_opr_param->nr_input = 1;
input_layput.shape = {1, {input_output_size}};
input.layout = input_layput;
input.device_ptr = input_device_ptr;
c_opr_param->input = &input;

c_opr_param->nr_output = 1;
output_layput.shape = {1, {input_output_size}};
output.layout = output_layput;
output.device_ptr = output_device_ptr;
c_opr_param->output = &output;
};

//! run with null param
reset();
run_test();

//! run with full param
init_param();
run_test();

//! run with a right index
init_param();
c_opr_param->extern_c_opr_dump_name = "bias_adder_dump:test";
run_test();

//! set a wrong index
init_param();
c_opr_param->extern_c_opr_dump_name = "bias_adder_dump";
ASSERT_THROW(run_test(), MegBrainError);

//! set a wrong index
init_param();
c_opr_param->extern_c_opr_dump_name = "sdfsdfs";
ASSERT_THROW(run_test(), MegBrainError);

//! set wrong input
init_param();
c_opr_param->input = nullptr;
ASSERT_THROW(run_test(), MegBrainError);

//! set wrong nr_input
init_param();
c_opr_param->nr_input = 3;
ASSERT_THROW(run_test(), MegBrainError);

//! set wrong input device_ptr
init_param();
c_opr_param->input->device_ptr = nullptr;
ASSERT_THROW(run_test(), MegBrainError);

//! set wrong input shape
init_param();
c_opr_param->input->layout.shape.shape[0] = input_output_size - 2;
ASSERT_THROW(run_test(), MegBrainError);

//! set wrong output
init_param();
c_opr_param->output = nullptr;
ASSERT_THROW(run_test(), MegBrainError);

//! set wrong nr_output
init_param();
c_opr_param->nr_output = 3;
ASSERT_THROW(run_test(), MegBrainError);

//! set wrong output device_ptr
init_param();
c_opr_param->output->device_ptr = nullptr;
ASSERT_THROW(run_test(), MegBrainError);

//! set wrong output shape
init_param();
c_opr_param->output->layout.shape.shape[0] = input_output_size - 2;
ASSERT_THROW(run_test(), MegBrainError);

//! set wrong dtype(test MGB_DTYPE_FLOAT32)
init_param();
c_opr_param->input[0].layout.dtype = MGB_DTYPE_INT32;
ASSERT_THROW(run_test(), MegBrainError);

//! test only device_id
reset();
c_opr_param->device_id = 8;
run_test();

//! free
free(input_device_ptr);
free(output_device_ptr);
}

TEST(TestExternCOpr, CPUCompute) { TEST(TestExternCOpr, CPUCompute) {
run_compute_test(CompNode::load("cpux"), MGB_DTYPE_FLOAT32); run_compute_test(CompNode::load("cpux"), MGB_DTYPE_FLOAT32);
} }
@@ -280,8 +490,9 @@ TEST(TestExternCOpr, Dedup) {
auto graph = ComputingGraph::make(); auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x); auto x = opr::Host2DeviceCopy::make(*graph, host_x);
auto make_opr = [x](float bias) { auto make_opr = [x](float bias) {
std::string name = "test";
return opr::ExternCOprRunner::make_from_desc( return opr::ExternCOprRunner::make_from_desc(
{x.node()}, MGBOprDescImpl<>::make(bias));
name, {x.node()}, MGBOprDescImpl<>::make(bias));
}; };
auto y0 = make_opr(0.5), y1 = make_opr(0.6), y2 = make_opr(0.5); auto y0 = make_opr(0.5), y1 = make_opr(0.6), y2 = make_opr(0.5);
ASSERT_EQ(y0, y2); ASSERT_EQ(y0, y2);


+ 7
- 0
src/serialization/test/extern_c_opr_v23.h View File

@@ -42,6 +42,10 @@ typedef struct MGBTensor {
void* data; //!< the tensor value, accessible by caller CPU thread void* data; //!< the tensor value, accessible by caller CPU thread
} MGBTensor; } MGBTensor;


typedef struct ExternCOprParam {
//! just for build
size_t _;
} ExternCOprParam;
/*! /*!
* \brief operator descriptor * \brief operator descriptor
* *
@@ -74,6 +78,9 @@ typedef struct MGBOprDesc {


//! custom user data to be associated with this descriptor //! custom user data to be associated with this descriptor
void* user_data; void* user_data;

//! dynamic extern c opr param
ExternCOprParam* dynamic_param;
} MGBOprDesc; } MGBOprDesc;


//! foreach member function of MGBOprDesc to help initialization //! foreach member function of MGBOprDesc to help initialization


Loading…
Cancel
Save