/** * \file sdk/c-opr-loaders/mace/mace_loader.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include #include #include "mace/public/mace.h" #include "extern_c_opr.h" #define ASSERT(x, msg) \ do { \ if (!(x)) { \ printf("error at %s:%d %s\n", __FILE__, __LINE__, __FUNCTION__); \ printf(msg); \ __builtin_trap(); \ } \ } while (0) class MGBOprDescImpl { struct UserData { std::shared_ptr engine; size_t nr_inputs, nr_outputs; std::vector> output_shapes; std::vector input_names, output_names; }; static UserData* user_data(const MGBOprDesc* self) { return static_cast(self->user_data); } static void release(MGBOprDesc* self) { // free all data buffers delete user_data(self); delete self; } static size_t hash(const MGBOprDesc* self) { return reinterpret_cast(self); } static int is_same(const MGBOprDesc* self, const MGBOprDesc* rhs) { return self == rhs; } static void infer_shape(const MGBOprDesc* self, const MGBTensorShape* input, MGBTensorShape* output) { auto ud = user_data(self); // infer output shape from user data for (size_t i = 0; i < ud->nr_outputs; i++) { output[i].ndim = ud->output_shapes[i].size(); for (size_t j = 0; j < output[i].ndim; j++) { output[i].shape[j] = ud->output_shapes[i][j]; } } } static void infer_dtype(const MGBOprDesc*, const MGBDType* input, MGBDType* output) { ASSERT(input[0] == MGB_DTYPE_FLOAT32, "Input dtype is not float32"); output[0] = MGB_DTYPE_FLOAT32; } static void execute(const MGBOprDesc* self, const MGBTensor* input, const MGBTensor* output) { auto ud = user_data(self); // create input and output tensor buffers std::map mace_inputs; std::map mace_outputs; auto mace_data_format = mace::DataFormat::NCHW; char *data_format = getenv("DATAFORMAT"); if (!strcmp(data_format, "NHWC")) { mace_data_format = mace::DataFormat::NHWC; } for (size_t i = 0; i < ud->nr_inputs; ++i) { // allocate input uint32_t ndim = input[i].layout.shape.ndim; auto input_shape = std::vector(input[i].layout.shape.shape, input[i].layout.shape.shape + ndim); int64_t input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); auto buffer_in = std::shared_ptr(new float[input_size], std::default_delete()); memcpy(buffer_in.get(), input[i].data, input_size * sizeof(float)); mace_inputs[ud->input_names[i]] = mace::MaceTensor(input_shape, buffer_in, mace_data_format); } for (size_t i = 0; i < ud->nr_outputs; ++i) { // allocate output uint32_t ndim = output[i].layout.shape.ndim; auto output_shape = std::vector(output[i].layout.shape.shape, output[i].layout.shape.shape + ndim); int64_t output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); auto buffer_out = std::shared_ptr(new float[output_size], std::default_delete()); mace_outputs[ud->output_names[i]] = mace::MaceTensor(output_shape, buffer_out, mace_data_format); } // run the model auto status = (ud->engine)->Run(mace_inputs, &mace_outputs); ASSERT(status == mace::MaceStatus::MACE_SUCCESS, "Error in running mace engine"); // send computed output to MGB int idx = 0; for (auto it = mace_outputs.begin(); it != mace_outputs.end(); it++) { float* to = &((float *)output[idx++].data)[0]; to = (it->second).data().get(); } } public: static MGBOprDesc* make(size_t nr_input, const void *buf, size_t buf_len) { auto ud = std::make_unique(); std::shared_ptr engine; mace::DeviceType device_type; char *runtime_mode = getenv("RUNTIME"); if (!strcmp(runtime_mode, "GPU")) { device_type = mace::DeviceType::GPU; } else { device_type = mace::DeviceType::CPU; } mace::MaceEngineConfig config(device_type); // set gpu context, mainly opencl path if (device_type == mace::DeviceType::GPU) { std::shared_ptr gpu_context; char *opencl_path = getenv("OPENCLPATH"); ASSERT(opencl_path, "Please set opencl library path"); std::string storage_path(opencl_path); gpu_context = mace::GPUContextBuilder() .SetStoragePath(storage_path) .Finalize(); config.SetGPUContext(gpu_context); config.SetGPUHints( static_cast(mace::GPUPerfHint::PERF_HIGH), static_cast(mace::GPUPriorityHint::PRIORITY_HIGH)); } std::vector input_names, output_names; // extract all information from buf void *buffer = const_cast(buf); ud->nr_inputs = *reinterpret_cast(buffer); ud->nr_outputs = *(reinterpret_cast(buffer) + 1); // interpret input names char *name_buf = reinterpret_cast(buffer) + 8; for (size_t i = 0; i < ud->nr_inputs; i++) { size_t ilen = *reinterpret_cast(name_buf); input_names.push_back(std::string(name_buf + 4, ilen)); name_buf += (ilen + 4); } // interpret output names buffer = name_buf; name_buf = reinterpret_cast(buffer); for (size_t i = 0; i < ud->nr_outputs; i++) { size_t olen = *reinterpret_cast(name_buf); output_names.push_back(std::string(name_buf + 4, olen)); name_buf += (olen + 4); } ud->input_names = input_names; ud->output_names = output_names; // interpret output shapes buffer = name_buf; uint32_t *shape_buf = reinterpret_cast(buffer) + 1; for (size_t i = 0; i < ud->nr_outputs; i++) { size_t olen = *reinterpret_cast(shape_buf); ud->output_shapes.push_back( std::vector(shape_buf + 1, shape_buf + olen + 1) ); shape_buf += (olen + 1); } buffer = shape_buf; const size_t model_buf_len = *reinterpret_cast(buffer); unsigned char *model_buf = reinterpret_cast(buffer) + 4; const size_t param_buf_len = *reinterpret_cast(model_buf + model_buf_len); unsigned char *param_buf = model_buf + model_buf_len + 4; // create mace engine auto create_engine_status = mace::CreateMaceEngineFromProto( model_buf, model_buf_len, param_buf, param_buf_len, input_names, output_names, config, &engine ); ASSERT(create_engine_status == mace::MaceStatus::MACE_SUCCESS, "Error in creating mace engine"); ud->engine = engine; auto ret = std::make_unique(); mgb_init_opr_desc(ret.get(), ud->nr_outputs, "mace"); #define a(n) ret->n = &n; MGB_OPR_DESC_FOREACH_MEM_FN(a); a(infer_dtype); #undef a ret->user_data = ud.release(); return ret.release(); } }; class MGBOprLoaderImpl { static MGBOprDesc* create_desc(size_t nr_input, const void *buf, size_t buf_len) { return MGBOprDescImpl::make(nr_input, buf, buf_len); } public: static MGBOprLoader make() { return {"mace", create_desc}; } }; extern "C" { // public interface __attribute__((visibility("default"))) void MGB_C_OPR_INIT_FUNC(const MGBExternCOprApi* (*get_api)(int)) { const MGBExternCOprApi* api = get_api(MGB_EXTERN_C_OPR_VERSION); ASSERT(api, "Create api failed"); MGBOprLoader loader = MGBOprLoaderImpl::make(); api->register_loader(&loader); } } // extern "C"