You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mace_loader.cpp 9.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. /**
  2. * \file sdk/c-opr-loaders/mace/mace_loader.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include <numeric>
  12. #include <iostream>
  13. #include "mace/public/mace.h"
  14. #include "extern_c_opr.h"
  15. #define ASSERT(x, msg) \
  16. do { \
  17. if (!(x)) { \
  18. printf("error at %s:%d %s\n", __FILE__, __LINE__, __FUNCTION__); \
  19. printf(msg); \
  20. __builtin_trap(); \
  21. } \
  22. } while (0)
  23. class MGBOprDescImpl {
  24. struct UserData {
  25. std::shared_ptr<mace::MaceEngine> engine;
  26. size_t nr_inputs, nr_outputs;
  27. std::vector<std::vector<int64_t>> output_shapes;
  28. std::vector<std::string> input_names, output_names;
  29. };
  30. static UserData* user_data(const MGBOprDesc* self) {
  31. return static_cast<UserData*>(self->user_data);
  32. }
  33. static void release(MGBOprDesc* self) {
  34. // free all data buffers
  35. delete user_data(self);
  36. delete self;
  37. }
  38. static size_t hash(const MGBOprDesc* self) {
  39. return reinterpret_cast<size_t>(self);
  40. }
  41. static int is_same(const MGBOprDesc* self, const MGBOprDesc* rhs) {
  42. return self == rhs;
  43. }
  44. static void infer_shape(const MGBOprDesc* self, const MGBTensorShape* input,
  45. MGBTensorShape* output) {
  46. auto ud = user_data(self);
  47. // infer output shape from user data
  48. for (size_t i = 0; i < ud->nr_outputs; i++) {
  49. output[i].ndim = ud->output_shapes[i].size();
  50. for (size_t j = 0; j < output[i].ndim; j++) {
  51. output[i].shape[j] = ud->output_shapes[i][j];
  52. }
  53. }
  54. }
  55. static void infer_dtype(const MGBOprDesc*, const MGBDType* input, MGBDType* output) {
  56. ASSERT(input[0] == MGB_DTYPE_FLOAT32, "Input dtype is not float32");
  57. output[0] = MGB_DTYPE_FLOAT32;
  58. }
  59. static void execute(const MGBOprDesc* self, const MGBTensor* input,
  60. const MGBTensor* output) {
  61. auto ud = user_data(self);
  62. // create input and output tensor buffers
  63. std::map<std::string, mace::MaceTensor> mace_inputs;
  64. std::map<std::string, mace::MaceTensor> mace_outputs;
  65. auto mace_data_format = mace::DataFormat::NCHW;
  66. char *data_format = getenv("DATAFORMAT");
  67. if (!strcmp(data_format, "NHWC")) {
  68. mace_data_format = mace::DataFormat::NHWC;
  69. }
  70. for (size_t i = 0; i < ud->nr_inputs; ++i) {
  71. // allocate input
  72. uint32_t ndim = input[i].layout.shape.ndim;
  73. auto input_shape = std::vector<int64_t>(input[i].layout.shape.shape,
  74. input[i].layout.shape.shape + ndim);
  75. int64_t input_size =
  76. std::accumulate(input_shape.begin(), input_shape.end(), 1,
  77. std::multiplies<uint64_t>());
  78. auto buffer_in = std::shared_ptr<float>(new float[input_size],
  79. std::default_delete<float[]>());
  80. memcpy(buffer_in.get(), input[i].data, input_size * sizeof(float));
  81. mace_inputs[ud->input_names[i]] =
  82. mace::MaceTensor(input_shape, buffer_in, mace_data_format);
  83. }
  84. for (size_t i = 0; i < ud->nr_outputs; ++i) {
  85. // allocate output
  86. uint32_t ndim = output[i].layout.shape.ndim;
  87. auto output_shape = std::vector<int64_t>(output[i].layout.shape.shape,
  88. output[i].layout.shape.shape + ndim);
  89. int64_t output_size =
  90. std::accumulate(output_shape.begin(), output_shape.end(), 1,
  91. std::multiplies<int64_t>());
  92. auto buffer_out = std::shared_ptr<float>(new float[output_size],
  93. std::default_delete<float[]>());
  94. mace_outputs[ud->output_names[i]] =
  95. mace::MaceTensor(output_shape, buffer_out, mace_data_format);
  96. }
  97. // run the model
  98. auto status = (ud->engine)->Run(mace_inputs, &mace_outputs);
  99. ASSERT(status == mace::MaceStatus::MACE_SUCCESS,
  100. "Error in running mace engine");
  101. // send computed output to MGB
  102. int idx = 0;
  103. for (auto it = mace_outputs.begin(); it != mace_outputs.end(); it++) {
  104. float* to = &((float *)output[idx++].data)[0];
  105. to = (it->second).data().get();
  106. }
  107. }
  108. public:
  109. static MGBOprDesc* make(size_t nr_input, const void *buf, size_t buf_len) {
  110. auto ud = std::make_unique<UserData>();
  111. std::shared_ptr<mace::MaceEngine> engine;
  112. mace::DeviceType device_type;
  113. char *runtime_mode = getenv("RUNTIME");
  114. if (!strcmp(runtime_mode, "GPU")) {
  115. device_type = mace::DeviceType::GPU;
  116. } else {
  117. device_type = mace::DeviceType::CPU;
  118. }
  119. mace::MaceEngineConfig config(device_type);
  120. // set gpu context, mainly opencl path
  121. if (device_type == mace::DeviceType::GPU) {
  122. std::shared_ptr<mace::GPUContext> gpu_context;
  123. char *opencl_path = getenv("OPENCLPATH");
  124. ASSERT(opencl_path, "Please set opencl library path");
  125. std::string storage_path(opencl_path);
  126. gpu_context = mace::GPUContextBuilder()
  127. .SetStoragePath(storage_path)
  128. .Finalize();
  129. config.SetGPUContext(gpu_context);
  130. config.SetGPUHints(
  131. static_cast<mace::GPUPerfHint>(mace::GPUPerfHint::PERF_HIGH),
  132. static_cast<mace::GPUPriorityHint>(mace::GPUPriorityHint::PRIORITY_HIGH));
  133. }
  134. std::vector<std::string> input_names, output_names;
  135. // extract all information from buf
  136. void *buffer = const_cast<void *>(buf);
  137. ud->nr_inputs = *reinterpret_cast<uint32_t*>(buffer);
  138. ud->nr_outputs = *(reinterpret_cast<uint32_t*>(buffer) + 1);
  139. // interpret input names
  140. char *name_buf = reinterpret_cast<char*>(buffer) + 8;
  141. for (size_t i = 0; i < ud->nr_inputs; i++) {
  142. size_t ilen = *reinterpret_cast<uint32_t*>(name_buf);
  143. input_names.push_back(std::string(name_buf + 4, ilen));
  144. name_buf += (ilen + 4);
  145. }
  146. // interpret output names
  147. buffer = name_buf;
  148. name_buf = reinterpret_cast<char*>(buffer);
  149. for (size_t i = 0; i < ud->nr_outputs; i++) {
  150. size_t olen = *reinterpret_cast<uint32_t*>(name_buf);
  151. output_names.push_back(std::string(name_buf + 4, olen));
  152. name_buf += (olen + 4);
  153. }
  154. ud->input_names = input_names;
  155. ud->output_names = output_names;
  156. // interpret output shapes
  157. buffer = name_buf;
  158. uint32_t *shape_buf = reinterpret_cast<uint32_t*>(buffer) + 1;
  159. for (size_t i = 0; i < ud->nr_outputs; i++) {
  160. size_t olen = *reinterpret_cast<int*>(shape_buf);
  161. ud->output_shapes.push_back(
  162. std::vector<int64_t>(shape_buf + 1, shape_buf + olen + 1)
  163. );
  164. shape_buf += (olen + 1);
  165. }
  166. buffer = shape_buf;
  167. const size_t model_buf_len = *reinterpret_cast<int*>(buffer);
  168. unsigned char *model_buf = reinterpret_cast<unsigned char*>(buffer) + 4;
  169. const size_t param_buf_len = *reinterpret_cast<int*>(model_buf + model_buf_len);
  170. unsigned char *param_buf = model_buf + model_buf_len + 4;
  171. // create mace engine
  172. auto create_engine_status = mace::CreateMaceEngineFromProto(
  173. model_buf,
  174. model_buf_len,
  175. param_buf,
  176. param_buf_len,
  177. input_names,
  178. output_names,
  179. config,
  180. &engine
  181. );
  182. ASSERT(create_engine_status == mace::MaceStatus::MACE_SUCCESS,
  183. "Error in creating mace engine");
  184. ud->engine = engine;
  185. auto ret = std::make_unique<MGBOprDesc>();
  186. mgb_init_opr_desc(ret.get(), ud->nr_outputs, "mace");
  187. #define a(n) ret->n = &n;
  188. MGB_OPR_DESC_FOREACH_MEM_FN(a);
  189. a(infer_dtype);
  190. #undef a
  191. ret->user_data = ud.release();
  192. return ret.release();
  193. }
  194. };
  195. class MGBOprLoaderImpl {
  196. static MGBOprDesc* create_desc(size_t nr_input, const void *buf,
  197. size_t buf_len)
  198. {
  199. return MGBOprDescImpl::make(nr_input, buf, buf_len);
  200. }
  201. public:
  202. static MGBOprLoader make() {
  203. return {"mace", create_desc};
  204. }
  205. };
  206. extern "C" {
  207. // public interface
  208. __attribute__((visibility("default")))
  209. void MGB_C_OPR_INIT_FUNC(const MGBExternCOprApi* (*get_api)(int))
  210. {
  211. const MGBExternCOprApi* api = get_api(MGB_EXTERN_C_OPR_VERSION);
  212. ASSERT(api, "Create api failed");
  213. MGBOprLoader loader = MGBOprLoaderImpl::make();
  214. api->register_loader(&loader);
  215. }
  216. } // extern "C"

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台