You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mace_loader.cpp 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. #include <numeric>
  2. #include <iostream>
  3. #include <sys/stat.h>
  4. #include "mace/public/mace.h"
  5. #include "extern_c_opr.h"
  6. #if defined(__APPLE__) || defined(__MACOSX)
  7. static const char* default_so_paths[] = {
  8. "/System/Library/Frameworks/OpenCL.framework/OpenCL", "libOpenCL.so"};
  9. #elif defined(__ANDROID__)
  10. static const char* default_so_paths[] = {
  11. #if defined(__aarch64__)
  12. "/system/lib64/libOpenCL.so",
  13. "/system/lib64/libOpenCL_system.so",
  14. "/system/lib64/egl/libGLES_mali.so",
  15. "/system/vendor/lib64/libOpenCL.so",
  16. "/system/vendor/lib64/egl/libGLES_mali.so",
  17. "/system/vendor/lib64/libPVROCL.so",
  18. "/vendor/lib64/libOpenCL.so",
  19. "/data/data/org.pocl.libs/files/lib64/libpocl.so",
  20. #else
  21. "/system/lib/libOpenCL.so",
  22. "/system/lib/libOpenCL_system.so",
  23. "/system/lib/egl/libGLES_mali.so",
  24. "/system/vendor/lib/libOpenCL.so",
  25. "/system/vendor/lib/egl/libGLES_mali.so",
  26. "/system/vendor/lib/libPVROCL.so",
  27. "/vendor/lib/libOpenCL.so",
  28. "/data/data/org.pocl.libs/files/lib/libpocl.so",
  29. #endif
  30. "libOpenCL.so"};
  31. #elif defined(_WIN32)
  32. static const char* default_so_paths[] = {"OpenCL.dll"};
  33. #elif defined(__linux__)
  34. static const char* default_so_paths[] = {
  35. #if defined(__x86_64__) || defined(__amd64__)
  36. "/usr/lib64/libOpenCL.so", "/usr/local/lib64/libOpenCL.so",
  37. "/usr/local/cuda/lib64/libOpenCL.so",
  38. "/opt/intel/opencl/libOpenCL.so",
  39. //! As in some system like apex, the driver exists here
  40. "/usr/lib/libOpenCL.so",
  41. #else
  42. "/usr/lib/libOpenCL.so",
  43. "/usr/lib32/libOpenCL.so",
  44. "/usr/local/lib/libOpenCL.so",
  45. "/usr/local/lib/libpocl.so",
  46. "/usr/local/cuda/lib/libOpenCL.so",
  47. #endif
  48. "libOpenCL.so"};
  49. #endif
  50. #define ASSERT(x, msg) \
  51. do { \
  52. if (!(x)) { \
  53. printf("error at %s:%d %s\n", __FILE__, __LINE__, __FUNCTION__); \
  54. printf(msg); \
  55. __builtin_trap(); \
  56. } \
  57. } while (0)
  58. inline bool file_exists (const char* name) {
  59. struct stat buffer;
  60. return (stat (name, &buffer) == 0);
  61. }
  62. class MGBOprDescImpl {
  63. struct UserData {
  64. std::shared_ptr<mace::MaceEngine> engine;
  65. size_t nr_inputs, nr_outputs;
  66. std::vector<std::vector<int64_t>> output_shapes;
  67. std::vector<std::string> input_names, output_names;
  68. };
  69. static UserData* user_data(const MGBOprDesc* self) {
  70. return static_cast<UserData*>(self->user_data);
  71. }
  72. static void release(MGBOprDesc* self) {
  73. // free all data buffers
  74. delete user_data(self);
  75. delete self;
  76. }
  77. static size_t hash(const MGBOprDesc* self) {
  78. return reinterpret_cast<size_t>(self);
  79. }
  80. static int is_same(const MGBOprDesc* self, const MGBOprDesc* rhs) {
  81. return self == rhs;
  82. }
  83. static void infer_shape(const MGBOprDesc* self, const MGBTensorShape* input,
  84. MGBTensorShape* output) {
  85. auto ud = user_data(self);
  86. // infer output shape from user data
  87. for (size_t i = 0; i < ud->nr_outputs; i++) {
  88. output[i].ndim = ud->output_shapes[i].size();
  89. for (size_t j = 0; j < output[i].ndim; j++) {
  90. output[i].shape[j] = ud->output_shapes[i][j];
  91. }
  92. }
  93. }
  94. static void infer_dtype(const MGBOprDesc*, const MGBDType* input, MGBDType* output) {
  95. ASSERT(input[0] == MGB_DTYPE_FLOAT32, "Input dtype is not float32");
  96. output[0] = MGB_DTYPE_FLOAT32;
  97. }
  98. static void execute(const MGBOprDesc* self, const MGBTensor* input,
  99. const MGBTensor* output) {
  100. auto ud = user_data(self);
  101. // create input and output tensor buffers
  102. std::map<std::string, mace::MaceTensor> mace_inputs;
  103. std::map<std::string, mace::MaceTensor> mace_outputs;
  104. auto mace_data_format = mace::DataFormat::NCHW;
  105. char *data_format = getenv("MGB_MACE_LOADER_FORMAT");
  106. if (data_format != nullptr && !strcmp(data_format, "NHWC")) {
  107. mace_data_format = mace::DataFormat::NHWC;
  108. }
  109. for (size_t i = 0; i < ud->nr_inputs; ++i) {
  110. // allocate input
  111. uint32_t ndim = input[i].layout.shape.ndim;
  112. auto input_shape = std::vector<int64_t>(input[i].layout.shape.shape,
  113. input[i].layout.shape.shape + ndim);
  114. int64_t input_size =
  115. std::accumulate(input_shape.begin(), input_shape.end(), 1,
  116. std::multiplies<uint64_t>());
  117. auto buffer_in = std::shared_ptr<float>(new float[input_size],
  118. std::default_delete<float[]>());
  119. memcpy(buffer_in.get(), input[i].data, input_size * sizeof(float));
  120. mace_inputs[ud->input_names[i]] =
  121. mace::MaceTensor(input_shape, buffer_in, mace_data_format);
  122. }
  123. for (size_t i = 0; i < ud->nr_outputs; ++i) {
  124. // allocate output
  125. uint32_t ndim = output[i].layout.shape.ndim;
  126. auto output_shape = std::vector<int64_t>(output[i].layout.shape.shape,
  127. output[i].layout.shape.shape + ndim);
  128. int64_t output_size =
  129. std::accumulate(output_shape.begin(), output_shape.end(), 1,
  130. std::multiplies<int64_t>());
  131. auto buffer_out = std::shared_ptr<float>(new float[output_size],
  132. std::default_delete<float[]>());
  133. mace_outputs[ud->output_names[i]] =
  134. mace::MaceTensor(output_shape, buffer_out, mace_data_format);
  135. }
  136. // run the model
  137. auto status = (ud->engine)->Run(mace_inputs, &mace_outputs);
  138. ASSERT(status == mace::MaceStatus::MACE_SUCCESS,
  139. "Error in running mace engine");
  140. // send computed output to MGB
  141. int idx = 0;
  142. for (auto it = mace_outputs.begin(); it != mace_outputs.end(); it++) {
  143. float* to = &((float *)output[idx++].data)[0];
  144. to = (it->second).data().get();
  145. }
  146. }
  147. public:
  148. static MGBOprDesc* make(size_t nr_input, const void *buf, size_t buf_len) {
  149. auto ud = std::make_unique<UserData>();
  150. std::shared_ptr<mace::MaceEngine> engine;
  151. mace::DeviceType device_type = mace::DeviceType::CPU;
  152. char *runtime_mode = getenv("MGB_MACE_RUNTIME");
  153. if (runtime_mode != nullptr && !strcmp(runtime_mode, "GPU")) {
  154. device_type = mace::DeviceType::GPU;
  155. }
  156. mace::MaceEngineConfig config(device_type);
  157. // set number of threads for cpu, default 1
  158. if (device_type == mace::DeviceType::CPU) {
  159. int nthread = 1;
  160. char *str_nthread = getenv("MGB_MACE_NR_THREADS");
  161. if (str_nthread != nullptr) {
  162. nthread = atoi(str_nthread);
  163. }
  164. config.SetCPUThreadPolicy(nthread, mace::CPUAffinityPolicy::AFFINITY_NONE);
  165. }
  166. // set gpu context, mainly opencl path
  167. if (device_type == mace::DeviceType::GPU) {
  168. std::shared_ptr<mace::GPUContext> gpu_context;
  169. char *cache_path = getenv("MGB_MACE_OPENCL_CACHE_PATH");
  170. ASSERT(cache_path, "there must be an opencl cache file path");
  171. char *param_path = getenv("MGB_MACE_TUNING_PARAM_PATH");
  172. std::string opencl_param_path("");
  173. if (param_path != nullptr) {
  174. opencl_param_path = std::string(param_path);
  175. }
  176. std::string storage_path(cache_path);
  177. gpu_context = mace::GPUContextBuilder()
  178. .SetStoragePath(storage_path)
  179. .SetOpenCLParameterPath(opencl_param_path)
  180. .Finalize();
  181. config.SetGPUContext(gpu_context);
  182. config.SetGPUHints(
  183. static_cast<mace::GPUPerfHint>(mace::GPUPerfHint::PERF_HIGH),
  184. static_cast<mace::GPUPriorityHint>(mace::GPUPriorityHint::PRIORITY_HIGH));
  185. }
  186. std::vector<std::string> input_names, output_names;
  187. // extract all information from buf
  188. void *buffer = const_cast<void *>(buf);
  189. ud->nr_inputs = *reinterpret_cast<uint32_t*>(buffer);
  190. ud->nr_outputs = *(reinterpret_cast<uint32_t*>(buffer) + 1);
  191. // interpret input names
  192. char *name_buf = reinterpret_cast<char*>(buffer) + 8;
  193. for (size_t i = 0; i < ud->nr_inputs; i++) {
  194. size_t ilen = *reinterpret_cast<uint32_t*>(name_buf);
  195. input_names.push_back(std::string(name_buf + 4, ilen));
  196. name_buf += (ilen + 4);
  197. }
  198. // interpret output names
  199. buffer = name_buf;
  200. name_buf = reinterpret_cast<char*>(buffer);
  201. for (size_t i = 0; i < ud->nr_outputs; i++) {
  202. size_t olen = *reinterpret_cast<uint32_t*>(name_buf);
  203. output_names.push_back(std::string(name_buf + 4, olen));
  204. name_buf += (olen + 4);
  205. }
  206. ud->input_names = input_names;
  207. ud->output_names = output_names;
  208. // interpret output shapes
  209. buffer = name_buf;
  210. uint32_t *shape_buf = reinterpret_cast<uint32_t*>(buffer) + 1;
  211. for (size_t i = 0; i < ud->nr_outputs; i++) {
  212. size_t olen = *reinterpret_cast<int*>(shape_buf);
  213. ud->output_shapes.push_back(
  214. std::vector<int64_t>(shape_buf + 1, shape_buf + olen + 1)
  215. );
  216. shape_buf += (olen + 1);
  217. }
  218. buffer = shape_buf;
  219. const size_t model_buf_len = *reinterpret_cast<int*>(buffer);
  220. unsigned char *model_buf = reinterpret_cast<unsigned char*>(buffer) + 4;
  221. const size_t param_buf_len = *reinterpret_cast<int*>(model_buf + model_buf_len);
  222. unsigned char *param_buf = model_buf + model_buf_len + 4;
  223. // create mace engine
  224. auto create_engine_status = mace::CreateMaceEngineFromProto(
  225. model_buf,
  226. model_buf_len,
  227. param_buf,
  228. param_buf_len,
  229. input_names,
  230. output_names,
  231. config,
  232. &engine
  233. );
  234. ASSERT(create_engine_status == mace::MaceStatus::MACE_SUCCESS,
  235. "Error in creating mace engine");
  236. ud->engine = engine;
  237. auto ret = std::make_unique<MGBOprDesc>();
  238. mgb_init_opr_desc(ret.get(), ud->nr_outputs, "mace");
  239. #define a(n) ret->n = &n;
  240. MGB_OPR_DESC_FOREACH_MEM_FN(a);
  241. a(infer_dtype);
  242. #undef a
  243. ret->user_data = ud.release();
  244. return ret.release();
  245. }
  246. };
  247. class MGBOprLoaderImpl {
  248. static MGBOprDesc* create_desc(size_t nr_input, const void *buf,
  249. size_t buf_len)
  250. {
  251. return MGBOprDescImpl::make(nr_input, buf, buf_len);
  252. }
  253. public:
  254. static MGBOprLoader make() {
  255. return {"mace", create_desc};
  256. }
  257. };
  258. extern "C" {
  259. // public interface
  260. __attribute__((visibility("default")))
  261. void MGB_C_OPR_INIT_FUNC(const MGBExternCOprApi* (*get_api)(int))
  262. {
  263. const MGBExternCOprApi* api = get_api(MGB_EXTERN_C_OPR_VERSION);
  264. ASSERT(api, "Create api failed");
  265. MGBOprLoader loader = MGBOprLoaderImpl::make();
  266. api->register_loader(&loader);
  267. }
  268. } // extern "C"