You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

module.mk 13 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. LOCAL_PATH := $(call my-dir)
  2. local_ge_executor_src_files := \
  3. ge_executor.cc \
  4. ../common/profiling/profiling_manager.cc \
  5. ../common/dump/dump_properties.cc \
  6. ../common/dump/dump_manager.cc \
  7. ../common/dump/dump_op.cc \
  8. ../common/ge/plugin_manager.cc \
  9. ../common/ge/op_tiling_manager.cc \
  10. ../common/profiling/ge_profiling.cc \
  11. ../graph/load/graph_loader.cc \
  12. ../graph/execute/graph_execute.cc \
  13. ../omm/csa_interact.cc \
  14. ../graph/manager/graph_manager_utils.cc \
  15. ../graph/manager/graph_var_manager.cc \
  16. ../graph/manager/rdma_pool_allocator.cc \
  17. ../graph/manager/graph_mem_allocator.cc \
  18. ../graph/manager/graph_caching_allocator.cc \
  19. ../graph/manager/trans_var_data_utils.cc \
  20. ../graph/manager/util/debug.cc \
  21. ../model/ge_model.cc \
  22. ../model/ge_root_model.cc \
  23. ../graph/load/new_model_manager/davinci_model.cc \
  24. ../graph/load/new_model_manager/davinci_model_parser.cc \
  25. ../graph/load/new_model_manager/model_manager.cc \
  26. ../graph/load/new_model_manager/tbe_handle_store.cc \
  27. ../graph/load/new_model_manager/cpu_queue_schedule.cc \
  28. ../graph/load/new_model_manager/model_utils.cc \
  29. ../graph/load/new_model_manager/aipp_utils.cc \
  30. ../graph/load/new_model_manager/data_inputer.cc \
  31. ../graph/load/new_model_manager/data_dumper.cc \
  32. ../graph/load/new_model_manager/zero_copy_task.cc \
  33. ../graph/load/new_model_manager/zero_copy_offset.cc \
  34. ../graph/load/new_model_manager/task_info/task_info.cc \
  35. ../graph/load/new_model_manager/task_info/event_record_task_info.cc \
  36. ../graph/load/new_model_manager/task_info/event_wait_task_info.cc \
  37. ../graph/load/new_model_manager/task_info/fusion_start_task_info.cc \
  38. ../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \
  39. ../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \
  40. ../graph/load/new_model_manager/task_info/kernel_task_info.cc \
  41. ../graph/load/new_model_manager/task_info/label_set_task_info.cc \
  42. ../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
  43. ../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \
  44. ../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \
  45. ../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
  46. ../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \
  47. ../graph/load/new_model_manager/task_info/stream_active_task_info.cc \
  48. ../graph/load/new_model_manager/task_info/stream_switch_task_info.cc \
  49. ../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \
  50. ../graph/load/new_model_manager/task_info/end_graph_task_info.cc \
  51. ../graph/load/new_model_manager/task_info/model_exit_task_info.cc \
  52. ../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \
  53. ../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \
  54. ../opskernel_manager/ops_kernel_builder_manager.cc \
  55. ../single_op/single_op_manager.cc \
  56. ../single_op/single_op_model.cc \
  57. ../single_op/single_op.cc \
  58. ../single_op/stream_resource.cc \
  59. ../single_op/task/op_task.cc \
  60. ../single_op/task/build_task_utils.cc \
  61. ../single_op/task/tbe_task_builder.cc \
  62. ../single_op/task/aicpu_task_builder.cc \
  63. ../single_op/task/aicpu_kernel_task_builder.cc \
  64. ../hybrid/node_executor/aicpu/aicpu_ext_info.cc \
  65. ../graph/common/local_context.cc \
  66. ../hybrid/common/tensor_value.cc \
  67. ../hybrid/common/npu_memory_allocator.cc \
  68. ../hybrid/executor/rt_callback_manager.cc \
  69. ../hybrid/executor/node_state.cc \
  70. ../hybrid/executor/node_done_manager.cc \
  71. ../hybrid/executor/hybrid_profiler.cc \
  72. ../hybrid/executor/hybrid_model_executor.cc \
  73. ../hybrid/executor/hybrid_model_async_executor.cc \
  74. ../hybrid/executor/hybrid_execution_context.cc \
  75. ../hybrid/executor/subgraph_context.cc \
  76. ../hybrid/executor/subgraph_executor.cc \
  77. ../hybrid/executor/worker/task_compile_engine.cc \
  78. ../hybrid/executor/worker/shape_inference_engine.cc \
  79. ../hybrid/executor/worker/execution_engine.cc \
  80. ../hybrid/model/hybrid_model.cc \
  81. ../hybrid/model/hybrid_model_builder.cc \
  82. ../hybrid/model/node_item.cc \
  83. ../hybrid/model/graph_item.cc \
  84. ../hybrid/node_executor/aicore/aicore_node_executor.cc \
  85. ../hybrid/node_executor/aicore/aicore_op_task.cc \
  86. ../hybrid/node_executor/aicore/aicore_task_builder.cc \
  87. ../hybrid/node_executor/aicpu/aicpu_node_executor.cc \
  88. ../hybrid/node_executor/compiledsubgraph/known_node_executor.cc \
  89. ../hybrid/node_executor/ge_local/ge_local_node_executor.cc \
  90. ../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc \
  91. ../hybrid/node_executor/host_cpu/kernel_factory.cc \
  92. ../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc \
  93. ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \
  94. ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \
  95. ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \
  96. ../hybrid/node_executor/controlop/control_op_executor.cc \
  97. ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \
  98. ../hybrid/node_executor/rts/rts_node_executor.cc \
  99. ../hybrid/node_executor/node_executor.cc \
  100. ../hybrid/node_executor/task_context.cc \
  101. ../hybrid/hybrid_davinci_model.cc \
  102. ../ge_local_engine/engine/host_cpu_engine.cc \
  103. ../graph/common/omg_util.cc \
  104. ../graph/manager/host_mem_manager.cc \
  105. ../graph/build/memory/var_mem_assign_util.cc \
  106. ../host_kernels/transpose_kernel.cc \
  107. ../host_kernels/add_kernel.cc \
  108. ../host_kernels/broadcast_args_kernel.cc \
  109. ../host_kernels/broadcast_gradient_args_kernel.cc \
  110. ../host_kernels/cast_kernel.cc \
  111. ../host_kernels/concat_offset_kernel.cc \
  112. ../host_kernels/concat_v2_kernel.cc \
  113. ../host_kernels/dynamic_stitch_kernel.cc \
  114. ../host_kernels/identity_kernel.cc \
  115. ../host_kernels/empty_kernel.cc \
  116. ../host_kernels/expanddims_kernel.cc \
  117. ../host_kernels/fill_kernel.cc \
  118. ../host_kernels/floordiv_kernel.cc \
  119. ../host_kernels/floormod_kernel.cc \
  120. ../host_kernels/gather_v2_kernel.cc \
  121. ../host_kernels/greater_kernel.cc \
  122. ../host_kernels/kernel_utils.cc \
  123. ../host_kernels/maximum_kernel.cc \
  124. ../host_kernels/mul_kernel.cc \
  125. ../host_kernels/pack_kernel.cc \
  126. ../host_kernels/permute_kernel.cc \
  127. ../host_kernels/range_kernel.cc \
  128. ../host_kernels/rank_kernel.cc \
  129. ../host_kernels/reduce_prod_kernel.cc \
  130. ../host_kernels/reshape_kernel.cc \
  131. ../host_kernels/rsqrt_kernel.cc \
  132. ../host_kernels/shape_kernel.cc \
  133. ../host_kernels/shape_n_kernel.cc \
  134. ../host_kernels/size_kernel.cc \
  135. ../host_kernels/slice_d_kernel.cc \
  136. ../host_kernels/slice_kernel.cc \
  137. ../host_kernels/squeeze_kernel.cc \
  138. ../host_kernels/unsqueeze_kernel.cc \
  139. ../host_kernels/ssd_prior_box_kernel.cc \
  140. ../host_kernels/strided_slice_kernel.cc \
  141. ../host_kernels/sub_kernel.cc \
  142. ../host_kernels/transdata_kernel.cc \
  143. ../host_kernels/unpack_kernel.cc \
  144. ../graph/passes/pass_utils.cc \
  145. ../graph/common/bcast.cc \
  146. ../common/fp16_t.cc \
  147. ../common/formats/format_transfers/format_transfer_transpose.cc \
  148. ../common/formats/utils/formats_trans_utils.cc \
  149. local_ge_executor_c_include := \
  150. proto/insert_op.proto \
  151. proto/op_mapping_info.proto \
  152. proto/dump_task.proto \
  153. proto/ge_ir.proto \
  154. proto/task.proto \
  155. proto/om.proto \
  156. $(TOPDIR)inc/external \
  157. $(TOPDIR)metadef/inc/external \
  158. $(TOPDIR)graphengine/inc/external \
  159. $(TOPDIR)metadef/inc/external/graph \
  160. $(TOPDIR)graphengine/inc/framework \
  161. $(TOPDIR)inc \
  162. $(TOPDIR)metadef/inc \
  163. $(TOPDIR)graphengine/inc \
  164. $(LOCAL_PATH)/../ \
  165. $(TOPDIR)graphengine/ge \
  166. $(TOPDIR)libc_sec/include \
  167. third_party/protobuf/include \
  168. third_party/json/include \
  169. local_ge_executor_shared_library := \
  170. libascend_protobuf \
  171. libc_sec \
  172. libge_common \
  173. libruntime \
  174. libslog \
  175. libmmpa \
  176. libgraph \
  177. libregister \
  178. liberror_manager \
  179. local_ge_executor_ldflags := -lrt -ldl \
  180. #compile arm device dynamic lib
  181. include $(CLEAR_VARS)
  182. LOCAL_MODULE := libge_executor
  183. LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations
  184. LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -DDAVINCI_SUPPORT_PROFILING -Dgoogle=ascend_private
  185. LOCAL_SRC_FILES := $(local_ge_executor_src_files)
  186. LOCAL_C_INCLUDES := $(local_ge_executor_c_include)
  187. LOCAL_SHARED_LIBRARIES := $(local_ge_executor_shared_library)
  188. LOCAL_SHARED_LIBRARIES += libascend_hal
  189. LOCAL_STATIC_LIBRARIES := \
  190. libmsprofiler \
  191. ifeq ($(device_os),android)
  192. LOCAL_LDFLAGS += -ldl
  193. LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog
  194. else
  195. LOCAL_LDFLAGS += $(local_ge_executor_ldflags)
  196. endif
  197. include $(BUILD_SHARED_LIBRARY)
  198. #compile x86 host dynamic lib
  199. include $(CLEAR_VARS)
  200. LOCAL_MODULE := libge_executor
  201. LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations
  202. LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING -Dgoogle=ascend_private
  203. ifeq ($(DEBUG), 1)
  204. LOCAL_CFLAGS += -g -O0
  205. else
  206. LOCAL_CFLAGS += -O2
  207. endif
  208. LOCAL_SRC_FILES := $(local_ge_executor_src_files)
  209. LOCAL_C_INCLUDES := $(local_ge_executor_c_include)
  210. LOCAL_SHARED_LIBRARIES := \
  211. libascend_protobuf \
  212. libc_sec \
  213. libge_common \
  214. libruntime \
  215. libslog \
  216. libmmpa \
  217. libgraph \
  218. libregister \
  219. liberror_manager \
  220. stub/libascend_hal \
  221. LOCAL_STATIC_LIBRARIES := \
  222. libmsprofiler \
  223. LOCAL_LDFLAGS += $(local_ge_executor_ldflags)
  224. include $(BUILD_HOST_SHARED_LIBRARY)
  225. #compile for host static lib
  226. include $(CLEAR_VARS)
  227. LOCAL_MODULE := libge_executor
  228. LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations
  229. LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING -Dgoogle=ascend_private
  230. ifeq ($(DEBUG), 1)
  231. LOCAL_CFLAGS += -g -O0
  232. else
  233. LOCAL_CFLAGS += -O2
  234. endif
  235. LOCAL_SRC_FILES := $(local_ge_executor_src_files)
  236. LOCAL_C_INCLUDES := $(local_ge_executor_c_include)
  237. LOCAL_STATIC_LIBRARIES := \
  238. libge_common \
  239. libgraph \
  240. libregister \
  241. libascend_protobuf \
  242. LOCAL_SHARED_LIBRARIES := \
  243. libc_sec \
  244. libruntime \
  245. libslog \
  246. libmmpa \
  247. LOCAL_LDFLAGS += $(local_ge_executor_ldflags)
  248. include $(BUILD_HOST_STATIC_LIBRARY)
  249. #compile for device static lib
  250. include $(CLEAR_VARS)
  251. LOCAL_MODULE := libge_executor
  252. LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations
  253. LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING -Dgoogle=ascend_private
  254. ifeq ($(DEBUG), 1)
  255. LOCAL_CFLAGS += -g -O0
  256. else
  257. LOCAL_CFLAGS += -O2
  258. endif
  259. LOCAL_SRC_FILES := $(local_ge_executor_src_files)
  260. LOCAL_C_INCLUDES := $(local_ge_executor_c_include)
  261. LOCAL_STATIC_LIBRARIES := \
  262. libge_common \
  263. libgraph \
  264. libregister \
  265. libascend_protobuf \
  266. LOCAL_SHARED_LIBRARIES := \
  267. libc_sec \
  268. libruntime \
  269. libslog \
  270. libmmpa \
  271. ifeq ($(device_os),android)
  272. LOCAL_LDFLAGS += -ldl
  273. LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog
  274. else
  275. LOCAL_LDFLAGS += $(local_ge_executor_ldflags)
  276. endif
  277. include $(BUILD_STATIC_LIBRARY)

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示