You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

task.proto 4.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. /* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved.
  2. *
  3. * This program is free software; you can redistribute it and/or modify
  4. * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License.
  5. *
  6. * This program is distributed in the hope that it will be useful,
  7. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. * Apache License for more details at
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. */
  12. syntax = "proto3";
  13. package domi;
  14. message ModelTaskDef {
  15. string version = 1;
  16. map<string, string> attr = 9; // Extended field
  17. repeated TaskDef task = 10;
  18. uint64 memory_size = 11;
  19. uint32 stream_num = 12;
  20. uint32 event_num = 13;
  21. uint64 weight_size = 14;
  22. repeated bytes op = 15; // input/output opdef in bytes
  23. uint64 base_addr = 16; // base addr
  24. uint64 weight_addr = 17; // weight addr
  25. uint32 batch_num = 18;
  26. }
  27. message TaskDef {
  28. uint32 id = 1;
  29. uint32 type = 2;
  30. uint32 stream_id = 10;
  31. uint32 event_id = 11;
  32. KernelDef kernel = 20;
  33. KernelExDef kernel_ex = 21;
  34. KernelHcclDef kernel_hccl = 25;
  35. EventExDef event_ex = 26;
  36. LogTimeStampDef log_timestamp = 28;
  37. uint32 label_id = 30;
  38. MemcpyAsyncDef memcpy_async = 31;
  39. StreamSwitchDef stream_switch = 32;
  40. StreamActiveDef stream_active = 33;
  41. bytes private_def = 34;
  42. uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future
  43. StreamSwitchNDef stream_switch_n = 36;
  44. LabelSetDef label_set = 37;
  45. LabelGotoExDef label_goto_ex = 38;
  46. LabelSwitchByIndexDef label_switch_by_index = 39;
  47. KernelDefWithHandle kernel_with_handle = 40;
  48. }
  49. message KernelDef {
  50. KernelContext context = 1;
  51. string stub_func = 10;
  52. uint32 block_dim = 11;
  53. uint32 args_size = 12;
  54. bytes args = 13;
  55. bytes sm_desc = 14;
  56. bytes flowtable = 15;
  57. string so_name = 16;
  58. string kernel_name = 17;
  59. bytes kernel_ext_info = 18;
  60. uint32 kernel_ext_info_size = 19;
  61. }
  62. message KernelDefWithHandle {
  63. KernelContext context = 1;
  64. uint64 handle = 10;
  65. string dev_func = 11;
  66. uint32 block_dim = 12;
  67. uint32 args_size = 13;
  68. bytes args = 14;
  69. bytes sm_desc = 15;
  70. string original_kernel_key = 16;
  71. string node_info = 17;
  72. }
  73. message KernelContext {
  74. uint32 kernel_type = 1;
  75. uint32 op_id = 2; // OP type in CCE
  76. uint32 kernel_func_id = 3;
  77. uint32 op_index = 4; // TE/Custom operator
  78. bool is_flowtable = 5; // Identify whether args is a flowtable structure
  79. bytes args_offset = 6; // args offset information
  80. uint32 args_count = 7; // args count
  81. repeated uint32 origin_op_index = 8;
  82. }
  83. message KernelExDef {
  84. uint32 flags = 1;
  85. uint32 op_index = 4;
  86. uint32 args_size = 12;
  87. bytes args = 13;
  88. bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput
  89. uint32 task_info_size = 15;
  90. bytes kernel_ext_info = 16;
  91. uint32 kernel_ext_info_size = 17;
  92. }
  93. message KernelHcclDef {
  94. uint32 op_index = 8;
  95. string hccl_type = 9;
  96. }
  97. message EventExDef {
  98. uint32 op_index = 1;
  99. uint32 event_type = 2;
  100. }
  101. message LogTimeStampDef {
  102. uint64 logid = 1;
  103. bool notify = 2;
  104. uint32 flat = 3;
  105. }
  106. message MemcpyAsyncDef {
  107. uint64 dst = 1;
  108. uint64 dst_max = 2;
  109. uint64 src = 3;
  110. uint64 count = 4;
  111. uint32 kind = 5;
  112. uint32 op_index = 6;
  113. }
  114. message StreamSwitchDef {
  115. uint32 op_index = 1;
  116. uint32 true_stream_id = 2;
  117. int64 value = 3;
  118. uint64 value_ptr = 4;
  119. uint32 data_type = 5;
  120. }
  121. message StreamActiveDef {
  122. uint32 op_index = 1;
  123. uint32 active_stream_id = 2;
  124. }
  125. message StreamSwitchNDef {
  126. uint32 op_index = 1;
  127. uint32 size = 2;
  128. repeated int64 target_value = 3;
  129. repeated uint32 true_stream_id = 4;
  130. uint32 element_size = 5;
  131. uint32 data_type = 6;
  132. }
  133. message LabelSetDef {
  134. uint32 op_index = 1;
  135. uint32 label_id = 2;
  136. uint32 model_id = 3;
  137. }
  138. message LabelGotoExDef {
  139. uint32 op_index = 1;
  140. uint32 label_id = 2;
  141. uint32 model_id = 3;
  142. }
  143. message LabelSwitchByIndexDef {
  144. uint32 op_index = 1;
  145. uint32 label_max = 2;
  146. }

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示