You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

resnet50_train.cc 63 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <assert.h>
  17. #include <sys/stat.h>
  18. #include <sys/types.h>
  19. #include <algorithm>
  20. #include <chrono>
  21. #include <ctime>
  22. #include <sstream>
  23. #include "common.h"
  24. #include "ge_api.h"
  25. #include "graph.h"
  26. #include "ops/all_ops.h"
  27. #include "types.h"
  28. #include "utils/tensor_utils.h"
  29. using namespace std;
  30. using namespace ge;
  31. using namespace op;
  32. typedef bool (*Func)(Graph &graph);
  33. #define PADDING_MODE 6
  34. #define GRAD_PADDING_MODE 3
  35. vector<int64_t> pad_1{1, 1, 1, 1};
  36. vector<int64_t> pad_0{0, 0, 0, 0};
  37. vector<int64_t> stride_1{1, 1};
  38. vector<int64_t> stride_2{2, 2};
  39. // (int out_channels, int h, int w, vector<uint_64> stride{1,1}, vector<uint_64> pad{1,1,1,1}, op::Data() input)
  40. #define GENERATE_CONV_VAR(LAYER, BLK, OPNUM, in_channels, out_channels, h, w, stride, pad, input) \
  41. auto &LAYER##_##BLK##_##OPNUM##_input = input; \
  42. \
  43. TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({out_channels, in_channels, h, w}), FORMAT_NCHW, DT_FLOAT); \
  44. auto LAYER##_##BLK##_##OPNUM##_weight = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_weight"); \
  45. LAYER##_##BLK##_##OPNUM##_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  46. \
  47. auto LAYER##_##BLK##_##OPNUM##_mom_weight = \
  48. op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_weight"); \
  49. LAYER##_##BLK##_##OPNUM##_mom_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  50. LAYER##_##BLK##_##OPNUM##_mom_weight.update_input_desc_x(LAYER##_##BLK##_##OPNUM##_desc); \
  51. \
  52. cout << string(#LAYER) + string(#BLK) + string(#OPNUM) << "'s weight shape is:" << in_channels << out_channels << h \
  53. << w << endl; \
  54. cout << string(#LAYER) + string(#BLK) + string(#OPNUM) \
  55. << "'s input_x op's shape is:" << input.GetOutputDesc("y").GetShape().GetDim(2) << endl; \
  56. auto LAYER##_##BLK##_##OPNUM##_tmp_dims = input.GetOutputDesc("y").GetShape().GetDims(); \
  57. for (auto LAYER##_##BLK##_##OPNUM##_tmp_it = LAYER##_##BLK##_##OPNUM##_tmp_dims.begin(); \
  58. LAYER##_##BLK##_##OPNUM##_tmp_it != LAYER##_##BLK##_##OPNUM##_tmp_dims.end(); \
  59. LAYER##_##BLK##_##OPNUM##_tmp_it++) { \
  60. cout << *LAYER##_##BLK##_##OPNUM##_tmp_it; \
  61. } \
  62. cout << endl; \
  63. \
  64. auto LAYER##_##BLK##_##OPNUM = op::Conv2D(string(#LAYER) + string(#BLK) + string(#OPNUM)) \
  65. .set_input_x(input, "y") \
  66. .set_input_filter(LAYER##_##BLK##_##OPNUM##_weight) \
  67. .set_attr_strides({1, 1, stride[0], stride[1]}) \
  68. .set_attr_pads(pad) \
  69. .set_attr_data_format("NCHW"); \
  70. update_op_format(LAYER##_##BLK##_##OPNUM);
  71. #define GENERATE_CONSTANT(LAYER, BLK, OPNUM, CONSTNAME) \
  72. Tensor LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor; \
  73. float *LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data = new float[LAYER##_##BLK##_##OPNUM##_size]; \
  74. for (int i = 0; i < (int)LAYER##_##BLK##_##OPNUM##_size; i++) { \
  75. *(LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data + i) = 0.01; \
  76. } \
  77. LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor.SetData((uint8_t *)LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data, \
  78. LAYER##_##BLK##_##OPNUM##_size * sizeof(float)); \
  79. LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor.SetTensorDesc(LAYER##_##BLK##_##OPNUM##_desc); \
  80. \
  81. auto LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_constant = \
  82. op::Constant().set_attr_value(LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor); \
  83. LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_constant.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  84. delete[] LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data;
  85. #define GENERATE_CONV_VAR_VAR(LAYER, BLK, OPNUM, in_channels, out_channels, h, w, stride, pad, input) \
  86. TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({out_channels, in_channels, h, w}), FORMAT_NCHW, DT_FLOAT); \
  87. uint32_t LAYER##_##BLK##_##OPNUM##_size = LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetShapeSize(); \
  88. auto LAYER##_##BLK##_##OPNUM##_weight = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_weight"); \
  89. LAYER##_##BLK##_##OPNUM##_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  90. \
  91. auto LAYER##_##BLK##_##OPNUM##_mom_weight = \
  92. op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_weight"); \
  93. LAYER##_##BLK##_##OPNUM##_mom_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  94. \
  95. GENERATE_CONSTANT(LAYER, BLK, OPNUM, weight); \
  96. auto LAYER##_##BLK##_##OPNUM##_weight_assign = op::Assign() \
  97. .set_input_ref(LAYER##_##BLK##_##OPNUM##_weight) \
  98. .set_input_value(LAYER##_##BLK##_##OPNUM##_weight_constant); \
  99. \
  100. GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_weight); \
  101. auto LAYER##_##BLK##_##OPNUM##_mom_weight_assign = \
  102. op::Assign() \
  103. .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_weight) \
  104. .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_weight_constant); \
  105. \
  106. input.push_back(LAYER##_##BLK##_##OPNUM##_weight); \
  107. input.push_back(LAYER##_##BLK##_##OPNUM##_mom_weight);
  108. // (int out_channels, Operator& input)
  109. #define GENERATE_BN_VAR(LAYER, BLK, OPNUM, out_channels, input) \
  110. auto &LAYER##_##BLK##_##OPNUM##_input = input; \
  111. \
  112. TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({1, out_channels, 1, 1}), FORMAT_NCHW, DT_FLOAT); \
  113. auto LAYER##_##BLK##_##OPNUM##_scale = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_scale"); \
  114. LAYER##_##BLK##_##OPNUM##_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  115. \
  116. auto LAYER##_##BLK##_##OPNUM##_mom_scale = \
  117. op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_scale"); \
  118. LAYER##_##BLK##_##OPNUM##_mom_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  119. \
  120. auto LAYER##_##BLK##_##OPNUM##_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_b"); \
  121. LAYER##_##BLK##_##OPNUM##_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  122. \
  123. auto LAYER##_##BLK##_##OPNUM##_mom_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_b"); \
  124. LAYER##_##BLK##_##OPNUM##_mom_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  125. \
  126. auto LAYER##_##BLK##_##OPNUM##_mean = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mean"); \
  127. LAYER##_##BLK##_##OPNUM##_mean.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  128. auto LAYER##_##BLK##_##OPNUM##_variance = \
  129. op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_variance"); \
  130. LAYER##_##BLK##_##OPNUM##_variance.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  131. \
  132. auto LAYER##_##BLK##_##OPNUM = op::FusedBatchNorm(string(#LAYER) + string(#BLK) + string(#OPNUM)) \
  133. .set_input_x(input, "y") \
  134. .set_input_scale(LAYER##_##BLK##_##OPNUM##_scale) \
  135. .set_input_b(LAYER##_##BLK##_##OPNUM##_b) \
  136. .set_input_mean(LAYER##_##BLK##_##OPNUM##_mean) \
  137. .set_input_variance(LAYER##_##BLK##_##OPNUM##_variance) \
  138. .set_attr_mode(1) \
  139. .set_attr_epsilon(1e-5) \
  140. .set_attr_is_training(true);
  141. #define GENERATE_BN_VAR_VAR(LAYER, BLK, OPNUM, out_channels, input) \
  142. TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({1, out_channels, 1, 1}), FORMAT_NCHW, DT_FLOAT); \
  143. uint32_t LAYER##_##BLK##_##OPNUM##_size = LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetShapeSize(); \
  144. auto LAYER##_##BLK##_##OPNUM##_scale = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_scale"); \
  145. LAYER##_##BLK##_##OPNUM##_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  146. \
  147. auto LAYER##_##BLK##_##OPNUM##_mom_scale = \
  148. op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_scale"); \
  149. LAYER##_##BLK##_##OPNUM##_mom_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  150. \
  151. auto LAYER##_##BLK##_##OPNUM##_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_b"); \
  152. LAYER##_##BLK##_##OPNUM##_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  153. \
  154. auto LAYER##_##BLK##_##OPNUM##_mom_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_b"); \
  155. LAYER##_##BLK##_##OPNUM##_mom_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  156. \
  157. auto LAYER##_##BLK##_##OPNUM##_mean = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mean"); \
  158. LAYER##_##BLK##_##OPNUM##_mean.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  159. auto LAYER##_##BLK##_##OPNUM##_variance = \
  160. op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_variance"); \
  161. LAYER##_##BLK##_##OPNUM##_variance.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \
  162. \
  163. GENERATE_CONSTANT(LAYER, BLK, OPNUM, scale); \
  164. \
  165. auto LAYER##_##BLK##_##OPNUM##_scale_assign = op::Assign() \
  166. .set_input_ref(LAYER##_##BLK##_##OPNUM##_scale) \
  167. .set_input_value(LAYER##_##BLK##_##OPNUM##_scale_constant); \
  168. GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_scale); \
  169. \
  170. auto LAYER##_##BLK##_##OPNUM##_mom_scale_assign = \
  171. op::Assign() \
  172. .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_scale) \
  173. .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_scale_constant); \
  174. \
  175. GENERATE_CONSTANT(LAYER, BLK, OPNUM, b); \
  176. \
  177. auto LAYER##_##BLK##_##OPNUM##_b_assign = \
  178. op::Assign().set_input_ref(LAYER##_##BLK##_##OPNUM##_b).set_input_value(LAYER##_##BLK##_##OPNUM##_b_constant); \
  179. \
  180. GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_b); \
  181. \
  182. auto LAYER##_##BLK##_##OPNUM##_mom_b_assign = op::Assign() \
  183. .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_b) \
  184. .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_b_constant); \
  185. GENERATE_CONSTANT(LAYER, BLK, OPNUM, mean); \
  186. \
  187. auto LAYER##_##BLK##_##OPNUM##_mean_assign = op::Assign() \
  188. .set_input_ref(LAYER##_##BLK##_##OPNUM##_mean) \
  189. .set_input_value(LAYER##_##BLK##_##OPNUM##_mean_constant); \
  190. \
  191. GENERATE_CONSTANT(LAYER, BLK, OPNUM, variance); \
  192. \
  193. auto LAYER##_##BLK##_##OPNUM##_variance_assign = op::Assign() \
  194. .set_input_ref(LAYER##_##BLK##_##OPNUM##_variance) \
  195. .set_input_value(LAYER##_##BLK##_##OPNUM##_variance_constant); \
  196. \
  197. input.push_back(LAYER##_##BLK##_##OPNUM##_scale); \
  198. input.push_back(LAYER##_##BLK##_##OPNUM##_mom_scale); \
  199. input.push_back(LAYER##_##BLK##_##OPNUM##_b); \
  200. input.push_back(LAYER##_##BLK##_##OPNUM##_mom_b); \
  201. input.push_back(LAYER##_##BLK##_##OPNUM##_mean); \
  202. input.push_back(LAYER##_##BLK##_##OPNUM##_variance);
  203. // (int out_channels, Operator& input)
  204. #define GENERATE_RELU_VAR(LAYER, BLK, OPNUM, input) \
  205. auto &LAYER##_##BLK##_##OPNUM##_input = input; \
  206. auto LAYER##_##BLK##_##OPNUM = op::Relu(string(#LAYER) + string(#BLK) + string(#OPNUM)).set_input_x(input, "y");
  207. // (int out_channels, Operator& input)
  208. #define GENERATE_MAXPOOL_VAR(LAYER, BLK, OPNUM, input) \
  209. auto &LAYER##_##BLK##_##OPNUM##_input = input; \
  210. \
  211. auto LAYER##_##BLK##_##OPNUM = op::MaxPoolWithArgmax(string(#LAYER) + string(#BLK) + string(#OPNUM)) \
  212. .set_input_x(input, "y") \
  213. .set_attr_ksize({1, 3, 3, 1}) \
  214. .set_attr_padding("SAME") \
  215. .set_attr_strides({1, 2, 2, 1});
  216. // (int out_channels, Operator& input)
  217. #define GENERATE_ADD_VAR(LAYER, BLK, OPNUM, input_x1, input_x2) \
  218. auto LAYER##_##BLK##_##OPNUM = \
  219. op::Add(string(#LAYER) + string(#BLK) + string(#OPNUM)).set_input_x1(input_x1, "y").set_input_x2(input_x2, "y");
  220. // (int in_channels, int out_channels,vector<int64_t> stride{1,1}, Operator& input)
  221. #define MAKE_RESIDUAL_BLOCK(LAYER, BLK, in_channels, out_channels, stride, input) \
  222. auto &LAYER##_##BLK##_input = input; \
  223. auto &LAYER##_##BLK##_stride = stride; \
  224. int LAYER##_##BLK##_out_chls = out_channels / 4; \
  225. \
  226. GENERATE_CONV_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \
  227. GENERATE_BN_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv1); \
  228. GENERATE_RELU_VAR(LAYER, BLK, relu1, LAYER##_##BLK##_bn1); \
  229. \
  230. GENERATE_CONV_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \
  231. LAYER##_##BLK##_relu1); \
  232. GENERATE_BN_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv2); \
  233. GENERATE_RELU_VAR(LAYER, BLK, relu2, LAYER##_##BLK##_bn2); \
  234. \
  235. GENERATE_CONV_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, \
  236. LAYER##_##BLK##_relu2); \
  237. GENERATE_BN_VAR(LAYER, BLK, bn3, out_channels, LAYER##_##BLK##_conv3); \
  238. \
  239. GENERATE_CONV_VAR(LAYER, BLK, conv4, in_channels, out_channels, 1, 1, stride, pad_0, input); \
  240. GENERATE_BN_VAR(LAYER, BLK, bn4, out_channels, LAYER##_##BLK##_conv4); \
  241. \
  242. GENERATE_ADD_VAR(LAYER, BLK, add5, LAYER##_##BLK##_bn3, LAYER##_##BLK##_bn4); \
  243. GENERATE_RELU_VAR(LAYER, BLK, relu5, LAYER##_##BLK##_add5); \
  244. \
  245. auto &LAYER##_##BLK##_output = LAYER##_##BLK##_relu5; \
  246. auto &LAYER##_##BLK##_output_label = "y";
  247. #define MAKE_RESIDUAL_BLOCK_VAR(LAYER, BLK, in_channels, out_channels, stride, input) \
  248. int LAYER##_##BLK##_out_chls = out_channels / 4; \
  249. GENERATE_CONV_VAR_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \
  250. GENERATE_BN_VAR_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, input); \
  251. \
  252. GENERATE_CONV_VAR_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \
  253. input); \
  254. GENERATE_BN_VAR_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, input); \
  255. \
  256. GENERATE_CONV_VAR_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, input); \
  257. GENERATE_BN_VAR_VAR(LAYER, BLK, bn3, out_channels, input); \
  258. \
  259. GENERATE_CONV_VAR_VAR(LAYER, BLK, conv4, in_channels, out_channels, 1, 1, stride, pad_0, input); \
  260. GENERATE_BN_VAR_VAR(LAYER, BLK, bn4, out_channels, input);
  261. // (int in_channels, int out_channels,vector<int64_t> stride{1,1}, Operator& input)
  262. #define MAKE_NORMAL_BLOCK(LAYER, BLK, in_channels, out_channels, stride, input) \
  263. auto &LAYER##_##BLK##_input = input; \
  264. auto &LAYER##_##BLK##_stride = stride; \
  265. int LAYER##_##BLK##_out_chls = out_channels / 4; \
  266. \
  267. GENERATE_CONV_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \
  268. GENERATE_BN_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv1); \
  269. GENERATE_RELU_VAR(LAYER, BLK, relu1, LAYER##_##BLK##_bn1); \
  270. \
  271. GENERATE_CONV_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \
  272. LAYER##_##BLK##_relu1); \
  273. GENERATE_BN_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv2); \
  274. GENERATE_RELU_VAR(LAYER, BLK, relu2, LAYER##_##BLK##_bn2); \
  275. \
  276. GENERATE_CONV_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, \
  277. LAYER##_##BLK##_relu2); \
  278. GENERATE_BN_VAR(LAYER, BLK, bn3, out_channels, LAYER##_##BLK##_conv3); \
  279. \
  280. GENERATE_ADD_VAR(LAYER, BLK, add5, LAYER##_##BLK##_bn3, input); \
  281. GENERATE_RELU_VAR(LAYER, BLK, relu5, LAYER##_##BLK##_add5); \
  282. \
  283. auto &LAYER##_##BLK##_output = LAYER##_##BLK##_relu5; \
  284. auto &LAYER##_##BLK##_output_label = "y";
  285. #define MAKE_NORMAL_BLOCK_VAR(LAYER, BLK, in_channels, out_channels, stride, input) \
  286. int LAYER##_##BLK##_out_chls = out_channels / 4; \
  287. GENERATE_CONV_VAR_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \
  288. GENERATE_BN_VAR_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, input); \
  289. \
  290. GENERATE_CONV_VAR_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \
  291. input); \
  292. GENERATE_BN_VAR_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, input); \
  293. \
  294. GENERATE_CONV_VAR_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, input); \
  295. GENERATE_BN_VAR_VAR(LAYER, BLK, bn3, out_channels, input);
  296. // (int in_channels, int out_channels,vector<int64_t> stride{1,1}, Operator& input)
  297. #define MAKE_RESIDUAL_LAYER(LAYER, in_channels, out_channels, stride, input) \
  298. MAKE_RESIDUAL_BLOCK(LAYER, blk1, in_channels, out_channels, stride, input); \
  299. \
  300. auto &LAYER##_output = LAYER##_blk1_output; \
  301. auto &LAYER##_output_label = LAYER##_blk1_output_label;
  302. #define MAKE_RESIDUAL_LAYER_VAR(LAYER, in_channels, out_channels, stride, input) \
  303. MAKE_RESIDUAL_BLOCK_VAR(LAYER, blk1, in_channels, out_channels, stride, input);
  304. // (int in_channels, int out_channels,vector<int64_t> stride{1,1}, Operator& input)
  305. #define MAKE_NORMAL_LAYER(LAYER, in_channels, out_channels, stride, input) \
  306. MAKE_NORMAL_BLOCK(LAYER, blk1, in_channels, out_channels, stride, input); \
  307. \
  308. auto &LAYER##_output = LAYER##_blk1_output; \
  309. auto &LAYER##_output_label = LAYER##_blk1_output_label;
  310. #define MAKE_NORMAL_LAYER_VAR(LAYER, in_channels, out_channels, stride, input) \
  311. MAKE_NORMAL_BLOCK_VAR(LAYER, blk1, in_channels, out_channels, stride, input);
  312. #define MAKE_RESNET50(input) \
  313. MAKE_RESIDUAL_LAYER(layer1, 64, 256, stride_1, input) \
  314. MAKE_NORMAL_LAYER(layer2, 256, 256, stride_1, layer1_output) \
  315. MAKE_NORMAL_LAYER(layer3, 256, 256, stride_1, layer2_output) \
  316. MAKE_RESIDUAL_LAYER(layer4, 256, 512, stride_2, layer3_output) \
  317. MAKE_NORMAL_LAYER(layer5, 512, 512, stride_1, layer4_output) \
  318. MAKE_NORMAL_LAYER(layer6, 512, 512, stride_1, layer5_output) \
  319. MAKE_NORMAL_LAYER(layer7, 512, 512, stride_1, layer6_output) \
  320. MAKE_RESIDUAL_LAYER(layer8, 512, 1024, stride_2, layer7_output) \
  321. MAKE_NORMAL_LAYER(layer9, 1024, 1024, stride_1, layer8_output) \
  322. MAKE_NORMAL_LAYER(layer10, 1024, 1024, stride_1, layer9_output) \
  323. MAKE_NORMAL_LAYER(layer11, 1024, 1024, stride_1, layer10_output) \
  324. MAKE_NORMAL_LAYER(layer12, 1024, 1024, stride_1, layer11_output) \
  325. MAKE_NORMAL_LAYER(layer13, 1024, 1024, stride_1, layer12_output) \
  326. MAKE_RESIDUAL_LAYER(layer14, 1024, 2048, stride_2, layer13_output) \
  327. MAKE_NORMAL_LAYER(layer15, 2048, 2048, stride_1, layer14_output) \
  328. MAKE_NORMAL_LAYER(layer16, 2048, 2048, stride_1, layer15_output) \
  329. \
  330. auto &resnet50_output = layer16_output; \
  331. auto &resnet50_output_label = layer16_output_label;
  332. #define MAKE_RESNET50_VAR(inputs) \
  333. MAKE_RESIDUAL_LAYER_VAR(layer1, 64, 256, stride_1, inputs) \
  334. MAKE_NORMAL_LAYER_VAR(layer2, 256, 256, stride_1, inputs) \
  335. MAKE_NORMAL_LAYER_VAR(layer3, 256, 256, stride_1, inputs) \
  336. MAKE_RESIDUAL_LAYER_VAR(layer4, 256, 512, stride_2, inputs) \
  337. MAKE_NORMAL_LAYER_VAR(layer5, 512, 512, stride_1, inputs) \
  338. MAKE_NORMAL_LAYER_VAR(layer6, 512, 512, stride_1, inputs) \
  339. MAKE_NORMAL_LAYER_VAR(layer7, 512, 512, stride_1, inputs) \
  340. MAKE_RESIDUAL_LAYER_VAR(layer8, 512, 1024, stride_2, inputs) \
  341. MAKE_NORMAL_LAYER_VAR(layer9, 1024, 1024, stride_1, inputs) \
  342. MAKE_NORMAL_LAYER_VAR(layer10, 1024, 1024, stride_1, inputs) \
  343. MAKE_NORMAL_LAYER_VAR(layer11, 1024, 1024, stride_1, inputs) \
  344. MAKE_NORMAL_LAYER_VAR(layer12, 1024, 1024, stride_1, inputs) \
  345. MAKE_NORMAL_LAYER_VAR(layer13, 1024, 1024, stride_1, inputs) \
  346. MAKE_RESIDUAL_LAYER_VAR(layer14, 1024, 2048, stride_2, inputs) \
  347. MAKE_NORMAL_LAYER_VAR(layer15, 2048, 2048, stride_1, inputs) \
  348. MAKE_NORMAL_LAYER_VAR(layer16, 2048, 2048, stride_1, inputs) \
  349. //---------------------------------------------------------------------------------------------
  350. // (Operator& input)
  351. #define GENERATE_BIASADD_GRAD(LAYER, BLK, OPNUM, input) \
  352. auto LAYER##_##BLK##_##OPNUM##_grad = \
  353. op::BiasAddGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \
  354. .set_input_x(input, input.name_out_dx());
  355. // (Operator& input)
  356. #define GENERATE_MATMUL_GRAD(LAYER, BLK, OPNUM, input) \
  357. auto LAYER##_##BLK##_##OPNUM##_grad = \
  358. op::MatMul(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")).set_input_x1(input);
  359. // (Operator& input)
  360. #define GENERATE_RESHAPE_GRAD(LAYER, BLK, OPNUM, input) \
  361. auto LAYER##_##BLK##_##OPNUM##_grad = \
  362. op::Reshape(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")).set_input_tensor(input);
  363. // (Operator& input_grad, Operator& input_maxpool)
  364. #define GENERATE_MAXPOOL_GRAD(LAYER, BLK, OPNUM, input_grad, input_maxpool) \
  365. auto LAYER##_##BLK##_##OPNUM##_grad = \
  366. op::MaxPoolGradWithArgmax(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \
  367. .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y") \
  368. .set_input_grad(input_grad) \
  369. .set_input_argmax(input_maxpool, input_maxpool.name_out_argmax()) \
  370. .set_attr_ksize({1, 1, 3, 3}) \
  371. .set_attr_strides({1, 1, 2, 2}) \
  372. .set_attr_padding("SAME");
  373. // (Operator& input_dy)
  374. #define GENERATE_RELU_GRAD(LAYER, BLK, OPNUM, input_dy, dy_label) \
  375. auto LAYER##_##BLK##_##OPNUM##_grad = op::ReluGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \
  376. .set_input_gradients(input_dy, dy_label) \
  377. .set_input_features(LAYER##_##BLK##_##OPNUM, "y");
  378. // (Operator& input_dy)
  379. #define GENERATE_BN_GRAD(LAYER, BLK, OPNUM, input_dy) \
  380. auto LAYER##_##BLK##_##OPNUM##_grad = \
  381. op::FusedBatchNormGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \
  382. .set_input_dy(input_dy, "backprops") \
  383. .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y") \
  384. .set_input_scale(LAYER##_##BLK##_##OPNUM##_scale) \
  385. .set_input_save_mean(LAYER##_##BLK##_##OPNUM, "save_mean") \
  386. .set_input_save_inv_variance(LAYER##_##BLK##_##OPNUM, "save_inv_variance") \
  387. .set_attr_epsilon(0.0001); \
  388. \
  389. auto LAYER##_##BLK##_##OPNUM##_momentum_scale = \
  390. op::ApplyMomentum() \
  391. .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_scale) \
  392. .set_input_grad(LAYER##_##BLK##_##OPNUM##_grad, LAYER##_##BLK##_##OPNUM##_grad.name_out_bn_scale()) \
  393. .set_input_lr(label1) \
  394. .set_input_momentum(label1) \
  395. .set_input_var(LAYER##_##BLK##_##OPNUM##_scale); \
  396. \
  397. auto LAYER##_##BLK##_##OPNUM##_momentum_b = \
  398. op::ApplyMomentum() \
  399. .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_b) \
  400. .set_input_grad(LAYER##_##BLK##_##OPNUM##_grad, LAYER##_##BLK##_##OPNUM##_grad.name_out_bn_bias()) \
  401. .set_input_lr(label1) \
  402. .set_input_momentum(label1) \
  403. .set_input_var(LAYER##_##BLK##_##OPNUM##_b);
  404. // (Operator& input)
  405. #define GENERATE_CONV_PROP_FILTER(LAYER, BLK, OPNUM, input_bngrad, stride) \
  406. auto LAYER##_##BLK##_##OPNUM##_propfilter = \
  407. op::Conv2DBackpropFilterD(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("_propfilter")) \
  408. .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y") \
  409. .set_attr_filter_size(LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetDims()) \
  410. .set_input_out_backprop(input_bngrad, input_bngrad.name_out_dx()) \
  411. .set_attr_strides(stride) \
  412. .set_attr_pads({1, 1, 1, 1}); \
  413. \
  414. update_op_format(LAYER##_##BLK##_##OPNUM##_propfilter); \
  415. auto LAYER##_##BLK##_##OPNUM##_momentum_weight = op::ApplyMomentum() \
  416. .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_weight) \
  417. .set_input_grad(LAYER##_##BLK##_##OPNUM##_propfilter) \
  418. .set_input_lr(label1) \
  419. .set_input_momentum(label1) \
  420. .set_input_var(LAYER##_##BLK##_##OPNUM##_weight);
  421. ///.set_attr_input_size({input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(0),LAYER##_##BLK##_##OPNUM##_weight.GetOutputDesc().GetShape().GetDim(1),
  422. ///input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(2)*stride[2],
  423. ///input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(3)*stride[3]})
  424. #define GENERATE_CONV_PROP_INPUT(LAYER, BLK, OPNUM, input_bngrad, stride) \
  425. auto LAYER##_##BLK##_##OPNUM##_propinput = \
  426. op::Conv2DBackpropInputD(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("_propinput")) \
  427. .set_attr_input_size(LAYER##_##BLK##_##OPNUM##_input.GetOutputDesc("y").GetShape().GetDims()) \
  428. .set_input_filter(LAYER##_##BLK##_##OPNUM##_weight) \
  429. .set_input_out_backprop(input_bngrad, input_bngrad.name_out_dx()) \
  430. .set_attr_strides(stride) \
  431. .set_attr_pads({1, 1, 1, 1}); \
  432. cout << string(#LAYER) + string(#BLK) + string(#OPNUM) + "_propinput" \
  433. << "'s input_x op's shape is:" << input_bngrad.GetOutputDesc("dx").GetShape().GetDim(3) * stride[3] << endl; \
  434. cout << string(#LAYER) + string(#BLK) + string(#OPNUM) + "_propinput" \
  435. << "'s input_x op's shape is:" << input_bngrad.GetOutputDesc("dx").GetShape().GetDim(2) * stride[2] << endl; \
  436. \
  437. update_op_format(LAYER##_##BLK##_##OPNUM##_propinput); \
  438. auto &LAYER##_##BLK##_##OPNUM##_propinput_label = "y"
  439. // (int out_channels, Operator& input)
  440. #define GENERATE_ADD_GRAD(LAYER, BLK, OPNUM, input_x1, input_x1_label, input_x2, input_x2_label) \
  441. auto LAYER##_##BLK##_##OPNUM##_grad = op::Add(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \
  442. .set_input_x1(input_x1, input_x1_label) \
  443. .set_input_x2(input_x2, input_x2_label);
  444. // (Operator& input)
  445. #define MAKE_RESIDUAL_BLOCK_GRAD(LAYER, BLK, input_dy, dy_label) \
  446. GENERATE_RELU_GRAD(LAYER, BLK, relu5, input_dy, dy_label); \
  447. \
  448. GENERATE_BN_GRAD(LAYER, BLK, bn4, LAYER##_##BLK##_relu5_grad); \
  449. GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv4, LAYER##_##BLK##_bn4_grad, LAYER##_##BLK##_stride); \
  450. GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv4, LAYER##_##BLK##_bn4_grad, LAYER##_##BLK##_stride); \
  451. \
  452. GENERATE_BN_GRAD(LAYER, BLK, bn3, LAYER##_##BLK##_relu5_grad); \
  453. GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \
  454. GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \
  455. \
  456. GENERATE_RELU_GRAD(LAYER, BLK, relu2, LAYER##_##BLK##_conv3_propinput, "y"); \
  457. GENERATE_BN_GRAD(LAYER, BLK, bn2, LAYER##_##BLK##_relu2_grad); \
  458. GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \
  459. GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \
  460. \
  461. GENERATE_RELU_GRAD(LAYER, BLK, relu1, LAYER##_##BLK##_conv2_propinput, "y"); \
  462. GENERATE_BN_GRAD(LAYER, BLK, bn1, LAYER##_##BLK##_relu1_grad); \
  463. GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \
  464. GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \
  465. \
  466. GENERATE_ADD_GRAD(LAYER, BLK, add5, LAYER##_##BLK##_conv1_propinput, LAYER##_##BLK##_conv1_propinput_label, \
  467. LAYER##_##BLK##_conv4_propinput, LAYER##_##BLK##_conv4_propinput_label); \
  468. \
  469. auto &LAYER##_##BLK##_grad_output = LAYER##_##BLK##_add5_grad; \
  470. auto &LAYER##_##BLK##_grad_output_label = "y"
  471. // (Operator& input)
  472. #define MAKE_NORMAL_BLOCK_GRAD(LAYER, BLK, input_dy, dy_label) \
  473. GENERATE_RELU_GRAD(LAYER, BLK, relu5, input_dy, dy_label); \
  474. \
  475. GENERATE_BN_GRAD(LAYER, BLK, bn3, LAYER##_##BLK##_relu5_grad); \
  476. GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \
  477. GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \
  478. \
  479. GENERATE_RELU_GRAD(LAYER, BLK, relu2, LAYER##_##BLK##_conv3_propinput, "y"); \
  480. GENERATE_BN_GRAD(LAYER, BLK, bn2, LAYER##_##BLK##_relu2_grad); \
  481. GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \
  482. GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \
  483. \
  484. GENERATE_RELU_GRAD(LAYER, BLK, relu1, LAYER##_##BLK##_conv2_propinput, "y"); \
  485. GENERATE_BN_GRAD(LAYER, BLK, bn1, LAYER##_##BLK##_relu1_grad); \
  486. GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \
  487. GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \
  488. \
  489. GENERATE_ADD_GRAD(LAYER, BLK, add5, LAYER##_##BLK##_conv1_propinput, LAYER##_##BLK##_conv1_propinput_label, \
  490. input_dy, dy_label); \
  491. \
  492. auto &LAYER##_##BLK##_grad_output = LAYER##_##BLK##_add5_grad; \
  493. auto &LAYER##_##BLK##_grad_output_label = "y"
  494. // (Operator& input_dy)
  495. #define MAKE_RESIDUAL_LAYER_GRAD(LAYER, input_dy, dy_label) \
  496. MAKE_RESIDUAL_BLOCK_GRAD(LAYER, blk1, input_dy, dy_label); \
  497. \
  498. auto &LAYER##_grad_output = LAYER##_blk1_grad_output; \
  499. auto &LAYER##_grad_output_label = LAYER##_blk1_grad_output_label;
  500. // (Operator& input_dy)
  501. #define MAKE_NORMAL_LAYER_GRAD(LAYER, input_dy, dy_label) \
  502. MAKE_NORMAL_BLOCK_GRAD(LAYER, blk1, input_dy, dy_label); \
  503. \
  504. auto &LAYER##_grad_output = LAYER##_blk1_grad_output; \
  505. auto &LAYER##_grad_output_label = LAYER##_blk1_grad_output_label;
  506. #define MAKE_RESNET50_GRAD(input_dy, dy_label) \
  507. MAKE_NORMAL_LAYER_GRAD(layer16, input_dy, dy_label) \
  508. MAKE_NORMAL_LAYER_GRAD(layer15, layer16_grad_output, layer16_grad_output_label) \
  509. MAKE_RESIDUAL_LAYER_GRAD(layer14, layer15_grad_output, layer15_grad_output_label) \
  510. MAKE_NORMAL_LAYER_GRAD(layer13, layer14_grad_output, layer14_grad_output_label) \
  511. MAKE_NORMAL_LAYER_GRAD(layer12, layer13_grad_output, layer13_grad_output_label) \
  512. MAKE_NORMAL_LAYER_GRAD(layer11, layer12_grad_output, layer12_grad_output_label) \
  513. MAKE_NORMAL_LAYER_GRAD(layer10, layer11_grad_output, layer11_grad_output_label) \
  514. MAKE_NORMAL_LAYER_GRAD(layer9, layer10_grad_output, layer10_grad_output_label) \
  515. MAKE_RESIDUAL_LAYER_GRAD(layer8, layer9_grad_output, layer9_grad_output_label) \
  516. MAKE_NORMAL_LAYER_GRAD(layer7, layer8_grad_output, layer8_grad_output_label) \
  517. MAKE_NORMAL_LAYER_GRAD(layer6, layer7_grad_output, layer7_grad_output_label) \
  518. MAKE_NORMAL_LAYER_GRAD(layer5, layer6_grad_output, layer6_grad_output_label) \
  519. MAKE_RESIDUAL_LAYER_GRAD(layer4, layer5_grad_output, layer5_grad_output_label) \
  520. MAKE_NORMAL_LAYER_GRAD(layer3, layer4_grad_output, layer4_grad_output_label) \
  521. MAKE_NORMAL_LAYER_GRAD(layer2, layer3_grad_output, layer3_grad_output_label) \
  522. MAKE_RESIDUAL_LAYER_GRAD(layer1, layer2_grad_output, layer2_grad_output_label) \
  523. \
  524. auto &resnet50_grad_output = layer1_grad_output; \
  525. auto &resnet50_grad_output_label = layer1_grad_output_label;
  526. bool resnet50(Graph &graph) {
  527. auto data = op::Data().set_attr_index(0);
  528. auto data1 = op::Data().set_attr_index(1);
  529. TensorDesc shape_desc(ge::Shape({32, 3, 224, 224}), FORMAT_NCHW, DT_FLOAT);
  530. data.update_output_desc_y(shape_desc);
  531. TensorDesc desc(ge::Shape({64, 3, 7, 7}), FORMAT_NCHW, DT_FLOAT);
  532. auto var = op::Variable("conv2d_var");
  533. var.update_output_desc_y(desc);
  534. var.update_input_desc_x(desc);
  535. auto varw1 = op::Variable("conv2d_varw1");
  536. varw1.update_output_desc_y(desc);
  537. auto conv2d = op::Conv2D("Translate")
  538. .set_input_x(data)
  539. .set_input_filter(var)
  540. .set_attr_strides({1, 1, 2, 2})
  541. .set_attr_pads({2, 3, 2, 3})
  542. .set_attr_data_format("NCHW");
  543. TensorDesc desc_y;
  544. desc_y.SetFormat(FORMAT_NCHW); // shape: 32 64 112 112
  545. conv2d.update_output_desc_y(desc_y);
  546. TensorDesc desc1(ge::Shape({1, 64, 1, 1}), FORMAT_NCHW, DT_FLOAT);
  547. auto var1 = op::Variable("bn_var1");
  548. var1.update_output_desc_y(desc1);
  549. auto var2 = op::Variable("bn_var2");
  550. var2.update_output_desc_y(desc1);
  551. auto var3 = op::Variable("bn_var3");
  552. var3.update_output_desc_y(desc1);
  553. auto var4 = op::Variable("bn_var4");
  554. var4.update_output_desc_y(desc1);
  555. TensorDesc desc2(ge::Shape({2048, 1001}), FORMAT_NCHW, DT_FLOAT);
  556. auto var5 = op::Variable("var5");
  557. var5.update_output_desc_y(desc2);
  558. auto var6 = op::Variable("var6");
  559. var6.update_output_desc_y(desc2);
  560. TensorDesc desclabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT);
  561. auto label1 = op::Variable("label1");
  562. label1.update_output_desc_y(desclabel);
  563. TensorDesc descmatlabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT);
  564. auto matvar = op::Variable("matvar");
  565. matvar.update_output_desc_y(descmatlabel);
  566. auto matvar1 = op::Variable("matvar1");
  567. matvar1.update_output_desc_y(descmatlabel);
  568. auto bn = op::FusedBatchNorm()
  569. .set_input_x(conv2d, "y")
  570. .set_input_scale(var1)
  571. .set_input_b(var2)
  572. .set_input_mean(var3)
  573. .set_input_variance(var4)
  574. .set_attr_mode(1)
  575. .set_attr_epsilon(1e-5)
  576. .set_attr_is_training(true)
  577. .set_attr_is_training_fusion(true)
  578. .set_attr_moving_average_fraction(994352128);
  579. auto relu = op::Relu().set_input_x(bn, "y");
  580. auto maxpool = op::MaxPoolWithArgmax()
  581. .set_input_x(relu, "y")
  582. .set_attr_ksize({1, 3, 3, 1})
  583. .set_attr_padding("SAME")
  584. .set_attr_strides({1, 2, 2, 1});
  585. MAKE_RESNET50(maxpool);
  586. std::vector<Operator> inputs{data}; //,var,var1,layer1_blk1_bn1_b,var3,var4};
  587. std::vector<Operator> outputs{};
  588. graph.SetInputs(inputs).SetOutputs(outputs);
  589. return true;
  590. }
  591. #define GENERATE_CONSTANT_USE_DESC(OPNUM, desc, val) \
  592. uint32_t OPNUM##_size = desc.GetShape().GetShapeSize(); \
  593. Tensor OPNUM##_tensor; \
  594. OPNUM##_tensor.SetTensorDesc(desc); \
  595. if (desc.GetDataType() == DT_FLOAT) { \
  596. float *OPNUM##_data = new float[OPNUM##_size]; \
  597. for (int i = 0; i < (int)OPNUM##_size; i++) { \
  598. *(OPNUM##_data + i) = val; \
  599. } \
  600. OPNUM##_tensor.SetData((uint8_t *)OPNUM##_data, OPNUM##_size * sizeof(float)); \
  601. delete[] OPNUM##_data; \
  602. } \
  603. if (desc.GetDataType() == DT_INT64) { \
  604. int64_t *OPNUM##_data = new int64_t[OPNUM##_size]; \
  605. for (int i = 0; i < (int)OPNUM##_size; i++) { \
  606. *(OPNUM##_data + i) = val; \
  607. } \
  608. OPNUM##_tensor.SetData((uint8_t *)OPNUM##_data, OPNUM##_size * sizeof(int64_t)); \
  609. delete[] OPNUM##_data; \
  610. } \
  611. auto OPNUM##_constant = op::Constant().set_attr_value(OPNUM##_tensor); \
  612. OPNUM##_constant.update_output_desc_y(desc);
  613. #define GENERATE_VAR_LAYER(OPNUM, desc, input) \
  614. auto OPNUM##_weight = op::Variable(string(#OPNUM)); \
  615. OPNUM##_weight.update_output_desc_y(desc); \
  616. auto OPNUM##_assign = op::Assign().set_input_ref(OPNUM##_weight).set_input_value(OPNUM##_constant); \
  617. \
  618. input.push_back(OPNUM##_weight);
  619. #define GENERATE_VAR_LAYER_1(OPNUM, desc, var_format, input, name) \
  620. auto OPNUM##_weight = op::Variable(string(name)); \
  621. OPNUM##_weight.update_output_desc_y(desc); \
  622. auto OPNUM##_assign = op::Assign().set_input_ref(OPNUM##_weight).set_input_value(OPNUM##_constant); \
  623. \
  624. input.push_back(OPNUM##_weight);
  625. int BuildInitVarGraph(Graph &graph) {
  626. std::vector<Operator> inputs{};
  627. std::vector<Operator> outputs{};
  628. TensorDesc desc(ge::Shape({64, 3, 7, 7}), FORMAT_NCHW, DT_FLOAT);
  629. GENERATE_CONSTANT_USE_DESC(conv2d_var, desc, 0.01);
  630. GENERATE_VAR_LAYER(conv2d_var, desc, inputs);
  631. GENERATE_CONSTANT_USE_DESC(conv2d_varw1, desc, 0.01);
  632. GENERATE_VAR_LAYER(conv2d_varw1, desc, inputs);
  633. TensorDesc desc1(ge::Shape({1, 64, 1, 1}), FORMAT_NCHW, DT_FLOAT);
  634. GENERATE_CONSTANT_USE_DESC(bn_var1, desc1, 0.01);
  635. GENERATE_VAR_LAYER(bn_var1, desc1, inputs);
  636. GENERATE_CONSTANT_USE_DESC(bn_var2, desc1, 0.01);
  637. GENERATE_VAR_LAYER(bn_var2, desc1, inputs);
  638. GENERATE_CONSTANT_USE_DESC(bn_var3, desc1, 0.01);
  639. GENERATE_VAR_LAYER(bn_var3, desc1, inputs);
  640. GENERATE_CONSTANT_USE_DESC(bn_var4, desc1, 0.01);
  641. GENERATE_VAR_LAYER(bn_var4, desc1, inputs);
  642. TensorDesc desc2(ge::Shape({2048, 1001}), FORMAT_NCHW, DT_FLOAT);
  643. GENERATE_CONSTANT_USE_DESC(var5, desc2, 0.01);
  644. GENERATE_VAR_LAYER(var5, desc2, inputs);
  645. GENERATE_CONSTANT_USE_DESC(var6, desc2, 0.01);
  646. GENERATE_VAR_LAYER(var6, desc2, inputs);
  647. TensorDesc desclabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT);
  648. GENERATE_CONSTANT_USE_DESC(label1, desclabel, 0.1);
  649. GENERATE_VAR_LAYER(label1, desclabel, inputs);
  650. TensorDesc descmatlabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT);
  651. GENERATE_CONSTANT_USE_DESC(matvar, descmatlabel, 0.01);
  652. GENERATE_VAR_LAYER(matvar, descmatlabel, inputs);
  653. GENERATE_CONSTANT_USE_DESC(matvar1, descmatlabel, 0.01);
  654. GENERATE_VAR_LAYER(matvar1, descmatlabel, inputs);
  655. MAKE_RESNET50_VAR(inputs);
  656. TensorDesc ctrl(ge::Shape({1, 1, 1, 1}), FORMAT_NCHW, DT_INT64);
  657. GENERATE_CONSTANT_USE_DESC(iterations_per_loop, ctrl, 100);
  658. GENERATE_VAR_LAYER_1(iterations_per_loop, ctrl, "4D", inputs, "npu_runconfig/iterations_per_loop");
  659. GENERATE_CONSTANT_USE_DESC(loop_cond, ctrl, 0);
  660. GENERATE_VAR_LAYER_1(loop_cond, ctrl, "4D", inputs, "npu_runconfig/loop_cond");
  661. GENERATE_CONSTANT_USE_DESC(one, ctrl, 1);
  662. GENERATE_VAR_LAYER_1(one, ctrl, "4D", inputs, "npu_runconfig/one");
  663. GENERATE_CONSTANT_USE_DESC(zero, ctrl, 0);
  664. GENERATE_VAR_LAYER_1(zero, ctrl, "4D", inputs, "npu_runconfig/zero");
  665. graph.SetInputs(inputs).SetOutputs(outputs);
  666. return 0;
  667. }
  668. int TestBuildGraphTest(Func fun, Graph &graph, vector<ge::Tensor> &inputs, vector<ge::Tensor> &outputs) {
  669. bool graph_ret = fun(graph);
  670. ge::Tensor shapeTensor;
  671. TensorDesc shape_desc(ge::Shape({32, 3, 224, 224}), FORMAT_NCHW, DT_FLOAT);
  672. uint32_t sizeshape = shape_desc.GetShape().GetShapeSize();
  673. printf("[test] desc size filter shape:%u\n", sizeshape);
  674. shapeTensor.SetTensorDesc(shape_desc);
  675. vector<float> dataValuec;
  676. for (int i = 0; i < sizeshape; i++) {
  677. dataValuec.push_back(1);
  678. }
  679. shapeTensor.SetData((uint8_t *)dataValuec.data(), 4 * sizeshape);
  680. inputs.push_back(shapeTensor);
  681. ge::Tensor shapeTensor1;
  682. TensorDesc shape_desc1(ge::Shape({1, 32, 1, 1}), FORMAT_NCHW, DT_FLOAT);
  683. uint32_t sizeshape1 = shape_desc1.GetShape().GetShapeSize();
  684. printf("[test] desc size filter shape:%u\n", sizeshape1);
  685. shapeTensor1.SetTensorDesc(shape_desc1);
  686. vector<int32_t> dataValuec1;
  687. for (int i = 0; i < sizeshape1; i++) {
  688. dataValuec1.push_back(1);
  689. }
  690. shapeTensor1.SetData((uint8_t *)dataValuec1.data(), 4 * sizeshape1);
  691. return 0;
  692. }
  693. int runTrainGraph(Func fun, int loopCount) {
  694. printf("GE BBIT begin...\n");
  695. std::chrono::system_clock::time_point start = std::chrono::system_clock::now();
  696. std::map<std::string, std::string> ge_options = {
  697. {"device_id", "0"}, {"rank_table_file", ""}, {"graphType", "1"}, {"ge.graphRunMode", "2"}};
  698. std::map<std::string, std::string> session_options = {{"a", "b"}, {TRAIN_FLAG, "1"}};
  699. ge::Status ret;
  700. // init ge
  701. ret = GEInitialize_api_new("train", "fe,plugin");
  702. printf("ge::GEInitialize ret:%d\n", ret);
  703. // init session
  704. ge::Session session(session_options);
  705. int graphId_initvar = 1;
  706. ge::Graph graph_initvar("initVarGraph");
  707. bool graph_ret = BuildInitVarGraph(graph_initvar);
  708. // session addgraph
  709. int graphId = 0;
  710. // build graph
  711. ge::Graph graph("bigGraph");
  712. std::vector<ge::Tensor> inputs;
  713. ge::Tensor outputTensor;
  714. std::vector<ge::Tensor> outputs;
  715. graph_ret = TestBuildGraphTest(fun, graph, inputs, outputs);
  716. printf("TestReluGrad ret:%d\n", graph_ret);
  717. ret = session.AddGraph(graphId_initvar, graph_initvar);
  718. printf("session.AddVarGraph ret:%d\n", ret);
  719. if (ret) return ret;
  720. ret = session.AddGraph(graphId, graph);
  721. printf("session.AddGraph ret:%d\n", ret);
  722. if (ret) return ret;
  723. std::vector<ge::Tensor> inputs1;
  724. std::vector<ge::Tensor> outputs1;
  725. ret = session.RunGraph(graphId_initvar, inputs1, outputs1);
  726. if (ret != SUCCESS) {
  727. return ret;
  728. }
  729. // add loop for test of stabilty:
  730. for (int i = 0; i < loopCount; i++) {
  731. // session rungraph
  732. printf("loopCount:%d\n", loopCount);
  733. ret = session.RunGraph(graphId, inputs, outputs);
  734. printf("session.RunGraph ret:%d\n", ret);
  735. if (ret) return ret;
  736. // define 99999 as loop forever
  737. if (loopCount == 99999) i = 0;
  738. }
  739. std::chrono::system_clock::time_point end = std::chrono::system_clock::now();
  740. auto millisecondsduration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
  741. auto ms = millisecondsduration.count();
  742. std::stringstream ss;
  743. ss << ms << "ms";
  744. std::string run_time = ss.str();
  745. printf("run time is : %s \n", run_time.c_str());
  746. return 0;
  747. }
  748. int main(int argc, char *argv[]) {
  749. // add loop for test of stabilty:
  750. int loopCount = 1;
  751. if (argc >= 2) loopCount = atoi(argv[1]);
  752. Status ret = SUCCESS;
  753. ret = runTrainGraph(resnet50, loopCount);
  754. if (ret == SUCCESS) {
  755. std::cout << "[train resnet50 success]" << std::endl;
  756. } else {
  757. std::cout << "!!! train resnet50 fail !!!" << std::endl;
  758. }
  759. return ret;
  760. }

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示