You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ssd_prior_box_kernel.cc 15 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "host_kernels/ssd_prior_box_kernel.h"
  17. #include <cfloat>
  18. #include <algorithm>
  19. #include <memory>
  20. #include <utility>
  21. #include "common/math/math_util.h"
  22. #include "common/math_util.h"
  23. #include "framework/common/types.h"
  24. #include "framework/common/util.h"
  25. #include "graph/debug/ge_attr_define.h"
  26. #include "graph/passes/pass_utils.h"
  27. #include "graph/utils/attr_utils.h"
  28. #include "inc/kernel_factory.h"
  29. namespace ge {
  30. namespace {
  31. const float kMinistBias = 1e-6;
  32. const float kAspectRationBase = 1.0;
  33. const size_t kBoundarySize = 4;
  34. const size_t kOutputDescFirstIndex = 0;
  35. const size_t kDimIndexZero = 0;
  36. const size_t kDimIndexOne = 1;
  37. const size_t kDimIndexTwo = 2;
  38. const size_t kDimIndexThree = 3;
  39. const int kNumVariance = 4;
  40. const int32_t kNumOne = 1;
  41. const int32_t kNumTwo = 2;
  42. const float kFloatNumTwo = 2.0;
  43. } // namespace
  44. Status SsdPriorboxKernel::GetPriorSizeParam(const OpDescPtr &op_desc, int &img_width, int &img_height, float &step_w,
  45. float &step_h, int &layer_width, int &layer_height) {
  46. if (op_desc == nullptr) {
  47. GELOGE(PARAM_INVALID, "input opdescptr is nullptr.");
  48. return PARAM_INVALID;
  49. }
  50. const GeTensorDesc tensor_desc = op_desc->GetInputDesc(kOutputDescFirstIndex);
  51. layer_width = tensor_desc.GetShape().GetDim(kDimIndexThree);
  52. layer_height = tensor_desc.GetShape().GetDim(kDimIndexTwo);
  53. if (layer_height == 0 || layer_width == 0) {
  54. GELOGE(PARAM_INVALID, "op:%s NCHW_DIM_H or NCHW_DIM_W is 0", op_desc->GetName().c_str());
  55. return PARAM_INVALID;
  56. }
  57. int32_t img_h = 0;
  58. int32_t img_w = 0;
  59. if (!AttrUtils::GetInt(op_desc, SSD_PRIOR_BOX_ATTR_IMG_H, img_h)) {
  60. GELOGE(PARAM_INVALID, "op:%s img_h attr is null", op_desc->GetName().c_str());
  61. return PARAM_INVALID;
  62. }
  63. if (!AttrUtils::GetInt(op_desc, SSD_PRIOR_BOX_ATTR_IMG_W, img_w)) {
  64. GELOGE(PARAM_INVALID, "op:%s img_w attr is null", op_desc->GetName().c_str());
  65. return PARAM_INVALID;
  66. }
  67. if (img_h == 0 || img_w == 0) {
  68. GELOGE(PARAM_INVALID, "op:%s Either img_h or img_w is null", op_desc->GetName().c_str());
  69. return PARAM_INVALID;
  70. } else {
  71. img_width = static_cast<int>(img_w);
  72. img_height = static_cast<int>(img_h);
  73. }
  74. float step_height = 0.0;
  75. float step_width = 0.0;
  76. if (!AttrUtils::GetFloat(op_desc, SSD_PRIOR_BOX_ATTR_STEP_H, step_height)) {
  77. GELOGE(PARAM_INVALID, "op:%s step_height attr is null", op_desc->GetName().c_str());
  78. return PARAM_INVALID;
  79. }
  80. if (!AttrUtils::GetFloat(op_desc, SSD_PRIOR_BOX_ATTR_STEP_W, step_width)) {
  81. GELOGE(PARAM_INVALID, "op:%s step_width attr is null", op_desc->GetName().c_str());
  82. return PARAM_INVALID;
  83. }
  84. if ((fabs(step_height) < FLT_EPSILON) || (fabs(step_width) < FLT_EPSILON)) {
  85. step_w = static_cast<float>(img_width) / layer_width;
  86. step_h = static_cast<float>(img_height) / layer_height;
  87. } else {
  88. step_w = step_width;
  89. step_h = step_height;
  90. }
  91. return SUCCESS;
  92. }
  93. Status SsdPriorboxKernel::GetPriorListParam(const OpDescPtr &op_desc, vector<float> &min_size_list,
  94. vector<float> &max_size_list, vector<float> &aspect_ratio_list,
  95. vector<float> &variance_list) {
  96. if (!AttrUtils::GetListFloat(op_desc, SSD_PRIOR_BOX_ATTR_MIN_SIZE, min_size_list)) {
  97. GELOGE(PARAM_INVALID, "op:%s min_size() attr is null", op_desc->GetName().c_str());
  98. return PARAM_INVALID;
  99. }
  100. if (!AttrUtils::GetListFloat(op_desc, SSD_PRIOR_BOX_ATTR_MAX_SIZE, max_size_list)) {
  101. GELOGE(PARAM_INVALID, "op:%s max_size() attr is null", op_desc->GetName().c_str());
  102. return PARAM_INVALID;
  103. }
  104. if (!AttrUtils::GetListFloat(op_desc, SSD_PRIOR_BOX_ATTR_VARIANCE, variance_list)) {
  105. GELOGE(PARAM_INVALID, "op:%s variance() attr is null", op_desc->GetName().c_str());
  106. return PARAM_INVALID;
  107. }
  108. if (!AttrUtils::GetListFloat(op_desc, SSD_PRIOR_BOX_ATTR_ASPECT_RATIO, aspect_ratio_list)) {
  109. GELOGE(PARAM_INVALID, "op:%s aspect_ratio() attr is null", op_desc->GetName().c_str());
  110. return PARAM_INVALID;
  111. }
  112. // if flip is true,aspect_ratio_list need add reciprocal
  113. bool flip = false;
  114. if (!AttrUtils::GetBool(op_desc, SSD_PRIOR_BOX_ATTR_FLIP, flip)) {
  115. GELOGE(PARAM_INVALID, "op:%s flip() attr is null", op_desc->GetName().c_str());
  116. return PARAM_INVALID;
  117. }
  118. vector<float> aspect_ratios;
  119. aspect_ratios.push_back(SSD_PRIORBOX_ASPECT_RATIO_VALUE);
  120. for (size_t i = 0; i < aspect_ratio_list.size(); i++) {
  121. float ar = aspect_ratio_list.at(i);
  122. bool already_exist =
  123. std::any_of(aspect_ratios.begin(), aspect_ratios.end(), [&ar](float x) { return fabs(ar - x) < kMinistBias; });
  124. if (!already_exist) {
  125. aspect_ratios.push_back(ar);
  126. if (flip) {
  127. aspect_ratios.push_back(1. / ar); // 1. reciprocal
  128. }
  129. }
  130. }
  131. aspect_ratio_list = std::move(aspect_ratios);
  132. return SUCCESS;
  133. }
  134. Status SsdPriorboxKernel::GetPriorOtherParam(const OpDescPtr &op_desc, float &offset, bool &clip) {
  135. if (!AttrUtils::GetBool(op_desc, SSD_PRIOR_BOX_ATTR_CLIP, clip)) {
  136. GELOGE(PARAM_INVALID, "op:%s clip() attr is null", op_desc->GetName().c_str());
  137. return PARAM_INVALID;
  138. }
  139. if (!AttrUtils::GetFloat(op_desc, SSD_PRIOR_BOX_ATTR_OFFSET, offset)) {
  140. GELOGE(PARAM_INVALID, "op:%s offset() attr is null", op_desc->GetName().c_str());
  141. return PARAM_INVALID;
  142. }
  143. return SUCCESS;
  144. }
  145. Status SsdPriorboxKernel::SetVariance(const vector<float> &variance, const int dim, const int32_t layer_height,
  146. const int32_t layer_width, const int num_priors, float *output_data) {
  147. if (output_data == nullptr) {
  148. GELOGE(PARAM_INVALID, "output_data is null");
  149. return PARAM_INVALID;
  150. }
  151. output_data += dim;
  152. if (variance.size() == 1) {
  153. if (NnSet(dim, variance[0], output_data) != SUCCESS) {
  154. GELOGE(PARAM_INVALID, "NnSet failed.");
  155. return PARAM_INVALID;
  156. }
  157. } else {
  158. size_t count = 0;
  159. for (int i = 0; i < layer_height * layer_width * num_priors; ++i) {
  160. for (size_t j = 0; j < 4; ++j) { // 4 variance
  161. output_data[count] = variance[j];
  162. ++count;
  163. }
  164. }
  165. }
  166. return SUCCESS;
  167. }
  168. Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint32_t aspect_ratios_size,
  169. uint32_t min_sizes_size,
  170. uint32_t max_sizes_size,
  171. int layer_width,
  172. int layer_height,
  173. int &num_priors,
  174. int &dim_size) const {
  175. if (ge::CheckUint32MulOverflow(min_sizes_size, aspect_ratios_size) != SUCCESS) {
  176. return PARAM_INVALID;
  177. }
  178. uint32_t tmp_value = aspect_ratios_size * min_sizes_size;
  179. if (ge::CheckUint32AddOverflow(tmp_value, max_sizes_size) != SUCCESS) {
  180. GELOGW("Failed to get list param.");
  181. return PARAM_INVALID;
  182. }
  183. tmp_value += max_sizes_size;
  184. if (tmp_value > INT32_MAX) {
  185. GELOGE(PARAM_INVALID, "Failed to get list param.");
  186. return PARAM_INVALID;
  187. }
  188. num_priors = static_cast<int>(tmp_value);
  189. if (ge::CheckIntMulOverflow(layer_width, layer_height) != SUCCESS) {
  190. GELOGW("Failed to get list param.");
  191. return PARAM_INVALID;
  192. }
  193. if (ge::CheckIntMulOverflow(layer_width * layer_height, num_priors) != SUCCESS) {
  194. GELOGW("Failed to get list param.");
  195. return PARAM_INVALID;
  196. }
  197. if (ge::CheckIntMulOverflow(layer_width * layer_height * num_priors, kNumVariance) != SUCCESS) {
  198. GELOGW("Failed to get list param.");
  199. return PARAM_INVALID;
  200. }
  201. dim_size = layer_width * layer_height * num_priors * kNumVariance; // 4 variance
  202. return SUCCESS;
  203. }
  204. void SsdPriorboxKernel::DataCalulate(float x, float y, float box_x, float box_y, int img_x, int img_y,
  205. vector<float> &result) {
  206. result.clear();
  207. // xmin
  208. result.push_back((x - box_x / kFloatNumTwo) / static_cast<float>(img_x));
  209. // ymin
  210. result.push_back((y - box_y / kFloatNumTwo) / static_cast<float>(img_y));
  211. // xmax
  212. result.push_back((x + box_x / kFloatNumTwo) / static_cast<float>(img_x));
  213. // ymax
  214. result.push_back((y + box_y / kFloatNumTwo) / static_cast<float>(img_y));
  215. }
  216. std::unique_ptr<float[]> SsdPriorboxKernel::BoundaryCalulate(int dim_size, int layer_width, int layer_height,
  217. float step_width, float step_height, int img_width,
  218. int img_height, float offset, vector<float> min_sizes,
  219. vector<float> max_sizes, vector<float> aspect_ratios) {
  220. // output two channel.First channel stores the mean of each prior coordinate.
  221. // Second channel stores the variance of each prior coordinate.
  222. unique_ptr<float[]> output_data(new (std::nothrow) float[dim_size * kNumTwo]());
  223. if (output_data == nullptr) {
  224. GELOGE(PARAM_INVALID, "Failed to create output_data ptr.");
  225. return nullptr;
  226. }
  227. int idx = 0;
  228. vector<float> boundaries;
  229. for (int height_index = 0; height_index < layer_height; ++height_index) {
  230. for (int width_index = 0; width_index < layer_width; ++width_index) {
  231. float center_x = (width_index + offset) * step_width;
  232. float center_y = (height_index + offset) * step_height;
  233. for (size_t size_index = 0; size_index < min_sizes.size(); ++size_index) {
  234. int min_size = min_sizes[size_index];
  235. // first prior: aspect_ratio = 1, size = min_size
  236. float box_width = min_size;
  237. float box_height = min_size;
  238. DataCalulate(center_x, center_y, box_width, box_height, img_width, img_height, boundaries);
  239. size_t index = 0;
  240. while (index < kBoundarySize) {
  241. output_data[idx++] = boundaries[index++];
  242. }
  243. if (!max_sizes.empty()) {
  244. int max_size = max_sizes[size_index];
  245. // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
  246. box_width = sqrt(min_size * max_size);
  247. DataCalulate(center_x, center_y, box_width, box_width, img_width, img_height, boundaries);
  248. index = 0;
  249. while (index < kBoundarySize) {
  250. output_data[idx++] = boundaries[index++];
  251. }
  252. }
  253. // rest of priors
  254. for (size_t ratio_index = 0; ratio_index < aspect_ratios.size(); ++ratio_index) {
  255. float aspect_ratio = aspect_ratios[ratio_index];
  256. if (fabs(aspect_ratio - kAspectRationBase) < kMinistBias) { // aspect ration base:1.
  257. continue;
  258. }
  259. box_width = min_size * sqrt(aspect_ratio);
  260. box_height = min_size / sqrt(aspect_ratio);
  261. DataCalulate(center_x, center_y, box_width, box_height, img_width, img_height, boundaries);
  262. index = 0;
  263. while (index < kBoundarySize) {
  264. output_data[idx++] = boundaries[index++];
  265. }
  266. }
  267. }
  268. }
  269. }
  270. return output_data;
  271. }
  272. Status SsdPriorboxKernel::Compute(const NodePtr &node, std::vector<GeTensorPtr> &v_output) {
  273. GELOGD("SsdPriorboxKernel in");
  274. OpDescPtr op_desc = node->GetOpDesc();
  275. if (op_desc == nullptr) {
  276. GELOGE(PARAM_INVALID, "node:%s opdesc is null", node->GetName().c_str());
  277. return PARAM_INVALID;
  278. }
  279. int img_width = 0;
  280. int img_height = 0;
  281. int layer_width = 0;
  282. int layer_height = 0;
  283. float step_width = 0.0;
  284. float step_height = 0.0;
  285. Status ret = GetPriorSizeParam(op_desc, img_width, img_height, step_width, step_height, layer_width, layer_height);
  286. if (ret != SUCCESS) {
  287. GELOGE(PARAM_INVALID, "Failed to get size param.");
  288. return PARAM_INVALID;
  289. }
  290. float offset = 0.0;
  291. bool clip = false;
  292. ret = GetPriorOtherParam(op_desc, offset, clip);
  293. if (ret != SUCCESS) {
  294. GELOGE(PARAM_INVALID, "Failed to get other param.");
  295. return PARAM_INVALID;
  296. }
  297. vector<float> min_sizes;
  298. vector<float> aspect_ratios;
  299. vector<float> variances;
  300. vector<float> max_sizes;
  301. if (GetPriorListParam(op_desc, min_sizes, max_sizes, aspect_ratios, variances) != SUCCESS) {
  302. GELOGE(PARAM_INVALID, "Failed to get list param.");
  303. return PARAM_INVALID;
  304. }
  305. int num_priors = 0;
  306. int dim_size = 0;
  307. ret = GetNumPriorAndDimSize(aspect_ratios.size(), min_sizes.size(), max_sizes.size(), layer_width, layer_height,
  308. num_priors, dim_size);
  309. if (ret != SUCCESS) {
  310. GELOGE(PARAM_INVALID, "Failed to get other param.");
  311. return PARAM_INVALID;
  312. }
  313. auto output_data = BoundaryCalulate(dim_size, layer_width, layer_height, step_width, step_height, img_width,
  314. img_height, offset, min_sizes, max_sizes, aspect_ratios);
  315. if (output_data == nullptr) {
  316. GELOGE(PARAM_INVALID, "Failed to create output_data ptr.");
  317. return PARAM_INVALID;
  318. }
  319. if (clip) {
  320. for (int d = 0; d < dim_size; ++d) {
  321. // clip the prior's coordidate such that it is within [0.0 1.0]
  322. output_data[d] = std::min<float>(std::max<float>(output_data[d], 0.), 1.);
  323. }
  324. }
  325. // set the variance.
  326. if (SetVariance(variances, dim_size, layer_height, layer_width, num_priors, output_data.get()) != SUCCESS) {
  327. GELOGE(PARAM_INVALID, "Failed to set variance.");
  328. return PARAM_INVALID;
  329. }
  330. GeTensorDesc output_tensor_desc = op_desc->GetOutputDesc(0);
  331. std::vector<int64_t> v_dims(3, 1); // 3 dims
  332. v_dims[kDimIndexZero] = kNumOne;
  333. v_dims[kDimIndexOne] = kNumTwo;
  334. v_dims[kDimIndexTwo] = dim_size;
  335. DataType data_type = output_tensor_desc.GetDataType();
  336. output_tensor_desc.Update(GeShape(v_dims), FORMAT_NCHW, data_type);
  337. // make TensorDesc
  338. GeTensorPtr output_ptr = MakeShared<GeTensor>(output_tensor_desc);
  339. if (output_ptr == nullptr) {
  340. GELOGW("Create shared ptr for GeTensor failed");
  341. return NOT_CHANGED;
  342. }
  343. GE_IF_BOOL_EXEC(output_ptr->SetData(reinterpret_cast<uint8_t *>(output_data.get()),
  344. static_cast<size_t>(dim_size * kNumTwo * sizeof(data_type))) != GRAPH_SUCCESS,
  345. GELOGE(INTERNAL_ERROR, "set data failed");
  346. return INTERNAL_ERROR);
  347. v_output.push_back(output_ptr);
  348. return SUCCESS;
  349. }
  350. REGISTER_KERNEL(SSDPRIORBOX, SsdPriorboxKernel);
  351. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示