diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h index 556652be..445ed12f 100644 --- a/inc/external/acl/error_codes/rt_error_codes.h +++ b/inc/external/acl/error_codes/rt_error_codes.h @@ -57,9 +57,6 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream re static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource -static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit -static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty -static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h index 1a8dc3e9..ef7e2ec7 100644 --- a/inc/external/runtime/rt_error_codes.h +++ b/inc/external/runtime/rt_error_codes.h @@ -58,9 +58,6 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream re static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource -static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit -static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty -static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index a3621d8b..6decaaea 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -128,6 +128,7 @@ struct OmgContext { bool fuzz_compile_flag = false; std::string atc_cmdline; bool user_attr_index_valid = false; + bool is_online_model = false; }; } // namespace ge diff --git a/metadef b/metadef index eb9262ab..f011a4c7 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit eb9262abf88b9d85d9fdc25055a2682dfd402ced +Subproject commit f011a4c7ad36e1ec80990e659abefc78b0aa7543 diff --git a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h index a2d805fb..a7eb727e 100644 --- a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h +++ b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h @@ -52,9 +52,6 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no str static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource -static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit -static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty -static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h index 32454d27..90a60b3d 100644 --- a/third_party/fwkacllib/inc/ops/data_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h @@ -2336,12 +2336,14 @@ REG_OP(CacheAllIndexToLocal) /** *@brief LRUCacheV2, aicore LRUCache. + *@par Inputs: *index_list: exchange index list *data: host data *cache: gm cache *tag: cache's tag *is_last_call: if is last call write all cache to data + *@par Outputs: *data: output data *cache: gm cache @@ -2349,8 +2351,11 @@ REG_OP(CacheAllIndexToLocal) *index_offset_list: index_offset_list *not_in_cache_index_list: output not in cache's index_list *not_in_cache_number: scalar + *@par Attributes: *pre_route_count: types of all outputs + +*@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LRUCacheV2) diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 2c5afbe7..b5251cd6 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -480,7 +480,7 @@ REG_OP(HistogramFixedWidth) .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) .INPUT(nbins, TensorType({DT_INT32})) .OUTPUT(y, TensorType({DT_INT32})) - .ATTR(dtype, String, "int32") + .ATTR(dtype, Int, 3) .OP_END_FACTORY_REG(HistogramFixedWidth) /** @@ -511,7 +511,7 @@ REG_OP(HistogramFixedWidthD) .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) .OUTPUT(y, TensorType({DT_INT32})) .REQUIRED_ATTR(nbins, Int) - .ATTR(dtype, String, "int32") + .ATTR(dtype, Int, 3) .OP_END_FACTORY_REG(HistogramFixedWidthD) /** diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index 55199962..0d9a8424 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -532,6 +532,36 @@ REG_OP(ScatterAdd) .OP_END_FACTORY_REG(ScatterAdd) /** +*@brief Adds sparse "updates" to a variable reference . \n + +*@par Inputs: +* Three inputs, including: +*@li var: An ND Tensor . +*Must be one of the following types: float16, float32, int32, int8, uint8 + +*@li indices: An ND Tensor of type int32 or int64 + +*@li updates: An ND Tensor . +*Must be one of the following types: float16, float32, int32, int8, uint8 + +*@par Attributes: +* axis: An required int. The axis along which to index. \n + +*@par Outputs: +*var: A Tensor. Has the same type and format as input "var" . \n + +*@par Third-party framework compatibility +* Compatible with the pytorch operator ScatterAdd. +*/ +REG_OP(ScatterAddWithAxis) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(indices, TensorType::IndexNumberType()) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .REQUIRED_ATTR(axis, Int) + .OP_END_FACTORY_REG(ScatterAddWithAxis) + +/** *@brief Divides a variable reference by sparse updates . \n *@par Inputs: diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index 9629976e..1ecc6e91 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -142,6 +142,29 @@ REG_OP(BatchNorm) .OP_END_FACTORY_REG(BatchNorm) /** +*@brief part of SyncBatchNormBackward . \n + +*@par Inputs: +* Three inputs, including: +*@li sum_dy: A Tensor. Must be one of the following types: float16, float32 . +*@li sum_dy_dx_pad: A Tensor. Must be one of the following types: float16, float32 . +*@li mean: A Tensor. Must be one of the following types: float16, float32 . +*@li invert_std: A Tensor. Must be one of the following types: float16, float32 . \n + +*@par Outputs: +*@li sum_dy_xmu: A Tensor. Has the same type and format as input "sum_dy" +*@li y: A Tensor. Has the same type and format as input "sum_dy" . \n +*/ +REG_OP(SyncBatchNormBackwardReduce) + .INPUT(sum_dy, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(sum_dy_dx_pad, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(invert_std, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(sum_dy_xmu, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(SyncBatchNormBackwardReduce) + +/** *@brief Performs batch normalization . \n *@par Inputs: diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index b14cc49d..a12a4f94 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -135,7 +135,8 @@ REG_OP(CheckValid) * the value "4" refers to "x0", "x1", "y0", and "y1" . \n *@par Attributes: -*mode: Computation mode, a character string with the value range of [iou, iof] . \n +*@li mode: Computation mode, a character string with the value range of [iou, iof] +*@li eps: An optional float, prevent division by 0, default value is 1.0 . \n *@par Outputs: *overlap: A 2D Tensor of type float16 or float32 with shape [M, N], specifying @@ -150,6 +151,7 @@ REG_OP(Iou) .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT})) .ATTR(mode, String, "iou") + .ATTR(eps, Float, 1.0) .OP_END_FACTORY_REG(Iou) /** @@ -580,6 +582,172 @@ REG_OP(Yolo) .ATTR(background, Bool, false) .ATTR(softmaxtree, Bool, false) .OP_END_FACTORY_REG(Yolo) + +/** +*@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3 . \n + +*@par Inputs: +*x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W), +where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged +as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n + +*@par Attributes: +*@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3. +*@li coords: An int32, specifying the number of parameters required for locating an object. The value is fixed at "4", corresponding to (x,y,w,h). +*@li classes: An int32, specifying the number of prediction classes. Defaults to "80". The value range is [1, 1024]. +*@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".Defaults to "V3" +*@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". Defaults to "false". +*@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". Defaults to "false". +*@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false" . \n + +*@par Outputs: +*@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2], +* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box. +*@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2], +* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence. +*@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2], +* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n + +*@attention Constraints: +*@li This operator applies to YOLO v2,v3 and v5 networks. +*@li The succeeding layer of the Yolo operator must be operator Yolov5DetectionOutput. +*@par Third-party framework compatibility +* It is a custom operator. +*/ +REG_OP(YoloPreDetection) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(boxes, Int, 3) + .ATTR(coords, Int, 4) + .ATTR(classes, Int, 80) + .ATTR(yolo_version, String, "V5") + .ATTR(softmax, Bool, false) + .ATTR(background, Bool, false) + .ATTR(softmaxtree, Bool, false) + .OP_END_FACTORY_REG(YoloPreDetection) + +/** +*@brief Performs YOLO V5 detection . \n + +*@par Inputs: +*Ten inputs, including: +*@li Operator Yolov5DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n +There are three Yolo operators at Yolov5DetectionOutput's preceding layer on Yolo v5. For details, see the description of operator Yolo. +*@li img_info: A float16 or float32, describing the image information including the required image height and width \n +* and the actual image height and width. + +*@par Attributes: +*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" +*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. +*@li coords: Specifies the number of coordinate parameters. Must be 4. +*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80]. +*@li relative: An optional bool. Defaults to and must be "true". +*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. + +*@li post_nms_topn: An optional int32. This attribute is reserved. +*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. + +*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n + +*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". + +*@par Outputs: +*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2), +* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. +*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. +* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 + +*@attention Constraints:\n +*@li This operator applies only to the YOLO v5 network. +*@li The preceding layer of operator Yolov5DetectionOutput must be three Yolo operators. + +*@see Yolo() +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. +*/ +REG_OP(YoloV5DetectionOutput) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(biases, ListFloat) + .ATTR(boxes, Int, 3) + .ATTR(coords, Int, 4) + .ATTR(classes, Int, 80) + .ATTR(relative, Bool, true) + .ATTR(obj_threshold, Float, 0.5) + .ATTR(post_nms_topn, Int, 512) + .ATTR(score_threshold, Float, 0.5) + .ATTR(iou_threshold, Float, 0.45) + .ATTR(pre_nms_topn, Int, 512) + .ATTR(N, Int, 10) + .ATTR(resize_origin_img_to_net, Bool, false) + .ATTR(out_box_dim, Int, 3) + .ATTR(alpha, Float, 2.0) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(box_out_num, TensorType({DT_INT32})) + .OP_END_FACTORY_REG(YoloV5DetectionOutput) + +/** +*@brief Performs YOLO V5 detection. + +*@par Inputs: +*16 Input, including: +*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v5) are used as the inputs of operator Yolov5DetectionOutput. +* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. +*@li imginfo: A float16, describing the image information including the required image height and width +* and the actual image height and width. +*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. +* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] +* is formed for the three Yolo outputs, respectively .It's a dynamic input. \n + +*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n +*@par Attributes: +*@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" +*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. +*@li coords: Specifies the number of coordinate parameters. Must be 4. +*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80]. +*@li relative: An optional bool. Defaults to and must be "true". +*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. +*@li post_nms_topn: An optional int32. This attribute is reserved. +*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. +*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0]. +*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". +* +*@par Outputs: +*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2), +* describing the information of each output box. +* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. +*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. +* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 +* +*@attention Constraints: +*@li This operator applies only to the YOLO v5 network. +*@li The preceding layer of operator Yolov5DetectionOutput must be three Yolo operators. +*@see Yolo() +*@par Third-party framework compatibility +* It is a custom operator. +*/ +REG_OP(YoloV5DetectionOutputD) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(biases, ListFloat) + .ATTR(boxes, Int, 3) + .ATTR(coords, Int, 4) + .ATTR(classes, Int, 80) + .ATTR(relative, Bool, true) + .ATTR(obj_threshold, Float, 0.5) + .ATTR(post_nms_topn, Int, 512) + .ATTR(score_threshold, Float, 0.5) + .ATTR(iou_threshold, Float, 0.45) + .ATTR(pre_nms_topn, Int, 512) + .ATTR(N, Int, 10) + .ATTR(resize_origin_img_to_net, Bool, false) + .ATTR(out_box_dim, Int, 3) + .ATTR(alpha, Float, 2.0) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(box_out_num, TensorType({DT_INT32})) + .OP_END_FACTORY_REG(YoloV5DetectionOutputD) /** *@brief Performs YOLO V2 detection . \n diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index 9dd502cd..bc75cfb7 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -2645,6 +2645,19 @@ REG_OP(SparseApplyAdadeltaD) REG_OP(AtomicAddrClean) .ATTR(automic_add_mem_size, ListInt, {}) .OP_END_FACTORY_REG(AtomicAddrClean) + +/** +*@brief Clean memory of workspace list . \n + +*@par Attributes: +* @li workspace_size: sizes of workspaces . \n + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DynamicAtomicAddrClean) + .ATTR(automic_add_mem_size, ListInt, {}) + .OP_END_FACTORY_REG(DynamicAtomicAddrClean) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_TRAINING_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 08fb25a3..580df4d8 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -1381,7 +1381,7 @@ REG_OP(InplaceUpdate) .INPUT(x, TensorType::BasicType()) .INPUT(indices, TensorType({DT_INT32})) .INPUT(v, TensorType::BasicType()) - .OUTPUT(x, TensorType::BasicType()) + .OUTPUT(y, TensorType::BasicType()) .OP_END_FACTORY_REG(InplaceUpdate) /** diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 525f60e9..2bbab7a2 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -847,7 +847,11 @@ with the same setting for this option. Default: False \n selected indices from the boxes tensor, where M <= max_output_size. \n *@attention Constraints: -*Input theta must be float16 or float, output_size must be int32 type . \n +*Input theta must be float16 or float, output_size must be int32 type . +The current implementation of AffineGrid operator AiCore adopts +BatchMatMul's FP16 fusion operator scheme, and the accuracy will +decrease when the theta range exceeds [-10,10].If the model requires +high accuracy of AffineGrid, it is recommended to use AICPU. \n *@par Third-party framework compatibility *Compatible with Pytorch affine_grid operator. diff --git a/third_party/fwkacllib/inc/toolchain/plog.h b/third_party/fwkacllib/inc/toolchain/plog.h index 6134c3e6..6fdbbb43 100644 --- a/third_party/fwkacllib/inc/toolchain/plog.h +++ b/third_party/fwkacllib/inc/toolchain/plog.h @@ -1,59 +1,59 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _PLOG_H_ -#define _PLOG_H_ - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -#ifndef LINUX -#define LINUX 0 -#endif // LINUX - -#ifndef WIN -#define WIN 1 -#endif - -#ifndef OS_TYPE -#define OS_TYPE 0 -#endif // OS_TYPE - -#if (OS_TYPE == LINUX) -#define DLL_EXPORT __attribute__((visibility("default"))) -#else -#define DLL_EXPORT _declspec(dllexport) -#endif - -/** - * @ingroup plog - * @brief DlogReportInitialize: init log in service process before all device setting. - * @return: 0: SUCCEED, others: FAILED - */ -DLL_EXPORT int DlogReportInitialize(); - -/** - * @ingroup plog - * @brief DlogReportFinalize: release log resource in service process after all device reset. - * @return: 0: SUCCEED, others: FAILED - */ -DLL_EXPORT int DlogReportFinalize(); - -#ifdef __cplusplus -} -#endif // __cplusplus -#endif // D_PLOG_H_ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _PLOG_H_ +#define _PLOG_H_ + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#ifndef LINUX +#define LINUX 0 +#endif // LINUX + +#ifndef WIN +#define WIN 1 +#endif + +#ifndef OS_TYPE +#define OS_TYPE 0 +#endif // OS_TYPE + +#if (OS_TYPE == LINUX) +#define DLL_EXPORT __attribute__((visibility("default"))) +#else +#define DLL_EXPORT _declspec(dllexport) +#endif + +/** + * @ingroup plog + * @brief DlogReportInitialize: init log in service process before all device setting. + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogReportInitialize(void); + +/** + * @ingroup plog + * @brief DlogReportFinalize: release log resource in service process after all device reset. + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogReportFinalize(void); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // D_PLOG_H_ diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h index 09a35c5d..10be2b93 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h +++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h @@ -77,18 +77,14 @@ #define PROF_MODEL_LOAD_MASK 0x8000000000000000 -#ifndef OS_TYPE -#define OS_TYPE 0 -#endif // OS_TYPE - -#if (OS_TYPE != LINUX) +#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) #define MSVP_PROF_API __declspec(dllexport) #else #define MSVP_PROF_API __attribute__((visibility("default"))) #endif #include -#include +#include namespace Msprofiler { namespace Api { diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h index e6ac64bf..53a5f64b 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_callback.h +++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h @@ -24,7 +24,7 @@ extern "C" { #endif // __cplusplus -#if (OS_TYPE != LINUX) +#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) #define MSVP_PROF_API __declspec(dllexport) #else #define MSVP_PROF_API __attribute__((visibility("default"))) diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h index d5ed7569..f0747833 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h +++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h @@ -16,11 +16,8 @@ #ifndef MSPROF_ENGINE_PROF_REPORTER_H_ #define MSPROF_ENGINE_PROF_REPORTER_H_ -#ifndef OS_TYPE -#define OS_TYPE 0 -#endif // OS_TYPE -#if (OS_TYPE != LINUX) +#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) #define MSVP_PROF_API __declspec(dllexport) #else #define MSVP_PROF_API __attribute__((visibility("default")))